# Calculating the density of POIs and categorizing each POI 

In [25]:
import pandas as pd
from collections import defaultdict

def categorize_poi(types):
    """Categorizes a POI based on its types and ensures unique POI counting."""
    categories = {
        "restaurant": ["Restaurants", "Food & Dining"],
        "cafe": ["Coffee Shops", "Food & Dining"],
        "hospital": ["Health"], "clinic": ["Health"], "pharmacy": ["Health"], "physiotherapist": ["Health"],
        "doctor": ["Health"], "dentist": ["Health"], "veterinary_care": ["Health"],
        "school": ["Education"], "university": ["Education"], "college": ["Education"],
        "library": ["Education"], "primary_school": ["Education"], "secondary_school": ["Education"],
        "shopping_mall": ["Retail & Shopping"], "department_store": ["Retail & Shopping"],
        "clothing_store": ["Retail & Shopping"], "shoe_store": ["Retail & Shopping"],
        "electronics_store": ["Retail & Shopping"], "home_goods_store": ["Retail & Shopping"],
        "furniture_store": ["Retail & Shopping"], "jewelry_store": ["Retail & Shopping"],
        "book_store": ["Retail & Shopping"], "convenience_store": ["Retail & Shopping"],
        "supermarket": ["Retail & Shopping"], "hardware_store": ["Retail & Shopping"],
        "florist": ["Retail & Shopping"], "pet_store": ["Retail & Shopping"], "store": ["Retail & Shopping"],
        "amusement_park": ["Entertainment & Recreation"], "aquarium": ["Entertainment & Recreation"],
        "art_gallery": ["Entertainment & Recreation"], "bowling_alley": ["Entertainment & Recreation"],
        "casino": ["Entertainment & Recreation"], "movie_theater": ["Entertainment & Recreation"],
        "museum": ["Entertainment & Recreation"], "night_club": ["Entertainment & Recreation"],
        "stadium": ["Entertainment & Recreation"], "tourist_attraction": ["Entertainment & Recreation"],
        "zoo": ["Entertainment & Recreation"], "park": ["Entertainment & Recreation"],
        "bar": ["Food & Dining"], "bakery": ["Food & Dining"], "meal_delivery": ["Food & Dining"], "meal_takeaway": ["Food & Dining"],
        "hotel": ["Hotels & Hospitality"], "motel": ["Hotels & Hospitality"],
        "guest_house": ["Hotels & Hospitality"], "lodging": ["Hotels & Hospitality"],
        "bank": ["Finance & Services"], "atm": ["Finance & Services"],
        "insurance_agency": ["Finance & Services"], "accounting": ["Finance & Services"],
        "real_estate_agency": ["Finance & Services"], "lawyer": ["Finance & Services"],
        "airport": ["Transportation & Travel"], "bus_station": ["Transportation & Travel"],
        "train_station": ["Transportation & Travel"], "subway_station": ["Transportation & Travel"],
        "transit_station": ["Transportation & Travel"], "taxi_stand": ["Transportation & Travel"],
        "parking": ["Transportation & Travel"], "car_rental": ["Transportation & Travel"],
        "car_dealer": ["Transportation & Travel"], "car_repair": ["Transportation & Travel"],
        "car_wash": ["Transportation & Travel"], "moving_company": ["Transportation & Travel"],
        "rv_park": ["Transportation & Travel"], "travel_agency": ["Transportation & Travel"],
        "police": ["Public & Government Services"], "fire_station": ["Public & Government Services"],
        "sublocality": ["Public & Government Services"], "locality": ["Public & Government Services"], 
        "post_office": ["Public & Government Services"],
        "courthouse": ["Public & Government Services"], "city_hall": ["Public & Government Services"],
        "embassy": ["Public & Government Services"], "local_government_office": ["Public & Government Services"],
        "church": ["Religious Institutions"], "mosque": ["Religious Institutions"], "cemetery": ["Religious Institutions"],
        "beauty_salon": ["Beauty & Wellness"], "spa": ["Beauty & Wellness"], "hair_care": ["Beauty & Wellness"],
        "gym": ["Beauty & Wellness"], "laundry": ["Beauty & Wellness"],
        "electrician": ["Home & Construction Services"], "plumber": ["Home & Construction Services"],
        "painter": ["Home & Construction Services"], "roofing_contractor": ["Home & Construction Services"],
        "locksmith": ["Home & Construction Services"]
    }
    
    unique_categories = set()
    for t in types.split(","):
        t = t.strip()
        if t in categories:
            unique_categories.update(categories[t])  # Add multiple categories if applicable
    
    return unique_categories

def main():
    restaurants_df = pd.read_csv("../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv")
    poi_df = pd.read_csv("poi_per_restaurant.csv")
    
    poi_df = poi_df.drop_duplicates(subset=["Business ID", "Name"])
    
    category_set = {
        "Restaurants", "Coffee Shops", "Health", "Education", "Retail & Shopping", "Entertainment & Recreation", 
        "Food & Dining", "Hotels & Hospitality", "Finance & Services", 
        "Transportation & Travel", "Public & Government Services", "Religious Institutions",
        "Beauty & Wellness", "Home & Construction Services"
    }
    
    for category in category_set:
        restaurants_df[category] = 0
    restaurants_df["POI Density"] = 0  
    
    for index, row in restaurants_df.iterrows():
        restaurant_id = row["Business ID"]
        associated_pois = poi_df[poi_df["Business ID"] == restaurant_id]
        
        category_counts = defaultdict(int)
        total_poi_count = 0
        
        for _, poi in associated_pois.iterrows():
            if pd.notna(poi["Types"]):
                unique_categories = categorize_poi(poi["Types"])
                total_poi_count += 1
                for category in unique_categories:
                    category_counts[category] += 1
        
        for category in category_set:
            restaurants_df.at[index, category] = category_counts[category]
        restaurants_df.at[index, "POI Density"] = total_poi_count
    
    restaurants_df.to_csv("../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv", index=False)
    print("Updated dataset saved successfully.")

if __name__ == "__main__":
    main()


Updated dataset saved successfully.


In [43]:
import pandas as pd
from collections import defaultdict

def categorize_poi(types):
    """Categorizes a POI based on its types and ensures unique POI counting."""
    categories = {
        "restaurant": ["Restaurants", "Food & Dining"],
        "cafe": ["Coffee Shops", "Food & Dining"],
        "hospital": ["Health"], "clinic": ["Health"], "pharmacy": ["Health"], "physiotherapist": ["Health"],
        "doctor": ["Health"], "dentist": ["Health"], "veterinary_care": ["Health"],
        "school": ["Education"], "university": ["Education"], "college": ["Education"],
        "library": ["Education"], "primary_school": ["Education"], "secondary_school": ["Education"],
        "shopping_mall": ["Retail & Shopping"], "department_store": ["Retail & Shopping"],
        "clothing_store": ["Retail & Shopping"], "shoe_store": ["Retail & Shopping"],
        "electronics_store": ["Retail & Shopping"], "home_goods_store": ["Retail & Shopping"],
        "furniture_store": ["Retail & Shopping"], "jewelry_store": ["Retail & Shopping"],
        "book_store": ["Retail & Shopping"], "convenience_store": ["Retail & Shopping"],
        "supermarket": ["Retail & Shopping"], "hardware_store": ["Retail & Shopping"],
        "florist": ["Retail & Shopping"], "pet_store": ["Retail & Shopping"], "store": ["Retail & Shopping"],
        "amusement_park": ["Entertainment & Recreation"], "aquarium": ["Entertainment & Recreation"],
        "art_gallery": ["Entertainment & Recreation"], "bowling_alley": ["Entertainment & Recreation"],
        "casino": ["Entertainment & Recreation"], "movie_theater": ["Entertainment & Recreation"],
        "museum": ["Entertainment & Recreation"], "night_club": ["Entertainment & Recreation"],
        "stadium": ["Entertainment & Recreation"], "tourist_attraction": ["Entertainment & Recreation"],
        "zoo": ["Entertainment & Recreation"], "park": ["Entertainment & Recreation"],
        "bar": ["Food & Dining"], "bakery": ["Food & Dining"], "meal_delivery": ["Food & Dining"], "meal_takeaway": ["Food & Dining"],
        "hotel": ["Hotels & Hospitality"], "motel": ["Hotels & Hospitality"],
        "guest_house": ["Hotels & Hospitality"], "lodging": ["Hotels & Hospitality"],
        "bank": ["Finance & Services"], "atm": ["Finance & Services"],
        "insurance_agency": ["Finance & Services"], "accounting": ["Finance & Services"],
        "real_estate_agency": ["Finance & Services"], "lawyer": ["Finance & Services"],
        "airport": ["Transportation & Travel"], "bus_station": ["Transportation & Travel"],
        "train_station": ["Transportation & Travel"], "subway_station": ["Transportation & Travel"],
        "transit_station": ["Transportation & Travel"], "taxi_stand": ["Transportation & Travel"],
        "parking": ["Transportation & Travel"], "car_rental": ["Transportation & Travel"],
        "car_dealer": ["Transportation & Travel"], "car_repair": ["Transportation & Travel"],
        "car_wash": ["Transportation & Travel"], "moving_company": ["Transportation & Travel"],
        "rv_park": ["Transportation & Travel"], "travel_agency": ["Transportation & Travel"],
        "police": ["Public & Government Services"], "fire_station": ["Public & Government Services"],
        "sublocality": ["Public & Government Services"], "locality": ["Public & Government Services"], 
        "post_office": ["Public & Government Services"],
        "courthouse": ["Public & Government Services"], "city_hall": ["Public & Government Services"],
        "embassy": ["Public & Government Services"], "local_government_office": ["Public & Government Services"],
        "church": ["Religious Institutions"], "mosque": ["Religious Institutions"], "cemetery": ["Religious Institutions"],
        "beauty_salon": ["Beauty & Wellness"], "spa": ["Beauty & Wellness"], "hair_care": ["Beauty & Wellness"],
        "gym": ["Beauty & Wellness"], "laundry": ["Beauty & Wellness"],
        "electrician": ["Home & Construction Services"], "plumber": ["Home & Construction Services"],
        "painter": ["Home & Construction Services"], "roofing_contractor": ["Home & Construction Services"],
        "locksmith": ["Home & Construction Services"]
    }
    
    unique_categories = set()
    for t in types.split(","):
        t = t.strip()
        if t in categories:
            unique_categories.update(categories[t])  # Add multiple categories if applicable
    
    return unique_categories

def main():
    coffeeshops_df = pd.read_csv("../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv")
    poi_df = pd.read_csv("poi_per_coffeeshop.csv")
    
    poi_df = poi_df.drop_duplicates(subset=["Business ID", "Name"])
    
    category_set = {
        "Restaurants", "Coffee Shops", "Health", "Education", "Retail & Shopping", "Entertainment & Recreation", 
        "Food & Dining", "Hotels & Hospitality", "Finance & Services", 
        "Transportation & Travel", "Public & Government Services", "Religious Institutions",
        "Beauty & Wellness", "Home & Construction Services"
    }
    
    for category in category_set:
        coffeeshops_df[category] = 0
    coffeeshops_df["POI Density"] = 0  
    
    for index, row in coffeeshops_df.iterrows():
        coffeeshop_id = row["Business ID"]
        associated_pois = poi_df[poi_df["Business ID"] == coffeeshop_id]
        
        category_counts = defaultdict(int)
        total_poi_count = 0
        
        for _, poi in associated_pois.iterrows():
            if pd.notna(poi["Types"]):
                unique_categories = categorize_poi(poi["Types"])
                total_poi_count += 1
                for category in unique_categories:
                    category_counts[category] += 1
        
        for category in category_set:
            coffeeshops_df.at[index, category] = category_counts[category]
        coffeeshops_df.at[index, "POI Density"] = total_poi_count
    
    coffeeshops_df.to_csv("../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv", index=False)
    print("Updated dataset saved successfully.")

if __name__ == "__main__":
    main()


Updated dataset saved successfully.


# Calculating the avg rating for resturants, in addition to Food and Drinks

In [None]:
import pandas as pd
from collections import defaultdict

def categorize_poi(types):
    """Returns categories for a POI based on its types."""
    categories = {
        "restaurant": ["Restaurants", "Food & Dining"],
        "cafe": ["Coffee Shops", "Food & Dining"],
        "bar": ["Food & Dining"],
        "bakery": ["Food & Dining"],
        "meal_delivery": ["Food & Dining"],
        "meal_takeaway": ["Food & Dining"],
    }
    
    poi_categories = set()
    for t in types.split(","):
        t = t.strip()
        if t in categories:
            poi_categories.update(categories[t])
    
    return poi_categories

def main():
    # Load datasets
    restaurants_df = pd.read_csv("../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv")
    poi_df = pd.read_csv("poi_per_restaurant.csv")
    
    # Ensure unique POIs per business
    poi_df = poi_df.drop_duplicates(subset=["Business ID", "Name"])
    
    # Initialize new columns
    restaurants_df["Avg Rating - Restaurants"] = None
    restaurants_df["Avg Rating - Food & Dining"] = None
    
    for index, row in restaurants_df.iterrows():
        restaurant_id = row["Business ID"]
        associated_pois = poi_df[poi_df["Business ID"] == restaurant_id]
        
        restaurant_ratings = []
        food_dining_ratings = []
        
        for _, poi in associated_pois.iterrows():
            if pd.notna(poi["Types"]) and pd.notna(poi["Rating"]):
                poi_categories = categorize_poi(poi["Types"])
                rating = poi["Rating"]
                
                if "Restaurants" in poi_categories:
                    restaurant_ratings.append(rating)
                if "Food & Dining" in poi_categories:
                    food_dining_ratings.append(rating)
        
        # Compute average ratings
        if restaurant_ratings:
            restaurants_df.at[index, "Avg Rating - Restaurants"] = sum(restaurant_ratings) / len(restaurant_ratings)
        if food_dining_ratings:
            restaurants_df.at[index, "Avg Rating - Food & Dining"] = sum(food_dining_ratings) / len(food_dining_ratings)
    
    # Save updated dataset
    restaurants_df.to_csv("../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv", index=False)
    
    print("Updated dataset with average ratings saved successfully.")

if __name__ == "__main__":
    main()


Updated dataset with average ratings saved successfully.


In [44]:
import pandas as pd
from collections import defaultdict

def categorize_poi(types):
    """Returns categories for a POI based on its types."""
    categories = {
        "restaurant": ["Restaurants", "Food & Dining"],
        "cafe": ["Coffee Shops", "Food & Dining"],
        "bar": ["Food & Dining"],
        "bakery": ["Food & Dining"],
        "meal_delivery": ["Food & Dining"],
        "meal_takeaway": ["Food & Dining"],
    }
    
    poi_categories = set()
    for t in types.split(","):
        t = t.strip()
        if t in categories:
            poi_categories.update(categories[t])
    
    return poi_categories

def main():
    # Load datasets
    coffeeshops_df = pd.read_csv("../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv")
    poi_df = pd.read_csv("poi_per_coffeeshop.csv")
    
    # Ensure unique POIs per business
    poi_df = poi_df.drop_duplicates(subset=["Business ID", "Name"])
    
    # Initialize new columns
    coffeeshops_df["Avg Rating - Coffee Shops"] = None
    coffeeshops_df["Avg Rating - Food & Dining"] = None
    
    for index, row in coffeeshops_df.iterrows():
        coffeeshop_id = row["Business ID"]
        associated_pois = poi_df[poi_df["Business ID"] == coffeeshop_id]
        
        coffeeshop_ratings = []
        food_dining_ratings = []
        
        for _, poi in associated_pois.iterrows():
            if pd.notna(poi["Types"]) and pd.notna(poi["Rating"]):
                poi_categories = categorize_poi(poi["Types"])
                rating = poi["Rating"]
                
                if "Coffee Shops" in poi_categories:
                    coffeeshop_ratings.append(rating)
                if "Food & Dining" in poi_categories:
                    food_dining_ratings.append(rating)
        
        # Compute average ratings
        if coffeeshop_ratings:
            coffeeshops_df.at[index, "Avg Rating - Coffee Shops"] = sum(coffeeshop_ratings) / len(coffeeshop_ratings)
        if food_dining_ratings:
            coffeeshops_df.at[index, "Avg Rating - Food & Dining"] = sum(food_dining_ratings) / len(food_dining_ratings)
    
    # Save updated dataset
    coffeeshops_df.to_csv("../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv", index=False)
    
    print("Updated dataset with average ratings saved successfully.")

if __name__ == "__main__":
    main()


Updated dataset with average ratings saved successfully.


## Calculating competition three different ways for two differen columns = 6 columns in total

number of Restaurants over Area Coverage

In [30]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    area_km2 = 3.1416  
    
    business_df['Competition - Restaurants/Area'] = business_df['Restaurants'] / area_km2
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv'
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv', index=False)
    print("Competition (number of Restaurants over Area Coverage) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Restaurants over Area Coverage) values added and saved


number of Coffee Shops over Area Coverage

In [45]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    area_km2 = 3.1416  
    
    business_df['Competition - Coffee Shops/Area'] = business_df['Coffee Shops'] / area_km2
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv'
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv', index=False)
    print("Competition (number of Coffee Shops over Area Coverage) values added and saved")

if __name__ == "__main__":
    main()

Competition (number of Coffee Shops over Area Coverage) values added and saved


number of Food & Dining over Area Coverage

In [31]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    area_km2 = 3.1416  
    
    business_df['Competition - Food & Dining/Area'] = business_df['Food & Dining'] / area_km2
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv'
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv', index=False)
    print("Competition (number of Food & Dining over Area Coverage) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Food & Dining over Area Coverage) values added and saved


In [46]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    area_km2 = 3.1416  
    
    business_df['Competition - Food & Dining/Area'] = business_df['Food & Dining'] / area_km2
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv'
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv', index=False)
    print("Competition (number of Food & Dining over Area Coverage) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Food & Dining over Area Coverage) values added and saved


number of Restaurants over POI Density

In [None]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    business_df['Competition - Restaurants/POI Density'] = business_df['Restaurants'] / business_df['POI Density']
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv' 
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv', index=False)
    print("Competition (number of Restaurants over POI Density) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Restaurants over POI Density) values added and saved


number of Coffee Shops over POI Density

In [47]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    business_df['Competition - Coffee Shops/POI Density'] = business_df['Coffee Shops'] / business_df['POI Density']
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv' 
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv', index=False)
    print("Competition (number of Coffee Shops over POI Density) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Coffee Shops over POI Density) values added and saved


number of Food & Dining over POI Density

In [None]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    business_df['Competition - Food & Dining/POI Density'] = business_df['Food & Dining'] / business_df['POI Density']
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv' 
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv', index=False)
    print("Competition (number of Food & Dining over POI Density) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Food & Dining over POI Density) values added and saved


In [48]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    business_df['Competition - Food & Dining/POI Density'] = business_df['Food & Dining'] / business_df['POI Density']
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv' 
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv', index=False)
    print("Competition (number of Food & Dining over POI Density) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Food & Dining over POI Density) values added and saved


number of Restaurants over related POIs (Food, Entertainment, Hotels, Retail)

In [38]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    columns_to_sum = ['Food & Dining', 'Entertainment & Recreation', 'Retail & Shopping', 'Hotels & Hospitality'] 
    business_df['Competition - Restaurants/related POIs'] = business_df['Restaurants'] / business_df[columns_to_sum].sum(axis=1)
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv'  
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv', index=False)
    print("Competition (number of Restaurants over related POIs [4 columns]) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Restaurants over related POIs [4 columns]) values added and saved


number of Coffee Shops over related POIs (Food, Entertainment, Hotels, Retail)

In [49]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    columns_to_sum = ['Food & Dining', 'Entertainment & Recreation', 'Retail & Shopping', 'Hotels & Hospitality'] 
    business_df['Competition - Coffee Shops/related POIs'] = business_df['Coffee Shops'] / business_df[columns_to_sum].sum(axis=1)
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv'  
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv', index=False)
    print("Competition (number of Coffee Shops over related POIs [4 columns]) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Coffee Shops over related POIs [4 columns]) values added and saved


number of Food & Dining over related POIs (Food, Entertainment, Hotels, Retail)

In [39]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    columns_to_sum = ['Food & Dining', 'Entertainment & Recreation', 'Retail & Shopping', 'Hotels & Hospitality'] 
    business_df['Competition - Food & Dining/related POIs'] = business_df['Food & Dining'] / business_df[columns_to_sum].sum(axis=1)
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv'
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_restaurants_dataset_cleaned.csv', index=False)
    print("Competition (number of Food & Dining over related POIs [4 columns]) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Food & Dining over related POIs [4 columns]) values added and saved


In [50]:
import pandas as pd

def load_data(business_file):
    return pd.read_csv(business_file)

def calculate_competition(business_df):
    columns_to_sum = ['Food & Dining', 'Entertainment & Recreation', 'Retail & Shopping', 'Hotels & Hospitality'] 
    business_df['Competition - Food & Dining/related POIs'] = business_df['Food & Dining'] / business_df[columns_to_sum].sum(axis=1)
    return business_df

def main():
    business_file = '../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv'
    
    business_df = load_data(business_file)
    updated_business_df = calculate_competition(business_df)
    
    updated_business_df.to_csv('../../Data Preprocessing/Foursquare/final_coffeeshops_dataset_cleaned.csv', index=False)
    print("Competition (number of Food & Dining over related POIs [4 columns]) values added and saved")

if __name__ == "__main__":
    main()


Competition (number of Food & Dining over related POIs [4 columns]) values added and saved
