In [6]:
import json
import pandas as pd
import folium
from sklearn.cluster import DBSCAN
from geopy.distance import geodesic
from math import radians

# Gym Location Data Processing & Cleaning
# 
# This script extracts, cleans, and saves gym location data from a JSON file.
# Key steps include:
# 1. **JSON Data Loading**: Reads raw gym location data from `gym.json`.
# 2. **Data Parsing**: Extracts relevant attributes (ID, Name, Latitude, Longitude) from the JSON structure.
# 3. **Data Cleaning**: Removes entries missing essential information (name, latitude, longitude).
# 4. **Data Formatting**: Converts the cleaned data into a Pandas DataFrame for structured manipulation.
# 5. **CSV Export**: Saves the final cleaned dataset as `clean_gym_locations.csv` for further analysis and visualization.
# 
# This preprocessing ensures gym location data is structured, complete, and ready for clustering or mapping.

In [7]:
gym_file_path = "gym.json"


with open(gym_file_path, "r", encoding="utf-8") as file:
    gym_data = json.load(file)

gym_list = []
for element in gym_data["elements"]:
    tags = element.get("tags", {})
    gym_list.append({
        "ID": element.get("id"),
        "Name": tags.get("name", "Unknown"),
        "Latitude": element.get("lat"),
        "Longitude": element.get("lon")
    })

gym_df = pd.DataFrame(gym_list)

clean_gym_df = gym_df.replace("Unknown", pd.NA).dropna(subset=["Name", "Latitude", "Longitude"])

clean_gym_csv_path = "clean_gym_locations.csv"
clean_gym_df.to_csv(clean_gym_csv_path, index=False)

# Supplement Store Location Data Processing & Cleaning
# 
# This script extracts, cleans, and saves supplement store location data from a JSON file.
# Key steps include:
# 1. **JSON Data Loading**: Reads raw supplement store data from `supplement store.json`.
# 2. **Data Parsing**: Extracts relevant attributes (ID, Name, Latitude, Longitude) from the JSON structure.
# 3. **Handling Missing Coordinates**: Checks for missing latitude or longitude values and retrieves them from the "center" field if available.
# 4. **Data Cleaning**: Removes entries missing essential information (name, latitude, longitude).
# 5. **Data Formatting**: Converts the cleaned data into a Pandas DataFrame for structured manipulation.
# 6. **CSV Export**: Saves the final cleaned dataset as `clean_supplement_store_locations.csv` for further analysis and visualization.
# 
# This preprocessing ensures supplement store data is structured, complete, and ready for clustering or mapping.

In [8]:
supplement_file_path = "supplement store.json"

with open(supplement_file_path, "r", encoding="utf-8") as file:
    supplement_data = json.load(file)

supplement_list = []
for element in supplement_data["elements"]:
    tags = element.get("tags", {})
    supplement_list.append({
        "ID": element.get("id"),
        "Name": tags.get("name", "Unknown"),
        "Latitude": element.get("lat") if "lat" in element else element.get("center", {}).get("lat"),
        "Longitude": element.get("lon") if "lon" in element else element.get("center", {}).get("lon")
    })

supplement_df = pd.DataFrame(supplement_list)

clean_supplement_df = supplement_df.replace("Unknown", pd.NA).dropna(subset=["Name", "Latitude", "Longitude"])

clean_supplement_csv_path = "clean_supplement_store_locations.csv"
clean_supplement_df.to_csv(clean_supplement_csv_path, index=False)

# Gym and Supplement Store Location Mapping
# 
# This script generates an interactive map to visualize gym and supplement store locations using Folium.
# Key steps include:
# 1. **Data Loading**: Reads cleaned gym and supplement store datasets.
# 2. **Map Center Calculation**: Computes the average latitude and longitude of all locations to center the map.
# 3. **Map Creation**: Initializes a Folium map centered at the computed location with an appropriate zoom level.
# 4. **Gym Marker Plotting**: Adds gym locations as blue markers, each displaying its name when clicked.
# 5. **Store Marker Plotting**: Adds supplement store locations as green markers, also with pop-up details.
# 6. **Map Saving**: Exports the final interactive map as an HTML file (`gym_store_map.html`) for easy visualization.
# 
# This map helps analyze spatial distribution, aiding decisions on fitness-related business expansion.

In [9]:
gyms_df = pd.read_csv('clean_gym_locations.csv') 
stores_df = pd.read_csv('clean_supplement_store_locations.csv') 

avg_lat = pd.concat([gyms_df['Latitude'], stores_df['Latitude']]).mean()
avg_lon = pd.concat([gyms_df['Longitude'], stores_df['Longitude']]).mean()
map_center = [avg_lat, avg_lon]

m = folium.Map(location=map_center, zoom_start=12)

for _, row in gyms_df.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"Gym: {row['Name']}",
        icon=folium.Icon(color='blue', icon='dumbbell', prefix='fa')
    ).add_to(m)

for _, row in stores_df.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"Store: {row['Name']}",
        icon=folium.Icon(color='green', icon='shopping-cart', prefix='fa')
    ).add_to(m)

m.save("gym_store_map.html")


# Gym Location Clustering & Smart Recommendations
# 
# This script analyzes gym and supplement store locations to identify optimal regions for new gyms. 
# Key steps include:
# 1. **Data Loading**: Reads cleaned gym and supplement store location datasets.
# 2. **DBSCAN Clustering**: Groups gyms into clusters based on geographic proximity using the Haversine distance metric.
# 3. **Recommendation Scoring**: Assigns scores to clusters based on gym density, proximity to supplement stores, and geographic spread.
# 4. **Filtering & Selection**: Refines clusters based on density thresholds and geographic constraints to ensure viable recommendations.
# 5. **Map Visualization**: Uses Folium to generate an interactive map displaying gyms, stores, and the best cluster recommendations.
# 
# The final output is an HTML map (`smart_recommendation_map.html`) that highlights recommended locations for gym expansion.

In [10]:
gyms = pd.read_csv('clean_gym_locations.csv')
stores = pd.read_csv('clean_supplement_store_locations.csv')

coords = gyms[['Latitude', 'Longitude']].applymap(radians).values
epsilon = 4.5 / 6371
db = DBSCAN(eps=epsilon, min_samples=3, algorithm='ball_tree', metric='haversine').fit(coords)
gyms['Cluster'] = db.labels_
gyms['IsClustered'] = gyms['Cluster'] != -1

recommendations = []

for cluster_id in gyms['Cluster'].unique():
    if cluster_id == -1:
        continue

    gyms_in_cluster = gyms[gyms['Cluster'] == cluster_id]
    center_lat = gyms_in_cluster['Latitude'].mean()
    center_lon = gyms_in_cluster['Longitude'].mean()
    center = (center_lat, center_lon)

    gym_count = len(gyms_in_cluster)
    nearby_store_count = sum(
        geodesic(center, (store['Latitude'], store['Longitude'])).km <= 3
        for _, store in stores.iterrows()
    )
    spread_km = max(
        geodesic(center, (row['Latitude'], row['Longitude'])).km
        for _, row in gyms_in_cluster.iterrows()
    )
    
    dense_gym_count = sum(
        geodesic(center, (row['Latitude'], row['Longitude'])).km <= 3
        for _, row in gyms_in_cluster.iterrows()
    )

    score = (gym_count * 3) - (nearby_store_count * 2) - spread_km

    recommendations.append({
        'Cluster': cluster_id,
        'Latitude': center_lat,
        'Longitude': center_lon,
        'Gym_Count': gym_count,
        'Nearby_Store_Count': nearby_store_count,
        'Dense_Gym_Count': dense_gym_count,
        'Spread_km': spread_km,
        'Score': score
    })

recommend_df = pd.DataFrame(recommendations)

recommend_df = recommend_df[recommend_df['Latitude'] < 30.6]

final_recommend = recommend_df[
    (recommend_df['Gym_Count'] >= 6) &
    (recommend_df['Dense_Gym_Count'] >= 5) &
    (recommend_df['Spread_km'] <= 4.5)
]

if len(final_recommend) < 5:
    final_recommend = recommend_df[
        (recommend_df['Gym_Count'] >= 5) &
        (recommend_df['Dense_Gym_Count'] >= 4) &
        (recommend_df['Spread_km'] <= 6)
    ]

if len(final_recommend) < 5:
    final_recommend = recommend_df[
        (recommend_df['Gym_Count'] >= 4) &
        (recommend_df['Dense_Gym_Count'] >= 3) &
        (recommend_df['Spread_km'] <= 7)
    ]

if len(final_recommend) < 5:
    final_recommend = recommend_df.sort_values(by='Score', ascending=False).head(5)

final_recommend = final_recommend.sort_values(by='Score', ascending=False).head(10)

map_center = [gyms['Latitude'].mean(), gyms['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=9)

for _, row in gyms.iterrows():
    color = 'blue' if row['IsClustered'] else 'lightgray'
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"Gym: {row['Name']}",
        icon=folium.Icon(color=color, icon='dumbbell', prefix='fa')
    ).add_to(m)

for _, row in stores.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"Store: {row['Name']}",
        icon=folium.Icon(color='green', icon='shopping-cart', prefix='fa')
    ).add_to(m)

for i, (_, row) in enumerate(final_recommend.iterrows(), start=1):
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=(f"📍 Recommendation #{i}\n"
               f"Gyms: {row['Gym_Count']}\n"
               f"Stores Nearby: {row['Nearby_Store_Count']}\n"
               f"Spread: {row['Spread_km']:.2f} km"),
        icon=folium.Icon(color='red', icon='plus-sign')
    ).add_to(m)

m.save("smart_recommendation_map.html")

  coords = gyms[['Latitude', 'Longitude']].applymap(radians).values
