In [1]:
# ===============================
# STEP 1: Upload the CSV File
# ===============================
from google.colab import files
uploaded = files.upload()

# Replace with your exact filename if needed
import pandas as pd
file_path = "Shelter_utilization_2017_2024_Final.csv"
df = pd.read_csv(file_path)

# ===============================
# STEP 2: Clean and Preprocess
# ===============================
df_unique = df[['SHELTER_NAME', 'Latitude', 'Longitude']].drop_duplicates()
df_unique = df_unique.dropna(subset=['Latitude', 'Longitude'])

# ===============================
# STEP 3: K-Means Clustering
# ===============================
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import numpy as np

scaler = StandardScaler()
coords_scaled = scaler.fit_transform(df_unique[['Latitude', 'Longitude']])

# Adjust number of clusters as needed
k = 5
kmeans = KMeans(n_clusters=k, random_state=42)
df_unique['Cluster'] = kmeans.fit_predict(coords_scaled)

# ===============================
# STEP 4: Assign Zone Names Based on Centroids
# ===============================
# Compute centroid of each cluster in original (unscaled) space
df_unique['Lat_orig'] = df_unique['Latitude']
df_unique['Lon_orig'] = df_unique['Longitude']

centroids_scaled = kmeans.cluster_centers_
centroids_unscaled = scaler.inverse_transform(centroids_scaled)

# Create a lookup for centroid location
centroid_lookup = {
    i: {'Latitude': lat, 'Longitude': lon}
    for i, (lat, lon) in enumerate(centroids_unscaled)
}

# Define your own logic for labeling cluster zones
# Manually adjust based on Toronto zones
def assign_zone(lat, lon):
    if lat > 43.75 and lon < -79.42:
        return "North York"
    elif lat < 43.68 and lon < -79.4:
        return "Downtown"
    elif lat < 43.7 and lon > -79.3:
        return "Scarborough"
    elif lat > 43.7 and lon < -79.5:
        return "Etobicoke"
    else:
        return "East York"

# Assign zone name to each cluster based on centroid location
cluster_zone_names = {
    i: assign_zone(centroid['Latitude'], centroid['Longitude'])
    for i, centroid in centroid_lookup.items()
}
df_unique['Zone'] = df_unique['Cluster'].map(cluster_zone_names)

# ===============================
# STEP 5: Plot on Folium Map
# ===============================
import folium

toronto_map = folium.Map(location=[43.7, -79.4], zoom_start=11)
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'cadetblue', 'darkgreen']

for _, row in df_unique.iterrows():
    popup_text = f"""
    <b>Shelter:</b> {row['SHELTER_NAME']}<br>
    <b>Cluster:</b> {row['Cluster']}<br>
    <b>Zone:</b> {row['Zone']}
    """
    folium.CircleMarker(
        location=(row['Latitude'], row['Longitude']),
        radius=6,
        popup=folium.Popup(popup_text, max_width=250),
        color=colors[row['Cluster'] % len(colors)],
        fill=True,
        fill_opacity=0.8
    ).add_to(toronto_map)

# ===============================
# STEP 6: Save and Download Map
# ===============================
output_path = "toronto_shelter_clusters_zones.html"
toronto_map.save(output_path)
print(f"✅ Map saved as {output_path}")

# Optional: Download
from google.colab import files
files.download(output_path)


Saving Shelter_utilization_2017_2024_Final.csv to Shelter_utilization_2017_2024_Final.csv
✅ Map saved as toronto_shelter_clusters_zones.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>