In [2]:
csv_path = "/content/drive/MyDrive/Unique_Building_Identifier.csv"

import pandas as pd

# Load the CSV
df = pd.read_csv(csv_path)

# Clean coordinates (only valid DC range)
df = df.dropna(subset=["CENTROIDX", "CENTROIDY"])
df = df[(df["CENTROIDY"].between(38.8, 39.1)) &
        (df["CENTROIDX"].between(-77.2, -76.9))]

df.shape


  df = pd.read_csv(csv_path)


(185695, 28)

In [3]:
!pip install h3 folium

Collecting h3
  Downloading h3-4.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Downloading h3-4.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m1.0/1.0 MB[0m [31m27.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: h3
Successfully installed h3-4.2.2


In [4]:
import h3

# Set resolution (9 is good for neighborhood-level)
res = 9

# Apply H3 indexing
df["h3_index"] = df.apply(lambda row: h3.latlng_to_cell(row["CENTROIDY"], row["CENTROIDX"], res), axis=1)

# Group by H3 cell
hex_grouped = df.groupby("h3_index").agg(
    building_count=("UBID", "count"),
    total_area=("AREA_NEW", "sum"),
    avg_area=("AREA_NEW", "mean")
).reset_index()

hex_grouped.head()


Unnamed: 0,h3_index,building_count,total_area,avg_area
0,892aa840823ffff,1,1888.017175,1888.017175
1,892aa840827ffff,64,133912.05656,2092.375884
2,892aa84082fffff,9,22329.885263,2481.098363
3,892aa840903ffff,15,42404.984202,2826.998947
4,892aa840907ffff,40,141756.324182,3543.908105


In [5]:
import h3
import folium
import matplotlib.pyplot as plt

# Convert H3 cell to GeoJSON-style polygon
def h3_to_geojson(h):
    boundary = h3.cell_to_boundary(h)  # ✅ No keyword needed
    return {
        "type": "Polygon",
        "coordinates": [[list(reversed(coord)) for coord in boundary]]  # lat/lng → lng/lat for GeoJSON
    }

# Create folium map centered on Washington, D.C.
m = folium.Map(location=[38.89511, -77.03637], zoom_start=12, tiles="cartodbpositron")

# Normalize for color scale
max_count = hex_grouped["building_count"].max()

# Plot each hexagon
for _, row in hex_grouped.iterrows():
    geojson = h3_to_geojson(row["h3_index"])
    color = plt.cm.Oranges(row["building_count"] / max_count)
    color_hex = f'#{int(color[0]*255):02x}{int(color[1]*255):02x}{int(color[2]*255):02x}'

    folium.GeoJson(
        geojson,
        style_function=lambda feature, col=color_hex: {
            "fillColor": col,
            "color": col,
            "weight": 0.5,
            "fillOpacity": 0.6,
        }
    ).add_to(m)

# Show map
m


Output hidden; open in https://colab.research.google.com to view.

In [6]:
import pandas as pd

# Load the CSV
fire_path = "/content/drive/MyDrive/Fire_Stations.csv"
fire_df = pd.read_csv(fire_path)

# Preview
fire_df.head()


Unnamed: 0,X,Y,NAME,ADDRESS,ZIP,PHONE,TYPE,BATTALION,WARD,HISTORY_DESCRIPTION,...,LONGITUDE,XCOORD,YCOORD,MAR_ID,GIS_ID,GLOBALID,OBJECTID,SE_ANNO_CAD_DATA,CREATED_DATE,LAST_EDITED_DATE
0,-8573583.0,4704899.0,Engine Company 13,501 4TH STREET SW,20024,(202) 673-3213,Engine House 13,6,6,"Organized: November 26, 1904.\n\nOriginal appa...",...,-77.017572,398441.4,135024.88,311526,Fire_023,{94E9C818-92E2-4A28-87C5-E5731A298179},1,,,2024/12/04 22:10:00+00
1,-8572524.0,4714351.0,Engine Company 14,4801 NORTH CAPITOL STREET NE,20011,(202) 673-3214,Engine House 14,1,5,"Organized: June 8, 1898.\n\nOriginal apparatus...",...,-77.008294,399281.07,142329.91,288276,Fire_002,{81BA98DD-8623-4CCC-8368-D6376A02D9A7},2,,,
2,-8570051.0,4702334.0,Engine Company 15,2101 14TH STREET SE,20020,(202) 673-3215,Engine House 15,3,8,"Organized: April 15, 1898.\n\nOriginal apparat...",...,-76.986021,401208.69,133005.19,156246,Fire_031,{50E73988-93A6-4937-BF1F-335210C8D57E},3,,,
3,-8574942.0,4707832.0,Engine Company 16,1018 13TH STREET NW,20005,(202) 673-3216,Engine House 16,6,2,"Organized: October 24, 1904. Replaced 1st Chem...",...,-77.030016,397396.45,137273.24,240645,Fire_024,{0AA98376-D7BF-4603-A082-98FE2859510D},4,,,
4,-8570480.0,4711984.0,Engine Company 17,1227 MONROE STREET NE,20017,(202) 673-3217,Engine House 17,1,5,"Organized: April 22, 1905 Replaced Chemical Co...",...,-76.989906,400873.27,140493.83,294510,Fire_008,{FB9D10BE-A87C-4DC2-B34C-E352C8C58C3B},5,,,


In [7]:
import folium

# Add fire stations as red markers to the existing map
for _, row in fire_df.iterrows():
    folium.Marker(
        location=[row["LATITUDE"], row["LONGITUDE"]],
        popup=row["NAME"],
        icon=folium.Icon(color="red", icon="fire", prefix="fa")
    ).add_to(m)

# Show updated map
m


Output hidden; open in https://colab.research.google.com to view.

In [8]:
import numpy as np
from geopy.distance import geodesic

# Create a dict to store underserved hexes
underserved_hexes = []

# Build a list of fire station coordinates
fire_coords = list(zip(fire_df["LATITUDE"], fire_df["LONGITUDE"]))

# For each hex, get the centroid
for _, row in hex_grouped.iterrows():
    hex_index = row["h3_index"]
    hex_center = h3.cell_to_latlng(hex_index)  # (lat, lng)

    # Find minimum distance to any fire station
    min_dist_km = min(geodesic(hex_center, fs).km for fs in fire_coords)

    # Threshold: underserved if >1 km from all stations
    if min_dist_km > 1.0:
        underserved_hexes.append({
            "h3_index": hex_index,
            "building_count": row["building_count"],
            "total_area": row["total_area"],
            "distance_km": min_dist_km
        })

# Convert to DataFrame
underserved_df = pd.DataFrame(underserved_hexes)
underserved_df.sort_values("distance_km", ascending=False).head()


Unnamed: 0,h3_index,building_count,total_area,distance_km
22,892aa840b0fffff,9,22184.283696,3.000718
31,892aa840b77ffff,50,64348.668388,2.990658
0,892aa840823ffff,1,1888.017175,2.914372
319,892aa84c593ffff,28,28820.775147,2.892242
26,892aa840b2bffff,77,112104.071139,2.790206


In [10]:
# Install if needed
!pip install h3 scikit-learn

import pandas as pd
import numpy as np
import h3
from sklearn.cluster import KMeans

# Use your actual DataFrame
# underserved_df = pd.read_pickle("/content/drive/MyDrive/underserved_df.pkl")
underserved_coords = [h3.cell_to_latlng(h) for h in underserved_df["h3_index"]]
X = np.array(underserved_coords)

# Choose number of suggested stations
n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
kmeans.fit(X)

# Final coordinates of proposed new stations
suggested_locations = kmeans.cluster_centers_
suggested_locations




array([[ 38.96755691, -77.0458029 ],
       [ 38.88550539, -76.94573844],
       [ 38.94039712, -76.9933371 ],
       [ 38.93240232, -77.08538967],
       [ 38.85346743, -77.00327788]])

In [11]:
for lat, lon in suggested_locations:
    folium.Marker(
        location=[lat, lon],
        popup="Suggested Station",
        icon=folium.Icon(color="blue", icon="plus", prefix="fa")
    ).add_to(m)

# Display map
m


Output hidden; open in https://colab.research.google.com to view.

In [12]:
m.save("/content/drive/MyDrive/DC_Emergency_Optimization_Map.html")


In [13]:
import joblib

# Save the trained KMeans model
joblib.dump(kmeans, "/content/drive/MyDrive/kmeans_firestation_model.pkl")


['/content/drive/MyDrive/kmeans_firestation_model.pkl']