In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, LabelEncoder
import folium
from folium.plugins import HeatMap
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from sklearn.metrics import davies_bouldin_score


In [None]:
df = pd.read_csv('dataset/cleaned_us_accident_data.csv')

In [None]:
X_cluster = df[['start_lat', 'start_lng', 'severity']].dropna()

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_cluster)

In [None]:
inertia = []
for k in range(2, 10):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

In [None]:
plt.figure(figsize=(8,5))
plt.plot(range(2, 10), inertia, marker='o', linestyle='--')
plt.xlabel("Number of Clusters")
plt.ylabel("Inertia")
plt.title("Elbow Method for Optimal Clusters")
plt.show()

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)
df['cluster'] = kmeans.fit_predict(X_scaled)
print(df['cluster'].value_counts())

In [None]:
db_score = davies_bouldin_score(X_scaled, df['cluster'])
print(f"Davies-Bouldin Score: {db_score:.4f}")

In [None]:
map_center = [df["start_lat"].mean(), df["start_lng"].mean()]
m = folium.Map(location=map_center, zoom_start=5)
heat_data = [[row["start_lat"], row["start_lng"]] for _, row in df.iterrows()]
HeatMap(heat_data, radius=8, blur=6, min_opacity=0.3).add_to(m)
m