In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN

import folium
from folium.plugins import MarkerCluster
from IPython.display import IFrame
import os


In [2]:
# Load cleaned data
df = pd.read_csv("../datasets/cleaned_crime_data2.csv", parse_dates=['datetime'])
df.dropna(subset=['Latitude', 'Longitude'], inplace=True)

# Prepare coords for DBSCAN (in radians for haversine)
coords = df[['Latitude', 'Longitude']].to_numpy()
kms_per_radian = 6371.0088
epsilon = 0.5 / kms_per_radian   # ~0.5 km

# Run DBSCAN
db = DBSCAN(eps=epsilon, min_samples=20, algorithm='ball_tree', metric='haversine')
df['cluster'] = db.fit_predict(np.radians(coords))

# Keep only real clusters (exclude noise = -1)
hotspots = df[df['cluster'] != -1].copy()
hotspots.cluster.value_counts().head()


cluster
2    311426
0    122983
3     11276
4      3405
7       331
Name: count, dtype: int64

In [3]:



center = [hotspots['Latitude'].mean(), hotspots['Longitude'].mean()]
m = folium.Map(location=center, zoom_start=11, tiles='CartoDB positron')

mc = MarkerCluster().add_to(m)
for _, row in hotspots.iterrows():
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=3,
        color='crimson',
        fill=True,
        fill_opacity=0.6,
        popup=f"{row['city']} | {row['crime_category']}"
    ).add_to(mc)

# Save and display
filepath = "crime_hotspots_map.html"
m.save(filepath)

# Only if file exists
if os.path.exists(filepath):
    display(IFrame(filepath, width=700, height=500))
else:
    print("Map file was not created.")
