In [54]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
from datetime import datetime
import folium
from folium.plugins import MarkerCluster
from IPython.display import IFrame
import os


In [59]:
class SafetyScoreModel:
    """
    Computes a safety score (1-10) for given coordinates and time of day,
    based on proximity to actual crime locations and time-sensitive risk factors.
    Aggregates nearby crimes within 800m, weighted by inverse distance.
    """
    def __init__(self, hotspots_csv: str = "hotspots_with_labels_hdbscan.csv"):
        df = pd.read_csv(hotspots_csv, parse_dates=['datetime'])
        df = df[df['cluster'] != -1]  # exclude noise
        self.df = df

        coords_rad = np.radians(df[['Latitude', 'Longitude']].values)
        self.tree = BallTree(coords_rad, metric='haversine')
        self.risk_weight = 2.0  # constant weight multiplier

    def get_time_risk(self, hour: int) -> float:
        if hour >= 22 or hour < 5:
            return 0.5
        if 5 <= hour < 9 or 17 <= hour < 22:
            return 0.8
        return 1.0

    def compute_safety_score(self, latitude: float, longitude: float, time_input) -> float:
        if isinstance(time_input, str):
            time_obj = datetime.strptime(time_input, "%H:%M").time()
        else:
            time_obj = time_input
        hour = time_obj.hour

        point_rad = np.radians([[latitude, longitude]])
        indices = self.tree.query_radius(point_rad, r=0.8 / 6371.0088, return_distance=True)
        neighbor_indices = indices[0][0]
        distances = indices[1][0] * 6371.0088  # convert back to km

        if len(neighbor_indices) == 0:
            return 10.0

        inverse_distances = np.clip(1 / (distances + 0.1), 0, 10)  # prevent div by zero
        score_penalty = np.sum(inverse_distances * self.risk_weight)

        base_score = max(1.0, 10.0 - score_penalty)
        time_factor = self.get_time_risk(hour)
        raw_score = base_score * time_factor

        return min(max(round(raw_score, 2), 1.0), 10.0)

In [60]:
test_data = pd.DataFrame({
    'Location': [
        'Downtown Crossing', 'Fenway Park', 'South Station',
        'Beacon Hill', 'Boston Common', 'Harvard Square', 'MIT', 'North End','Seaport District', 'Charlestown'
    ],
    'Latitude': [42.3601, 42.3467, 42.3522, 42.3606, 42.3550, 42.3736, 42.3601, 42.3637, 42.3519, 42.3782],
    'Longitude': [-71.0589, -71.0950, -71.0551, -71.0650, -71.0636, -71.1189, -71.0942, -71.0545, -71.0405, -71.0602],
    'Time': ['00:30', '08:00', '13:00', '18:30', '23:15', '01:45', '09:30', '14:15', '19:45', '00:00']
})

test_data.head()

Unnamed: 0,Location,Latitude,Longitude,Time
0,Downtown Crossing,42.3601,-71.0589,00:30
1,Fenway Park,42.3467,-71.095,08:00
2,South Station,42.3522,-71.0551,13:00
3,Beacon Hill,42.3606,-71.065,18:30
4,Boston Common,42.355,-71.0636,23:15


In [62]:
model = SafetyScoreModel("hotspots_with_labels_hdbscan.csv")

def categorize(score):
    if score <= 4:
        return '⚠️ High Risk'
    elif score <= 7:
        return '⚠️ Moderate'
    else:
        return '✅ Low Risk'

test_data['SafetyScore'] = test_data.apply(
    lambda row: model.compute_safety_score(row['Latitude'], row['Longitude'], row['Time']),
    axis=1
)
test_data['RiskLevel'] = test_data['SafetyScore'].apply(categorize)

test_data

Unnamed: 0,Location,Latitude,Longitude,Time,SafetyScore,RiskLevel
0,Downtown Crossing,42.3601,-71.0589,00:30,1.0,⚠️ High Risk
1,Fenway Park,42.3467,-71.095,08:00,1.0,⚠️ High Risk
2,South Station,42.3522,-71.0551,13:00,1.0,⚠️ High Risk
3,Beacon Hill,42.3606,-71.065,18:30,1.0,⚠️ High Risk
4,Boston Common,42.355,-71.0636,23:15,1.0,⚠️ High Risk
5,Harvard Square,42.3736,-71.1189,01:45,10.0,✅ Low Risk
6,MIT,42.3601,-71.0942,09:30,10.0,✅ Low Risk
7,North End,42.3637,-71.0545,14:15,1.0,⚠️ High Risk
8,Seaport District,42.3519,-71.0405,19:45,1.0,⚠️ High Risk
9,Charlestown,42.3782,-71.0602,00:00,1.0,⚠️ High Risk


In [63]:
hotspots_df = pd.read_csv("hotspots_with_labels_hdbscan.csv")
hotspots_df = hotspots_df[hotspots_df['cluster'] != -1]

center = [42.3601, -71.0589]
map_vis = folium.Map(location=center, zoom_start=13, tiles='CartoDB positron')

mc = MarkerCluster().add_to(map_vis)
for _, r in hotspots_df.iterrows():
    folium.CircleMarker(
        location=[r['Latitude'], r['Longitude']],
        radius=3,
        color='crimson',
        fill=True,
        fill_opacity=0.6,
        popup=f"Cluster {r['cluster']}"
    ).add_to(mc)

for _, r in test_data.iterrows():
    test_loc = (r['Latitude'], r['Longitude'])
    point_rad = np.radians([[r['Latitude'], r['Longitude']]])
    indices = model.tree.query_radius(point_rad, r=0.8 / 6371.0088, return_distance=True)
    if len(indices[0][0]) > 0:
        crime_idx = indices[0][0][0]
        crime_row = model.df.iloc[crime_idx]
        crime_loc = (crime_row['Latitude'], crime_row['Longitude'])
        folium.PolyLine([test_loc, crime_loc], color='blue', weight=2.5, opacity=0.6).add_to(map_vis)

    folium.Marker(
        location=test_loc,
        popup=f"{r['Location']}<br>Score: {r['SafetyScore']} ({r['RiskLevel']})",
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(map_vis)

map_path = 'boston_crime_hotspots_map_with_test.html'
map_vis.save(map_path)

if os.path.exists(map_path):
    display(IFrame(map_path, width=700, height=500))
else:
    print("Failed to create map.")
