In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
from datetime import datetime


In [2]:
class SafetyScoreModel:
    """
    Computes a safety score (1-10) for given coordinates and time of day,
    based on proximity to crime hotspots and time-sensitive risk factors.
    """
    def __init__(self, hotspots_csv: str = "data/crime_with_hotspots.csv"):
        # Load hotspots data (crime points with cluster labels)
        df = pd.read_csv(hotspots_csv, parse_dates=['datetime'])
        df = df[df['cluster'] != -1]  # exclude noise

        # Compute centroids and cluster weights
        grouped = df.groupby('cluster').agg(
            Latitude=('Latitude', 'mean'),
            Longitude=('Longitude', 'mean'),
            count=('cluster', 'count')
        ).reset_index()
        self.centroids = grouped[['Latitude', 'Longitude']].values
        self.weights = grouped['count'].values

        # Build BallTree on centroid coordinates (radians, haversine metric)
        coords_rad = np.radians(self.centroids)
        self.tree = BallTree(coords_rad, metric='haversine')

    def get_time_risk(self, hour: int) -> float:
        """
        Time-based risk modifier:
        - Night (22-5): higher risk (0.5 multiplier)
        - Commute (5-9, 17-22): moderate risk (0.8)
        - Day (9-17): baseline (1.0)
        """
        if hour >= 22 or hour < 5:
            return 0.5
        if 5 <= hour < 9 or 17 <= hour < 22:
            return 0.8
        return 1.0

    def compute_safety_score(self, latitude: float, longitude: float, time_input) -> float:
        """
        Compute safety score for a point (latitude, longitude) and a time.
        time_input can be a datetime.time or string 'HH:MM'.
        Returns a score between 1 (least safe) and 10 (most safe).
        """
        # Parse time
        if isinstance(time_input, str):
            time_obj = datetime.strptime(time_input, "%H:%M").time()
        else:
            time_obj = time_input
        hour = time_obj.hour

        # Query nearest hotspot centroid
        point_rad = np.radians([[latitude, longitude]])
        dist, ind = self.tree.query(point_rad, k=1)
        # Convert radians to km
        dist_km = dist[0][0] * 6371.0088

        # Base distance score (inverse relation)
        distance_score = max(1.0, 10.0 - dist_km * 2.0)

        # Apply time risk modifier
        time_factor = self.get_time_risk(hour)
        raw_score = distance_score * time_factor

        # Clamp score to [1, 10]
        score = min(max(round(raw_score, 2), 1.0), 10.0)
        return score


In [5]:
# Initialize model
model = SafetyScoreModel("hotspots_with_labels_hdbscan.csv")

# Test coordinate and time
lat, lon = 34.0375, -118.3506
time = "23:30"

# Compute safety score
score = model.compute_safety_score(lat, lon, time)
print(f"Safety Score for ({lat}, {lon}) at {time}: {score}")


Safety Score for (34.0375, -118.3506) at 23:30: 1.0
