# CityScope — Incident Risk Baseline (Synthetic, Denver)

This notebook creates a synthetic geospatial dataset around Denver, engineers simple features (weather, events, temporal), and trains a baseline classifier to predict incident risk per tile/time slice.

**Purpose:** portfolio demo without exposing proprietary data or logic.

In [None]:
import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score, average_precision_score
import matplotlib.pyplot as plt
np.random.seed(42)

# Synthetic tiles near Denver
N = 5000
lat = np.random.uniform(39.55, 39.88, N)
lon = np.random.uniform(-105.15, -104.60, N)

# Features — synthetic proxies
hour = np.random.randint(0,24,N)
dow = np.random.randint(0,7,N)
is_rush = ((hour>=6)&(hour<=9)) | ((hour>=15)&(hour<=18))
temp_c = np.random.normal(12, 10, N)
precip_mm = np.clip(np.random.exponential(1.0, N)-0.5, 0, None)
wind = np.random.gamma(2, 3, N)
event_density = np.random.poisson(0.3, N)
near_stadium = (np.abs(lat-39.7439)+np.abs(lon+105.0201)<0.07).astype(int)

# Label — risk probability combining factors + noise
base = 0.05 + 0.15*is_rush + 0.1*(event_density>0) + 0.08*near_stadium + 0.05*(precip_mm>1.0)
prob = 1/(1+np.exp(-( (base-0.2)*6 )))
y = (np.random.rand(N) < prob).astype(int)

X = pd.DataFrame({
    'hour':hour,'dow':dow,'is_rush':is_rush.astype(int),'temp_c':temp_c,'precip_mm':precip_mm,
    'wind':wind,'event_density':event_density,'near_stadium':near_stadium,
    'lat':lat,'lon':lon
})
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)

clf = GradientBoostingClassifier(random_state=42)
clf.fit(X_train, y_train)
p = clf.predict_proba(X_test)[:,1]
auc = roc_auc_score(y_test, p)
ap = average_precision_score(y_test, p)
auc, ap

In [None]:
# Plot a heat-like scatter of top-risk points
scores = clf.predict_proba(X)[:,1]
df = X.copy(); df['score']=scores
top = df.nlargest(300, 'score')
plt.figure(figsize=(6,6))
plt.scatter(df['lon'], df['lat'], s=4, alpha=0.05)
plt.scatter(top['lon'], top['lat'], s=10, alpha=0.6)
plt.title('Synthetic Risk Hotspots — Denver Bounding Box')
plt.xlabel('Longitude'); plt.ylabel('Latitude')
plt.grid(True, alpha=0.2)
plt.tight_layout()
out = '../architecture/synthetic_hotspots.png'
plt.savefig(out, dpi=160)
out