# Hotspot Storytelling Exploration

Use this notebook to inspect Chicago crime hotspots before demoing the Streamlit dashboard. It combines deterministic L3 aggregates with contextual data from L2 (crime types, normalized streets, districts). Adjust the parameters below to explore other months or H3 resolutions.

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import h3


In [None]:
# Parameters
YEAR = 2024
MONTH = 9
RESOLUTION = 9  # 7 ≈ district, 8 ≈ neighbourhood, 9 ≈ block

In [None]:
ROOT = Path.cwd()
L3_PATH = ROOT / f'data/l3/res={RESOLUTION}/year={YEAR}/month={MONTH:02d}' / f'l3-aggregates-{RESOLUTION}-{YEAR}-{MONTH:02d}.parquet'
L2_PATH = ROOT / f'data/l2/year={YEAR}/month={MONTH:02d}/features-{YEAR}-{MONTH:02d}.parquet'
print('L3 exists?', L3_PATH.exists())
print('L2 exists?', L2_PATH.exists())

In [None]:
l3 = pd.read_parquet(L3_PATH)
l3['date'] = pd.to_datetime(l3['date'])
cols = ['datetime', 'primary_type', 'street_norm', 'community_area_id', 'district_id', 'ward_id', 'h3_r7', 'h3_r8', 'h3_r9']
l2 = pd.read_parquet(L2_PATH)
l2 = l2[[c for c in cols if c in l2.columns]].copy()
l2['datetime'] = pd.to_datetime(l2['datetime'])
print(l3.head())
print(l2.head())

In [None]:
h3_col = f'h3_r{RESOLUTION}'
summary_cols = ['n_crimes', 'n_arrests', 'low_conf', 'smoothed_rate', 'pooled_smoothed']
summary = (
    l3.assign(low_conf=lambda d: d['low_conf'].astype(bool))
      .groupby(h3_col, dropna=True)[summary_cols]
      .agg({'n_crimes': 'sum', 'n_arrests': 'sum', 'low_conf': 'mean', 'smoothed_rate': 'mean', 'pooled_smoothed': 'mean'})
      .reset_index()
      .rename(columns={h3_col: 'h3_id', 'low_conf': 'low_conf_share'})
)
summary['low_conf_share'] = summary['low_conf_share'].round(3)
summary['lat'] = summary['h3_id'].apply(lambda h: h3.cell_to_latlng(h)[0])
summary['lon'] = summary['h3_id'].apply(lambda h: h3.cell_to_latlng(h)[1])
summary.head()

In [None]:
def top_value(df, value_col, alias):
    clean = df.dropna(subset=[h3_col, value_col])
    if clean.empty:
        return pd.DataFrame(columns=['h3_id', alias])
    counts = (clean.groupby([h3_col, value_col]).size().reset_index(name='count')
              .sort_values(['count', value_col], ascending=[False, True]))
    top = counts.drop_duplicates(h3_col).rename(columns={h3_col: 'h3_id', value_col: alias, 'count': f'{alias}_count'})
    return top

context_frames = [
    top_value(l2, 'primary_type', 'common_crime'),
    top_value(l2, 'street_norm', 'signature_street'),
    top_value(l2, 'district_id', 'district_id'),
    top_value(l2, 'community_area_id', 'community_area_id'),
]
context = None
for frame in context_frames:
    context = frame if context is None else context.merge(frame, on='h3_id', how='outer')
context = context.fillna({'common_crime': 'UNKNOWN', 'signature_street': 'UNKNOWN'})
summary = summary.merge(context, on='h3_id', how='left')
summary.head()

In [None]:
import math
selected_types = []  # e.g. ['THEFT', 'BATTERY']
base = l2.dropna(subset=[h3_col])
total = base.groupby(h3_col).size().reset_index(name='incident_total')
if selected_types:
    focus = base[base['primary_type'].isin(selected_types)].groupby(h3_col).size().reset_index(name='focus_count')
else:
    focus = total.rename(columns={'incident_total': 'focus_count'})
share = total.merge(focus, on=h3_col, how='left').fillna({'focus_count': 0})
share['focus_share'] = share.apply(lambda r: 0 if r['incident_total'] == 0 else r['focus_count'] / r['incident_total'], axis=1)
summary = summary.merge(share.rename(columns={h3_col: 'h3_id'}), on='h3_id', how='left')
summary.head()

In [None]:
geojson = {
    'type': 'FeatureCollection',
    'features': [
        {
            'type': 'Feature',
            'id': row['h3_id'],
            'properties': {'h3_id': row['h3_id']},
            'geometry': {'type': 'Polygon', 'coordinates': [h3.cell_to_boundary(row['h3_id'], geo_json=True)]}
        }
        for _, row in summary.iterrows()
    ]
}

fig = px.choropleth_mapbox(
    summary, geojson=geojson, locations='h3_id', featureidkey='properties.h3_id',
    color='n_crimes', color_continuous_scale='YlOrRd',
    hover_data={
        'n_crimes': True, 'n_arrests': True, 'smoothed_rate': ':.2f', 'low_conf_share': ':.2f',
        'focus_share': ':.2f', 'common_crime': True, 'signature_street': True, 'district_id': True
    },
    mapbox_style='open-street-map', center={'lat': 41.881832, 'lon': -87.623177}, zoom=10.5, opacity=0.75,
    title=f'Hotspots r{RESOLUTION} — {YEAR}-{MONTH:02d}'
)
fig.update_layout(margin={'l': 0, 'r': 0, 't': 60, 'b': 0})
fig.show()

In [None]:
daily = l3.groupby('date', as_index=False)['n_crimes'].sum()
px.line(daily, x='date', y='n_crimes', markers=True, title='Daily incident trend').update_layout(yaxis_title='Incidents', xaxis_title='Date')

In [None]:
crime_mix = l2['primary_type'].value_counts().head(10).reset_index().rename(columns={'index': 'primary_type', 'primary_type': 'count'})
px.bar(crime_mix, x='count', y='primary_type', orientation='h', title='Top crime categories').update_layout(yaxis_title='', xaxis_title='Incidents')

In [None]:
summary[['h3_id', 'n_crimes', 'common_crime', 'signature_street', 'focus_share']].sort_values('n_crimes', ascending=False).head(10)

### Talking points
- Highlight the top three hexes, referencing their signature street and dominant crime type.
- Use `focus_share` to explain how the selected crime filter (if any) concentrates in certain cells.
- Mention resolution size (r7 ≈ 1.2km, r8 ≈ 0.46km, r9 ≈ 0.17km) so stakeholders grasp map granularity.