In [None]:
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


# dashboard.py — London Crime Risk Dashboard (Streamlit + Folium)
# ----------------------------------------------------------
# Files expected:
#   predictions_test.csv   (LSOA_Code, date, score, y_cls)  -> test-period scores
#   risk_latest.csv        (LSOA_Code, date, score, pred)   -> latest month scores
#   IMD2019.csv            ("LSOA code (2011)", "Local Authority District name (2019)")
#   crime_monthly_wide.csv (monthly counts by major category; used for "Top crime type")
#   LSOA_2011_London_generalised.geojson  OR  a shapefile (.shp / .zip)
#
# Run: streamlit run dashboard.py

import os, json
import numpy as np
import pandas as pd
import streamlit as st

import geopandas as gpd
import folium
from streamlit_folium import st_folium

# ------------------ CONFIG (edit paths if needed) ------------------
PRED_TEST_CSV     = "data/predictions_test.csv"
RISK_LATEST_CSV   = "data/risk_latest.csv"
IMD_CSV           = "data/IMD2019.csv"
CRIME_MONTHLY_CSV = "data/crime_monthly_wide.csv"
GEO_PATH          = "data/statistical-gis-boundaries-london/ESRI/LSOA_2011_London_gen_MHW.shp"

DEFAULT_THR  = 0.37
SIMPLIFY_TOL = 0.0005
# -------------------------------------------------------------------

st.set_page_config(page_title="London Crime Risk Dashboard", layout="wide")
st.title("London Crime Risk Dashboard")
st.caption("Predictive risk (next-month) by LSOA — model built from rolling 6‑month lags + static features")

# ---- Pretty labels for crime categories (display only) ----
CRIME_LABELS = {
    "cnt_ARSON_AND_CRIMINAL_DAMAGE": "Arson & Criminal Damage",
    "cnt_BURGLARY": "Burglary",
    "cnt_DRUG_OFFENCES": "Drugs",
    "cnt_MISCELLANEOUS_CRIMES_AGAINST_SOCIETY": "Misc. Crimes",
    "cnt_POSSESSION_OF_WEAPONS": "Weapons",
    "cnt_PUBLIC_ORDER_OFFENCES": "Public Order",
    "cnt_ROBBERY": "Robbery",
    "cnt_THEFT": "Theft",
    "cnt_VEHICLE_OFFENCES": "Vehicle Crime",
    "cnt_VIOLENCE_AGAINST_THE_PERSON": "Violence",
}

# ---------- load data (cache) ----------
@st.cache_data
def load_predictions():
    df = pd.read_csv(PRED_TEST_CSV, parse_dates=["date"])
    need = {"LSOA_Code", "date", "score"}
    missing = need - set(df.columns)
    if missing:
        st.error(f"predictions_test.csv missing columns: {missing}")
    return df

@st.cache_data
def load_monthly():
    mon = pd.read_csv(CRIME_MONTHLY_CSV, parse_dates=["date"], dtype={"LSOA_Code": "string"})
    # category columns (e.g., cnt_THEFT, cnt_VIOLENCE_AGAINST_THE_PERSON, ...)
    cat_cols = [c for c in mon.columns if c.startswith("cnt_") and not c.endswith("_roll6")]
    return mon, cat_cols

@st.cache_data
def load_risk_latest():
    df = pd.read_csv(RISK_LATEST_CSV, parse_dates=["date"])
    return df

@st.cache_data
def load_borough_lookup():
    imd = pd.read_csv(IMD_CSV, dtype=str).rename(columns={
        "LSOA code (2011)": "LSOA_Code",
        "Local Authority District name (2019)": "Borough"
    })[["LSOA_Code", "Borough"]]
    return imd

@st.cache_resource
def load_geometry(path: str):
    # Prefer GeoJSON if available
    if path.lower().endswith(".geojson") or path.lower().endswith(".json"):
        gdf = gpd.read_file(path)
    else:
        # Shapefile reader with robust CRS handling
        try:
            gdf = gpd.read_file(path)
        except Exception:
            gdf = gpd.read_file(path, engine="fiona")

    # detect LSOA code column
    code_col = None
    for c in gdf.columns:
        if str(c).upper() in {"LSOA11CD", "LSOA_CODE", "LSOACODE", "LSOA_CODE11"}:
            code_col = c
            break
    if code_col is None:
        cands = [c for c in gdf.columns if "LSOA" in str(c).upper() and "CD" in str(c).upper()]
        if cands:
            code_col = cands[0]
    if code_col is None:
        raise ValueError(f"Could not detect LSOA code column in geometry: {list(gdf.columns)[:10]}")

    gdf = gdf[[code_col, "geometry"]].rename(columns={code_col: "LSOA_Code"})

    # ensure WGS84 for Folium
    try:
        if gdf.crs is None:
            try:
                gdf = gdf.set_crs(epsg=27700, allow_override=True).to_crs(epsg=4326)
            except Exception:
                gdf = gdf.set_crs(epsg=4326, allow_override=True)
        elif gdf.crs.to_epsg() != 4326:
            gdf = gdf.to_crs(epsg=4326)
    except Exception:
        gdf = gdf.to_crs(epsg=4326)

    if SIMPLIFY_TOL:
        gdf["geometry"] = gdf.geometry.simplify(SIMPLIFY_TOL, preserve_topology=True)
    return gdf

# ---- load everything ----
pred_test     = load_predictions()
risk_latest   = load_risk_latest()
lookup        = load_borough_lookup()
gdf           = load_geometry(GEO_PATH)
mon, CAT_COLS = load_monthly()

# ---------- sidebar controls ----------
st.sidebar.header("Controls")

# Available months from predictions_test
months = sorted(pred_test["date"].dt.to_period("M").unique())
months_ts = [p.to_timestamp() for p in months]

latest_date = risk_latest["date"].max()
month = st.sidebar.selectbox(
    "Month to display (features at t predicting risk at t+1)",
    options=months_ts + [latest_date],
    index=(len(months_ts) - 1),
    format_func=lambda d: d.strftime("%Y-%m")
)

thr = st.sidebar.slider("High‑risk threshold (on score)", 0.0, 1.0, value=float(DEFAULT_THR), step=0.01)

# Borough filter
all_boroughs = sorted(lookup["Borough"].dropna().unique())
sel_boroughs = st.sidebar.multiselect("Filter borough(s)", options=all_boroughs, default=[])

# ---------- helpers ----------
def attach_top_type(df_points: pd.DataFrame, ref_month: pd.Timestamp) -> pd.DataFrame:
    """
    Attach 'top_type' (friendly label) for each LSOA using crime counts
    in 'crime_monthly_wide.csv' at the reference month 'ref_month'.
    """
    sel = mon[mon["date"].dt.to_period("M") == ref_month.to_period("M")].copy()
    if sel.empty:
        df_points["top_type"] = np.nan
        return df_points

    tmp = sel[["LSOA_Code"] + CAT_COLS].copy()
    tmp[CAT_COLS] = tmp[CAT_COLS].apply(pd.to_numeric, errors="coerce")

    # column with the maximum count
    top_col = tmp[CAT_COLS].idxmax(axis=1)

    # map to pretty label; fallback to cleaned name
    pretty = top_col.map(CRIME_LABELS).fillna(
        top_col.str.replace(r"^cnt_", "", regex=True).str.replace("_", " ").str.title()
    )

    out = tmp[["LSOA_Code"]].copy()
    out["top_type"] = pretty.values
    return df_points.merge(out, on="LSOA_Code", how="left")

def get_month_risk(selected_month) -> pd.DataFrame:
    """
    Return month-level risk table for selected_month,
    already merged with borough names and filtered by sel_boroughs.
    """
    if selected_month == latest_date:
        df = risk_latest.copy()
        df["pred"] = (df["score"] >= thr).astype(int)
        ref_month = (latest_date - pd.offsets.MonthBegin(1))  # latest prediction uses t = (t+1)-1
    else:
        df = pred_test[pred_test["date"].dt.to_period("M") == selected_month.to_period("M")].copy()
        df["pred"] = (df["score"] >= thr).astype(int)
        ref_month = selected_month

    # merge borough names
    df = df.merge(lookup, on="LSOA_Code", how="left")

    # if user filtered boroughs, warn when some have no data then apply filter
    if sel_boroughs:
        avail = set(df["Borough"].dropna().unique())
        missing = sorted(set(sel_boroughs) - avail)
        if missing:
            try:
                st.toast(f"No data this month for: {', '.join(missing)}", icon="⚠️")
            except Exception:
                pass
            st.sidebar.warning("No data for: " + ", ".join(missing))
        df = df[df["Borough"].isin(sel_boroughs)]

    # attach top crime type (pretty label)
    df = attach_top_type(df, ref_month)
    return df

# compute table for selected month
risk_month = get_month_risk(month)

# If empty after filters, show a message and stop
if risk_month.empty:
    st.warning("No rows available for this month and selected borough filter.")
    st.stop()

# ---------- KPI row ----------
left, mid, right = st.columns(3)
left.metric("LSOA in view", f"{len(risk_month):,}")

# High-risk share MoM delta
curr_share = risk_month["pred"].mean()
prev_month = (pd.to_datetime(month) - pd.offsets.MonthBegin(1))
risk_prev  = get_month_risk(prev_month)
prev_share = risk_prev["pred"].mean() if not risk_prev.empty else np.nan
delta_txt  = None if np.isnan(prev_share) else f"{(curr_share - prev_share):+,.1%}"

mid.metric(
    "High‑risk count",
    f"{int(risk_month['pred'].sum()):,}",
    delta=delta_txt
)
st.caption(
    f"High‑risk share: {curr_share:.1%}"
    + ("" if np.isnan(prev_share) else f" (last month: {prev_share:.1%})")
)

# Precision/Recall (only if y_cls exists for test months)
if "y_cls" in risk_month.columns and not risk_month["y_cls"].isna().all():
    tp = ((risk_month["pred"] == 1) & (risk_month["y_cls"] == 1)).sum()
    fp = ((risk_month["pred"] == 1) & (risk_month["y_cls"] == 0)).sum()
    fn = ((risk_month["pred"] == 0) & (risk_month["y_cls"] == 1)).sum()
    precision = 0.0 if tp + fp == 0 else tp / (tp + fp)
    recall    = 0.0 if tp + fn == 0 else tp / (tp + fn)
    right.metric("Precision / Recall", f"{precision:.2f} / {recall:.2f}")
else:
    right.metric("Precision / Recall", "—")

st.markdown("---")

# ---------- Map (top) ----------
st.subheader(f"Risk map — {month.strftime('%Y-%m')} (features at t, predicting t+1)")

# join with geometry
merged = gdf.merge(risk_month, on="LSOA_Code", how="inner")
st.caption(f"Matched geometries: {len(merged):,} / rows in view: {len(risk_month):,}")

# convert non‑JSON‑serializable columns and ensure strings for tooltip
merged_json = merged.copy()
if "date" in merged_json.columns:
    merged_json["date"] = pd.to_datetime(merged_json["date"], errors="coerce").dt.strftime("%Y-%m")
if "top_type" in merged_json.columns:
    merged_json["top_type"] = merged_json["top_type"].fillna("")

# build GeoJSON
geo = json.loads(merged_json.to_json())

m = folium.Map(location=[51.5074, -0.1278], zoom_start=9, tiles="cartodbpositron")

folium.Choropleth(
    geo_data=geo,
    data=merged,
    columns=["LSOA_Code","score"],
    key_on="feature.properties.LSOA_Code",
    fill_color="YlOrRd",
    fill_opacity=0.85, line_opacity=0.2,
    nan_fill_opacity=0.0,
    legend_name="Predicted risk (score)",
).add_to(m)

# Outlines layer (clickable)
folium.GeoJson(
    geo,
    name="Outlines",
    style_function=lambda x: {"fillOpacity": 0, "weight": 0.2},
    highlight_function=lambda x: {"weight": 1, "color": "#555", "fillOpacity": 0},
    tooltip=folium.GeoJsonTooltip(
        fields=["LSOA_Code", "Borough", "date", "top_type", "score", "pred"],
        aliases=["LSOA", "Borough", "Month", "Top crime type", "Score", "High risk (0/1)"],
        localize=True
    )
).add_to(m)

# Render map and capture click (single call)
evt = st_folium(m, width=800, height=600, key="mainmap")

# ---------- Detail (bottom, directly under map) ----------
def _extract_clicked_props(e: dict) -> dict:
    if not isinstance(e, dict):
        return {}
    for k in ("last_object_clicked", "last_active_drawing", "last_active_drawing_geojson"):
        obj = e.get(k)
        if isinstance(obj, dict):
            props = obj.get("properties") or obj.get("feature", {}).get("properties")
            if isinstance(props, dict):
                return props
    return {}

clicked_props = _extract_clicked_props(evt)
clicked_lsoa = clicked_props.get("LSOA_Code")

# remember the last clicked LSOA
if clicked_lsoa:
    st.session_state["clicked_lsoa"] = clicked_lsoa
lsoa_clicked = st.session_state.get("clicked_lsoa")

# sidebar fallback selector
with st.sidebar.expander("LSOA detail (fallback)"):
    lsoa_manual = st.selectbox(
        "Pick an LSOA if clicking does not work:",
        sorted(risk_month["LSOA_Code"].unique()),
        index=0 if lsoa_clicked is None else max(
            0, list(sorted(risk_month["LSOA_Code"].unique())).index(lsoa_clicked)
        ),
        key="lsoa_manual_select"
    )
    if st.button("Show details", key="show_details_btn"):
        st.session_state["clicked_lsoa"] = lsoa_manual
        lsoa_clicked = lsoa_manual

# --- detail panel ---
if lsoa_clicked:
    st.markdown("---")
    st.subheader(f"Details • {lsoa_clicked}")

    # 1) Risk time-series (test period only)
    ts = pred_test[pred_test["LSOA_Code"] == lsoa_clicked].sort_values("date")
    if not ts.empty:
        st.markdown("**Risk score over time (test period)**")
        st.line_chart(ts.set_index("date")["score"])
    else:
        st.info("No history available for this LSOA in predictions_test.csv.")

    # 2) Category distribution bar (reference month = selected or selected-1)
    ref_month = (latest_date - pd.offsets.MonthBegin(1)) if month == latest_date else month
    bar_src = mon[(mon["LSOA_Code"] == lsoa_clicked) &
                  (mon["date"].dt.to_period("M") == ref_month.to_period("M"))]
    if not bar_src.empty:
        row = bar_src.iloc[0][CAT_COLS].astype(float)
        s = row.rename(index=lambda c: CRIME_LABELS.get(
            c, c.replace("cnt_", "").replace("_", " ").title())
        ).sort_values(ascending=False).head(8)
        st.markdown(f"**Top categories in {ref_month.strftime('%Y-%m')}**")
        st.bar_chart(s)
    else:
        st.info("No category data for this LSOA in the selected month.")

# ---------- Download filtered list ----------
st.markdown("---")
st.subheader("Download filtered high‑risk list")
dl = risk_month.sort_values("score", ascending=False)[
    ["LSOA_Code", "Borough", "date", "top_type", "score", "pred"]
]
st.download_button(
    "Download CSV",
    dl.to_csv(index=False).encode("utf-8"),
    file_name="risk_filtered.csv",
    mime="text/csv"
)

st.caption("Note: scores are probabilities for 'next‑month high risk'. Threshold changes only affect 'pred' in the UI.")
