In [2]:
# Comparative temporal + single interactive Folium map (per-crime-type toggleable clusters)
# Outputs saved with explicit absolute paths

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import uuid
import folium
from folium.plugins import MarkerCluster, HeatMap
from folium import FeatureGroup


# Explicit paths 

# Using both already cleaned preprocessed files: one for TEMPORAL visuals and one for SPATIAL map
TEMPORAL_CSV_PATH = "C:/Users/rohit/OneDrive/Desktop/Uni/Dissertation/cleaned_dataset/cleaned_city_of_london_crime_for_temporal_analysis.csv"
SPATIAL_CSV_PATH  = "C:/Users/rohit/OneDrive/Desktop/Uni/Dissertation/cleaned_dataset/cleaned_city_of_london_crime_for_spatial_analysis.csv"

OUT_DIR = "C:/Users/rohit/OneDrive/Desktop/Uni/Dissertation/RQ3_updated"
Path(OUT_DIR).mkdir(parents=True, exist_ok=True)

HEATMAP_PNG = f"{OUT_DIR}/timeline_heatmap_crime_types_by_month.png"
STACK_ALL_PNG = f"{OUT_DIR}/stacked_bars_all_crime_types_by_month.png"
STACK_TOP5_PNG = f"{OUT_DIR}/stacked_bars_Top5_crime_types_vs_Other_by_month.png"
README_TXT  = f"{OUT_DIR}/README.txt"
MAP_HTML    = f"{OUT_DIR}/RQ3_all_crime_types.html"


# function to create geographic boundary box for plotting on the map

def compute_dynamic_bbox(df_like, lat_col, lon_col, padding=0.0015):
    lat_min = float(df_like[lat_col].min()) - padding
    lat_max = float(df_like[lat_col].max()) + padding
    lon_min = float(df_like[lon_col].min()) - padding
    lon_max = float(df_like[lon_col].max()) + padding
    return dict(lat_min=lat_min, lat_max=lat_max, lon_min=lon_min, lon_max=lon_max)

# -----------------------------
# Loading already cleaned preprocessed dataset for creating temporal visuals
# -----------------------------
df_city = pd.read_csv(TEMPORAL_CSV_PATH)
df_city.columns = [c.strip() for c in df_city.columns]
lower_map = {c.lower(): c for c in df_city.columns}

# infer needed columns
month_col = lower_map.get("month")
ctype_col = (lower_map.get("crime type") or lower_map.get("crime_type")
             or lower_map.get("offence") or lower_map.get("offense"))

# parse month to ensure correct type if needed
if month_col in df_city.columns:
    df_city[month_col] = pd.to_datetime(df_city[month_col], errors="coerce")

# -----------------------------
# TEMPORAL VISUALS
# -----------------------------
min_m = df_city[month_col].min()
max_m = df_city[month_col].max()
full_months = pd.period_range(min_m.to_period('M'), max_m.to_period('M'), freq='M').to_timestamp()

monthly = (df_city.groupby([month_col, ctype_col]).size()
           .reset_index(name="count"))
pivot = monthly.pivot_table(index=month_col, columns=ctype_col, values="count", aggfunc="sum", fill_value=0)
pivot = pivot.reindex(full_months, fill_value=0)
crime_types = pivot.columns.tolist()

# Heatmap
heatmat = pivot.values.T
plt.figure(figsize=(max(10, len(full_months)*0.4), max(5, 0.35*len(crime_types))))
plt.imshow(heatmat, aspect="auto", interpolation="nearest")
plt.colorbar(label="Incidents")
plt.yticks(range(len(crime_types)), crime_types)
xlabels = [d.strftime("%Y-%m") for d in full_months]
plt.xticks(range(len(full_months)), xlabels, rotation=90)
plt.xlabel("Month"); plt.ylabel("Crime type")
plt.title("Timeline heatmap — crimes by type and month (no month aggregation)")
plt.tight_layout()
plt.savefig(HEATMAP_PNG, dpi=200)
plt.close()

# Stacked Bar Graph of all crime types
totals = pivot.sum(axis=0).sort_values()
ordered_cols = totals.index.tolist()
fig_w = max(12, len(full_months)*0.45)
plt.figure(figsize=(fig_w, 6))
bottom = np.zeros(len(full_months))
x = np.arange(len(full_months))
for col in ordered_cols:
    vals = pivot[col].values
    plt.bar(x, vals, bottom=bottom, label=col)
    bottom += vals
plt.xticks(x, xlabels, rotation=90)
plt.xlabel("Month")
plt.ylabel("Incidents (stacked across types)")
plt.title("Stacked monthly totals by crime type — ALL types")
plt.legend(fontsize=8, ncol=2)
plt.tight_layout()
plt.savefig(STACK_ALL_PNG, dpi=180)
plt.close()

# Stacked Top 5 crime types plus all Other crime types combined together
top5_types = totals.sort_values(ascending=False).head(5).index.tolist()
pivot_top = pivot[top5_types].copy()
pivot_other = pivot.drop(columns=top5_types).sum(axis=1)
pivot_top["Other"] = pivot_other
ordered_cols_top = top5_types + ["Other"]
fig_w2 = max(12, len(full_months)*0.45)
plt.figure(figsize=(fig_w2, 6))
bottom = np.zeros(len(full_months))
x = np.arange(len(full_months))
for col in ordered_cols_top:
    vals = pivot_top[col].values
    plt.bar(x, vals, bottom=bottom, label=col)
    bottom += vals
plt.xticks(x, xlabels, rotation=90)
plt.xlabel("Month"); plt.ylabel("Incidents")
plt.title("Stacked monthly totals — Top 5 crime types + Other")
plt.legend(fontsize=9, ncol=3)
plt.tight_layout()
plt.savefig(STACK_TOP5_PNG, dpi=180)
plt.close()

# -----------------------------
# Load preprocessed SPATIAL file (for map only)
# -----------------------------
df_city = pd.read_csv(SPATIAL_CSV_PATH)
df_city.columns = [c.strip() for c in df_city.columns]
lower_map = {c.lower(): c for c in df_city.columns}

lon_col = lower_map.get("longitude")
lat_col = lower_map.get("latitude")
loc_col = lower_map.get("location")
month_col = lower_map.get("month")
crime_id_col = lower_map.get("crime id") or lower_map.get("crime_id")
ctype_col = (lower_map.get("crime type") or lower_map.get("crime_type")
             or lower_map.get("offence") or lower_map.get("offense"))

# (defensive) parse month to ensure correct type if needed
if month_col in df_city.columns:
    df_city[month_col] = pd.to_datetime(df_city[month_col], errors="coerce")

# -----------------------------
# Loading already cleaned preprocessed dataset for creating spatial visuals
# -----------------------------
center_lat = float(df_city[lat_col].median())
center_lon = float(df_city[lon_col].median())
m = folium.Map(location=[center_lat, center_lon], zoom_start=15, tiles="OpenStreetMap")

# Heatmap layer
fg_heat = FeatureGroup(name="Heatmap (all types)", show=True)
HeatMap(df_city[[lat_col, lon_col]].values.tolist(), radius=14, blur=10).add_to(fg_heat)
fg_heat.add_to(m)

# One clustered layer per crime type (toggle on/off)
crime_types = sorted(df_city[ctype_col].astype(str).str.strip().unique().tolist())
for t in crime_types:
    fg = FeatureGroup(name=f"{t} (clustered)", show=False)
    mc = MarkerCluster().add_to(fg)
    sub = df_city[df_city[ctype_col] == t]
    for _, r in sub.iterrows():
        popup = [f"<b>Offence:</b> {t}"]
        if loc_col in df_city.columns:
            popup.append(f"<b>Location:</b> {r.get(loc_col, '')}")
        if month_col in df_city.columns and pd.notna(r.get(month_col)):
            popup.append(f"<b>Month:</b> {str(r.get(month_col))[:10]}")
        folium.Marker([r[lat_col], r[lon_col]], popup=folium.Popup("<br>".join(popup), max_width=300)).add_to(mc)
    fg.add_to(m)

folium.LayerControl(collapsed=False).add_to(m)
m.save(MAP_HTML)
