In [5]:
from networkx import add_path
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import os


#ward_path = "../data/boundaries/ward boundaries 2018/London_Ward_CityMerged.shp" # Now this does not work 
#wards = gpd.read_file(ward_path).to_crs("EPSG:4326")

# 2024 ward boundaries
ward_path = "../data/raw/Wards_December_2024_Boundaries_UK_BGC/WD_DEC_24_UK_BGC.shp"
wards = gpd.read_file(ward_path).to_crs("EPSG:4326")

base_dir = "../data/crime 2022-2025"

# Monthly burglary counts by ward
monthly_ward_counts = []

# Get all folder names
folders = sorted([f for f in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, f))])

for folder in folders:
    for file in [f"{folder}-city-of-london-street.csv", f"{folder}-metropolitan-street.csv"]:
        path = os.path.join(base_dir, folder, file)
        if os.path.exists(path):
            print(f"Processing {path}")
            df = pd.read_csv(path, parse_dates=["Month"])
            
            # Filter for burglary crimes
            df = df[df["Crime type"].str.lower() == "burglary"]
            df = df.dropna(subset=["Latitude", "Longitude"])
            df["Month"] = pd.to_datetime(df["Month"]).dt.to_period("M").dt.to_timestamp()

            geometry = [Point(xy) for xy in zip(df["Longitude"], df["Latitude"])]
            gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

            # Spatial join to match burglary points to ward polygons
            gdf = gpd.sjoin(gdf, wards, how="inner", predicate="intersects")

            # Group by month, ward name, and ward code
            counts = gdf.groupby(["Month", "WD24NM", "WD24CD"]).size().reset_index(name="burglary_count")

            # Rename for clarity
            counts.rename(columns={"WD24NM": "Ward name", "WD24CD": "Ward code"}, inplace=True)
            monthly_ward_counts.append(counts)


# Combine
df_monthly = pd.concat(monthly_ward_counts, ignore_index=True)
df_monthly.sort_values(by="Month", inplace=True)

# Save result
output_path = "../data/processed/monthly_burglary_per_ward.csv"
df_monthly.to_csv(output_path, index=False)
print(f"\nSaved monthly burglary counts to: {output_path}")

Processing ../data/crime 2022-2025\2022-04\2022-04-city-of-london-street.csv
Processing ../data/crime 2022-2025\2022-04\2022-04-metropolitan-street.csv
Processing ../data/crime 2022-2025\2022-05\2022-05-city-of-london-street.csv
Processing ../data/crime 2022-2025\2022-05\2022-05-metropolitan-street.csv
Processing ../data/crime 2022-2025\2022-06\2022-06-city-of-london-street.csv
Processing ../data/crime 2022-2025\2022-06\2022-06-metropolitan-street.csv
Processing ../data/crime 2022-2025\2022-07\2022-07-city-of-london-street.csv
Processing ../data/crime 2022-2025\2022-07\2022-07-metropolitan-street.csv
Processing ../data/crime 2022-2025\2022-08\2022-08-city-of-london-street.csv
Processing ../data/crime 2022-2025\2022-08\2022-08-metropolitan-street.csv
Processing ../data/crime 2022-2025\2022-09\2022-09-city-of-london-street.csv
Processing ../data/crime 2022-2025\2022-09\2022-09-metropolitan-street.csv
Processing ../data/crime 2022-2025\2022-10\2022-10-city-of-london-street.csv
Processing 