In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Load stops
df_stops = pd.read_parquet("../data/processed/stops_lsoa.parquet")
df_stops = df_stops.dropna(subset=["Longitude", "Latitude"])
df_stops["geometry"] = df_stops.apply(lambda row: Point(row["Longitude"], row["Latitude"]), axis=1)
gdf_stops = gpd.GeoDataFrame(df_stops, geometry="geometry", crs="EPSG:4326")

gdf_wards = gpd.read_file("../data/boundaries/ward boundaries 2024/london_wards_merged.shp").to_crs("EPSG:4326")

# match stops to wards
gdf_joined = gpd.sjoin(gdf_stops, gdf_wards[["WD24CD", "WD24NM", "geometry"]], how="inner", predicate="intersects")

# Count stops per ward
df_counts = gdf_joined.groupby(["WD24CD", "WD24NM"]).size().reset_index(name="stop_count")

df_counts.rename(columns={"WD24CD": "Ward code", "WD24NM": "Ward name"}, inplace=True)

df_counts.to_csv("../data/processed/stop_counts_per_ward.csv", index=False)
print("Stop counts saved to: data/processed/stop_counts_per_ward.csv")


Stop counts saved to: data/processed/stop_counts_per_ward.csv
