In [30]:
import geopandas as gpd
import pandas as pd
from pathlib import Path

# ------------------------------------------------------------------
# 1. Load the South-West LSOA layer (Bri­tish National Grid, EPSG:27700)
# ------------------------------------------------------------------
BASE = Path(
    "/Users/rosstaylor/Downloads/Research Project/Code Folder/"
    "Research Project - Geospatial Health Demand/data/processed/"
    "lsoa_sw_enriched_demographics_enriched.gpkg"
)
lsoa_sw = gpd.read_file(BASE)          # (3475, 21), EPSG:27700

In [31]:

# ------------------------------------------------------------------
# 2. One-shot field clean-up
#    • keep only one human-readable LSOA name
#    • rename everything to canonical snake_case
# ------------------------------------------------------------------
# mapping from raw -> tidy
RENAME = {
    "lsoa21cd":  "lsoa",
    "lsoa21nm_x": "lsoa_name",
    "lsoa21nmw":  "lsoa_name",
    "lsoa21nm_y": "lsoa_name",
    "msoa21cd":  "msoa",
    "msoa21nm":  "msoa_name",
    "lad22cd":   "lad",
    "ladcd":     "lad",          # keep whichever exists
    "lad22nm":   "lad_name",
    "ladnm":     "lad_name",
    "ruc21nm":   "urban_rural",
    "urban_rural_flag": "urban_rural",
    "imd_rank":  "imd_rank",
    "bng_e":     "bng_e",
    "bng_n":     "bng_n",
    "lat":       "lat",
    "long":      "lon",          # rename to lon for clarity
    "shape__are":"shape_area",
    "shape__len":"shape_len",
    "globalid":  "global_id",
}

# lowercase all columns for easier matching
lsoa_sw.columns = [c.lower() for c in lsoa_sw.columns]

# perform renaming (duplicates automatically collapsed)
lsoa_sw = lsoa_sw.rename(columns=RENAME)

# after renaming, drop duplicate columns keeping the first
lsoa_sw = lsoa_sw.loc[:, ~lsoa_sw.columns.duplicated()]


In [32]:
# ------------------------------------------------------------------
# 3. Re-project to EPSG:4326 for web mapping
# ------------------------------------------------------------------
lsoa_sw_wgs = lsoa_sw.to_crs(4326)

In [33]:
# ------------------------------------------------------------------
# 4. Build MSOA & LAD layers by dissolving geometry
# ------------------------------------------------------------------
msoa_sw = (
    lsoa_sw_wgs
    .dissolve(by="msoa", as_index=False, aggfunc="first")
    .sort_values("msoa")
)

lad_sw = (
    lsoa_sw_wgs
    .dissolve(by="lad", as_index=False, aggfunc="first")
    .sort_values("lad")
)


In [34]:
# ------------------------------------------------------------------
# 5. Quick print-outs so you know it worked
# ------------------------------------------------------------------
print("LSOA layer :", lsoa_sw_wgs.shape)
print("MSOA layer :", msoa_sw.shape)
print("LAD layer  :", lad_sw.shape)

LSOA layer : (3475, 16)
MSOA layer : (742, 16)
LAD layer  : (40, 16)


In [35]:
# ------------------------------------------------------------------
# 6. (Optional) save everything to one GeoPackage
# ------------------------------------------------------------------
OUT = BASE.with_name("sw_ready_layers.gpkg")
lsoa_sw.to_file(OUT, layer="lsoa_bng",  driver="GPKG")   # original CRS
lsoa_sw_wgs.to_file(OUT, layer="lsoa_wgs84", driver="GPKG")
msoa_sw.to_file(OUT,   layer="msoa_wgs84", driver="GPKG")
lad_sw.to_file(OUT,    layer="lad_wgs84",  driver="GPKG")
print("✓ All layers saved to", OUT)

✓ All layers saved to /Users/rosstaylor/Downloads/Research Project/Code Folder/Research Project - Geospatial Health Demand/data/processed/sw_ready_layers.gpkg
