In [None]:
# Cell 1: Imports and path setup

import os
import sys

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

# Add src directory to Python path so we can import our modules
sys.path.append(os.path.abspath("UA-11624497-project/src"))

from metrics import calc_gini_moran_clean
from plotting import plot_multiscale, COLOR_PALETTE, set_sci_style


In [None]:
# Cell 2: Read spatial and attribute data, then merge

# TODO: change these paths to your own data locations
lsoa_path = r"data/LSOA_boundary.shp"
mean_path = r"data/LSOA_mean.csv"

# Read LSOA boundary (geometry)
g_lsoa = gpd.read_file(lsoa_path)

# Read mean indicators table (BtE, TPD, NQPDE mean values)
df_mean = pd.read_csv(mean_path)

# Check columns to confirm join key names
display(g_lsoa.head())
display(df_mean.head())

# TODO: change this to the actual LSOA ID field name in both tables
key_lsoa = "LSOA21CD"

# Merge geometry with mean indicators
g = g_lsoa.merge(df_mean, on=key_lsoa, how="inner")

print("Number of LSOAs after merge:", len(g))
print("Columns:", g.columns.tolist())


In [None]:
# Cell 3: Define the list of mean indicator columns to analyse

mean_cols = [
    # BTBE mean fields
    "BtE400_mean", "BtE800_mean", "BtE1200_mean", "BtE2000_mean",

    # TPD mean fields (from merged CSV, suffix _y)
    "TPD400_y", "TPD800_y", "TPD1200_y", "TPD2000_y",

    # NQPDE mean fields (from merged CSV, suffix _y)
    "NQPDE400_y", "NQPDE800_y", "NQPDE1200_y", "NQPDE2000_y",
]

print("Columns to analyse:", mean_cols)


In [None]:
# Cell 4: Compute Gini and Moran's I for all indicators and scales

results = []

for col in mean_cols:
    if col not in g.columns:
        print(f"Warning: {col} not found in GeoDataFrame. Skipping.")
        continue

    res = calc_gini_moran_clean(g, col)

    # Extract type (BtE / TPD / NQPDE) and radius (400 / 800 / 1200 / 2000)
    var_type = ''.join([c for c in col if not c.isdigit()]).replace("_mean", "").replace("_y", "")
    radius = ''.join([c for c in col if c.isdigit()])

    results.append({
        "column": col,
        "type": var_type,
        "radius_m": int(radius),
        "gini": round(res["gini"], 4),
        "moran_I": round(res["moran_I"], 4),
        "z_score": round(res["z_score"], 3),
        # keep p-value in scientific notation
        "p_value": float(f"{res['p_value']:.2e}"),
    })

results_df = pd.DataFrame(results).sort_values(["type", "radius_m"])
results_df


In [None]:
# Cell 5: Save the result table for use in the paper

os.makedirs("results", exist_ok=True)
out_csv = os.path.join("results", "LSOA_Gini_Moran_results.csv")

results_df.to_csv(out_csv, index=False)
print("Saved results to:", out_csv)


In [None]:
# Cell 6: Plot Gini and Moran figures using the unified style
os.makedirs("figures", exist_ok=True)

# 1) Gini figure
plot_multiscale(
    results_df=results_df,
    value_col="gini",
    ylabel="Gini coefficient",
    title="Inequality across scales (BtE / TPD / NQPDE)",
    outfile=os.path.join("figures", "Gini_multiscale_blackline_colorpoints.png")
)

# 2) Moran's I figure
plot_multiscale(
    results_df=results_df,
    value_col="moran_I",
    ylabel="Moran's I",
    title="Spatial autocorrelation across scales (BtE / TPD / NQPDE)",
    outfile=os.path.join("figures", "Moran_multiscale_blackline_colorpoints.png")
)
