## Target Validation

In [8]:
from policyengine_uk import Microsimulation
import pandas as pd
import h5py
import numpy as np
import sys
from itables import init_notebook_mode
from utils import show
import itables.options as opt
from pathlib import Path

opt.maxBytes = "1MB"
init_notebook_mode(all_interactive=True)

REPO = Path(".").resolve().parent

with h5py.File(REPO / "policyengine_uk_local_areas" / "weights.h5", "r") as f:
    weights = f["weight"][:]

baseline = Microsimulation()
household_weights = baseline.calculate("household_weight", 2025).values

from policyengine_uk_local_areas.loss import create_constituency_target_matrix, create_national_target_matrix

constituency_target_matrix, constituency_actuals = create_constituency_target_matrix("enhanced_frs_2022_23", 2025, None)
national_target_matrix, national_actuals = create_national_target_matrix("enhanced_frs_2022_23", 2025, None)
constituencies_2024 = pd.read_csv(REPO / "policyengine_uk_local_areas" / "constituencies_2024.csv")

constituency_wide = weights @ constituency_target_matrix
constituency_wide.index = constituencies_2024.code.values
constituency_wide["name"] = constituencies_2024.name.values

constituency_results = pd.melt(constituency_wide.reset_index(), id_vars=["index", "name"], var_name="variable", value_name="value")

constituency_actuals.index = constituencies_2024.code.values
constituency_actuals["name"] = constituencies_2024.name.values
constituency_actuals_long = pd.melt(constituency_actuals.reset_index(), id_vars=["index", "name"], var_name="variable", value_name="value")

constituency_target_validation = pd.merge(constituency_results, constituency_actuals_long, on=["index", "variable"], suffixes=("_target", "_actual"))
constituency_target_validation.drop("name_actual", axis=1, inplace=True)
constituency_target_validation.columns = ["index", "name", "metric", "estimate", "target"]

constituency_target_validation["error"] = constituency_target_validation["estimate"] - constituency_target_validation["target"]
constituency_target_validation["abs_error"] = constituency_target_validation["error"].abs()
constituency_target_validation["rel_abs_error"] = constituency_target_validation["abs_error"] / constituency_target_validation["target"]

### Calibration check:
Looking at the sorted validation results by relative absolute error shows how well our calibrated weights perform against the actual target statistics across UK parliamentary constituencies under the new 2024 boundaries. The table reveals the accuracy of our estimates, from the closest matches to the largest discrepancies, where a lower relative error indicates better calibration performance.

In [9]:
constituency_target_validation.sort_values("rel_abs_error")

Unnamed: 0,index,name,metric,estimate,target,error,abs_error,rel_abs_error
Loading ITables v2.2.1 from the init_notebook_mode cell... (need help?),,,,,,,,


In [10]:
national_performance = household_weights @ national_target_matrix
national_target_validation = pd.DataFrame({"metric": national_performance.index, "estimate": national_performance.values})
national_target_validation["target"] = national_actuals.values

national_target_validation["error"] = national_target_validation["estimate"] - national_target_validation["target"]
national_target_validation["abs_error"] = national_target_validation["error"].abs()
national_target_validation["rel_abs_error"] = national_target_validation["abs_error"] / national_target_validation["target"]

The table below shows the relative absolute error for each calibration target at the **national level**, sorted from the closest matches to the largest discrepancies.

In [11]:
national_target_validation.sort_values("rel_abs_error")

Unnamed: 0,metric,estimate,target,error,abs_error,rel_abs_error
Loading ITables v2.2.1 from the init_notebook_mode cell... (need help?),,,,,,
