In [1]:
from policyengine_uk import Microsimulation
import pandas as pd
import h5py
from pathlib import Path
from policyengine.utils.huggingface import download
from policyengine_uk_data.storage import STORAGE_FOLDER
from typing import List
from policyengine_uk_data.datasets.frs.enhanced_frs import EnhancedFRS_2022_23
from policyengine_uk_data.datasets.frs.local_areas.constituencies.calibrate import calibrate


REPO = Path(".").resolve().parent

weights_file_path = STORAGE_FOLDER / "parliamentary_constituency_weights.h5"
with h5py.File(weights_file_path, "r") as f:
        weights = f[str(2025)][...]

constituencies = pd.read_csv(STORAGE_FOLDER / "constituencies_2024.csv")

from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import create_constituency_target_matrix, create_national_target_matrix

def get_constituency_performance(weights):
    constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix("enhanced_frs_2022_23", 2025, None)

    constituency_wide = weights @ constituency_target_matrix
    constituency_wide.index = constituencies.code.values
    constituency_wide["name"] = constituencies.name.values

    constituency_results = pd.melt(constituency_wide.reset_index(), id_vars=["index", "name"], var_name="variable", value_name="value")

    constituency_actuals.index = constituencies.code.values
    constituency_actuals["name"] = constituencies.name.values
    constituency_actuals_long = pd.melt(constituency_actuals.reset_index(), id_vars=["index", "name"], var_name="variable", value_name="value")

    constituency_target_validation = pd.merge(constituency_results, constituency_actuals_long, on=["index", "variable"], suffixes=("_target", "_actual"))
    constituency_target_validation.drop("name_actual", axis=1, inplace=True)
    constituency_target_validation.columns = ["index", "name", "metric", "estimate", "target"]

    constituency_target_validation["error"] = constituency_target_validation["estimate"] - constituency_target_validation["target"]
    constituency_target_validation["abs_error"] = constituency_target_validation["error"].abs()
    constituency_target_validation["rel_abs_error"] = constituency_target_validation["abs_error"] / constituency_target_validation["target"]

    return constituency_target_validation

def get_weights_with_exclude_list(
    exclude_list: List[str] | None = None,
    should_exclude: bool = True,
):
    EnhancedFRS_2022_23().generate()

    def analytics(w_):
        performance = get_constituency_performance(w_)
        performance["metric_excluded"] = performance.metric.isin(exclude_list)
        return (performance[~performance["metric_excluded"]].abs_error ** 2).mean(), (performance[performance["metric_excluded"]].abs_error ** 2).mean()


    weights = calibrate(exclude_targets=exclude_list if should_exclude else [], epochs=250, analytics=analytics)

    return weights

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np
constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix("enhanced_frs_2022_23", 2025, None)
excluded = np.random.choice(constituency_target_matrix.columns, size=int(0.1 * constituency_target_matrix.shape[1]), replace=False)
weights, performance = get_weights_with_exclude_list(exclude_list=excluded)

Loss: 8.697927474975586, Epoch: 0, Constituency<10%: 11.1%, National<10%: 3.9%
(1.445586234280882e+17, 24765395.08905384)
Loss: 6.540287971496582, Epoch: 1, Constituency<10%: 13.5%, National<10%: 4.5%
Loss: 4.908971309661865, Epoch: 2, Constituency<10%: 16.9%, National<10%: 7.2%
Loss: 3.7292542457580566, Epoch: 3, Constituency<10%: 20.0%, National<10%: 11.3%
Loss: 2.8773155212402344, Epoch: 4, Constituency<10%: 22.2%, National<10%: 19.4%
Loss: 2.244055986404419, Epoch: 5, Constituency<10%: 24.7%, National<10%: 29.3%
Loss: 1.7912296056747437, Epoch: 6, Constituency<10%: 26.5%, National<10%: 37.0%
Loss: 1.4615260362625122, Epoch: 7, Constituency<10%: 28.2%, National<10%: 35.2%
Loss: 1.2095162868499756, Epoch: 8, Constituency<10%: 29.7%, National<10%: 30.1%
Loss: 1.0173256397247314, Epoch: 9, Constituency<10%: 31.9%, National<10%: 30.1%
Loss: 0.866382360458374, Epoch: 10, Constituency<10%: 34.3%, National<10%: 31.0%
(8.896352824424206e+16, 13157800.896526517)
Loss: 0.7467900514602661, Epo

In [6]:
from policyengine.utils.charts import *

add_fonts()

In [15]:
import plotly.express as px
from policyengine.utils.charts import *
from policyengine_core.charts import format_fig
import pandas as pd
import numpy as np

train_error = [x[0] for x in performance]
holdout_error = [x[1] for x in performance]

error_df = pd.DataFrame({
    "Calibrated": train_error,
    "Holdout": holdout_error,
    "Epoch": np.arange(len(train_error)) * 10,
})

error_df["Calibrated"] = error_df["Calibrated"] / error_df["Calibrated"].values[0]
error_df["Holdout"] = error_df["Holdout"] / error_df["Holdout"].values[0]

error_df["Calibrated"] -= 1
error_df["Holdout"] -= 1

fig = px.line(error_df, x="Epoch", y=["Calibrated", "Holdout"], color_discrete_sequence=px.colors.qualitative.T10).update_layout(
    title="Calibrated vs holdout error",
    yaxis_title="Relative error change",
    yaxis_tickformat=".0%",
)
format_fig(fig)

In [3]:
performance

[(209314924.07636842, 0.488475147361944),
 (104688701.06549102, 0.26861174256506515),
 (84800021.75700788, 0.19636401884845647)]

In [2]:
standard_weights = get_weights_with_exclude_list()

standard_df = get_constituency_performance(standard_weights)

Loss: 8.413374900817871, Epoch: 0, Constituency<10%: 11.7%, National<10%: 3.9%
Loss: 6.326820373535156, Epoch: 1, Constituency<10%: 14.5%, National<10%: 4.5%
Loss: 4.765588760375977, Epoch: 2, Constituency<10%: 18.2%, National<10%: 7.2%
Loss: 3.6271231174468994, Epoch: 3, Constituency<10%: 21.6%, National<10%: 10.7%
Loss: 2.7974398136138916, Epoch: 4, Constituency<10%: 23.9%, National<10%: 19.7%
Loss: 2.196089744567871, Epoch: 5, Constituency<10%: 25.3%, National<10%: 29.3%
Loss: 1.7553602457046509, Epoch: 6, Constituency<10%: 26.5%, National<10%: 37.3%
Loss: 1.4333171844482422, Epoch: 7, Constituency<10%: 27.9%, National<10%: 34.3%
Loss: 1.1882569789886475, Epoch: 8, Constituency<10%: 29.9%, National<10%: 30.1%
Loss: 1.0012985467910767, Epoch: 9, Constituency<10%: 33.0%, National<10%: 29.9%
Loss: 0.8539372086524963, Epoch: 10, Constituency<10%: 37.2%, National<10%: 31.3%
Loss: 0.7359781265258789, Epoch: 11, Constituency<10%: 39.4%, National<10%: 34.0%
Loss: 0.6413196325302124, Epoch: 

In [3]:
import numpy as np

national_weights_projected = np.ones_like(standard_weights) * standard_weights.mean(axis=0)

national_df = get_constituency_performance(national_weights_projected)

In [4]:
import numpy as np
constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix("enhanced_frs_2022_23", 2025, None)
excluded = np.random.choice(constituency_target_matrix.columns, size=int(0.1 * constituency_target_matrix.shape[1]), replace=False)
excluded_df = get_constituency_performance(get_weights_with_exclude_list(exclude_list=excluded))

standard_df["metric_excluded"] = standard_df.metric.isin(excluded)
excluded_df["metric_excluded"] = excluded_df.metric.isin(excluded)
national_df["metric_excluded"] = national_df.metric.isin(excluded)

standard_df.to_csv("standard_df.csv", index=False)
excluded_df.to_csv("excluded_df.csv", index=False)
national_df.to_csv("national_df.csv", index=False)

Loss: 8.692922592163086, Epoch: 0, Constituency<10%: 11.1%, National<10%: 3.6%
Loss: 6.525519371032715, Epoch: 1, Constituency<10%: 14.3%, National<10%: 5.1%
Loss: 4.899694919586182, Epoch: 2, Constituency<10%: 18.7%, National<10%: 7.5%
Loss: 3.7199020385742188, Epoch: 3, Constituency<10%: 22.4%, National<10%: 12.5%
Loss: 2.8636200428009033, Epoch: 4, Constituency<10%: 24.8%, National<10%: 22.7%
Loss: 2.241800308227539, Epoch: 5, Constituency<10%: 26.4%, National<10%: 29.9%
Loss: 1.7911784648895264, Epoch: 6, Constituency<10%: 27.7%, National<10%: 37.6%
Loss: 1.4546833038330078, Epoch: 7, Constituency<10%: 28.8%, National<10%: 34.3%
Loss: 1.2061407566070557, Epoch: 8, Constituency<10%: 31.3%, National<10%: 31.6%
Loss: 1.015023946762085, Epoch: 9, Constituency<10%: 34.4%, National<10%: 30.7%
Loss: 0.864335298538208, Epoch: 10, Constituency<10%: 38.3%, National<10%: 33.1%
Loss: 0.7443556785583496, Epoch: 11, Constituency<10%: 41.2%, National<10%: 36.1%
Loss: 0.6479336023330688, Epoch: 12

In [5]:
excluded_df.groupby("metric_excluded").rel_abs_error.mean(), standard_df.groupby("metric_excluded").rel_abs_error.mean(), national_df.groupby("metric_excluded").rel_abs_error.mean()

(metric_excluded
 False    0.065632
 True     0.341493
 Name: rel_abs_error, dtype: float64,
 metric_excluded
 False    0.067863
 True     0.046826
 Name: rel_abs_error, dtype: float64,
 metric_excluded
 False    0.287826
 True     0.436839
 Name: rel_abs_error, dtype: float64)

In [6]:
import plotly.express as px
from policyengine_core.charts import *

analysis_df = pd.DataFrame({
    "Weights": ["National", "Calibrated (full)", "Calibrated (holdout set)"] * 2,
    "Metric excluded": ["No"] * 3 + ["Yes"] * 3,
    "Mean relative error": [
        national_df[~national_df.metric_excluded].rel_abs_error.mean(),
        standard_df[~standard_df.metric_excluded].rel_abs_error.mean(),
        excluded_df[~excluded_df.metric_excluded].rel_abs_error.mean(),
        national_df[national_df.metric_excluded].rel_abs_error.mean(),
        standard_df[standard_df.metric_excluded].rel_abs_error.mean(),
        excluded_df[excluded_df.metric_excluded].rel_abs_error.mean()
    ],
})

fig = px.bar(
    analysis_df,
    y="Weights",
    x="Mean relative error",
    color="Metric excluded",
    barmode="group",
    title="Relative error in constituency estimates",
    color_discrete_sequence=px.colors.qualitative.T10,
)

format_fig(fig)