In [207]:
import torch
from policyengine_uk import Microsimulation
import pandas as pd
import numpy as np
from tqdm import tqdm

incomes = pd.read_csv("constituency_incomes.csv")

ENGLAND_CONSTITUENCY = "E14"
NI_CONSTITUENCY = "N06"
SCOTLAND_CONSTITUENCY = "S14"
WALES_CONSTITUENCY = "W07"

incomes = incomes[np.any([
    incomes["code"].str.contains(country_code) for country_code in [ENGLAND_CONSTITUENCY, NI_CONSTITUENCY, SCOTLAND_CONSTITUENCY, WALES_CONSTITUENCY]
], axis=0)]

sim = Microsimulation()

def get_simulation_matrix():
    total_income = sim.calculate("total_income", period=2025).values
    hh_ti = sim.map_result(total_income, "person", "household")
    hh_count_with_ti = sim.map_result(total_income != 0, "person", "household")
    return pd.DataFrame({
        "total_income_amount": hh_ti,
        "total_income_count": hh_count_with_ti
    })

# Weights - 650 x 100180
original_weights = sim.calculate("household_weight", 2025).values
weights = torch.tensor(np.ones((650, 100180)) * original_weights, dtype=torch.float32, requires_grad=True)
metrics = torch.tensor(get_simulation_matrix().values, dtype=torch.float32)
weighted_metrics = weights.unsqueeze(-1) * metrics.unsqueeze(0)
totals = weighted_metrics.sum(dim=1)
y = torch.tensor(incomes[['total_income_amount', 'total_income_count']].values, dtype=torch.float32)

# Weights - 650 x 100180
original_weights = np.log(sim.calculate("household_weight", 2025).values / 650)
weights = torch.tensor(np.ones((650, 100180)) * original_weights, dtype=torch.float32, requires_grad=True)
metrics = torch.tensor(get_simulation_matrix().values, dtype=torch.float32)
weighted_metrics = weights.unsqueeze(-1) * metrics.unsqueeze(0)
totals = weighted_metrics.sum(dim=1)
y = torch.tensor(incomes[['total_income_amount', 'total_income_count']].values, dtype=torch.float32)

def loss(w):
    pred = (w.unsqueeze(-1) * metrics.unsqueeze(0)).sum(dim=1)
    mse = torch.mean((pred / (1 + y) - 1) ** 2)
    return mse

optimizer = torch.optim.Adam([weights], lr=0.5)

desc = tqdm(range(100))

for epoch in desc:
    optimizer.zero_grad()
    l = loss(torch.exp(weights))
    desc.set_description(f"Loss: {l.item()}")
    l.backward()
    optimizer.step()

final_weights = torch.exp(weights).detach().numpy()

final_weights.sum(axis=1)

Loss: 0.000329483620589599: 100%|██████████| 100/100 [00:17<00:00,  5.87it/s] 


array([23051.914, 26474.219, 26693.133, 26928.248, 27563.207, 26482.924,
       24148.105, 26044.832, 24899.234, 25751.709, 24421.324, 24283.676,
       22967.82 , 29656.938, 21332.375, 22753.791, 28619.414, 26476.904,
       29544.984, 27652.705, 23974.111, 23803.36 , 21421.15 , 24909.826,
       30363.041, 27500.355, 33720.082, 26057.33 , 23923.535, 31014.514,
       24459.584, 27257.436, 22450.594, 25585.549, 24329.037, 23656.285,
       22469.27 , 26038.441, 24967.412, 30919.965, 26036.535, 24116.023,
       24795.629, 31474.346, 28790.281, 29005.557, 33749.11 , 28425.062,
       33759.48 , 24172.24 , 35950.473, 26474.451, 29647.816, 28727.822,
       30324.562, 28737.543, 27764.375, 26992.98 , 31360.938, 23617.064,
       29952.39 , 23846.936, 33676.355, 28208.16 , 24386.28 , 23027.328,
       23039.562, 31864.62 , 31259.174, 35934.31 , 23322.207, 28320.049,
       25527.238, 26744.967, 24714.084, 24583.266, 26757.553, 26084.756,
       37444.863, 23974.111, 29157.06 , 38112.977, 

In [215]:
df = pd.DataFrame(final_weights.T, columns=incomes["code"])

In [217]:
df.to_csv("constituency_weights.csv", index=False)