In [1]:
from policyengine_uk import Microsimulation
from policyengine_core.reforms import Reform
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import h5py
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
baseline = Microsimulation()
# reformed = Microsimulation(reform=reform)
baseline_income = baseline.calculate("real_household_net_income", period=2024)
# reformed_income = reformed.calculate("real_household_net_income", period=2029)
baseline_people = baseline.calculate("people", map_to = "household", period=2024)
# reformed_people = baseline.calculate("people", map_to = "household", period=2029)

In [3]:
def get_income_data():
    constituencies = pd.read_csv("../policyengine_uk_local_areas/constituencies_2024.csv")
    hex_locations = pd.read_csv("../policyengine_uk_local_areas/hex_map/hex_map_2024.csv")
    
    with h5py.File("../constituencies/weights.h5", "r") as f:
        weights = f["weight"][:]
    
    mapping_matrix = pd.read_csv("../mapping_2010_to_2024/mapping_matrix.csv")
    mapping_matrix = mapping_matrix.set_index(mapping_matrix.columns[0])
    mapping_matrix = mapping_matrix.div(mapping_matrix.sum(), axis=1)
    
    weights_2024 = mapping_matrix.T.dot(weights)
    
    # Calculate income and population for 2024 only
    income_2024 = np.dot(weights_2024, baseline_income.values)
    population_2024 = np.dot(weights_2024, baseline_people.values)
    
    # Calculate per capita income for 2024
    per_capita_2024 = income_2024 / population_2024
    
    df = pd.DataFrame({
        'code': mapping_matrix.columns,
        'income': per_capita_2024
    })
    df = df.merge(constituencies[['code', 'name']], on='code', how='left')
    merged_df = pd.merge(df, hex_locations, on='code', how='inner')
    
    return merged_df

# Get the data
data = get_income_data()

data.x = data.x + (data.y % 2 == 0) * 0.5

# Create the interactive plot
fig = go.Figure()

# Add hexagons
fig.add_trace(go.Scatter(
    x=data['x'],
    y=data['y'],
    mode='markers',
    marker=dict(
        size=13.8,
        color=data['income'],
        colorscale='OrRd',
        symbol='hexagon',
        line=dict(color='white', width=0.5),
        colorbar=dict(
            title=dict(
                text='Per capita household real net income (£)',
                side='bottom',
                font=dict(size=12)
            ),
            orientation='h',
            y=-0.15,
            xanchor='center',
            x=0.5,
            thickness=10,
            len=0.5,
            nticks=7,  # Reduced number of ticks
            tickangle=0  # Rotate the tick labels
            # tickformat=',.0f',  # Format as thousands with comma
            # tickprefix='£'  # Add pound symbol
        )
    ),
    hovertemplate=
    '<b>Constituency:</b> %{text}<br>' +
    '<b>Income:</b> £%{marker.color:,.0f}<br>' +
    '<extra></extra>',  # removes secondary box
    text=data['name']
))

# Update layout
fig.update_layout(
    title=dict(
        text='Per capita household real net income by constituency (2024)',
        x=0.5,
        y=0.95,
        xanchor='center',
        yanchor='top'
    ),
    paper_bgcolor='white',
    plot_bgcolor='#f0f0f0',
    showlegend=False,
    width=800,
    height=800,
    xaxis=dict(
        showgrid=False,
        showticklabels=False,
        zeroline=False
    ),
    yaxis=dict(
        showgrid=False,
        showticklabels=False,
        zeroline=False,
        scaleanchor='x',
        scaleratio=1
    )
)

# Show the plot
fig.show()

In [17]:
data.head()

Unnamed: 0,code,income,name,x,y
0,E14001063,23038.553717,Aldershot,56.5,-40
1,E14001064,14378.778035,Aldridge-Brownhills,56.5,-30
2,E14001065,29698.730152,Altrincham and Sale West,52.0,-25
3,E14001066,16366.588627,Amber Valley,58.0,-27
4,E14001067,20748.962962,Arundel and South Downs,61.5,-44


In [38]:
constituency_lad = pd.read_csv("Constituency_to_LAD(2024).csv", 
                             usecols=['PCON24CD', 'PCON24NM', 'LAD24CD', 'LAD24NM'])
constituency_lad.head()

Unnamed: 0,PCON24CD,PCON24NM,LAD24CD,LAD24NM
0,E14001272,Hartlepool,E06000001,Hartlepool
1,E14001272,Hartlepool,E06000001,Hartlepool
2,E14001272,Hartlepool,E06000001,Hartlepool
3,E14001272,Hartlepool,E06000001,Hartlepool
4,E14001272,Hartlepool,E06000001,Hartlepool


In [20]:
# local_authority_boundaries = pd.read_csv("Local_Authority_Boundaries_2024.csv", 
#                                        usecols=['LAD24CD', 'LONG', 'LAT'])

# local_authority_boundaries = pd.read_csv("Local_Authority_Boundaries_2024.csv")
# local_authority_boundaries = local_authority_boundaries.rename(columns={'LAT': 'LA_x'})
# local_authority_boundaries['LA_x'] = local_authority_boundaries['LA_x'].round(1)
# local_authority_boundaries = local_authority_boundaries.rename(columns={'LONG': 'LA_y'})
# local_authority_boundaries['LA_y'] = (local_authority_boundaries['LA_y'] * 10).round(1)

# local_authority_boundaries.head()

Unnamed: 0,FID,LAD24CD,LAD24NM,LAD24NMW,BNG_E,BNG_N,LONG,LAT,Shape__Area,Shape__Length,GlobalID
0,1,E06000001,Hartlepool,,447161,531473,-1.27017,54.67613,96609660.0,50746.500918,3f58aa35-9ea6-4001-a80f-8aab0e41313f
1,2,E06000002,Middlesbrough,,451141,516887,-1.21099,54.54467,55230930.0,35500.289988,c5bc1c3e-111f-46db-8e41-362fbbc78d30
2,3,E06000003,Redcar and Cleveland,,464330,519596,-1.00656,54.56752,248307600.0,85068.788856,29afa1cb-8719-44c2-9906-38bc7bae2981
3,4,E06000004,Stockton-on-Tees,,444940,518179,-1.30664,54.55687,205176000.0,88841.993471,8ebb86c0-86bb-466e-ae7e-a832eeb755ff
4,5,E06000005,Darlington,,428029,515648,-1.56835,54.53534,198236900.0,90977.686391,bf2173db-02e1-4b58-9617-e775100f58ec


In [40]:
# Merge the dataframes
result = data.merge(constituency_lad, 
                   left_on='code',    # from data dataframe
                   right_on='PCON24CD',  # from constituency_lad
                   how='left')

# Display info to verify the merge
print("Original data shape:", data.shape)
print("constituency_lad shape:", constituency_lad.shape)
print("Result shape:", result.shape)

# Check for any unmatched rows
unmatched = result[result['PCON24CD'].isna()]
if len(unmatched) > 0:
    print("\nNumber of unmatched rows:", len(unmatched))
    print("Sample of unmatched codes:")
    print(unmatched['code'].head())

# Show first few rows and columns
print("\nFirst few rows of merged result:")
result.head()

Original data shape: (650, 5)
constituency_lad shape: (8798, 4)
Result shape: (8798, 9)

First few rows of merged result:


Unnamed: 0,code,income,name,x,y,PCON24CD,PCON24NM,LAD24CD,LAD24NM
0,E14001063,23038.553717,Aldershot,56.5,-40,E14001063,Aldershot,E07000092,Rushmoor
1,E14001063,23038.553717,Aldershot,56.5,-40,E14001063,Aldershot,E07000092,Rushmoor
2,E14001063,23038.553717,Aldershot,56.5,-40,E14001063,Aldershot,E07000092,Rushmoor
3,E14001063,23038.553717,Aldershot,56.5,-40,E14001063,Aldershot,E07000092,Rushmoor
4,E14001063,23038.553717,Aldershot,56.5,-40,E14001063,Aldershot,E07000092,Rushmoor


In [42]:
lad_income = result.groupby(['LAD24CD', 'LAD24NM'])['income'].mean().reset_index()
lad_income.head()

Unnamed: 0,LAD24CD,LAD24NM,income
0,E06000001,Hartlepool,14037.783074
1,E06000002,Middlesbrough,14687.359761
2,E06000003,Redcar and Cleveland,15224.669602
3,E06000004,Stockton-on-Tees,15093.184476
4,E06000005,Darlington,14858.934621
