In [1]:
from policyengine_uk import Microsimulation
from policyengine_core.reforms import Reform
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import h5py
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
baseline = Microsimulation()
# reformed = Microsimulation(reform=reform)
baseline_income = baseline.calculate("real_household_net_income", period=2024)
# reformed_income = reformed.calculate("real_household_net_income", period=2029)
baseline_people = baseline.calculate("people", map_to = "household", period=2024)
# reformed_people = baseline.calculate("people", map_to = "household", period=2029)

In [3]:
def get_income_data():
    constituencies = pd.read_csv("../policyengine_uk_local_areas/constituencies_2024.csv")
    hex_locations = pd.read_csv("../policyengine_uk_local_areas/hex_map/hex_map_2024.csv")
    
    with h5py.File("../constituencies/weights.h5", "r") as f:
        weights = f["weight"][:]
    
    mapping_matrix = pd.read_csv("../mapping_2010_to_2024/mapping_matrix.csv")
    mapping_matrix = mapping_matrix.set_index(mapping_matrix.columns[0])
    mapping_matrix = mapping_matrix.div(mapping_matrix.sum(), axis=1)
    
    weights_2024 = mapping_matrix.T.dot(weights)
    
    # Calculate income and population for 2024 only
    income_2024 = np.dot(weights_2024, baseline_income.values)
    population_2024 = np.dot(weights_2024, baseline_people.values)
    
    # Calculate per capita income for 2024
    per_capita_2024 = income_2024 / population_2024
    
    df = pd.DataFrame({
        'code': mapping_matrix.columns,
        'income': per_capita_2024
    })
    df = df.merge(constituencies[['code', 'name']], on='code', how='left')
    merged_df = pd.merge(df, hex_locations, on='code', how='inner')
    
    return merged_df

# Get the data
data = get_income_data()

data.x = data.x + (data.y % 2 == 0) * 0.5

# Create the interactive plot
fig = go.Figure()

# Add hexagons
fig.add_trace(go.Scatter(
    x=data['x'],
    y=data['y'],
    mode='markers',
    marker=dict(
        size=13.8,
        color=data['income'],
        colorscale='OrRd',
        symbol='hexagon',
        line=dict(color='white', width=0.5),
        colorbar=dict(
            title=dict(
                text='Per capita household real net income (£)',
                side='bottom',
                font=dict(size=12)
            ),
            orientation='h',
            y=-0.15,
            xanchor='center',
            x=0.5,
            thickness=10,
            len=0.5,
            nticks=7,  # Reduced number of ticks
            tickangle=0  # Rotate the tick labels
            # tickformat=',.0f',  # Format as thousands with comma
            # tickprefix='£'  # Add pound symbol
        )
    ),
    hovertemplate=
    '<b>Constituency:</b> %{text}<br>' +
    '<b>Income:</b> £%{marker.color:,.0f}<br>' +
    '<extra></extra>',  # removes secondary box
    text=data['name']
))

# Update layout
fig.update_layout(
    title=dict(
        text='Per capita household real net income by constituency (2024)',
        x=0.5,
        y=0.95,
        xanchor='center',
        yanchor='top'
    ),
    paper_bgcolor='white',
    plot_bgcolor='#f0f0f0',
    showlegend=False,
    width=800,
    height=800,
    xaxis=dict(
        showgrid=False,
        showticklabels=False,
        zeroline=False
    ),
    yaxis=dict(
        showgrid=False,
        showticklabels=False,
        zeroline=False,
        scaleanchor='x',
        scaleratio=1
    )
)

# Show the plot
fig.show()

In [4]:
data.head()

Unnamed: 0,code,income,name,x,y
0,E14001063,23038.553717,Aldershot,56.5,-40
1,E14001064,14378.778035,Aldridge-Brownhills,56.5,-30
2,E14001065,29698.730152,Altrincham and Sale West,52.0,-25
3,E14001066,16366.588627,Amber Valley,58.0,-27
4,E14001067,20748.962962,Arundel and South Downs,61.5,-44


In [5]:
constituency_lad = pd.read_csv("Constituency_to_LAD(2024).csv", 
                             usecols=['PCON24CD', 'PCON24NM', 'LAD24CD', 'LAD24NM'])
constituency_lad.head()

Unnamed: 0,PCON24CD,PCON24NM,LAD24CD,LAD24NM
0,E14001272,Hartlepool,E06000001,Hartlepool
1,E14001272,Hartlepool,E06000001,Hartlepool
2,E14001272,Hartlepool,E06000001,Hartlepool
3,E14001272,Hartlepool,E06000001,Hartlepool
4,E14001272,Hartlepool,E06000001,Hartlepool


In [11]:
local_authority_boundaries = pd.read_csv("Local_Authority_Boundaries_2024.csv", 
                                       usecols=['LAD24CD', 'LONG', 'LAT'])

local_authority_boundaries = local_authority_boundaries.rename(columns={'LAT': 'LA_x'})
local_authority_boundaries['LA_x'] = local_authority_boundaries['LA_x'].round(1)
local_authority_boundaries = local_authority_boundaries.rename(columns={'LONG': 'LA_y'})
local_authority_boundaries['LA_y'] = (local_authority_boundaries['LA_y'] * 10).round(1)

local_authority_boundaries.head()

Unnamed: 0,LAD24CD,LA_y,LA_x
0,E06000001,-12.7,54.7
1,E06000002,-12.1,54.5
2,E06000003,-10.1,54.6
3,E06000004,-13.1,54.6
4,E06000005,-15.7,54.5


In [14]:
merged_df_LA = constituency_lad.merge(local_authority_boundaries, 
                                 on='LAD24CD', 
                                 how='inner')
merged_df_LA.head(30)

Unnamed: 0,PCON24CD,PCON24NM,LAD24CD,LAD24NM,LA_y,LA_x
0,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
1,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
2,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
3,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
4,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
5,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
6,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
7,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
8,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7
9,E14001272,Hartlepool,E06000001,Hartlepool,-12.7,54.7


In [15]:
# Count how many LAD24CD each PCON24CD maps to
multiple_mappings = merged_df_LA.groupby('PCON24CD').agg({
    'LAD24CD': 'nunique',
    'PCON24NM': 'first'
}).reset_index()

# Filter for constituencies that map to more than 1 LAD
multiple_lads = multiple_mappings[multiple_mappings['LAD24CD'] > 1]

print(f"Number of constituencies mapping to multiple LADs: {len(multiple_lads)}")
print("\nThese constituencies are:")
print(multiple_lads[['PCON24CD', 'PCON24NM', 'LAD24CD']].to_string())

# Optional: To see the detailed mappings for a specific constituency
if len(multiple_lads) > 0:
    print("\nExample of detailed mapping for first constituency:")
    example_pcon = multiple_lads['PCON24CD'].iloc[0]
    print(merged_df_LA[merged_df_LA['PCON24CD'] == example_pcon][['PCON24CD', 'PCON24NM', 'LAD24CD', 'LAD24NM']])

Number of constituencies mapping to multiple LADs: 261

These constituencies are:
      PCON24CD                                  PCON24NM  LAD24CD
0    E14001063                                 Aldershot        2
4    E14001067                   Arundel and South Downs        3
6    E14001069                                   Ashford        2
9    E14001072                                   Banbury        2
13   E14001076                        Barrow and Furness        2
25   E14001088                        Bexhill and Battle        2
27   E14001090                    Bicester and Woodstock        2
32   E14001095  Birmingham Hodge Hill and Solihull North        2
40   E14001103              Blackley and Middleton South        2
41   E14001104             Blackpool North and Fleetwood        2
43   E14001106                       Blaydon and Consett        2
46   E14001109                                  Bolsover        3
48   E14001111                  Bolton South and Walkden    