# HBAI poverty rates

This notebook compares official HBAI poverty statistics with PolicyEngine model outputs. We examine four poverty definitions:
- Absolute poverty before housing costs (BHC)
- Absolute poverty after housing costs (AHC)
- Relative poverty before housing costs (BHC)
- Relative poverty after housing costs (AHC)

Official data covers 2002-2023, while model projections cover 2023-2030.

## Generate all data

In [1]:
from IPython.core.display import HTML, display_html


def add_fonts():
    fonts = HTML(
        """
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=Roboto+Serif:ital,opsz,wght@0,8..144,100..900;1,8..144,100..900&display=swap" rel="stylesheet">
    """
    )
    return display_html(fonts)

add_fonts()

In [2]:
from policyengine_uk import Microsimulation
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from policyengine_core.charts import format_fig, BLUE_COLOUR_SCALE, GRAY

# Load official HBAI poverty outturn data
official_data = pd.read_csv('hbai_outturn.csv')
official_data = official_data[official_data['scenario'] == 'Baseline'].copy()

print(f"Official data: {official_data['year'].min()}-{official_data['year'].max()}")
print(f"Age groups: {official_data['group'].unique()}")

def get_age_group(age):
    """Map age to HBAI age groups."""
    if age < 18:
        return "Children"
    elif age < 66:
        return "Working age"
    else:
        return "Pensioners"

def calculate_poverty_stats_hbai_format(dataset="enhanced_frs", scenario="PolicyEngine EFRS"):
    """Calculate poverty stats in the same format as official HBAI data."""
    
    sim = Microsimulation(dataset=f"hf://policyengine/policyengine-uk-data-private/{dataset}_2023_24.h5")
    results = []
    
    # Map our variable names to HBAI format
    poverty_map = {
        ("in_poverty_bhc", True, False): (True, False),  # BHC, absolute
        ("in_poverty_ahc", False, False): (False, False),  # AHC, absolute
        ("in_relative_poverty_bhc", True, True): (True, True),  # BHC, relative
        ("in_relative_poverty_ahc", False, True): (False, True),  # AHC, relative
    }
    
    for year in range(2023, 2031):
        # Get data for this year
        df = sim.calculate_dataframe(
            ["age", "in_poverty_bhc", "in_poverty_ahc", 
             "in_relative_poverty_bhc", "in_relative_poverty_ahc", "person_weight"],
            period=year
        )
        
        # Add age groups
        df["age_group"] = df["age"].apply(get_age_group)
        
        # Calculate for each poverty type and age group
        for pov_var, (bhc, relative) in poverty_map.items():
            for group in ["All", "Children", "Working age", "Pensioners"]:
                if group == "All":
                    group_df = df
                else:
                    group_df = df[df["age_group"] == group]
                
                total_pop = group_df["person_weight"].sum()
                poor_pop = group_df[group_df[pov_var[0]] == 1]["person_weight"].sum()
                
                if total_pop > 0:
                    poverty_rate = poor_pop / total_pop
                else:
                    poverty_rate = 0
                
                # Add poverty rate row
                results.append({
                    "scenario": scenario,
                    "year": year,
                    "bhc": bhc,
                    "relative": relative,
                    "headcount": False,
                    "group": group,
                    "poverty_rate": poverty_rate
                })
                
                # Add headcount row
                results.append({
                    "scenario": scenario,
                    "year": year,
                    "bhc": bhc,
                    "relative": relative,
                    "headcount": True,
                    "group": group,
                    "poverty_rate": poor_pop
                })
    
    return pd.DataFrame(results)

# Calculate model results for both datasets
frs_model = calculate_poverty_stats_hbai_format("frs", "PE FRS")

efrs_model = calculate_poverty_stats_hbai_format("enhanced_frs", "PE EFRS")

# Combine all data
model_data = pd.concat([frs_model, efrs_model], ignore_index=True)
all_data = pd.concat([official_data, model_data], ignore_index=True)

# Helper functions for visualisation
def get_poverty_description(bhc, relative):
    """Get human-readable poverty type description."""
    housing = "BHC" if bhc else "AHC"
    pov_type = "Relative" if relative else "Absolute"
    return f"{pov_type} poverty {housing}"

def show_poverty_analysis(data, bhc, relative):
    """Show table and faceted chart for a specific poverty type."""
    
    poverty_desc = get_poverty_description(bhc, relative)
    
    # Filter for specific poverty type (rates only, not headcounts)
    subset = data[
        (data['bhc'] == bhc) & 
        (data['relative'] == relative) & 
        (data['headcount'] == False)
    ].copy()
    
    # Convert to percentages
    subset['poverty_rate'] = subset['poverty_rate'] * 100
    
    # Show comparison table for 2023
    comparison_2023 = subset[subset['year'] == 2023].pivot_table(
        index='group',
        columns='scenario',
        values='poverty_rate'
    ).round(1)
    
    # Create faceted line chart
    fig = px.line(
        subset[subset['group'] != 'All'],
        x='year',
        y='poverty_rate',
        color='scenario',
        facet_col='group',
        title=poverty_desc,
        labels={'poverty_rate': 'Poverty rate (%)', 'year': 'Year'},
        markers=True,
        color_discrete_map={
            'Baseline': GRAY,
            'PolicyEngine FRS': BLUE_COLOUR_SCALE[1],
            'PolicyEngine EFRS': BLUE_COLOUR_SCALE[2]
        },
        height=400
    )
    
    # Add vertical line at 2023
    fig.add_vline(x=2023, line_dash="dot", line_color="gray", opacity=0.5)
    
    fig = format_fig(fig)
    fig.show()

Official data: 2002-2023
Age groups: ['All' 'Children' 'Pensioners' 'Working age']


## Absolute poverty before housing costs (BHC)

In [3]:
show_poverty_analysis(all_data, bhc=True, relative=False)

## Absolute poverty after housing costs (AHC)

In [4]:
show_poverty_analysis(all_data, bhc=False, relative=False)

## Relative poverty before housing costs (BHC)

In [5]:
show_poverty_analysis(all_data, bhc=True, relative=True)

## Relative poverty after housing costs (AHC)

In [6]:
show_poverty_analysis(all_data, bhc=False, relative=True)