In [2]:
from policyengine_us import Microsimulation
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from policyengine_core.charts import format_fig

def calculate_spm_ratio(simulation, year):
    spm_unit_net_income = simulation.calculate("spm_unit_net_income", period=year)
    spm_unit_spm_threshold = simulation.calculate("spm_unit_spm_threshold", period=year)
    return spm_unit_net_income / spm_unit_spm_threshold

def create_binned_data(data, bins):
    counts, bin_edges = np.histogram(data, bins=bins)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    return pd.DataFrame({'bin_center': bin_centers, 'count': counts})

# Create simulations for both datasets
year = 2024
baseline_cps = Microsimulation(dataset="cps_2024")
baseline_ecps = Microsimulation(dataset="enhanced_cps_2024")

# Calculate ratios for both datasets
ratio_cps = calculate_spm_ratio(baseline_cps, year)
ratio_ecps = calculate_spm_ratio(baseline_ecps, year)

# Create binned data
bins = np.linspace(0, 5, 26)  # 25 bins from 0 to 5
df_cps_binned = create_binned_data(ratio_cps, bins)
df_ecps_binned = create_binned_data(ratio_ecps, bins)

# Create subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("CPS 2024", "Enhanced CPS 2024"),
                    shared_yaxes=True, shared_xaxes=True)

# Add bar charts
fig.add_trace(
    go.Bar(x=df_cps_binned["bin_center"], y=df_cps_binned["count"], name="CPS"),
    row=1, col=1
)
fig.add_trace(
    go.Bar(x=df_ecps_binned["bin_center"], y=df_ecps_binned["count"], name="Enhanced CPS"),
    row=1, col=2
)

# Update layout
fig.update_layout(
    title_text="Distribution of SPM Unit Net Income / SPM Unit SPM Threshold (2024)",
    barmode='group',
    bargap=0.1,
    height=600,
    width=1200,
)

fig.update_xaxes(title_text="Ratio", range=[0, 5])
fig.update_yaxes(title_text="Frequency")

# Add vertical lines at ratio = 1
fig.add_vline(x=1, line_dash="dash", line_color="red", row=1, col=1)
fig.add_vline(x=1, line_dash="dash", line_color="red", row=1, col=2)

# Format the figure
fig = format_fig(fig)

# Show the plot
fig.show()

# Print summary statistics
print("Summary Statistics:")
print("CPS 2024:")
print(pd.Series(ratio_cps).describe())
print("\nEnhanced CPS 2024:")
print(pd.Series(ratio_ecps).describe())

Summary Statistics:
CPS 2024:
count    58711.000000
mean         3.093463
std          3.311985
min        -44.055717
25%          1.316754
50%          2.283899
75%          3.817694
max        104.305397
dtype: float64

Enhanced CPS 2024:
count    117422.000000
mean         75.388573
std         489.630920
min       -2853.169434
25%           0.974510
50%           2.163770
75%           4.607322
max       11401.973633
dtype: float64
