In [13]:
from policyengine_us import Microsimulation
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from policyengine_core.charts import format_fig

In [4]:


def calculate_spm_ratio(simulation, year):
    spm_unit_net_income = simulation.calculate("spm_unit_net_income", map_to="person", period=year)
    spm_unit_spm_threshold = simulation.calculate("spm_unit_spm_threshold", map_to="person", period=year)
    return spm_unit_net_income / spm_unit_spm_threshold


# Create simulations for both datasets
year = 2024
baseline_cps = Microsimulation(dataset="cps_2024")
baseline_ecps = Microsimulation(dataset="enhanced_cps_2024")

# Calculate ratios for both datasets
ratio_cps = calculate_spm_ratio(baseline_cps, year)
ratio_ecps = calculate_spm_ratio(baseline_ecps, year)

# Calculate dollar difference
spm_unit_net_income_cps = baseline_cps.calculate("spm_unit_net_income", map_to="person", period=year)
spm_unit_spm_threshold_cps = baseline_cps.calculate("spm_unit_spm_threshold", map_to="person", period=year)
diff_cps = spm_unit_net_income_cps - spm_unit_spm_threshold_cps

spm_unit_net_income_ecps = baseline_ecps.calculate("spm_unit_net_income", map_to="person", period=year)
spm_unit_spm_threshold_ecps = baseline_ecps.calculate("spm_unit_spm_threshold", map_to="person", period=year)
diff_ecps = spm_unit_net_income_ecps - spm_unit_spm_threshold_ecps

KeyboardInterrupt: 

In [14]:
def create_binned_data(data, bins):
    counts = np.zeros(len(bins) - 1, dtype=int)
    for i in range(len(bins) - 1):
        counts[i] = ((data >= bins[i]) & (data < bins[i+1])).sum()
    bin_centers = (bins[:-1] + bins[1:]) / 2
    return pd.DataFrame({'bin_center': bin_centers, 'count': counts})

# Calculate dollar difference
spm_unit_net_income_cps = baseline_cps.calculate("spm_unit_net_income", map_to="person", period=year)
spm_unit_spm_threshold_cps = baseline_cps.calculate("spm_unit_spm_threshold", map_to="person", period=year)
diff_cps = spm_unit_net_income_cps - spm_unit_spm_threshold_cps

spm_unit_net_income_ecps = baseline_ecps.calculate("spm_unit_net_income", map_to="person", period=year)
spm_unit_spm_threshold_ecps = baseline_ecps.calculate("spm_unit_spm_threshold", map_to="person", period=year)
diff_ecps = spm_unit_net_income_ecps - spm_unit_spm_threshold_ecps

# Create binned data for ratios
ratio_bins = np.concatenate([[-np.inf], np.linspace(0, 1.5, 16), [np.inf]])
df_cps_ratio = create_binned_data(ratio_cps, ratio_bins)
df_ecps_ratio = create_binned_data(ratio_ecps, ratio_bins)

# Create binned data for dollar differences
diff_bins = np.arange(-50000, 50001, 1000)
df_cps_diff = create_binned_data(diff_cps, diff_bins)
df_ecps_diff = create_binned_data(diff_ecps, diff_bins)

# Create subplots
fig = make_subplots(rows=2, cols=2, 
                    subplot_titles=("CPS 2024 - Ratio", "Enhanced CPS 2024 - Ratio",
                                    "CPS 2024 - Dollar Difference", "Enhanced CPS 2024 - Dollar Difference"),
                    shared_yaxes=True)

# Add bar charts for ratios
fig.add_trace(
    go.Bar(x=df_cps_ratio['bin_center'], y=df_cps_ratio['count'], name="CPS - Ratio"),
    row=1, col=1
)
fig.add_trace(
    go.Bar(x=df_ecps_ratio['bin_center'], y=df_ecps_ratio['count'], name="Enhanced CPS - Ratio"),
    row=1, col=2
)

# Add bar charts for dollar differences
fig.add_trace(
    go.Bar(x=df_cps_diff['bin_center'], y=df_cps_diff['count'], name="CPS - Difference"),
    row=2, col=1
)
fig.add_trace(
    go.Bar(x=df_ecps_diff['bin_center'], y=df_ecps_diff['count'], name="Enhanced CPS - Difference"),
    row=2, col=2
)

# Update layout
fig.update_layout(
    title_text="Distribution of SPM Unit Net Income / SPM Unit SPM Threshold and Dollar Difference (2024)",
    barmode='group',
    bargap=0.1,
    height=1200,
    width=1200,
)

fig.update_xaxes(title_text="Ratio", range=[0, 1.5], row=1, col=1)
fig.update_xaxes(title_text="Ratio", range=[0, 1.5], row=1, col=2)
fig.update_xaxes(title_text="Dollar Difference", range=[-20000, 20000], row=2, col=1)
fig.update_xaxes(title_text="Dollar Difference", range=[-20000, 20000], row=2, col=2)
fig.update_yaxes(title_text="Frequency", row=1, col=1)
fig.update_yaxes(title_text="Frequency", row=2, col=1)

# Add vertical lines at ratio = 1 and difference = 0
fig.add_vline(x=1, line_dash="dash", line_color="red", row=1, col=1)
fig.add_vline(x=1, line_dash="dash", line_color="red", row=1, col=2)
fig.add_vline(x=0, line_dash="dash", line_color="red", row=2, col=1)
fig.add_vline(x=0, line_dash="dash", line_color="red", row=2, col=2)

# Format the figure
fig = format_fig(fig)

# Show the plot
fig.show()

# Print summary statistics
print("Summary Statistics:")
print("CPS 2024:")
print(pd.Series(ratio_cps).describe())
print("\nEnhanced CPS 2024:")
print(pd.Series(ratio_ecps).describe())

Summary Statistics:
CPS 2024:
count    144265.000000
mean          3.112052
std           3.075388
min         -44.055717
25%           1.420955
50%           2.346481
75%           3.801475
max         104.305397
dtype: float64

Enhanced CPS 2024:
count    288530.000000
mean         74.728607
std         438.380035
min       -3641.827637
25%           1.116068
50%           2.306132
75%           4.879819
max       11173.454102
dtype: float64


# We care aboout of up to 150% - also include negative item , also do dollar perspective - $1,000 bin of how for from poverty line (keep 10% bins as well)


In [10]:
df_ecps_binned

Unnamed: 0,bin_center,count
0,0.1,793342
1,0.3,1803468
2,0.5,5449166
3,0.7,9763932
4,0.9,23043143
5,1.1,34265985
6,1.3,27311177
7,1.5,40745925
8,1.7,25037491
9,1.9,24195811


In [11]:
df_cps_binned

Unnamed: 0,bin_center,count
0,0.1,368725
1,0.3,1542170
2,0.5,2713584
3,0.7,6793768
4,0.9,13535442
5,1.1,21175379
6,1.3,28231565
7,1.5,30122345
8,1.7,29415661
9,1.9,31482229
