In [1]:
from policyengine_us import Microsimulation
import pandas as pd
from microdf import MicroDataFrame

cps_sim = Microsimulation(dataset="cps_2023")
enhanced_cps_sim = Microsimulation(dataset="enhanced_cps")

puf = MicroDataFrame(pd.read_csv("puf_full_demographics.csv.gz"), weights="decimal_weight")

In [2]:
parameters = cps_sim.tax_benefit_system.parameters("2023-01-01").calibration
import numpy as np
agi_thresholds = list(parameters.gov.irs.soi.agi.number_of_returns.thresholds) + [np.inf]
num_returns = parameters.gov.irs.soi.agi.number_of_returns.amounts

import plotly.express as px

band_names = []
sources = []
values = []

cps_agi = cps_sim.calculate("adjusted_gross_income")
enhanced_cps_agi = enhanced_cps_sim.calculate("adjusted_gross_income")
puf_agi = puf.adjusted_gross_income
for i in range(len(agi_thresholds[:-1])):
    band_name = f"{agi_thresholds[i]:,.0f}-{agi_thresholds[i+1]:,.0f}"

    band_names.extend([band_name] * 3)
    sources.extend(["CPS", "Enhanced CPS", "PUF (2015)"])
    
    cps_in_band = cps_agi.between(agi_thresholds[i], agi_thresholds[i+1])
    enhanced_cps_in_band = enhanced_cps_agi.between(agi_thresholds[i], agi_thresholds[i+1])
    puf_in_band = puf_agi.between(agi_thresholds[i], agi_thresholds[i+1])
    target = num_returns[i]

    values.extend([
        cps_agi[cps_in_band].count() - target,
        enhanced_cps_agi[enhanced_cps_in_band].count() - target,
        puf_agi[puf_in_band].count() - target,
    ])
    # values = list(map(abs, values))

df = pd.DataFrame({
    "Band": band_names,
    "Source": sources,
    "Number of returns error": values
})

df["Number of returns error"] /= 1e3

from policyengine_core.charts import format_fig, BLUE, GRAY, DARK_GRAY

fig = px.bar(
    df,
    x="Band",
    y="Number of returns error",
    color="Source",
    barmode="group",
    color_discrete_map={
        "CPS": GRAY,
        "PUF (2015)": DARK_GRAY,
        "Enhanced CPS": BLUE,
    },
)

fig.update_layout(
    title="Number of returns error by dataset and AGI band",
    yaxis_title="Number of returns error (thousands)",
    yaxis_tickformat="+"
)

format_fig(fig)

In [3]:
parameters = cps_sim.tax_benefit_system.parameters("2023-01-01").calibration
import numpy as np
agi_thresholds = list(parameters.gov.irs.soi.agi.total_agi.thresholds) + [np.inf]
num_returns = parameters.gov.irs.soi.agi.total_agi.amounts

import plotly.express as px

band_names = []
sources = []
values = []

cps_agi = cps_sim.calculate("adjusted_gross_income")
enhanced_cps_agi = enhanced_cps_sim.calculate("adjusted_gross_income")
puf_agi = puf.adjusted_gross_income
for i in range(len(agi_thresholds[:-1])):
    band_name = f"{agi_thresholds[i]:,.0f}"

    band_names.extend([band_name] * 3)
    sources.extend(["CPS", "Enhanced CPS", "PUF (2015)"])
    
    cps_in_band = cps_agi.between(agi_thresholds[i], agi_thresholds[i+1])
    enhanced_cps_in_band = enhanced_cps_agi.between(agi_thresholds[i], agi_thresholds[i+1])
    puf_in_band = puf_agi.between(agi_thresholds[i], agi_thresholds[i+1])
    target = num_returns[i]

    values.extend([
        cps_agi[cps_in_band].sum() - target,
        enhanced_cps_agi[enhanced_cps_in_band].sum() - target,
        puf_agi[puf_in_band].sum() - target,
    ])

    # values = list(map(abs, values))

df = pd.DataFrame({
    "Band": band_names,
    "Source": sources,
    "Total AGI error": values
})

df["Total AGI error"] /= 1e9

from policyengine_core.charts import format_fig, BLUE, GRAY, DARK_GRAY

fig = px.bar(
    df,
    x="Band",
    y="Total AGI error",
    color="Source",
    barmode="group",
    color_discrete_map={
        "CPS": GRAY,
        "PUF (2015)": DARK_GRAY,
        "Enhanced CPS": BLUE,
    },
)

fig.update_layout(
    title="Total AGI error by dataset and AGI band",
    yaxis_title="Total AGI error ($bn)",
    yaxis_tickformat="+"
)

format_fig(fig)