In [155]:
import pandas as pd
from microdf import MicroDataFrame

cols = ['SSUID','PNUM','MONTHCODE','ERESIDENCEID','ERELRPE','SPANEL','SWAVE',
      
      'WPFINWGT',
      
      'ESEX','TAGE','TAGE_EHC','ERACE','EORIGIN','EEDUC', 'EDEPCLM', 'EMS', 'EFSTATUS',
      
      'TJB1_TXAMT', 'TJB1_MSUM', 'TJB1_OCC', 'TJB1_IND', 'AJB1_TXAMT', 'EJB1_TYPPAY3',
      'TJB2_TXAMT', 'TJB2_MSUM', 'TJB2_OCC', 'TJB2_IND', 'AJB2_TXAMT', 'EJB2_TYPPAY3',
      'TJB3_TXAMT', 'TJB3_MSUM', 'TJB3_OCC', 'TJB3_IND', 'AJB3_TXAMT', 'EJB3_TYPPAY3',
      'TJB4_TXAMT', 'TJB4_MSUM', 'TJB4_OCC', 'TJB4_IND', 'AJB4_TXAMT', 'EJB4_TYPPAY3',
      
      'TPTOTINC']

for col in cols:
    if "JB1" in col:
        for i in range(2, 8):
            cols.append(col.replace("JB1", f"JB{i}"))

df = pd.read_csv("~/Downloads/pu2022.csv", delimiter="|", usecols=cols)

df["tip_income"] = df[df.columns[df.columns.str.contains("TXAMT")]].fillna(0).sum(axis=1) * 12
df["employment_income"] = df.TPTOTINC * 12
df["is_under_18"] = (df.TAGE < 18) & (df.MONTHCODE == 12)
df["is_under_6"] = (df.TAGE < 6) & (df.MONTHCODE == 12)
df["count_under_18"] = df.groupby("SSUID")["is_under_18"].sum().loc[df.SSUID.values].values
df["count_under_6"] = df.groupby("SSUID")["is_under_6"].sum().loc[df.SSUID.values].values
df["household_weight"] = df.WPFINWGT / 12
df["household_id"] = df.SSUID
df["age"] = df.TAGE

sipp = df[["household_id", "employment_income", "tip_income", "count_under_18", "count_under_6", "age", "household_weight"]]

import numpy as np

sipp = sipp[~sipp.isna().any(axis=1)]

sipp = sipp.loc[np.random.choice(sipp.index, size=100_000, replace=True, p=sipp.household_weight/sipp.household_weight.sum())]

from policyengine_us import Microsimulation

sim = Microsimulation(dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5")

cps = sim.calculate_dataframe([
    "person_id",
    "household_id",
    "employment_income",
    "age",
    "household_weight",
], 2025)

cps["is_under_18"] = cps.age < 18
cps["is_under_6"] = cps.age < 6
cps["count_under_18"] = cps.groupby("household_id")["is_under_18"].sum().loc[cps.household_id.values].values
cps["count_under_6"] = cps.groupby("household_id")["is_under_6"].sum().loc[cps.household_id.values].values
cps = pd.DataFrame(cps)

In [156]:
from microimpute.comparisons.autoimpute import autoimpute
from microimpute.models import QRF

model = QRF()

model = model.fit(X_train=sipp, predictors=["employment_income", "age", "count_under_18", "count_under_6"], imputed_variables=["tip_income"])

In [163]:
quantiles = np.linspace(0.1, 0.9, 9)

quantile = np.random.choice(quantiles, size=len(cps), replace=True)

results = model.predict(X_test=cps, quantiles=quantiles)

values = np.empty(len(cps), dtype=float)

for i, q in enumerate(quantiles):
    values[quantile == q] = results[q].tip_income.values[quantile == q]

In [166]:
from policyengine_core.reforms import Reform
from policyengine_us.model_api import *

class tip_income(Variable):
    entity = Person
    label = "tip income"
    value_type = float
    definition_period = YEAR

class taxable_income(Variable):
    value_type = float
    entity = TaxUnit
    label = "IRS taxable income"
    unit = USD
    definition_period = YEAR

    def formula(tax_unit, period, parameters):
        agi = tax_unit("adjusted_gross_income", period)
        exemptions = tax_unit("exemptions", period)
        deductions = tax_unit("taxable_income_deductions", period) + add(tax_unit, period, ["tip_income"])
        return max_(0, agi - exemptions - deductions)


class no_tax_on_tips(Reform):
    def apply(self):
        self.update_variable(tip_income)
        self.update_variable(taxable_income)

baseline = Microsimulation(dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5")
reformed = Microsimulation(dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5", reform=no_tax_on_tips)
reformed.set_input("tip_income", 2025, values)

In [167]:
reformed.calculate("income_tax", 2025).sum()/1e9 - baseline.calculate("income_tax", 2025).sum()/1e9

-11.500174170757418

In [168]:
reformed.calculate("tip_income", 2025).sum()/1e9

94.61886627605287

In [54]:
from policyengine import Simulation

sim = Simulation(
    country="us",
    scope="macro",
    reform=no_tax_on_tips,
    time_period=2025,
    data="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5",
)
sim.reform_simulation.set_input("tip_income", 2025, results.tip_income)

Using Hugging Face for download.


In [55]:
result = sim.calculate_economy_comparison()

In [60]:
result.budget.budgetary_impact/1e9

-15.512736900124512

In [57]:
reformed.calculate("tip_income", 2025).sum()/1e9

78.70567237215111