In [3]:
import taxcalc as tc
from policyengine_us import Microsimulation
from policyengine_us.model_api import *
from policyengine_core.model_api import *
import numpy as np
import pandas as pd

In [26]:
class TaxCalcVariableAlias(Variable):
    label = "TaxCalc Variable Alias"
    definition_period = YEAR
    entity = TaxUnit
    value_type = float


class tc_RECID(TaxCalcVariableAlias):
    def formula(tax_unit, period, parameters):
        return tax_unit("tax_unit_id", period)


class tc_MARS(TaxCalcVariableAlias):
    def formula(tax_unit, period, parameters):
        filing_status = tax_unit("filing_status", period).decode_to_str()
        CODE_MAP = {
            "SINGLE": 1,
            "JOINT": 2,
            "SEPARATE": 3,
            "HEAD_OF_HOUSEHOLD": 4,
            "WIDOW": 5,
        }
        return pd.Series(filing_status).map(CODE_MAP)
    
class tc_e00200p(TaxCalcVariableAlias):
    def formula(tax_unit, period, parameters):
        person = tax_unit.members
        employment_income = person("employment_income", period)
        is_tax_unit_head = person("is_tax_unit_head", period)
        return tax_unit.sum(employment_income * is_tax_unit_head)

class tc_e00200s(TaxCalcVariableAlias):
    def formula(tax_unit, period, parameters):
        person = tax_unit.members
        employment_income = person("employment_income", period)
        is_tax_unit_spouse = person("is_tax_unit_spouse", period)
        return tax_unit.sum(employment_income * is_tax_unit_spouse)

class tc_e00200(TaxCalcVariableAlias):
    adds = [
        "tc_e00200p",
        "tc_e00200s",
    ]

class taxcalc_extension(Reform):
    def apply(self):
        self.add_variables(
            tc_RECID,
            tc_MARS,
            tc_e00200p,
            tc_e00200s,
            tc_e00200,
        )


sim = Microsimulation(reform=taxcalc_extension, dataset="enhanced_cps_2023")
df = pd.DataFrame()

for variable in sim.tax_benefit_system.variables:
    if variable.startswith("tc_"):
        df[variable[3:]] = sim.calculate(variable)

df

Unnamed: 0,RECID,MARS,e00200p,e00200s,e00200
0,101.0,1.0,0.000000,0.000000,0.0
1,201.0,1.0,0.000000,0.000000,0.0
2,301.0,5.0,0.000000,0.000000,0.0
3,302.0,5.0,0.000000,0.000000,0.0
4,401.0,2.0,0.000000,42000.000000,42000.0
...,...,...,...,...,...
155307,108897504.0,2.0,93456.140625,118049.859375,211506.0
155308,108897600.0,2.0,79172.000000,0.000000,79172.0
155309,108897600.0,2.0,166321.203125,110880.796875,277202.0
155310,108897704.0,2.0,335500.000000,268400.000000,603900.0


In [29]:
np.allclose(df["e00200"], df["e00200p"] + df["e00200s"])

True

In [30]:
input_data = tc.Records(df, 2023)
policy = tc.Policy()
simulation = tc.Calculator(records=input_data, policy=policy)

simulation.calc_all()

ValueError: expression "e00200 == e00200p + e00200s" is not true for every record

In [5]:
simulation.array("RECID")

array([      101,       201,       301, ..., 108897600, 108897704,
       108897800], dtype=int32)