<a href="https://colab.research.google.com/github/MikhailMatytsin/gross_up_prototype/blob/main/gross_up_prototype.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Here's a Python translation of your Stata code using pandas and numpy. This version attempts to preserve the structure and logic of your original model, with clear comments to help map concepts from Stata to Python. The code assumes you're working with a single observation (as in your set obs 1) and uses iterative adjustments to align statutory and net incomes.



In [55]:
import pandas as pd
import numpy as np

# PARAMETERS FOR SIMULATIONS
pit_rate_b = 0.13
sic_rate_b = 0.3

pit_rate_r = 0.13
sic_rate_r = 0.3

pit_pt_b = 1
sic_pt_b = 1

pit_pt_r = 1
sic_pt_r = 1

market_income = ["labor_inc", "self_inc", "other_inc"]
direct_taxes = ["pit"]
SSC = ["sic"]

d = 10**-8
report = 1

# INITIAL DATA SETUP
df = pd.DataFrame({'hh_id': [1], 'p_id': [1], 'labor_inc': [87], 'self_inc': [35], 'other_inc': [50]})

In [56]:
def SSC_direct_taxes_statutory(df, *, pit_taxable_list=None, sic_taxable_list=None):
    """
    df: pandas DataFrame
    pit_taxable_list, sic_taxable_list: lists of income components for each tax
    Assumes:
      - market_income is defined globally
      - tax rates and pt params are in globals() with names like 'pit_rate' and 'pit_pt'
    """
    market_income = globals().get("market_income", [])
    tax_lists = {
        'pit': pit_taxable_list or [],
        'sic': sic_taxable_list or []
    }

    for tax, inc_list in tax_lists.items():
        base_col = f"{tax}_base"
        df[base_col] = 0

        for inc in inc_list:
            stat_col = f"{inc}_stat"
            df[stat_col] = df.get(stat_col, 0).fillna(0)
            df[base_col] += df[stat_col]

        for inc in inc_list:
            share_col = f"{tax}_sh_{inc}"
            stat_col = f"{inc}_stat"
            df[share_col] = (df[stat_col] / df[base_col]).fillna(0)

        for inc in market_income:
            if inc not in inc_list:
                df[f"{tax}_sh_{inc}"] = 0

        # Soft-coded tax rate and pt from global scope
        tax_rate = globals().get(f"{tax}_rate", 0)
        tax_pt = globals().get(f"{tax}_pt", 1)

        tax_stat_col = f"{tax}_stat"
        df[tax_stat_col] = -1 * tax_rate * df[base_col]
        df[tax] = df[tax_stat_col] * tax_pt

In [57]:
# Step 1: Finding statutory wages
for inc in market_income:
    df[f'{inc}_stat'] = df[inc]
    df.rename(columns={f'{inc}': f'{inc}_orig'}, inplace=True)


sic_rate = sic_rate_b
pit_rate = pit_rate_b

sic_pt = sic_pt_b
pit_pt = pit_pt_b

# Iterative adjustment of statutory income
max_gap = d * 2
min_gap = 0
s = 1

while max_gap > d or min_gap < -d:
    SSC_direct_taxes_statutory(
    df,
    pit_taxable_list=['labor_inc', 'self_inc'],
    sic_taxable_list=['labor_inc']
)

    for inc in market_income:
        df[f'{inc}_net_it'] = df[f'{inc}_stat']
        for tax in direct_taxes:
            df[f'{inc}_net_it'] += df[f'{tax}_sh_{inc}'] * df[f'{tax}_stat']

        df[f'{inc}_gap'] = df[f'{inc}_orig'] - df[f'{inc}_net_it']

    # if s % report == 0:
        # print(f"Step {s}")
        # print(df['labor_inc_net_it'].mean())

    max_gap = max(df[f'{inc}_gap'].max() for inc in market_income)
    min_gap = min(df[f'{inc}_gap'].min() for inc in market_income)

    for inc in market_income:
        df[f'{inc}_stat'] += df[f'{inc}_gap']

    s += 1

print(f"End at step {s}")
# print(df)




End at step 13


In [58]:
# Step 2: Calculate equilibrium income
for inc in market_income:
    df[inc] = df[f'{inc}_orig']
    for tax in SSC + direct_taxes:
        df[inc] -= df[f'{tax}_sh_{inc}'] * df[tax]

In [59]:
# Store results for prototype
for inc in market_income:
    df.rename(columns={f'{inc}_orig': f'{inc}_net_b'}, inplace=True)
    df[f'{inc}_eq_b'] = df[inc]
    df.rename(columns={f'{inc}_stat': f'{inc}_stat_b'}, inplace=True)

for tax in SSC + direct_taxes:
    df.rename(columns={f'{tax}': f'{tax}_b'}, inplace=True)
    df.rename(columns={f'{tax}_stat': f'{tax}_stat_b'}, inplace=True)
    df.rename(columns={f'{tax}_base': f'{tax}_base_b'}, inplace=True)
    for inc in market_income:
        df.rename(columns={f'{tax}_sh_{inc}': f'{tax}_sh_{inc}_b'}, inplace=True)

In [60]:
# Step 3: Nowcasting
for inc in market_income:
    df[inc] = df[inc] * 1

In [61]:
# step 4. calculating the statutory wage for reform case via loop to make sure that the equilibrium wage matches.

for inc in market_income:
    df[f'{inc}_stat'] = df[inc] # starting point

sic_rate = sic_rate_r
pit_rate = pit_rate_r

sic_pt = sic_pt_r
pit_pt = pit_pt_r

# Iterative adjustment of statutory income
max_gap = d * 2
min_gap = 0
s = 1

while max_gap > d or min_gap < -d:
    SSC_direct_taxes_statutory(
    df,
    pit_taxable_list=['labor_inc', 'self_inc'],
    sic_taxable_list=['labor_inc']
)

    for inc in market_income:
        df[f'{inc}_net'] = df[f'{inc}_stat']
        for tax in direct_taxes:
            df[f'{inc}_net'] += df[f'{tax}_sh_{inc}'] * df[f'{tax}_stat']

        df[f'{inc}_eq_it'] = df[f'{inc}_net']
        for tax in SSC + direct_taxes:
            df[f'{inc}_eq_it'] -= df[f'{tax}_sh_{inc}'] * df[f'{tax}_stat']

        assert df[f"{inc}_eq_it"].notna().all()

        df[f'{inc}_gap'] = df[f'{inc}'] - df[f'{inc}_eq_it']

   # if s % report == 0:
        # print(f"Step {s}")
        # print(df['labor_inc_net_it'].mean())

    max_gap = max(df[f'{inc}_gap'].max() for inc in market_income)
    min_gap = min(df[f'{inc}_gap'].min() for inc in market_income)

    for inc in market_income:
        df[f'{inc}_stat'] += df[f'{inc}_gap']

    s += 1

print(f"End at step {s}")

End at step 21


In [62]:
# Store results for prototype
for inc in market_income:
    df.rename(columns={f'{inc}_net': f'{inc}_net_r'}, inplace=True)
    df[f'{inc}_eq_r'] = df[inc]
    df.rename(columns={f'{inc}_stat': f'{inc}_stat_r'}, inplace=True)

for tax in SSC + direct_taxes:
    df.rename(columns={f'{tax}': f'{tax}_r'}, inplace=True)
    df.rename(columns={f'{tax}_stat': f'{tax}_stat_r'}, inplace=True)
    df.rename(columns={f'{tax}_base': f'{tax}_base_r'}, inplace=True)
    for inc in market_income:
        df.rename(columns={f'{tax}_sh_{inc}': f'{tax}_sh_{inc}_r'}, inplace=True)

In [63]:
# print(df.columns)
# print(df[['labor_inc_net_b', 'labor_inc_net_r']])
for inc in market_income:
    print(df[[f'{inc}_net_b', f'{inc}_net_r', f'{inc}_eq_b', f'{inc}_stat_b', f'{inc}_stat_r']])

   labor_inc_net_b  labor_inc_net_r  labor_inc_eq_b  labor_inc_stat_b  \
0               87             87.0           130.0             100.0   

   labor_inc_stat_r  
0             100.0  
   self_inc_net_b  self_inc_net_r  self_inc_eq_b  self_inc_stat_b  \
0              35            35.0      40.229885        40.229885   

   self_inc_stat_r  
0        40.229885  
   other_inc_net_b  other_inc_net_r  other_inc_eq_b  other_inc_stat_b  \
0               50             50.0            50.0              50.0   

   other_inc_stat_r  
0              50.0  


**Notes:**


* The translation assumes a single-row DataFrame, as in your Stata example.
* The compute_taxes() function mimics the Stata program SSC_direct_taxes_statutory.
* Handling of missing values is simplified â€” in production, ensure NaNs are handled robustly.
* The code structure supports extension to multiple households if needed by expanding the DataFrame rows.

Let me know if you want to modularize this into functions or extend it to support vectorized reform simulations.