In [1]:
import pandas as pd

In [2]:
ht2 = pd.read_csv('17in54cmcsv.csv')
puf_targ = pd.read_csv('Stage_II_targets.csv', index_col=0)
ht2_us = ht2[ht2['STATE']=='US']

In [3]:
puf_targ_2 = puf_targ.transpose()
puf_targ_2['Net Business Income'] = \
    puf_targ_2['Business Income (Schedule C)'] - \
    puf_targ_2['Business Loss (Schedule C)']
puf_targ_2['Net Supplemental Income (Schedule E)'] = \
    puf_targ_2['Supplemental Income (Schedule E)'] - \
    puf_targ_2['Supplemental Loss (Schedule E)']

In [4]:
crosswalk = [('Single Returns', 'MARS1'),
           ('Joint Returns', 'MARS2'),
           ('Head of Household Returns', 'MARS4'),
#            ('Number of Returns w/ Gross Security Income', 'N02500'),
           ('Number of Dependent Exemptions', 'NUMDEP'),
           ('Taxable Interest Income', 'A00300'),
           ('Ordinary Dividends', 'A00600'),
           ('Net Business Income', 'A00900'),
           ('Net Capital Gains in AGI', 'A01000'),
           ('Taxable Pensions and Annuities', 'A01700'),
           ('Net Supplemental Income (Schedule E)', 'A26270'),
#            ('Gross Social Security Income', 'A02500'),
           ('Unemployment Compensation', 'A02300')]

**NOTES:**

- I omitted the population variables from comparison because a) the concepts don't line up and b) the population variables are used to grow the PUF targets and aren't targets themselves
- The PUF targets relating to Social Security include households with untaxed SS benefits, whereas the HT2 reports taxed SS benefits. So, I omitted number receiving and amount of SS benefits from the comparison. 
- Business Income and Supplemental Income are targeted separately as income and loss in the PUF. HT2 reports net business/supplemental income.

In [5]:
compare_df = pd.DataFrame()
for item in crosswalk:
    ht2_val = round(int(ht2_us.loc[0, item[1]].replace(',','')) / 1e6, 1)
    puf_targ_val = round(puf_targ_2.loc['2017', item[0]] / 1e6, 1)
    perc_dif = round(((ht2_val - puf_targ_val) / ht2_val), 3)
    data = pd.Series(dtype='float64')
    data = pd.Series(data = {"HT2_value": ht2_val, \
                             "PUF_target_val": puf_targ_val, \
                             "perc_dif": perc_dif}, name=item[0])
    compare_df = compare_df.append(data, ignore_index=False)
compare_df

Unnamed: 0,HT2_value,PUF_target_val,perc_dif
Single Returns,72.9,72.8,0.001
Joint Returns,54.7,57.7,-0.055
Head of Household Returns,21.6,23.1,-0.069
Number of Dependent Exemptions,94.2,97.1,-0.031
Taxable Interest Income,106.6,111.9,-0.05
Ordinary Dividends,279.7,352.9,-0.262
Net Business Income,350.9,370.2,-0.055
Net Capital Gains in AGI,855.7,857.6,-0.002
Taxable Pensions and Annuities,729.2,761.8,-0.045
Net Supplemental Income (Schedule E),677.7,657.5,0.03


In [6]:
crosswalk_stubs = [('Wages and Salaries: Zero or Less', 1),
                  ('Wages and Salaries: $1 Less Than $10,000', 2),
                  ('Wages and Salaries: $50,000 Less Than $75,000', 5),
                  ('Wages and Salaries: $75,000 Less Than $100,000', 6),
                  ('Wages and Salaries: $100,000 Less Than $200,000', 7),
                  ('Wages and Salaries: $200,000 Less Than $500,000', 8),
                  ('Wages and Salaries: $500,000 Less Than $1 Million', 9),
                  ('Wages and Salaries: $1 Million and Over', 10)]

**NOTES:**

- Most of the AGI stubs in the PUF targets and HT2 line up with the exception of the two stubs between $10,000 and $50,000. For this reason, that AGI range is not in the table below

In [7]:
compare_stubs = pd.DataFrame()
for item in crosswalk_stubs:
    ht2_val = round(int(ht2_us.loc[item[1], 'A00200'].replace(',','')) / 1e6, 1)
    puf_targ_val = round(puf_targ_2.loc['2017', item[0]] / 1e6, 1)
    perc_dif = round(((ht2_val - puf_targ_val) / ht2_val), 3)
    data = pd.Series(dtype='float64')
    data = pd.Series(data = {"HT2_value": ht2_val, \
                             "PUF_target_val": puf_targ_val, \
                             "perc_dif": perc_dif}, \
                     name=item[0])
    compare_stubs = compare_stubs.append(data, ignore_index=False)
compare_stubs

Unnamed: 0,HT2_value,PUF_target_val,perc_dif
Wages and Salaries: Zero or Less,21.2,22.0,-0.038
"Wages and Salaries: $1 Less Than $10,000",87.7,107.4,-0.225
"Wages and Salaries: $50,000 Less Than $75,000",995.5,1037.3,-0.042
"Wages and Salaries: $75,000 Less Than $100,000",874.6,951.2,-0.088
"Wages and Salaries: $100,000 Less Than $200,000",2005.7,1990.9,0.007
"Wages and Salaries: $200,000 Less Than $500,000",1209.2,1081.0,0.106
"Wages and Salaries: $500,000 Less Than $1 Million",372.3,351.2,0.057
Wages and Salaries: $1 Million and Over,486.1,462.0,0.05
