In [1]:
import pandas as pd
import numpy as np 
import sys
sys.path.append("../tax-calculator")
from taxcalc.records import Records
from taxcalc import Policy, Records, Calculator, Behavior, behavior
from taxcalc.utils import *
from numpy.testing import assert_array_almost_equal
import matplotlib
import matplotlib.pyplot as plt 
from matplotlib.transforms import BlendedGenericTransform
%matplotlib inline
import copy
import itertools

In [2]:
puf = pd.read_csv("../tax-calculator/true_puf.csv")
policy_base = Policy()
records_base = Records(puf)

policy_reform = Policy()
records_reform = Records(puf)

policy_org = Policy()
records_org = Records(puf)

calcbase = Calculator(policy = policy_base, records = records_base)
calcreform = Calculator(policy = policy_reform, records = records_reform)
calcori = Calculator(policy = policy_org, records = records_org)

You loaded data for 2009.
Your data have been extrapolated to 2013.
You loaded data for 2009.
Your data have been extrapolated to 2013.
You loaded data for 2009.
Your data have been extrapolated to 2013.


In [3]:
reform_5mm = {
    2017: {'_II_rt8': [0.436],
           '_II_brk7': [[5000000, 5000000, 5000000, 5000000, 5000000, 5000000]],
        '_CG_rt4':[0.24],
        '_CG_thd3': [[5000000, 5000000, 5000000, 5000000, 5000000, 5000000]],
        '_AMT_CG_rt4':[0.24],
        '_AMT_CG_thd3': [[5000000, 5000000, 5000000, 5000000, 5000000, 5000000]]
        }}
policy_reform.implement_reform(reform_5mm)
calcreform.advance_to_year(2017)
calcbase.advance_to_year(2017)

In [4]:
calcbase.calc_all()
calcreform.calc_all()

In [5]:
EPSILON = 1e-3


RES_COLUMNS = STATS_COLUMNS + ['e00200']
# The results function selects the data frame we'll be using 
def results(c):
    outputs = []
    for col in RES_COLUMNS:
        if hasattr(c.policy, col):
            outputs.append(getattr(c.policy, col))
        else:
            outputs.append(getattr(c.records, col))
    return DataFrame(data=np.column_stack(outputs), columns=RES_COLUMNS)

def wage_weighted(agg, col_name):
    return (float((agg[col_name] * agg['s006'] * agg['e00200']).sum())/
            ((agg['s006']*agg['e00200']).sum() + EPSILON))

def add_income_bins2(df, num_bins, tab):
    df.sort(tab, inplace=True)
    df['cumsum_weights'] = np.cumsum(df['s006'].values)
    max_ = df['cumsum_weights'].values[-1]
    bin_edges = [0] + list(np.arange(1, (num_bins+1)) * (max_ / float(num_bins)))
    labels = range(1, (num_bins+1))
    df['bins'] = pd.cut(df['cumsum_weights'], bins=bin_edges, labels=labels)
    return df

def get_mtr_data(calcX, calcY, weights, tab):
    df_x = results(calcX)
    df_y = results(calcY)

    a, mtr_iit_x, b = calcX.mtr()
    a, mtr_iit_y, b = calcY.mtr()
    
    df_x['mtr_iit'] = mtr_iit_x
    df_y['mtr_iit'] = mtr_iit_y


    df_y[tab] = df_x[tab]

    df_x = add_income_bins2(df_x, 100, tab)
    df_y = add_income_bins2(df_y, 100, tab)
    
    df_filtered_x = df_x.copy()
    df_filtered_y = df_y.copy()

    gp_x = df_filtered_x.groupby('bins', as_index=False)
    gp_y = df_filtered_y.groupby('bins', as_index=False)

    wgtpct_x = gp_x.apply(weights, 'mtr_iit')
    wgtpct_y = gp_y.apply(weights, 'mtr_iit')

    wpct_x = DataFrame( data=wgtpct_x, columns=['w_mtr'])
    wpct_y = DataFrame( data=wgtpct_y, columns=['w_mtr'])

    wpct_x['bins'] = np.arange(1, 101)
    wpct_y['bins'] = np.arange(1, 101)

    rsltx = pd.merge(df_filtered_x[['bins']], wpct_x, how='left')
    rslty = pd.merge(df_filtered_y[['bins']], wpct_y, how='left')

    df_filtered_x['w_mtr'] = rsltx['w_mtr'].values
    df_filtered_y['w_mtr'] = rslty['w_mtr'].values

    df_filtered_x.drop_duplicates(subset = 'bins', inplace = True)
    df_filtered_y.drop_duplicates(subset = 'bins', inplace = True)

    df_filtered_x = df_filtered_x['w_mtr']
    df_filtered_y = df_filtered_y['w_mtr']

    merged = pd.concat([df_filtered_x, df_filtered_y], axis=1, ignore_index=True)
    merged.columns = ['base','reform']

    return merged

In [6]:
get_mtr_data(calcbase,calcreform,weights = wage_weighted, tab = 'c00100')

Unnamed: 0,base,reform
108,0.005368,0.005368
7631,-0.018005,-0.018005
216001,0.000000,0.000000
215390,0.000000,0.000000
217397,0.000000,0.000000
219563,0.000000,0.000000
218137,0.000000,0.000000
217071,-0.071064,-0.071064
219583,-0.034308,-0.034308
8177,-0.032364,-0.032364


In [27]:
def add_econ_bins(df, num_bins, tab):
    df['AGIs006'] = np.multiply(df['c00100'].values, df['s006'].values)
    df.sort('AGIs006', inplace=True)
    df['cumsum'] = np.cumsum(df['AGIs006'].values)
    max_ = df['cumsum'].values[-1]
    bin_edges = [0] + list(np.arange(1, (num_bins+1)) * (max_ / float(num_bins)))
    labels = range(1, (num_bins+1))
    df['bins'] = pd.cut(df['cumsum'], bins=bin_edges, labels=labels)
    return df

def get_econ_mtr(calcX, calcY, weights, tab):
    df_x = results(calcX)
    df_y = results(calcY)

    a, mtr_iit_x, b = calcX.mtr()
    a, mtr_iit_y, b = calcY.mtr()
    
    df_x['mtr_iit'] = mtr_iit_x
    df_y['mtr_iit'] = mtr_iit_y


    df_y[tab] = df_x[tab]

    df_x = add_econ_bins(df_x, 100, tab)
    df_y = add_econ_bins(df_y, 100, tab)

    df_filtered_x = df_x.copy()
    df_filtered_y = df_y.copy()

    gp_x = df_filtered_x.groupby('bins', as_index=False)
    gp_y = df_filtered_y.groupby('bins', as_index=False)

    wgtpct_x = gp_x.apply(weights, 'mtr_iit')
    wgtpct_y = gp_y.apply(weights, 'mtr_iit')

    wpct_x = DataFrame( data=wgtpct_x, columns=['w_mtr'])
    wpct_y = DataFrame( data=wgtpct_y, columns=['w_mtr'])

    wpct_x['bins'] = np.arange(1, 101)
    wpct_y['bins'] = np.arange(1, 101)

    rsltx = pd.merge(df_filtered_x[['bins']], wpct_x, how='left')
    rslty = pd.merge(df_filtered_y[['bins']], wpct_y, how='left')

    df_filtered_x['w_mtr'] = rsltx['w_mtr'].values
    df_filtered_y['w_mtr'] = rslty['w_mtr'].values

    df_filtered_x.drop_duplicates(subset = 'bins', inplace = True)
    df_filtered_y.drop_duplicates(subset = 'bins', inplace = True)

    df_filtered_x = df_filtered_x['w_mtr']
    df_filtered_y = df_filtered_y['w_mtr']

    merged = pd.concat([df_filtered_x, df_filtered_y], axis=1, ignore_index=True)
    merged.columns = ['base','reform']

    return merged[1:]

In [28]:
get_econ_mtr(calcbase,calcreform,weights = wage_weighted, tab = 'c00100')

Unnamed: 0,base,reform
132709,0.077775,0.077775
62808,0.087011,0.087013
51276,0.127617,0.127617
71582,0.157556,0.157556
73630,0.160722,0.160722
199336,0.177169,0.177169
62408,0.183258,0.183260
101798,0.176341,0.176341
75514,0.171241,0.171242
60882,0.175362,0.175364
