In [1]:
from taxcalc import *
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook, hplot
from bokeh.charts import Scatter
output_notebook()

In [2]:
recs = Records('ubi_puf.csv', weights='puf_weights.csv')
policy = Policy(start_year=2013)
calc = Calculator(policy=policy, records=recs)

You loaded data for 2009.
Calculator instantiation automatically extrapolated your data to 2013.


In [3]:
calc.advance_to_year(2014)
calc.calc_all()

In [4]:
soi = pd.read_csv('soi_data.csv')

In [5]:
def distribution(item, weight, agi):
    agi_1 = (((item[agi < 0] * weight[agi < 0]).sum() / 1000))
    agi_2 = ((item[(agi > 1) & (agi < 5000)] * weight[(agi > 1) & (agi < 5000)]).sum() / 1000)
    agi_3 = ((item[(agi > 5000) & (agi < 10000)] * weight[(agi > 5000) & (agi < 10000)]).sum() / 1000)
    agi_4 = ((item[(agi > 10000) & (agi < 15000)] * weight[(agi > 10000) & (agi < 15000)]).sum() / 1000)
    agi_5 = ((item[(agi > 15000) & (agi < 20000)] * weight[(agi > 15000) & (agi < 20000)]).sum() / 1000)
    agi_6 = ((item[(agi > 20000) & (agi < 25000)] * weight[(agi > 20000) & (agi < 25000)]).sum() / 1000)
    agi_7 = ((item[(agi > 25000) & (agi < 30000)] * weight[(agi > 25000) & (agi < 30000)]).sum() / 1000)
    agi_8 = ((item[(agi > 30000) & (agi < 40000)] * weight[(agi > 30000) & (agi < 40000)]).sum() / 1000)
    agi_9 = ((item[(agi > 40000) & (agi < 50000)] * weight[(agi > 40000) & (agi < 50000)]).sum() / 1000)
    agi_10 = ((item[(agi > 50000) & (agi < 75000)] * weight[(agi > 50000) & (agi < 75000)]).sum() / 1000)
    agi_11 = ((item[(agi > 75000) & (agi < 100000)] * weight[(agi > 75000) & (agi < 100000)]).sum() / 1000)
    agi_12 = ((item[(agi > 100000) & (agi < 200000)] * weight[(agi > 100000) & (agi < 200000)]).sum() / 1000)
    agi_13 = ((item[(agi > 200000) & (agi < 500000)] * weight[(agi > 200000) & (agi < 500000)]).sum() / 1000)
    agi_14 = ((item[(agi > 500000) & (agi < 1000000)] * weight[(agi > 500000) & (agi < 1000000)]).sum() / 1000)
    agi_15 = ((item[(agi > 1000000) & (agi < 1500000)] * weight[(agi > 1000000) & (agi < 1500000)]).sum() / 1000)
    agi_16 = ((item[(agi > 1500000) & (agi < 2000000)] * weight[(agi > 1500000) & ( agi < 2000000)]).sum() / 1000)
    agi_17 = ((item[(agi > 2000000) & (agi < 5000000)] * weight[(agi > 2000000) & ( agi < 5000000)]).sum() / 1000)
    agi_18 = ((item[(agi > 5000000) & (agi < 10000000)] * weight[(agi > 5000000) & ( agi < 10000000)]).sum() / 1000)
    agi_19 = ((item[agi > 10000000] * weight[agi > 10000000]).sum() / 1000)
    total = ((item * weight).sum() / 1000)
    df = [agi_1, agi_2, agi_3, agi_4, agi_5, agi_6, agi_7, agi_8, agi_9, agi_10, agi_11,
                       agi_12, agi_13, agi_14, agi_15, agi_16, agi_17, agi_18, agi_19]
    return df

In [6]:
# Find totals for each item
tc = pd.DataFrame()
tc['AGI'] = distribution(calc.records.c00100, calc.records.s006, calc.records.c00100)
tc['Wages'] = distribution(calc.records.e00200, calc.records.s006, calc.records.c00100)
tc['Interest'] = distribution(calc.records.e00300, calc.records.s006, calc.records.c00100)
tc['Dividends'] = distribution(calc.records.e00650, calc.records.s006, calc.records.c00100)
tc['SchC'] = distribution(calc.records.e00900, calc.records.s006, calc.records.c00100)
tc['IRA'] = distribution(calc.records.e01400, calc.records.s006, calc.records.c00100)
tc['Scorp'] = distribution(calc.records.e26270, calc.records.s006, calc.records.c00100)
tc['SS Benefits'] = distribution(calc.records.e02400, calc.records.s006, calc.records.c00100)
tc['Item Ded'] = distribution(calc.records.c04470, calc.records.s006, calc.records.c00100)
tc['Inc'] = distribution(calc.records.c04800, calc.records.s006, calc.records.c00100)

In [7]:
# Create results DataFrame and file
results = pd.DataFrame()
results['SOI - AGI'] = soi['AGI']
results['TC - AGI'] = tc['AGI']
results['AGI % Error'] = (tc['AGI'] / soi['AGI'] - 1) * 100
results['SOI - Wages'] = soi['Wages']
results['TC - Wages'] = tc['Wages']
results['Wages % Error'] = (tc['Wages'] / soi['Wages'] - 1) * 100
results['SOI - Taxable Interest'] = soi['Interest']
results['TC - Taxable Interest'] = tc['Interest']
results['Taxable Interest % Error'] = (tc['Interest'] / soi['Interest'] - 1) * 100
results['SOI - Qualified Dividends'] = soi.Dividends
results['TC - Qualified Dividends'] = tc['Dividends']
results['Qualified Dividends % Error'] = (tc['Dividends'] / soi['Dividends'] - 1) * 100
results['SOI - Schedule C Net Income/Loss'] = soi['SchC']
results['TC - Schedule C Net Income/Loss'] = tc['SchC']
results['Sch C Net Income/Loss % Error'] = (tc['SchC'] / soi['SchC'] - 1) * 100
results['SOI - IRA Distributions'] = soi['IRA']
results['TC - IRA Distribtuions'] = tc['IRA']
results['IRA Distributions Error'] = (tc['IRA'] / soi['IRA'] - 1) * 100
results['SOI - S Corp Income'] = soi['Scorp']
results['TC - S Corp Income'] = tc['Scorp']
results['S Corp Income % Error'] = (tc['Scorp'] / soi['Scorp'] - 1) * 100
results['SOI - SS Benefits'] = soi['SS Benefits']
results['TC - SS Benefits'] = tc['SS Benefits']
results['SS Benefits % Error'] = (tc['SS Benefits'] / soi['SS Benefits'] - 1) * 100
results['SOI - Itemized Deductions'] = soi['Item Ded']
results['TC - Itemized Deduction'] = tc['Item Ded']
results['Itemized Deductions % Error'] = (tc['Item Ded'] / soi['Item Ded'] - 1) * 100
results['SOI - Taxable Income'] = soi['Inc']
results['TC - Taxable Income'] = tc['Inc']
results['Taxable Income % Error'] = (tc['Inc'] / soi['Inc'] - 1) * 100

In [8]:
index_list = ['Zero or Negative',
              '$1-$5K',
              '$5K-$10K',
              '$10K-$15K',
              '$15K-$20K',
              '$20K-$25K',
              '$25K-$30K',
              '$30K-$40K',
              '$40K-$50K',
              '$50K-$75K',
              '$75K-$100K',
              '$100K-$200K',
              '$200K-$500K',
              '$500K-$1M',
              '$1M-$1.5M',
              '$1.5M-$2M',
              '$2M-$5M',
              '$5M-$10M',
              '$10M and over']
results.index = index_list
results.to_csv('Distribution totals.csv')

In [9]:
results

Unnamed: 0,SOI - AGI,TC - AGI,AGI % Error,SOI - Wages,TC - Wages,Wages % Error,SOI - Taxable Interest,TC - Taxable Interest,Taxable Interest % Error,SOI - Qualified Dividends,...,S Corp Income % Error,SOI - SS Benefits,TC - SS Benefits,SS Benefits % Error,SOI - Itemized Deductions,TC - Itemized Deduction,Itemized Deductions % Error,SOI - Taxable Income,TC - Taxable Income,Taxable Income % Error
Zero or Negative,-197690795,-150438700.0,-23.902021,19421543,16570380.0,-14.680437,4688264,4026400.0,-14.117467,3095521,...,241.401549,16387413,14161850.0,-13.58092,0,6719.763,inf,0,0.0,
$1-$5K,26379097,39243830.0,48.768655,26466383,24462140.0,-7.572771,541622,1304557.0,140.861223,639115,...,525.548618,30462629,107651200.0,253.387657,5288726,34580.74,-99.346142,540176,448522.2,-16.967396
$5K-$10K,89719121,98192870.0,9.444749,68432757,56376830.0,-17.617186,770262,1354465.0,75.844658,859292,...,119.075292,35764379,63369860.0,77.187084,6356107,266340.1,-95.809697,3617249,2608670.0,-27.882481
$10K-$15K,153830822,156233600.0,1.561992,108333169,90660470.0,-16.313287,1002678,1359774.0,35.614247,1137469,...,-125.952947,41628764,57694590.0,38.593086,9183481,4981969.0,-45.750761,18153883,16853580.0,-7.162683
$15K-$20K,197774439,189888600.0,-3.987274,153662477,126502700.0,-17.674955,1147555,1407631.0,22.663459,1137002,...,-116.503921,36611936,42469040.0,15.997794,13507422,8318782.0,-38.41325,44114832,38591280.0,-12.520851
$20K-$25K,226042578,191514000.0,-15.27526,183452869,137265300.0,-25.176831,1125760,1073167.0,-4.671798,1234331,...,0.35226,31287885,34164110.0,9.192762,14020331,11951340.0,-14.757093,71604080,56946380.0,-20.47048
$25K-$30K,241769583,231983000.0,-4.047901,197806750,180585000.0,-8.706364,1340811,1227169.0,-8.475598,1183817,...,145.993561,26673890,29575270.0,10.877229,18200485,14975350.0,-17.720024,98280801,90627030.0,-7.787658
$30K-$40K,507486039,456048800.0,-10.135697,418567676,351486100.0,-16.02646,2096189,1861235.0,-11.208607,2794332,...,-64.556973,43890297,47871850.0,9.071591,41001388,36411400.0,-11.194725,253424537,227598000.0,-10.191028
$40K-$50K,513959724,464513200.0,-9.620693,418379915,356662000.0,-14.751634,1900496,1930341.0,1.570384,2996252,...,3.983191,35702308,37709300.0,5.621455,48792280,45879420.0,-5.969916,299205654,273391400.0,-8.627587
$50K-$75K,1191956661,1154207000.0,-3.167024,916618937,875452300.0,-4.491144,5955852,4960325.0,-16.715102,9906058,...,-14.843329,83556997,80531710.0,-3.620626,137812627,130585600.0,-5.24408,771635427,755847200.0,-2.046076


## Distributions

### AGI

In [10]:
soi_agi = pd.DataFrame()
soi_agi['Percent of Total AGI'] = soi['AGI'] / soi['AGI'].sum()
soi_agi['Measure'] = 'SOI'
soi_agi['AGI Bin'] = index_list
tc_agi = pd.DataFrame()
tc_agi['Percent of Total AGI'] = tc['AGI'] / tc['AGI'].sum()
tc_agi['Measure'] = 'TC'
tc_agi['AGI Bin'] = index_list
data_agi = [soi_agi, tc_agi]
plot_data_agi = pd.concat(data_agi)
plot_agi = Scatter(plot_data_agi, x='AGI Bin', y='Percent of Total AGI', color='Measure', 
                   title='Percent of Total AGI by AGI Bin')
show(plot_agi)

soi_agi_sum = pd.DataFrame()
soi_agi_sum['Total AGI'] = soi['AGI']
soi_agi_sum['Measure'] = 'SOI'
soi_agi_sum['AGI Bin'] = index_list
tc_agi_sum = pd.DataFrame()
tc_agi_sum['Total AGI'] = tc['AGI']
tc_agi_sum['Measure'] = 'TC'
tc_agi_sum['AGI Bin'] = index_list
data_agi_sum = [soi_agi_sum, tc_agi_sum]
plot_data_agi_sum = pd.concat(data_agi_sum)
plot_agi_sum = Scatter(plot_data_agi_sum, x='AGI Bin', y='Total AGI', color='Measure', 
                   title='Total AGI by AGI Bin')
show(plot_agi_sum)

### Wages and Salaries

In [11]:
soi_wage = pd.DataFrame()
soi_wage['Percent of Total Wage and Salaries Income'] = soi['Wages'] / soi['Wages'].sum()
soi_wage['Measure'] = 'SOI'
soi_wage['AGI Bin'] = index_list
tc_wage = pd.DataFrame()
tc_wage['Percent of Total Wage and Salaries Income'] = tc['Wages'] / tc['Wages'].sum()
tc_wage['Measure'] = 'TC'
tc_wage['AGI Bin'] = index_list
data_wage = [soi_wage, tc_wage]
plot_data_wage = pd.concat(data_wage)
plot_wage = Scatter(plot_data_wage, x='AGI Bin', y='Percent of Total Wage and Salaries Income', color='Measure',
                    title = 'Percent of Total Wages and Salaries by AGI Bin')
show(plot_wage)

soi_wage_sum = pd.DataFrame()
soi_wage_sum['Total Wage and Salaries Income'] = soi['Wages']
soi_wage_sum['Measure'] = 'SOI'
soi_wage_sum['AGI Bin'] = index_list
tc_wage_sum = pd.DataFrame()
tc_wage_sum['Total Wage and Salaries Income'] = tc['Wages']
tc_wage_sum['Measure'] = 'TC'
tc_wage_sum['AGI Bin'] = index_list
data_wage_sum = [soi_wage_sum, tc_wage_sum]
plot_data_wage_sum = pd.concat(data_wage_sum)
plot_wage_sum = Scatter(plot_data_wage_sum, x='AGI Bin', y='Total Wage and Salaries Income', color='Measure',
                    title = 'Total Wages and Salaries by AGI Bin')
show(plot_wage_sum)

### Taxable Interest

In [12]:
soi_int = pd.DataFrame()
soi_int['Percent of Total Interest Income'] = soi['Interest'] / soi['Interest'].sum()
soi_int['Measure'] = 'SOI'
soi_int['AGI Bin'] = index_list
tc_int = pd.DataFrame()
tc_int['Percent of Total Interest Income'] = tc['Interest'] / tc['Interest'].sum()
tc_int['Measure'] = 'TC'
tc_int['AGI Bin'] = index_list
data_int = [soi_int, tc_int]
plot_data_int = pd.concat(data_int)
plot_int = Scatter(plot_data_int, x='AGI Bin', y='Percent of Total Interest Income', color='Measure',
                    title = 'Percent of Total Interest Income by AGI Bin')
show(plot_int)

soi_int_sum = pd.DataFrame()
soi_int_sum['Total Interest Income'] = soi['Interest']
soi_int_sum['Measure'] = 'SOI'
soi_int_sum['AGI Bin'] = index_list
tc_int_sum = pd.DataFrame()
tc_int_sum['Total Interest Income'] = tc['Interest']
tc_int_sum['Measure'] = 'TC'
tc_int_sum['AGI Bin'] = index_list
data_int_sum = [soi_int_sum, tc_int_sum]
plot_data_int_sum = pd.concat(data_int_sum)
plot_int_sum = Scatter(plot_data_int_sum, x='AGI Bin', y='Total Interest Income', color='Measure',
                    title = 'Total Interest Income by AGI Bin')
show(plot_int_sum)

### Qualified Dividends

In [13]:
soi_div = pd.DataFrame()
soi_div['Percent of Total Dividend Income'] = soi['Dividends'] / soi['Dividends'].sum()
soi_div['Measure'] = 'SOI'
soi_div['AGI Bin'] = index_list
tc_div = pd.DataFrame()
tc_div['Percent of Total Dividend Income'] = tc['Dividends'] / tc['Dividends'].sum()
tc_div['Measure'] = 'TC'
tc_div['AGI Bin'] = index_list
data_div = [soi_div, tc_div]
plot_data_div = pd.concat(data_div)
plot_div = Scatter(plot_data_div, x='AGI Bin', y='Percent of Total Dividend Income', color='Measure',
                    title = 'Percent of Total Qualified Dividends Distribution by AGI Bin')
show(plot_div)

soi_div_sum = pd.DataFrame()
soi_div_sum['Total Dividend Income'] = soi['Dividends']
soi_div_sum['Measure'] = 'SOI'
soi_div_sum['AGI Bin'] = index_list
tc_div_sum = pd.DataFrame()
tc_div_sum['Total Dividend Income'] = tc['Dividends']
tc_div_sum['Measure'] = 'TC'
tc_div_sum['AGI Bin'] = index_list
data_div_sum = [soi_div_sum, tc_div_sum]
plot_data_div_sum = pd.concat(data_div_sum)
plot_div_sum = Scatter(plot_data_div_sum, x='AGI Bin', y='Total Dividend Income', color='Measure',
                    title = 'Total Qualified Dividends Distribution by AGI Bin')
show(plot_div_sum)

### Schedule C Net Income/Loss

In [14]:
soi_schc = pd.DataFrame()
soi_schc['Percent of Total Schedule C Income'] = soi['SchC'] / soi['SchC'].sum()
soi_schc['Measure'] = 'SOI'
soi_schc['AGI Bin'] = index_list
tc_schc = pd.DataFrame()
tc_schc['Percent of Total Schedule C Income'] = tc['SchC'] / tc['SchC'].sum()
tc_schc['Measure'] = 'TC'
tc_schc['AGI Bin'] = index_list
data_schc = [soi_schc, tc_schc]
plot_data_schc = pd.concat(data_schc)
plot_schc = Scatter(plot_data_schc, x='AGI Bin', y='Percent of Total Schedule C Income', color='Measure',
                    title = 'Percent of Total Schedule C Income by AGI Bin')
show(plot_schc)

soi_schc_sum = pd.DataFrame()
soi_schc_sum['Total Schedule C Income'] = soi['SchC']
soi_schc_sum['Measure'] = 'SOI'
soi_schc_sum['AGI Bin'] = index_list
tc_schc_sum = pd.DataFrame()
tc_schc_sum['Total Schedule C Income'] = tc['SchC']
tc_schc_sum['Measure'] = 'TC'
tc_schc_sum['AGI Bin'] = index_list
data_schc_sum = [soi_schc_sum, tc_schc_sum]
plot_data_schc_sum = pd.concat(data_schc_sum)
plot_schc_sum = Scatter(plot_data_schc_sum, x='AGI Bin', y='Total Schedule C Income', color='Measure',
                    title = 'Total Schedule C Income by AGI Bin')
show(plot_schc_sum)

### Taxable IRA Distributions

In [15]:
soi_ira = pd.DataFrame()
soi_ira['Percent of Taxable IRA Distributions'] = soi['IRA'] / soi['IRA'].sum()
soi_ira['Measure'] = 'SOI'
soi_ira['AGI Bin'] = index_list
tc_ira = pd.DataFrame()
tc_ira['Percent of Taxable IRA Distributions'] = tc['IRA'] / tc['IRA'].sum()
tc_ira['Measure'] = 'TC'
tc_ira['AGI Bin'] = index_list
data_ira = [soi_ira, tc_ira]
plot_data_ira = pd.concat(data_ira)
plot_ira = Scatter(plot_data_ira, x='AGI Bin', y='Percent of Taxable IRA Distributions', color='Measure',
                    title = 'Percent of Total Taxable IRA Distributions by AGI Bin')
show(plot_ira)

soi_ira_sum = pd.DataFrame()
soi_ira_sum['Total Taxable IRA Distributions'] = soi['IRA']
soi_ira_sum['Measure'] = 'SOI'
soi_ira_sum['AGI Bin'] = index_list
tc_ira_sum = pd.DataFrame()
tc_ira_sum['Total Taxable IRA Distributions'] = tc['IRA']
tc_ira_sum['Measure'] = 'TC'
tc_ira_sum['AGI Bin'] = index_list
data_ira_sum = [soi_ira_sum, tc_ira_sum]
plot_data_ira_sum = pd.concat(data_ira_sum)
plot_ira_sum = Scatter(plot_data_ira_sum, x='AGI Bin', y='Total Taxable IRA Distributions', color='Measure',
                    title = 'Total Taxable IRA Distributions by AGI Bin')
show(plot_ira_sum)

### S Corporation Income

In [16]:
soi_scorp = pd.DataFrame()
soi_scorp['Percent of Total S Corporation Income'] = soi['Scorp'] / soi['Scorp'].sum()
soi_scorp['Measure'] = 'SOI'
soi_scorp['AGI Bin'] = index_list
tc_scorp = pd.DataFrame()
tc_scorp['Percent of Total S Corporation Income'] = tc['Scorp'] / tc['Scorp'].sum()
tc_scorp['Measure'] = 'TC'
tc_scorp['AGI Bin'] = index_list
data_scorp = [soi_scorp, tc_scorp]
plot_data_scorp = pd.concat(data_scorp)
plot_scorp = Scatter(plot_data_scorp, x='AGI Bin', y='Percent of Total S Corporation Income', color='Measure',
                    title = 'Percent of Total S Corporation Income by AGI Bin')
show(plot_scorp)

soi_scorp_sum = pd.DataFrame()
soi_scorp_sum['Total S Corporation Income'] = soi['Scorp']
soi_scorp_sum['Measure'] = 'SOI'
soi_scorp_sum['AGI Bin'] = index_list
tc_scorp_sum = pd.DataFrame()
tc_scorp_sum['Total S Corporation Income'] = tc['Scorp']
tc_scorp_sum['Measure'] = 'TC'
tc_scorp_sum['AGI Bin'] = index_list
data_scorp_sum = [soi_scorp_sum, tc_scorp_sum]
plot_data_scorp_sum = pd.concat(data_scorp_sum)
plot_scorp_sum = Scatter(plot_data_scorp_sum, x='AGI Bin', y='Total S Corporation Income', color='Measure',
                    title = 'Total S Corporation Income by AGI Bin')
show(plot_scorp_sum)

### Social Security Benefits

In [17]:
soi_ss = pd.DataFrame()
soi_ss['Percent of Total SS Benefits'] = soi['SS Benefits'] / soi['SS Benefits'].sum()
soi_ss['Measure'] = 'SOI'
soi_ss['AGI Bin'] = index_list
tc_ss = pd.DataFrame()
tc_ss['Percent of Total SS Benefits'] = tc['SS Benefits'] / tc['SS Benefits'].sum()
tc_ss['Measure'] = 'TC'
tc_ss['AGI Bin'] = index_list
data_ss = [soi_ss, tc_ss]
plot_data_ss = pd.concat(data_ss)
plot_ss = Scatter(plot_data_ss, x='AGI Bin', y='Percent of Total SS Benefits', color='Measure',
                    title = 'Percent of Total SS Benefits by AGI Bin')
show(plot_ss)

soi_ss_sum = pd.DataFrame()
soi_ss_sum['Total SS Benefits'] = soi['SS Benefits']
soi_ss_sum['Measure'] = 'SOI'
soi_ss_sum['AGI Bin'] = index_list
tc_ss_sum = pd.DataFrame()
tc_ss_sum['Total SS Benefits'] = tc['SS Benefits']
tc_ss_sum['Measure'] = 'TC'
tc_ss_sum['AGI Bin'] = index_list
data_ss_sum = [soi_ss_sum, tc_ss_sum]
plot_data_ss_sum = pd.concat(data_ss_sum)
plot_ss_sum = Scatter(plot_data_ss_sum, x='AGI Bin', y='Total SS Benefits', color='Measure',
                    title = 'Total SS Benefits by AGI Bin')
show(plot_ss_sum)

### Total Itemized Deductions

In [18]:
soi_item = pd.DataFrame()
soi_item['Percent of Total Itemized Deductions'] = soi['Item Ded'] / soi['Item Ded'].sum()
soi_item['Measure'] = 'SOI'
soi_item['AGI Bin'] = index_list
tc_item = pd.DataFrame()
tc_item['Percent of Total Itemized Deductions'] = tc['Item Ded'] / tc['Item Ded'].sum()
tc_item['Measure'] = 'TC'
tc_item['AGI Bin'] = index_list
data_item = [soi_item, tc_item]
plot_data_item = pd.concat(data_item)
plot_item = Scatter(plot_data_item, x='AGI Bin', y='Percent of Total Itemized Deductions', color='Measure',
                    title = 'Percent of Total Itemized Deductions by AGI Bin')
show(plot_item)

soi_item_sum = pd.DataFrame()
soi_item_sum['Total Itemized Deductions'] = soi['Item Ded']
soi_item_sum['Measure'] = 'SOI'
soi_item_sum['AGI Bin'] = index_list
tc_item_sum = pd.DataFrame()
tc_item_sum['Total Itemized Deductions'] = tc['Item Ded']
tc_item_sum['Measure'] = 'TC'
tc_item_sum['AGI Bin'] = index_list
data_item_sum = [soi_item_sum, tc_item_sum]
plot_data_item_sum = pd.concat(data_item_sum)
plot_item_sum = Scatter(plot_data_item_sum, x='AGI Bin', y='Total Itemized Deductions', color='Measure',
                    title = 'Total Itemized Deductions by AGI Bin')
show(plot_item_sum)

### Taxable Income

In [19]:

soi_inc = pd.DataFrame()
soi_inc['Percent of Total Taxable Income'] = soi['Inc'] / soi['Inc'].sum()
soi_inc['Measure'] = 'SOI'
soi_inc['AGI Bin'] = index_list
tc_inc = pd.DataFrame()
tc_inc['Percent of Total Taxable Income'] = tc['Inc'] / tc['Inc'].sum()
tc_inc['Measure'] = 'TC'
tc_inc['AGI Bin'] = index_list
data_inc = [soi_inc, tc_inc]
plot_data_inc = pd.concat(data_inc)
plot_inc = Scatter(plot_data_inc, x='AGI Bin', y='Percent of Total Taxable Income', color='Measure',
                    title = 'Percent of Total Taxable Income by AGI Bin')
show(plot_inc)

soi_inc_sum = pd.DataFrame()
soi_inc_sum['Total Taxable Income'] = soi['Inc']
soi_inc_sum['Measure'] = 'SOI'
soi_inc_sum['AGI Bin'] = index_list
tc_inc_sum = pd.DataFrame()
tc_inc_sum['Total Taxable Income'] = tc['Inc']
tc_inc_sum['Measure'] = 'TC'
tc_inc_sum['AGI Bin'] = index_list
data_inc_sum = [soi_inc_sum, tc_inc_sum]
plot_data_inc_sum = pd.concat(data_inc_sum)
plot_inc_sum = Scatter(plot_data_inc_sum, x='AGI Bin', y='Total Taxable Income', color='Measure',
                    title = 'Total Taxable Income by AGI Bin')
show(plot_inc_sum)