In [1]:
#importing some libraries
import pandas as pd
from os import path

#importing Bokeh libraries
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import Select
from bkcharts import Line, Bar
from bkcharts.attributes import cat, ColorAttr, color
from bkcharts.operations import blend
from bokeh.layouts import column
from bokeh.io import output_file, show
from bokeh.models import BoxZoomTool, ResetTool, ResetTool, SaveTool
from bokeh.models import FuncTickFormatter
from bokeh.models import CustomJS, Slider

In [2]:
#load data as DataFrame
df = pd.read_csv("resources/dataset_tax_change.csv", decimal=",", encoding='utf-8-sig')

In [3]:
df

Unnamed: 0.1,Unnamed: 0,Tax Units with Tax Cut,Tax Units with Tax Increase,Count,Average Tax Change,Total Tax Difference,Percent with Tax Increase,Percent with Tax Decrease,Share of Overall Change
0,,Thousands,Thousands,Thousands,,Billions,,,
1,Less than 10,1436,477,23157,-8.0,-0.2,2.1,6.2,0.5
2,10-20,7015,111,24467,-81.0,-2.0,0.5,28.7,5.9
3,20-30,9483,106,18943,-184.0,-3.5,0.6,50.1,10.4
4,30-40,11352,307,16372,-297.0,-4.9,1.9,69.3,14.4
5,40-50,9795,374,13283,-370.0,-4.9,2.8,73.7,14.6
6,50-75,17029,1201,22758,-553.0,-12.6,5.3,74.8,37.3
7,75-100,9208,1294,13548,-577.0,-7.8,9.6,68.0,23.2
8,100-200,11100,4332,23024,-512.0,-11.8,18.8,48.2,35.0
9,200-500,691,4421,7922,245.0,1.9,55.8,8.7,-5.8


In [4]:
list(df)

['Unnamed: 0',
 'Tax Units with Tax Cut',
 'Tax Units with Tax Increase',
 'Count',
 'Average Tax Change',
 'Total Tax Difference',
 'Percent with Tax Increase',
 'Percent with Tax Decrease',
 'Share of Overall Change']

In [5]:
df_data = df[['Unnamed: 0','Tax Units with Tax Cut', 'Tax Units with Tax Increase', 'Average Tax Change']]

In [6]:
df_data

Unnamed: 0.1,Unnamed: 0,Tax Units with Tax Cut,Tax Units with Tax Increase,Average Tax Change
0,,Thousands,Thousands,
1,Less than 10,1436,477,-8.0
2,10-20,7015,111,-81.0
3,20-30,9483,106,-184.0
4,30-40,11352,307,-297.0
5,40-50,9795,374,-370.0
6,50-75,17029,1201,-553.0
7,75-100,9208,1294,-577.0
8,100-200,11100,4332,-512.0
9,200-500,691,4421,245.0


In [7]:
df_dropped=df_data.drop(df.index[[0,len(df_data['Tax Units with Tax Cut'])-1]])

In [8]:
len(df_data['Tax Units with Tax Cut'])-1

13

In [9]:
df_dropped

Unnamed: 0.1,Unnamed: 0,Tax Units with Tax Cut,Tax Units with Tax Increase,Average Tax Change
1,Less than 10,1436,477,-8.0
2,10-20,7015,111,-81.0
3,20-30,9483,106,-184.0
4,30-40,11352,307,-297.0
5,40-50,9795,374,-370.0
6,50-75,17029,1201,-553.0
7,75-100,9208,1294,-577.0
8,100-200,11100,4332,-512.0
9,200-500,691,4421,245.0
10,500-1000,57,710,2.461


In [10]:
df_dropped['Tax Units with Tax Cut']= [x.replace(',', '.') for x in df_dropped['Tax Units with Tax Cut']]

In [11]:
df_dropped['Tax Units with Tax Cut']=(-1)*df_dropped['Tax Units with Tax Cut'].astype(float)

In [12]:
df_dropped['Tax Units with Tax Increase']= [x.replace(',', '.') for x in df_dropped['Tax Units with Tax Increase']]

In [13]:
df_dropped['Tax Units with Tax Increase']=df_dropped['Tax Units with Tax Increase'].astype(float)

In [14]:
df_dropped

Unnamed: 0.1,Unnamed: 0,Tax Units with Tax Cut,Tax Units with Tax Increase,Average Tax Change
1,Less than 10,-1.436,477.0,-8.0
2,10-20,-7.015,111.0,-81.0
3,20-30,-9.483,106.0,-184.0
4,30-40,-11.352,307.0,-297.0
5,40-50,-9.795,374.0,-370.0
6,50-75,-17.029,1.201,-553.0
7,75-100,-9.208,1.294,-577.0
8,100-200,-11.1,4.332,-512.0
9,200-500,-691.0,4.421,245.0
10,500-1000,-57.0,710.0,2.461


In [15]:
names = df_dropped.columns.tolist()
names[names.index('Unnamed: 0')] = 'Intervals'
df_dropped.columns = names

In [16]:
#Transform ColumnDataFrame to ColumnDataSource
tax_increase=ColumnDataSource(pd.DataFrame(df_dropped["Tax Units with Tax Increase"]))
tax_cut=ColumnDataSource(pd.DataFrame(df_dropped["Tax Units with Tax Cut"]))
cds_df_dropped = ColumnDataSource(df_dropped)


In [17]:
df_dropped

Unnamed: 0,Intervals,Tax Units with Tax Cut,Tax Units with Tax Increase,Average Tax Change
1,Less than 10,-1.436,477.0,-8.0
2,10-20,-7.015,111.0,-81.0
3,20-30,-9.483,106.0,-184.0
4,30-40,-11.352,307.0,-297.0
5,40-50,-9.795,374.0,-370.0
6,50-75,-17.029,1.201,-553.0
7,75-100,-9.208,1.294,-577.0
8,100-200,-11.1,4.332,-512.0
9,200-500,-691.0,4.421,245.0
10,500-1000,-57.0,710.0,2.461


In [18]:
p = Bar(df_dropped, 
       label=cat(columns='Intervals', sort=False), 
       values=blend('Tax Units with Tax Increase', 'Tax Units with Tax Cut', name='values', labels_name='vars'),
       tooltips=[('Value', '@values')], ylabel="Tax units with tax cut             Tax units with tax increase", stack=cat(columns='values', sort=False), 
       color=color(columns='vars',
                      palette=['silver', 'orange'],
                      sort=True),
       bar_width=0.4, tools="pan,box_zoom, reset, save", plot_width=600, plot_height=450, logo=None,
       toolbar_sticky=False, legend=False)
p.xaxis.axis_label = 'Income [$ thousands]'

In [19]:
values=blend('Tax Units with Tax Increase', 'Tax Units with Tax Cut', name='values', labels_name='vars')

In [20]:
values

Blend(Tax Units with Tax Increase, Tax Units with Tax Cut)

In [21]:
s1 = figure(plot_width=600, plot_height=200, title='Average Tax Change', tools="box_zoom, reset, save")
s1.toolbar.logo = None
s1.toolbar_sticky = False
s1.title.text_color = '#0569CE'
s1.xaxis.axis_label = 'Income [$ thousands]'

In [22]:
avg_tax_change=df_dropped['Average Tax Change']

In [23]:
line_x=[i for i in range(len(df_dropped['Intervals']))]

In [24]:
line_x

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

In [25]:
s1.line(line_x, avg_tax_change, line_color='#0569CE', line_width=2)
s1.circle(line_x, avg_tax_change, fill_color="white", size=8)

In [26]:
label_dict = {}
for i, s in enumerate(df_dropped['Intervals']):
    label_dict[i] = s

s1.xaxis.formatter = FuncTickFormatter(code="""
    var labels = %s;
    return labels[tick];
""" % label_dict)

In [27]:
label_dict

{0: 'Less than 10',
 1: '10-20',
 2: '20-30',
 3: '30-40',
 4: '40-50',
 5: '50-75',
 6: '75-100',
 7: '100-200',
 8: '200-500',
 9: '500-1000',
 10: '1000+',
 11: 'All'}

In [28]:
#l = Line(x = "line_x", y = "avg_tax_change", line_color='#0569CE', line_width=2)

In [29]:
output_file("bar.html")
p_l = column(s1,p)

show(p_l)