In [85]:
import numpy as np
import scipy
import pandas as pd
from bokeh.plotting import figure, save, show, ColumnDataSource
from bokeh.models import (GMapPlot, GMapOptions, ColumnDataSource,
                          Patch, Patches, Range1d, LogColorMapper,
                          HoverTool, PanTool, WheelZoomTool,
                          BoxSelectTool, SaveTool, CustomJS)
from bokeh.models.widgets import Select, Slider
from bokeh.embed import components, autoload_static
from bokeh.resources import CDN
from bokeh.layouts import widgetbox, column, row
from bokeh.palettes import Spectral6
from bokeh.io import output_notebook

In [2]:
output_notebook()

### Summary
1. Plot interactive line graph that shows the effect of the congestion fee.

In [3]:
df_ratios = pd.read_csv('Data/Ratios.csv')

In [4]:
df_ratios['census_tract'] = df_ratios['census_tract'].apply(str)

In [5]:
df_ratios = df_ratios.set_index('census_tract')

We need average fares for the 36 major census tracts. Ignore the rest for the purpose of this plot. The other tracts make up less than 10% of all rides.

In [6]:
df_avg = pd.read_csv('GetAvg/stats_3.csv')

In [7]:
df_avg.head()

Unnamed: 0,census_tract,avg,std
0,17031071400,11.324124,1.805734
1,17031071500,9.846077,1.586051
2,17031080100,8.630585,1.425377
3,17031080201,8.797396,1.306599
4,17031080202,8.287167,1.309778


Put the major census tracts in 4 distinct groups.

In [8]:
NORTH = ['17031071400', '17031071500', '17031080100', '17031080201',
         '17031080202', '17031080300', '17031081000', '17031081100',
         '17031081201', '17031081202', '17031081300', '17031081401',
         '17031081402', '17031081403', '17031081500', '17031081600',
         '17031081700', '17031081800', '17031842200']
         
WEST = ['17031243500', '17031280100', '17031281900', '17031833000',
        '17031833100', '17031838100', '17031841900', '17031842300',
        '17031980000']
        
LOOP = ['17031320100', '17031320400', '17031320600', '17031839000',
        '17031839100']
        
SOUTH =['17031330100', '17031841000', '17031980100']

In [9]:
def findDirection(row):
    """Assign group to each census tract."""
    tract = str(int(row['census_tract']))
    if tract in NORTH:
        return 'N'
    elif tract in WEST:
        return 'W'
    elif tract in LOOP:
        return 'L'
    else:
        return 'S'

In [10]:
df_avg['region'] = df_avg.apply(findDirection, axis=1)

In [11]:
df_avg['census_tract'] = df_avg['census_tract'].apply(str)

In [12]:
df_avg = df_avg.join(df_ratios[['ratio_from', 'ratio_to']], on='census_tract')

In [13]:
for item in df_avg[df_avg['region']=='N'][['avg', 'ratio_from', 'ratio_to']].values:
    print(item)

[  1.13241236e+01   4.99445630e-03   3.52539976e-03]
[  9.84607685e+00   9.62619745e-03   7.98933494e-03]
[  8.63058475e+00   7.05208308e-03   7.47243225e-03]
[  8.79739644e+00   2.67906593e-03   2.61466365e-03]
[  8.28716708e+00   5.16914435e-03   6.02757316e-03]
[  8.81705338e+00   6.84228667e-03   6.10838512e-03]
[ 7.49889426  0.00867315  0.00963972]
[  7.68806003e+00   7.01798849e-03   8.89583944e-03]
[ 7.61109364  0.03401044  0.0356376 ]
[ 8.03909664  0.00821333  0.00890366]
[ 7.67738973  0.0243486   0.02874784]
[ 7.15810992  0.03301161  0.0313799 ]
[ 7.40656487  0.01775213  0.01909883]
[ 6.52983036  0.04409529  0.0453716 ]
[ 6.49566784  0.05346543  0.07418738]
[ 6.19493159  0.02325757  0.0271815 ]
[ 6.12761999  0.05246585  0.05687428]
[ 6.62521538  0.03980929  0.03580569]
[  9.39590597e+00   5.96297723e-03   4.80998669e-03]


In [14]:
def traffic(region, fee=0, elasticity=0.5):
    if region == 'L': #no decrease for this
        return tuple(df_avg[df_avg['region']=='L'][['ratio_from', 'ratio_to']].sum())
    array = df_avg[df_avg['region']==region][['avg', 'ratio_from', 'ratio_to']].values
    total_from = 0
    total_to = 0
    for avg, ratio_from, ratio_to in array:
        #calculate the ratio of traffic decrease
        ratio = elasticity * (fee / avg)
        total_from += ratio_from * (1 - ratio)
        total_to += ratio_to * (1 - ratio)
    return total_from, total_to

In [15]:
total_from = sum([traffic(region)[0] for region in ['N', 'W', 'L', 'S']])

In [16]:
total_to = sum([traffic(region)[1] for region in ['N', 'W', 'L', 'S']])

In [17]:
def total_traffic(fee=0, elasticity=0.5):
    value_from = sum([traffic(region, fee, elasticity)[0]
                      for region in ['N', 'W', 'L', 'S']])
    value_to = sum([traffic(region, fee, elasticity)[1]
                      for region in ['N', 'W', 'L', 'S']])
    return value_from / total_from, value_to / total_to

In [18]:
TOOLS = "pan, box_zoom, wheel_zoom, reset, save"

In [125]:
#set up initial data
regions = ['N', 'W', 'L', 'S']
direction_dict = {'from the Loop': 0,
                  'to the Loop': 1}
direction0 = 'from the Loop'
elasticity0 = 0.5
x = np.linspace(0, 5, 51)
df_line = pd.DataFrame(x, columns=['x'])
df_line['total'] = df_line['x'].apply(lambda x: total_traffic(x, elasticity0)[direction_dict[direction0]])
for region in regions:
    df_line[region] = df_line['x'].apply(lambda x: traffic(region, x, elasticity0)[direction_dict[direction0]])
source = ColumnDataSource(df_line)

In [126]:
#set up plot
colorwheel = Spectral6
p_line = figure(tools=TOOLS, title="Taxi traffic reduction by region",
                x_r
                 x_axis_label="fee (dollars)", y_axis_label="proportion of taxi rides")
p_line.line('x', 'total', source=source, line_width=2,
            line_color=colorwheel[0], legend="Total")
p_line.line('x', 'N', source=source, line_width=2,
            line_color=colorwheel[1], legend="North")
p_line.line('x', 'W', source=source, line_width=2,
            line_color=colorwheel[2], legend="West")
p_line.line('x', 'S', source=source, line_width=2,
            line_color=colorwheel[3], legend="South")
p_line.line('x', 'L', source=source, line_width=2,
            line_color=colorwheel[4], legend="Loop")
hover_line = HoverTool(tooltips=[("(x,y)", "($x, $y)")])    
p_line.add_tools(hover_line)

Write separate callback functions for direction and elasticity. I need a ColumnDataSource for both 'from' and 'to'.

In [65]:
df_avg.head()

Unnamed: 0,census_tract,avg,std,region,ratio_from,ratio_to
0,17031071400,11.324124,1.805734,N,0.004994,0.003525
1,17031071500,9.846077,1.586051,N,0.009626,0.007989
2,17031080100,8.630585,1.425377,N,0.007052,0.007472
3,17031080201,8.797396,1.306599,N,0.002679,0.002615
4,17031080202,8.287167,1.309778,N,0.005169,0.006028


In [62]:
avg_source = ColumnDataSource(df_avg)

In [171]:
df_avg.head()

Unnamed: 0,census_tract,avg,std,region,ratio_from,ratio_to
0,17031071400,11.324124,1.805734,N,0.004994,0.003525
1,17031071500,9.846077,1.586051,N,0.009626,0.007989
2,17031080100,8.630585,1.425377,N,0.007052,0.007472
3,17031080201,8.797396,1.306599,N,0.002679,0.002615
4,17031080202,8.287167,1.309778,N,0.005169,0.006028


In [170]:
def traffic(region, fee=0, elasticity=0.5):
    if region == 'L': #no decrease for this
        return tuple(df_avg[df_avg['region']=='L'][['ratio_from', 'ratio_to']].sum())
    array = df_avg[df_avg['region']==region][['avg', 'ratio_from', 'ratio_to']].values
    total_from = 0
    total_to = 0
    for avg, ratio_from, ratio_to in array:
        #calculate the ratio of traffic decrease
        ratio = elasticity * (fee / avg)
        total_from += ratio_from * (1 - ratio)
        total_to += ratio_to * (1 - ratio)
    return total_from, total_to

In [None]:
def total_traffic(fee=0, elasticity=0.5):
    value_from = sum([traffic(region, fee, elasticity)[0]
                      for region in ['N', 'W', 'L', 'S']])
    value_to = sum([traffic(region, fee, elasticity)[1]
                      for region in ['N', 'W', 'L', 'S']])
    return value_from / total_from, value_to / total_to

In [311]:
dir_callback = CustomJS(args=dict(source=source, avg_source=avg_source,
                                 elas_select=elas_select), code="""
    var data = source.data
    var dir = cb_obj.value
    var elas = elas_select.value
    var avg_data = avg_source.data
    
    regions = avg_data['region']
    avgs = avg_data['avg']
    if (dir == 'from the Loop') {
        var ratios = avg_data['ratio_from']
    } else {
        var ratios = avg_data['ratio_to']
    }
    sum = ratios.reduce((a, b) => a + b, 0)
    
    x = data['x']
    total = data['total']
    N = data['N']
    W = data['W']
    S = data['S']
    L = data['L']
    
    for (i = 0; i < x.length; i++) {
        N[i] = 0
        W[i] = 0
        S[i] = 0
        L[i] = 0
        for (j = 0; j < regions.length; j++) {
            if (regions[j] == 'N') {
                N[i] += ratios[j] * (1 - elas * x[i] / avgs[j])
            } else if (regions[j] == 'W') {
                W[i] += ratios[j] * (1 - elas * x[i] / avgs[j])
            } else if (regions[j] == 'S') {
                S[i] += ratios[j] * (1 - elas * x[i] / avgs[j])
            } else {
                L[i] += ratios[j]
            }
        }
        N[i] = N[i] / sum
        W[i] = W[i] / sum
        S[i] = S[i] / sum
        L[i] = L[i] / sum
        total[i] = N[i] + W[i] + S[i] + L[i]
    }
    source.change.emit()
""")

In [318]:
elas_callback = CustomJS(args=dict(source=source, avg_source=avg_source,
                              dir_select=dir_select), code="""
    var data = source.data
    var avg_data = avg_source.data
    var dir = dir_select.value
    var elas = cb_obj.value
    
    regions = avg_data['region']
    avgs = avg_data['avg']
    if (dir == 'from the Loop') {
        var ratios = avg_data['ratio_from']
    } else {
        var ratios = avg_data['ratio_to']
    }
    sum = ratios.reduce((a, b) => a + b, 0)
    
    x = data['x']
    total = data['total']
    N = data['N']
    W = data['W']
    S = data['S']
    L = data['L']
    
    for (i = 0; i < x.length; i++) {
        N[i] = 0
        W[i] = 0
        S[i] = 0
        L[i] = 0
        for (j = 0; j < regions.length; j++) {
            if (regions[j] == 'N') {
                N[i] += ratios[j] * (1 - elas * x[i] / avgs[j])
            } else if (regions[j] == 'W') {
                W[i] += ratios[j] * (1 - elas * x[i] / avgs[j])
            } else if (regions[j] == 'S') {
                S[i] += ratios[j] * (1 - elas * x[i] / avgs[j])
            } else {
                L[i] += ratios[j]
            }
        }
        N[i] = N[i] / sum
        W[i] = W[i] / sum
        S[i] = S[i] / sum
        L[i] = L[i] / sum
        total[i] = N[i] + W[i] + S[i] + L[i]
    }
    source.change.emit()
""")

In [319]:
dir_select = Select(value='from the Loop', title='direction',
                   options=['from the Loop', 'to the Loop'])
elas_select = Slider(value=0.5, start=0., end=1., step=0.1,
                    title='price elasticity')

In [320]:
dir_select.js_on_change('value', dir_callback)
elas_select.js_on_change('value', elas_callback)

In [321]:
inputs = widgetbox(dir_select, elas_select)

In [322]:
layout = row(inputs, p_line)

In [323]:
show(layout)

In [35]:
from bokeh.layouts import column
from bokeh.models import CustomJS, ColumnDataSource, Slider
from bokeh.plotting import Figure, output_file, show

x = [x*0.005 for x in range(0, 200)]
y = x

source = ColumnDataSource(data=dict(x=x, y=y))

plot = Figure(plot_width=400, plot_height=400)
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

callback = CustomJS(args=dict(source=source), code="""
    var data = source.data;
    var f = cb_obj.value
    x = data['x']
    y = data['y']
    for (i = 0; i < x.length; i++) {
        y[i] = Math.pow(x[i], f)
    }
    source.change.emit();
""")

slider = Slider(start=0.1, end=4, value=1, step=.1, title="power")
slider.js_on_change('value', callback)

layout = column(slider, plot)

show(layout)

In [24]:
def update_data(attrname, old, new):
    #Get the current values
    direction = dir_select.value
    elasticity = elas_select.value

    x = np.linspace(0, 5, 51)
    df_line = pd.DataFrame(x, columns=['x'])
    df_line['total'] = df_line['x'].apply(lambda x: total_traffic(x, elasticity)[direction_dict[direction]])
    for region in regions:
        df_line[region] = df_line['x'].apply(lambda x: traffic(region, x, elasticity)[direction_dict[direction]])
    src = ColumnDataSource(df_line)
    source_line.data.update(src.data)

In [25]:
def update_data(attrname, old, new):
    #Get the current values
    direction = dir_select.value
    #elasticity = elas_select.value
    elasticity = new

    x = np.linspace(0, 5, 51)
    df_line = pd.DataFrame(x, columns=['x'])
    df_line['total'] = df_line['x'].apply(lambda x: total_traffic(x, elasticity)[direction_dict[direction]])
    for region in regions:
        df_line[region] = df_line['x'].apply(lambda x: traffic(region, x, elasticity)[direction_dict[direction]])
    src = ColumnDataSource(df_line)
    source_line.data.update(src.data)