# Interactive Plotting

In [1]:
import pandas as pd
import datetime as dt
import math
from ast import literal_eval
import bokeh
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
from ipywidgets import interact, interact_manual
from bokeh.io import output_notebook
from bokeh.tile_providers import CARTODBPOSITRON, get_provider
from bokeh.models import ColumnDataSource, HoverTool

output_notebook()

## Data

Chicago data portal

* List of 'L' Stops:
https://data.cityofchicago.org/Transportation/CTA-System-Information-List-of-L-Stops-Map/zbnc-zirh
* Station entry averages:
https://data.cityofchicago.org/Transportation/CTA-Ridership-L-Station-Entries-Monthly-Day-Type-A/t2rn-p8d7

In [2]:
def get_data_chicago(id):
    '''
    Connect to the chicago data portal API and returns a dataframe
    '''
    
    url = f'https://data.cityofchicago.org/api/views/{id}/rows.csv?accessType=DOWNLOAD'
    df = pd.read_csv(url)
    
    return df

In [3]:
#CTA Ridership data
rides = get_data_chicago('t2rn-p8d7')
rides['month_beginning'] = pd.to_datetime(rides['month_beginning'])
print(rides.shape)
rides.head()

(33331, 7)


Unnamed: 0,station_id,stationame,month_beginning,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal
0,40900,Howard,2001-01-01,6233.9,3814.5,2408.6,164447
1,41190,Jarvis,2001-01-01,1489.1,1054.0,718.0,40567
2,40100,Morse,2001-01-01,4412.5,3064.5,2087.8,119772
3,41300,Loyola,2001-01-01,4664.5,3156.0,1952.8,125008
4,40760,Granville,2001-01-01,3109.8,2126.0,1453.8,84189


In [4]:
#CTA Transit Station Location Data
stations = get_data_chicago('zbnc-zirh')
print(stations.shape)
stations.head()

(300, 15)


Unnamed: 0,STOP_ID,STOP_NAME,STATION_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location
0,30162,18th (54th/Cermak-bound),18th,40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)"
1,30161,18th (Loop-bound),18th,40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)"
2,30022,35th/Archer (Loop-bound),35th/Archer,40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)"
3,30023,35th/Archer (Midway-bound),35th/Archer,40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)"
4,30214,35-Bronzeville-IIT (63rd-bound),35th-Bronzeville-IIT,41120,True,False,False,True,False,False,False,False,False,False,"(41.831677, -87.625826)"


We have duplicate rows, 2 stops for each direction that the train travels

In [5]:
stations.drop_duplicates(subset='MAP_ID', keep="last", inplace=True)

## Bokeh building blocks

A Bokeh plot as a series of objects. At the very base layer is a Figure, and on top of it you can add Glyphs.

* Figure: grouping of all the elements (i.e. the plot)
* Glyphs: basic visual markers that Bokeh can display

## Plot ridership by month

In [6]:
rides_by_month = rides[['month_beginning', 'monthtotal']].groupby('month_beginning').mean()
rides_by_month.head()

Unnamed: 0_level_0,monthtotal
month_beginning,Unnamed: 1_level_1
2001-01-01,87141.113475
2001-02-01,80963.070922
2001-03-01,91590.751773
2001-04-01,87147.070922
2001-05-01,93250.092199


In [7]:
plot = figure(title='Average rides per month', x_axis_label='Date', 
              y_axis_label='Month average', x_axis_type='datetime', plot_height=400)
plot.line(rides_by_month.index, rides_by_month['monthtotal'], line_width=4, color='indianred')
show(plot)

## Plot popular stations

In [8]:
rides_by_station = rides[['stationame', 'monthtotal']].groupby('stationame').mean()
rides_by_station = rides_by_station.sort_values(by='monthtotal', ascending=False).head()
rides_by_station

Unnamed: 0_level_0,monthtotal
stationame,Unnamed: 1_level_1
Clark/Lake,423038.675214
Lake/State,422762.538462
Chicago/State,387423.316239
Belmont-North Main,313586.380342
95th/Dan Ryan,313288.982833


In [9]:
p = figure(x_range=list(rides_by_station.index), title="Top Stations", plot_height=250)
p.vbar(x=rides_by_station.index, top=rides_by_station['monthtotal'], width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

In [10]:
# add range slider

def plot_stations(df, num_bars):
    rides_by_station = df[['stationame', 'monthtotal']].groupby('stationame').mean()
    rides_by_station = rides_by_station.sort_values(by='monthtotal', ascending=False).head(num_bars)
    
    plot = figure(x_range=list(rides_by_station.index), title="Top Stations", plot_height=350)
    plot.vbar(x=rides_by_station.index, top=rides_by_station['monthtotal'], width=0.9)
    
    plot.xaxis.major_label_orientation = "vertical"
    plot.xgrid.grid_line_color = None
    plot.y_range.start = 0
    
    return plot

#add a decorator
@interact(value=(0, 40))
def make_plot_stations(value=5):
    plot = plot_stations(rides, value)
    show(plot)

interactive(children=(IntSlider(value=5, description='value', max=40), Output()), _dom_classes=('widget-intera…

## Interactive Maps

What part of the city has the most train ridership?

In [11]:
stations.head()

Unnamed: 0,STOP_ID,STOP_NAME,STATION_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location
1,30161,18th (Loop-bound),18th,40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)"
3,30023,35th/Archer (Midway-bound),35th/Archer,40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)"
5,30213,35-Bronzeville-IIT (Harlem-bound),35th-Bronzeville-IIT,41120,True,False,False,True,False,False,False,False,False,False,"(41.831677, -87.625826)"
7,30245,43rd (Harlem-bound),43rd,41270,True,False,False,True,False,False,False,False,False,False,"(41.816462, -87.619021)"
9,30209,47th (Harlem-bound),47th,41080,True,False,False,True,False,False,False,False,False,False,"(41.809209, -87.618826)"


Adding map data to a plot uses the same methodology as adding other types of Glyphs to a Figure. This time  you will be passing in tiles using the add_tiles() command, along with a tile provider as an argument.

In [12]:
tile_provider = get_provider(CARTODBPOSITRON)

p = figure(x_range=(-9780000, -9745000), y_range=(5130000, 5160000),
           x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(tile_provider)
show(p)

<b>Mercator Projection:</b> The tiles for the map uses the mercator projection. The the spherical coordinates of the earth (latitude and longitude) are projected onto a plane (X and Y coordinates). https://en.wikipedia.org/wiki/Mercator_projection

In [13]:
def merc(Coords):
    '''
    Takes a string of latitude and longitude coordinates (like the ones in the dataframe), 
    and converts them to a tuple of X and Y coordinates.
    '''
    Coordinates = literal_eval(Coords)
    lat = Coordinates[0]
    lon = Coordinates[1]
    
    r_major = 6378137.000
    x = r_major * math.radians(lon)
    scale = x/lon
    y = 180.0/math.pi * math.log(math.tan(math.pi/4.0 + 
        lat * (math.pi/180.0)/2.0)) * scale
    return (x, y)

In [14]:
stations['coords_x'] = stations['Location'].apply(lambda x: merc(x)[0])
stations['coords_y'] = stations['Location'].apply(lambda x: merc(x)[1])
stations.head()

Unnamed: 0,STOP_ID,STOP_NAME,STATION_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location,coords_x,coords_y
1,30161,18th (Loop-bound),18th,40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)",-9759285.0,5139718.0
3,30023,35th/Archer (Midway-bound),35th/Archer,40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)",-9760562.0,5135452.0
5,30213,35-Bronzeville-IIT (Harlem-bound),35th-Bronzeville-IIT,41120,True,False,False,True,False,False,False,False,False,False,"(41.831677, -87.625826)",-9754462.0,5135799.0
7,30245,43rd (Harlem-bound),43rd,41270,True,False,False,True,False,False,False,False,False,False,"(41.816462, -87.619021)",-9753705.0,5133526.0
9,30209,47th (Harlem-bound),47th,41080,True,False,False,True,False,False,False,False,False,False,"(41.809209, -87.618826)",-9753683.0,5132443.0


We now can overlay the stations on the map we created earlier.

In [15]:
tile_provider = get_provider(CARTODBPOSITRON)

p = figure(x_range=(-9780000, -9745000), y_range=(5130000, 5160000),
           x_axis_type="mercator", y_axis_type="mercator")

p.add_tile(tile_provider)

p.circle(x = stations['coords_x'],
         y = stations['coords_y'])
show(p)

In [16]:
rides = rides[['station_id', 'monthtotal']].groupby('station_id').mean()
print(rides.shape)
rides.head()

(147, 1)


Unnamed: 0_level_0,monthtotal
station_id,Unnamed: 1_level_1
40010,46057.606838
40020,92983.179487
40030,42461.508547
40040,169900.461538
40050,98748.286325


In [17]:
merged = stations.merge(rides, how='inner', left_on='MAP_ID', right_index=True)
print(merged.shape)
merged.head()

(143, 18)


Unnamed: 0,STOP_ID,STOP_NAME,STATION_NAME,MAP_ID,ADA,RED,BLUE,G,BRN,P,Pexp,Y,Pnk,O,Location,coords_x,coords_y,monthtotal
1,30161,18th (Loop-bound),18th,40830,True,False,False,False,False,False,False,False,True,False,"(41.857908, -87.669147)",-9759285.0,5139718.0,38551.0
3,30023,35th/Archer (Midway-bound),35th/Archer,40120,True,False,False,False,False,False,False,False,False,True,"(41.829353, -87.680622)",-9760562.0,5135452.0,68148.282051
5,30213,35-Bronzeville-IIT (Harlem-bound),35th-Bronzeville-IIT,41120,True,False,False,True,False,False,False,False,False,False,"(41.831677, -87.625826)",-9754462.0,5135799.0,54021.367521
7,30245,43rd (Harlem-bound),43rd,41270,True,False,False,True,False,False,False,False,False,False,"(41.816462, -87.619021)",-9753705.0,5133526.0,25041.521368
9,30209,47th (Harlem-bound),47th,41080,True,False,False,True,False,False,False,False,False,False,"(41.809209, -87.618826)",-9753683.0,5132443.0,32944.970085


In [18]:
#scale the ridership data to the appropriate size circle
merged['circle_sizes'] = merged['monthtotal'] / 10000

In [19]:
tile_provider = get_provider(CARTODBPOSITRON)

p = figure(x_range=(-9780000, -9745000), y_range=(5130000, 5160000),
           x_axis_type="mercator", y_axis_type="mercator")

p.add_tile(tile_provider)

p.circle(x=merged['coords_x'],
         y=merged['coords_y'], 
         size=merged['circle_sizes'],
         line_color="#FF0000", 
         fill_color="#FF0000",
         fill_alpha=0.05)

show(p)

Bokeh has interactive tools that can be used to report information. We can use the <b>Hover Tool</b> to overlay information about each station when a user hovers over a circle.

In [20]:
def map_ridership(df):
    
    tile_provider = get_provider(CARTODBPOSITRON)


    source = ColumnDataSource(data=dict(
                            x=list(df['coords_x']), 
                            y=list(df['coords_y']),
                            ridership=list(df['monthtotal']),
                            sizes=list(df['circle_sizes']),
                            stationname=list(df['STATION_NAME'])))

    hover = HoverTool(tooltips=[
        ("station", "@stationname"),
        ("ridership","@ridership")
    
    ])

    p = figure(x_axis_type="mercator", 
               y_axis_type="mercator",
               tools=[hover, 'wheel_zoom','save'])

    p.add_tile(get_provider(tile_provider))

    p.circle(x='x',
             y='y',
             source=source,
             size='sizes',
             line_color="#FF0000", 
             fill_color="#FF0000",
             fill_alpha=0.05)
    return p
    
plot = map_ridership(merged)
show(plot)