
# Hands on with Bokeh

> Bokeh is a Python library for creating interactive visualizations for modern web browsers. It helps you build beautiful graphics, ranging from simple plots to complex dashboards with streaming datasets. With Bokeh, you can create JavaScript-powered visualizations without writing any JavaScript yourself.


## Blog

- https://www.fouyapen.com/visualization/python/basic/2020/01/31/hands-on-with-bokeh.html


## What you will learn in the notebook 
- Heatmap
- Barchart
- Bar plot 
- Line plot 
- Smoothing 
- Density plot 
- Area plot 

## Keywords 
- visualization
- interactive plots 

## Data source 
- tranport of london: weekly volume of user 


## Requirements 

- python 3.7

## Reference 

https://docs.bokeh.org/en/latest/index.html#:~:text=Bokeh%20is%20a%20Python%20library,without%20writing%20any%20JavaScript%20yourself.


In [13]:
import pandas as pd 
import numpy as np 
%load_ext watermark
%watermark -u -d -t -z

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Last updated: 2021-03-29 22:06:40CEST



In [14]:
# Some nice palettes
COLOR_PALETTES = [
'#000001'
'#092a3a'
'#395036'
'#a37c19'
'#ffe9c7'
]


CONTRASTING_PALETTES = [
'#007b00',
'#24e0b8',
'#ffcc51',
'#ff8b76',
'#ff3031'
]

In [15]:
# objective: load data 
en17week = pd.read_csv('data/counts_tfl/En17week.csv',skiprows=6)
ext17week = pd.read_csv('data/counts_tfl/Ex17week.csv',skiprows=6)

## Data processing 
 269 rows 
 108 columns
 
 
 
Vauxhall <- Stockwell <-Brixton




victoria line :
Victoria	2009	8	986	252	734

In [17]:
#en17week.columns[100:108]

In [19]:
en17week.shape

(269, 108)

In [20]:
#objective: define columns 
COLUMNS_META = [
                'nlc', 
                ' Station', 
                ' Date', 
                ' Note',
                ' Total', 
                'Early', 'AM Peak',
                'Inter peak', 
                'PM Peak',
                'Evening', 
                'Late' 
            ]

COLUMNS_HOURS = [
        '0200-0215','0215-0230',
       '0230-0245', '0245-0300', '0300-0315', '0315-0330', '0330-0345',
       '0345-0400', '0400-0415', '0415-0430', '0430-0445', '0445-0500',
       '0500-0515', '0515-0530', '0530-0545', '0545-0600', '0600-0615',
       '0615-0630', '0630-0645', '0645-0700', '0700-0715', '0715-0730',
       '0730-0745', '0745-0800', '0800-0815', '0815-0830', '0830-0845',
       '0845-0900', '0900-0915', '0915-0930', '0930-0945', '0945-1000',
       '1000-1015', '1015-1030', '1030-1045', '1045-1100', '1100-1115',
       '1115-1130', '1130-1145', '1145-1200', '1200-1215', '1215-1230',
       '1230-1245', '1245-1300', '1300-1315', '1315-1330', '1330-1345',
       '1345-1400', '1400-1415', '1415-1430', '1430-1445', '1445-1500',
       '1500-1515', '1515-1530', '1530-1545', '1545-1600', '1600-1615',
       '1615-1630', '1630-1645', '1645-1700', '1700-1715', '1715-1730',
       '1730-1745', '1745-1800', '1800-1815', '1815-1830', '1830-1845',
       '1845-1900', '1900-1915', '1915-1930', '1930-1945', '1945-2000',
       '2000-2015', '2015-2030', '2030-2045', '2045-2100', '2100-2115',
       '2115-2130', '2130-2145', '2145-2200', '2200-2215', '2215-2230',
       '2230-2245', '2245-2300', '2300-2315', '2315-2330', '2330-2345',
       '2345-2400', '0000-0015', '0015-0030', '0030-0045', '0045-0100',
       '0100-0115', '0115-0130', '0130-0145', '0145-0200']

COLUMNS_ALL = COLUMNS_META + COLUMNS_HOURS

In [7]:
time_categorical =  [
                        'Early',
                        'AM Peak',
                        'Inter peak', 
                        'PM Peak',
                        'Evening', 
                        'Late'
]

In [21]:
# entrance
en17week_station =  en17week[[' Station']+COLUMNS_HOURS+ time_categorical]

#Exit
ext17week_station =  ext17week[[' Station']+COLUMNS_HOURS+ time_categorical]

In [22]:
# ENTRANCE for brixton, stockwell, vauxhall
vauxhall_ent = en17week_station[en17week_station[' Station'] == 'Vauxhall']
brixton_ent = en17week_station[en17week_station[' Station'] == 'Brixton']
stockwell_ent = en17week_station[en17week_station[' Station'] == 'Stockwell']

In [23]:
main_ent = en17week_station[en17week_station[' Station'].isin(['Brixton','Stockwell','Vauxhall'])]
#main_ent.transpose()

In [24]:
vauxhall_ent[time_categorical]

Unnamed: 0,Early,AM Peak,Inter peak,PM Peak,Evening,Late
238,1587.0,16279.0,12144.0,14905.0,4277.0,1580.0


In [25]:
# handle the column feature
def break_down(vauxhall_ent):
    '''
    Description:
    
    '''
    EARLY = COLUMNS_HOURS[0:20]
    #print(EARLY[0])
    #print(EARLY[-1])
    #print('\tEarly : {}\n'.format(vauxhall_ent[EARLY].sum(axis=1)))
    tmp = np.repeat('Early',len(EARLY)) 
    EARLY = list(zip(tmp,EARLY))

    AM_Peak = COLUMNS_HOURS[20:32]
    #print(AM_Peak[0])
    #print(AM_Peak[-1])
    #print('\tAM_Peak: {}\n'.format(vauxhall_ent[AM_Peak].sum(axis=1)))

    tmp = np.repeat('AM Peak',len(AM_Peak)) 
    AM_Peak = list(zip(tmp,AM_Peak))

    INTER_Peak = COLUMNS_HOURS[32:56]
    #print(INTER_Peak[0])
    #print(INTER_Peak[-1])
    #print('\t INTER_Peak: {}\n'.format(vauxhall_ent[INTER_Peak].sum(axis=1)))
    tmp = np.repeat('Inter peak',len(INTER_Peak)) 
    INTER_Peak = list(zip(tmp,INTER_Peak))

    PM_Peak = COLUMNS_HOURS[56:68]
    #print(PM_Peak[0])
    #print(PM_Peak[-1])
    #print('\t PM_Peak: {}\n'.format(vauxhall_ent[PM_Peak].sum(axis=1)))
    tmp = np.repeat('PM Peak',len(PM_Peak)) 
    PM_Peak = list(zip(tmp,PM_Peak))


    EVENING = COLUMNS_HOURS[68:80]
    #print(EVENING[0])
    #print(EVENING[-1])
    #print('\tEVENING: {}\n'.format(vauxhall_ent[EVENING].sum(axis=1)))
    tmp = np.repeat('Evening',len(EVENING)) 
    EVENING = list(zip(tmp,EVENING))


    LATE = COLUMNS_HOURS[80:96]
    #print(LATE[0])
    #print(LATE[-1])
    #print('\tLATE: {}\n'.format(vauxhall_ent[LATE].sum(axis=1)))
    tmp = np.repeat('Late',len(LATE )) 
    LATE  = list(zip(tmp,LATE))
    
    factors = [EARLY+
            AM_Peak+
            INTER_Peak+
            PM_Peak+
            EVENING+
            LATE
            ] 
    return factors[0]


In [26]:
#vauxhall data 
vauxhall_ent = en17week_station[en17week_station[' Station'] == 'Vauxhall']
vauxhall_ext = ext17week_station[en17week_station[' Station'] == 'Vauxhall']

#brixton data
brixton_ent = en17week_station[en17week_station[' Station'] == 'Brixton']
brixton_ext = ext17week_station[en17week_station[' Station'] == 'Brixton']


#Stockwell data 
stockwell_ent = en17week_station[en17week_station[' Station'] == 'Stockwell']
stockwell_ext = ext17week_station[en17week_station[' Station'] == 'Stockwell']

In [27]:
# generate data to be plot 
factors_ent = break_down(vauxhall_ent)
factors_ext = break_down(vauxhall_ext)

## Visualizing data

Now the data is ready we can focus on the ploting.

In [43]:
from bokeh.models import NumeralTickFormatter
from bokeh.models import LinearAxis, Range1d
from bokeh.io import show, output_notebook
from bokeh.models import CategoricalColorMapper, ColumnDataSource, FactorRange
from bokeh.plotting import figure
from bokeh.models import LinearAxis, Range1d
from bokeh.models import NumeralTickFormatter
from bokeh.transform import dodge

from selenium import webdriver


# save
from bokeh.io import export_png
from bokeh.io import export_svgs
output_notebook()

In [19]:
# tim_cat_value = []
# for k,l in zip([len(EARLY),len(AM_Peak),len(INTER_Peak),len(PM_Peak),len(EVENING),len(LATE)],time_cat_value):
#     tim_cat_value.append(l/k)
    
# tim_cat_value

In [29]:
time_categorical = ['Early', 'AM Peak', 'Inter peak', 'PM Peak', 'Evening', 'Late']
time_cat_value = vauxhall_ent[time_categorical].values[0]
#time_cat_value  = time_cat_value  / [len(EARLY),len(AM_Peak),len(INTER_Peak),len(PM_Peak),len(EVENING),len(LATE)]


In [30]:
# Entrance
y_vaux_ent = vauxhall_ent[COLUMNS_HOURS].values[0]
y_brix_ent = brixton_ent[COLUMNS_HOURS].values[0]
y_stock_ent = stockwell_ent[COLUMNS_HOURS].values[0]

#Exit
y_vaux_ext = vauxhall_ext[COLUMNS_HOURS].values[0]
y_brix_ext = brixton_ext[COLUMNS_HOURS].values[0]
y_stock_ext = stockwell_ext[COLUMNS_HOURS].values[0]

### Line plot
Visualizing the volume of user using a line plot.

In [46]:
# help(export_png)

In [52]:
help(export_png)

Help on function export_png in module bokeh.io.export:

export_png(obj: Union[bokeh.models.layouts.LayoutDOM, bokeh.document.document.Document], *, filename: Union[str, NoneType] = None, width: Union[int, NoneType] = None, height: Union[int, NoneType] = None, webdriver: 'Optional[WebDriver]' = None, timeout: int = 5) -> str
    Export the ``LayoutDOM`` object or document as a PNG.
    
    If the filename is not given, it is derived from the script name (e.g.
    ``/foo/myplot.py`` will create ``/foo/myplot.png``)
    
    Args:
        obj (LayoutDOM or Document) : a Layout (Row/Column), Plot or Widget
            object or Document to export.
    
        filename (str, optional) : filename to save document under (default: None)
            If None, infer from the filename.
    
        width (int) : the desired width of the exported layout obj only if
            it's a Plot instance. Otherwise the width kwarg is ignored.
    
        height (int) : the desired height of the exporte

In [83]:
##
x_hours = COLUMNS_HOURS
##
p_ext = figure(x_range=FactorRange(*factors_ext), 
           plot_height=500,
           plot_width=1200,
           toolbar_location=None,
           title='Volume of exit at station',
#            tools=""
              )



# total = np.sum(y_counts_ext)
# y_counts_2_ext = y_counts_ext/total


# Setting the second y axis range name and range
#p_ext.extra_y_ranges = {"foo": Range1d(start=0, end=1)}
#p_ext.add_layout(LinearAxis(y_range_name="foo"), 'right')



CONTRASTING_PALETTES = [
'#007b00',
'#24e0b8',
'#ffcc51',
'#ff8b76',
'#ff3031'
]

p_ext.line(x=factors_ext, y=y_vaux_ent , color='#ff8b76', line_width=2,alpha=1,legend_label='Vauxhall')
p_ext.line(x=factors_ext, y=y_brix_ent , color='#24e0b8', line_width=2,alpha=1,legend_label='Brixton')
p_ext.line(x=factors_ext, y=y_stock_ent , color='#ffcc51', line_width=2,alpha=1,legend_label='Stockwell')



# p_ext.vbar(x=factors_ent,
#        top=y_vaux_ent,
#        fill_color= '#ff8b76',
#        line_color= '#ff8b76',
#        alpha=0.3,
#        width=0.5)

# p_ext.vbar(x=factors_ent,
#        top=y_brix_ent,
#        fill_color= '#24e0b8',
#        line_color= '#24e0b8',
#        alpha=0.1,
#        width=0.5)

# p_ext.vbar(x=factors_ent,
#        top=y_stock_ent,
#        fill_color= '#ffcc51',
#        line_color= '#ffcc51',
#        alpha=0.1,
#        width=0.5)

p_ext.xaxis.axis_label = 'Time 15min'
p_ext.yaxis[0].axis_label = 'Volume'
#p_ext.yaxis[1].axis_label = 'Percentage'
#p_ext.yaxis[1].formatter = NumeralTickFormatter(format="0.0%")
p_ext.y_range.start = 0
p_ext.x_range.range_padding = 0.1
p_ext.xaxis.major_label_orientation = 1.5
p_ext.xgrid.grid_line_color = None

#LEGEND
p_ext.legend.title = 'Station'
p_ext.legend.label_text_font= 'times'
p_ext.legend.title_text_font_style = "bold"
p_ext.legend.title_text_font_size = "12pt"
p_ext.legend.location = "top_left"
p_ext.legend.orientation = "horizontal"


# p_ext.background_fill_color = None
# p_ext.border_fill_color = None
show(p_ext, notebook_handle=True)
#Save plot

# driver = webdriver.Chrome(executable_path='/Users/cyrilnrt/Downloads/chromedriver',options=webdriver.ChromeOptions().add_argument('headless'))
driver = webdriver.Chrome(executable_path='/Users/cyrilnrt/Desktop/Lemili/mlstack/0-hands_on_bokeh/chromedriver-2')
export_png(obj=p_ext,filename= "plots/exit_stations.png", webdriver =driver)

'/Users/cyrilnrt/Desktop/Lemili/mlstack/0-hands_on_bokeh/plots/exit_stations.png'

In [23]:
tmp = en17week_station[[' Station']+time_categorical]
avg_time_cat = tmp[tmp[' Station'].isin(['Brixton','Stockwell','Vauxhall'])].mean(axis=0).values

In [62]:

# profile = webdriver.FirefoxProfile('/Users/cyrilnrt/Downloads/')


WebDriverException: Message: Service /Users/cyrilnrt/Downloads/geckodriver unexpectedly exited. Status code was: -9


In [24]:
# perform some smoothing with scipy since bokeh doesn't support it
from scipy.interpolate import interp1d
from scipy.signal import find_peaks


#f3 = interp1d(x=val,y=avg, kind='next')
#x_new = np.linspace(start=0,stop=95,num=95,endpoint=True)
#f3(x_new)

## Barplot with Bokeh
- Visualise volume of user for three stations : Brixton, Stockwell and Vauxhall
- Using visualisation to compare volume of user for three station

In [45]:

####

source = ColumnDataSource(data=dict(x=factors_ext,
                                    Brixton=brixton_ent[COLUMNS_HOURS].values[0],
                                    Stockwell = stockwell_ent[COLUMNS_HOURS].values[0],
                                    Vauxhall = vauxhall_ent[COLUMNS_HOURS].values[0],
    
))

avg = pd.DataFrame(source.data).mean(axis=1).values


p = figure(x_range=FactorRange(*factors_ext), 
           plot_height=500,
           plot_width=1500,
           toolbar_location=None, 
           title='Entrance',
           #background_fill_color="#fafafa",
           tools="")


stations = ['Brixton','Stockwell','Vauxhall']


p.vbar_stack(stations, 
             x='x', 
             width=0.5, 
             alpha=0.5, 
             color=['#24e0b8','#ffcc51','#ff8b76'], 
             source=source,
             legend_label=stations)

#p.line(x=factors_ext, y=avg, color="red", line_width=1.5, line_dash="4 4",legend_label='avg')

# catch the pick 
peaks, _ = find_peaks(avg, height=0)
x_peak = np.array(COLUMNS_HOURS)[peaks]
x_peak = [e for e in factors_ext if e[1] in x_peak ]

#r = p.circle(x= x_peak, y=avg[peaks], size=10,legend_label='local maxima')


#p.line(x=factors_ext, y=f3(x_new), color="orange", line_width=3, line_dash="4 4")

#p.line(x=time_categorical, y=avg_time_cat, color="red", line_width=1.5, line_dash='dashed')


p.y_range.start = 0

p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

# glyph = r.glyph
# glyph.size = 8
# glyph.fill_alpha = 0.9
# glyph.line_color = None
# glyph.line_dash = None
# glyph.line_width = None


p.legend.title = 'Station'
p.legend.label_text_font= 'times'
p.legend.title_text_font_style = "bold"
p.legend.title_text_font_size = "12pt"
p.legend.title_text_font='times'
p.legend.location = "top_left"
p.legend.orientation = "vertical"
#p.background_fill_color = "#ddeaee"
#p.legend.background_fill_color = '#ddeaee'
p.yaxis.minor_tick_line_color = None
#p.yaxis.ticker = FixedTicker(ticks=[10, 20, 37.4])
#p.background_fill_alpha = 0.5
p.yaxis.axis_label='Volume of user'



#p.output_backend = "svg"
#export_svgs(p, filename="entrance_station.svg")

# save plot
#export_png(obj=p,filename= "plots/entrance_station.png")

#shpw plot
show(p, notebook_handle=True)

## Save plot with bokeh 

```sh
npm install -g backstopjs

pip install selenium phantomjs pillow
```

### Entrance rate versus Exit rate 
- Is the station is safe ? 
- How many people in the station ? 


In [26]:
COLOR_PALETTES = [
'#000001'
'#092a3a'
'#395036'
'#a37c19'
'#ffe9c7'
]

In [30]:

x_hours = COLUMNS_HOURS
y_counts_ent = vauxhall_ent[COLUMNS_HOURS].values[0]

total = vauxhall_ent[COLUMNS_HOURS].values[0].sum()
y_counts_2_ent = vauxhall_ent[COLUMNS_HOURS].values[0]/total
##
p = figure(x_range=FactorRange(*factors_ent), 
           plot_height=500,
           plot_width=1200,
           toolbar_location=None,
           title='Entry Vauxhall',
           tools="")




# Setting the second y axis range name and range
p.extra_y_ranges = {"foo": Range1d(start=0, end=1)}
p.add_layout(LinearAxis(y_range_name="foo"), 'right')

# p.vbar(x=factors_ent,
#        color='orange', 
#        y_range_name="foo",
#        top=y_counts_2,
#        alpha=0.5,
#        width=0.5)

p.vbar(x=factors_ent,
       top=y_counts_ent,
       fill_color= '#395036',
       line_color= '#395036',
       #alpha=0.3,
       width=0.5)

cte = [986] * len(y_counts_ent)
p.line(x=factors_ent, y=cte , color="orange", line_width=3,alpha=0.5)

p.xaxis.axis_label = 'Time Quarter'
p.yaxis[0].axis_label = 'Volume'
p.yaxis[1].axis_label = 'Percentage'
p.yaxis[1].formatter = NumeralTickFormatter(format="0.0%")
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1.5
p.xgrid.grid_line_color = None
#p.xgrid= None

# save plot
# export_png(obj=p,filename= "plots/entrance_vauxhall_station.png")

#show plot
show(p, notebook_handle=True)

In [31]:
##
x_hours = COLUMNS_HOURS
y_counts_ext = vauxhall_ext[COLUMNS_HOURS].values[0]


##
p_ext = figure(x_range=FactorRange(*factors_ext), 
           plot_height=500,
           plot_width=1500,
           toolbar_location=None,
           title='Exit Vauxhall',
           tools="")



total = np.sum(y_counts_ext)
y_counts_2_ext = y_counts_ext/total


# Setting the second y axis range name and range
p_ext.extra_y_ranges = {"foo": Range1d(start=0, end=1)}
p_ext.add_layout(LinearAxis(y_range_name="foo"), 'right')


p_ext.vbar(x=factors_ext,
       top=y_counts_ext,
       alpha=0.3,
       fill_color='red',
       line_color='black',
       width=0.5)

p_ext.line(x=factors_ext, y=y_counts_ext , color="blue", line_width=2,alpha=0.4)

p_ext.xaxis.axis_label = 'time Quarter'
p_ext.yaxis[0].axis_label = 'Volume'
p_ext.yaxis[1].axis_label = 'Percentage'
p_ext.yaxis[1].formatter = NumeralTickFormatter(format="0.0%")
p_ext.y_range.start = 0
p_ext.x_range.range_padding = 0.1
p_ext.xaxis.major_label_orientation = 1.5
p_ext.xgrid.grid_line_color = None

show(p_ext, notebook_handle=True)

In [32]:

dat_ext_int = {'time' :factors_ext ,
        'Exit' : y_vaux_ext ,
        'Entrance': y_vaux_ent,
       }

source_ext_int = ColumnDataSource(data=dat_ext_int)

p_ext_int = figure(x_range=FactorRange(*factors_ent), 
           plot_height=500,
           plot_width=1300,
           toolbar_location=None,
           title='Exit and Entrance at Vauxhall',
           tools="")

# p_ext_int.vbar(x=factors_ent,
#        top=y_counts_ent,
#        alpha=0.5,
#        fill_color='orange',
#        line_color='black',
#                legend_label='Entrance',
#        width=0.5)

##
# p_ext_int.vbar(x=factors_ext,
#                top=y_counts_ext,
#                 alpha=0.5,
#                 fill_color='blue',
#                 line_color='black',
#                 legend_label='Exit',
#                 width=0.5
#               )

##
p_ext_int.vbar(x=dodge('time', -0.30, range=p_ext_int.x_range), 
               top='Entrance', 
               width=0.4, 
                alpha=0.4,
               source=source_ext_int,
               color="orange", 
               legend_label="Entrance")


p_ext_int.vbar(x=dodge('time', +0.30, range=p_ext_int.x_range), 
               top='Exit', 
               width=0.4, 
               alpha=0.4,
               source=source_ext_int,
               color="blue", 
               legend_label="Exit")
##
# p_ext_int.vbar(x=factors_ext,
#        top=y_counts_ext,
#        alpha=0.5,
#        fill_color='blue',
#        line_color='black',
#               legend_label='Exit',
#        width=0.5)
######

# fruits = stockwell_ent[COLUMNS_HOURS].columns
# years = ['2015', '2016', '2017']
# #stockwell_ent[COLUMNS_HOURS].columns
# stockwell_ent[COLUMNS_HOURS].values[0]
# data = {'fruits' : fruits,
#         '2015'   : [2, 1, 4, 3, 2, 4],
#         '2016'   : [5, 3, 3, 2, 4, 6],
#         '2017'   : [3, 2, 4, 4, 5, 3]}

# source = ColumnDataSource(data=data)

# p = figure(x_range=fruits, 
#            y_range=(0, 10), 
#            plot_height=250, 
#            title="Fruit Counts by Year",
#            toolbar_location=None, tools="")

# p.vbar(x=dodge('fruits', -0.25, range=p.x_range), top='2015', width=0.2, source=source,
#        color="#c9d9d3", legend_label="2015")

# p.vbar(x=dodge('fruits',  0.0,  range=p.x_range), top='2016', width=0.2, source=source,
#        color="#718dbf", legend_label="2016")

# p.vbar(x=dodge('fruits',  0.25, range=p.x_range), top='2017', width=0.2, source=source,
#        color="#e84d60", legend_label="2017")

# p.x_range.range_padding = 0.1
# p.xgrid.grid_line_color = None
# p.legend.location = "top_left"
# p.legend.orientation = "horizontal"

#####
p_ext_int.xaxis.axis_label = 'time Quarter'
p_ext_int.yaxis[0].axis_label = 'Volume'


p_ext_int.y_range.start = 0

p_ext_int.x_range.range_padding = 0.1
p_ext_int.xaxis.major_label_orientation = 1.5
p_ext_int.xgrid.grid_line_color = None
p_ext_int.legend.location = "top_left"
p_ext_int.legend.orientation = "horizontal"

# save plot
#export_png(obj=p_ext_int,filename= "plots/entrance_exit_vauxhall_dodge.png")

# show
show(p_ext_int, notebook_handle=True)

In [33]:
all_data = pd.DataFrame()
all_data['Entrance'] = y_counts_ent
all_data['Exit'] = y_counts_ext
all_data['time'] = COLUMNS_HOURS
all_data['ent_pct_change'] = all_data['Entrance'].pct_change()
all_data['exit_pct_change'] = all_data['Exit'].pct_change()

all_data = all_data.fillna(0)

tmp = all_data[~all_data.isin([np.nan, np.inf, -np.inf]).any(1)]
avg_ent_pct_change = np.mean(tmp['ent_pct_change'])

In [34]:
all_data['color'] = all_data['ent_pct_change'].apply(lambda x : 'red' if x <= 0 else 'green')

In [35]:
from bokeh.models import BoxAnnotation
from bokeh.plotting import figure, show, output_file
#from bokeh.sampledata.glucose import data
from bokeh.models import Title
#TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

#data = data.loc['2010-10-04':'2010-10-04']

p = figure(x_range=FactorRange(*factors_ent),
           plot_height=500,
           plot_width=1300,
           toolbar_location=None,
           #title="Percentage Change Entrance"
          )


#p.line(x=factors_ent, y=all_data['ent_pct_change'], line_color='grey',line_width=2)
#p.circle(x=factors_ent, y=all_data['ent_pct_change'], color='blue', size=3,fill_alpha=0.5)

avg_ent = np.repeat(avg_ent_pct_change,len(all_data['exit_pct_change']))

# p.line(x=factors_ent,
#        y=avg_ent,
#        line_color='red',
#        line_dash='dashed',
#        line_width=1.5)

p.vbar(x=factors_ent,
       top=all_data['ent_pct_change'],
       alpha=0.9,
       fill_color=all_data['color'],
       line_color='black',
    #legend_label='Entrance',
       width=0.5)




##=== Styling plot 
p.add_layout(Title(text="Percentage Change Entrance", text_font_style='normal',text_font_size='10pt'), 'above')
p.add_layout(Title(text="Vauxhall Station", text_font_size="16pt"), 'above')
#p.background_fill_color = "#efefef"
p.xgrid.grid_line_color=None
p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Percentage change'
p.yaxis.formatter = NumeralTickFormatter(format="0.0%")

#p.add_layout(BoxAnnotation(top=80, fill_alpha=0.1, fill_color='red', line_color='red'))
#p.add_layout(BoxAnnotation(bottom=180, fill_alpha=0.1, fill_color='red', line_color='red'))
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
#output_file("box_annotation.html", title="box_annotation.py example")
p.xaxis.major_label_orientation = 1.5

#show
show(p, notebook_handle=True)

#Save plot
#export_png(obj=p,filename= "entrance_vauxhall_percentageChange.png")

## Visualize Using Pie chart 
Use case how many people in the station 
- We know how many poeple entry the station 
- How many train per 15 mins intervalle 
- 70% of the people get on the train for each peak hour 

In [36]:
from bokeh.palettes import Set3
from bokeh.transform import cumsum
from bokeh.models import LabelSet, ColumnDataSource
from bokeh.models import HoverTool

####
x = vauxhall_ent[time_categorical].to_dict('record')[0]

data = pd.Series(x).reset_index(name='value').rename(columns={'index':'time'})
data['Perc'] = np.round(data['value']*100/data['value'].sum() ,2)

data['angle'] = data['value']/data['value'].sum() *2*np.pi
data['angle_s'] = data['angle'].shift(1).fillna(0)
data['color'] = Set3[len(x)]

### 
data["Perc_legend"] = data['Perc'].astype(str)
data["Perc_legend"] = data["Perc_legend"].apply(lambda x : x+' %')
data["Perc_legend"] = data["Perc_legend"].str.pad(8, side = "left") 
data['legend'] = [ str(e)+':'+str(b) for e,b in  zip(data['time'],data['Perc_legend'])]

##

#hover = HoverTool(tooltips=[("@time", "@value")])

##
p = figure(x_range=(-0.5,0.75),
           #y_range=(-0.5,0.5),
           #plot_height=450,
           title="Entrance: Vauxhall station", 
           #plot_width=700,
           toolbar_location=None,
           #tooltips="@time: @value",
          # tools="pan,box_zoom,reset,save", 
           #tooltips="@time: @value",
           #x_range=(-0.5, 1.0)
          )
##


# (“percentage”, “@percentage{%0.2f}”)

# ])
##
p.wedge(x=0, 
        y=1,
        radius=0.4,
        start_angle=cumsum('angle', include_zero=True), 
        end_angle=cumsum('angle'),
        line_color="black", 
        fill_alpha=0.8,
        fill_color='color',
        legend="legend",
        source=data)

#data["value"] = data["value"].str.pad(35, side = "left")

data_label = data
#data_label['angle'] = data['angle']+90



source = ColumnDataSource(data_label)

# labels = LabelSet(x=0,
#                 y=1,
#                 #x_offset=20,
#                 #x_offset=0, 
#             #y_offset=2,
#                 text='Perc',
                
#                 #level='glyph',
#                 angle=cumsum('angle', include_zero=True),
#                 source=source, 
#                 text_font_size='10pt',
#                 render_mode='canvas'
#                 )



#p.add_layout(labels,place='below')
###
p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
p.legend.location = "top_right"
#hover.point_policy='snap_to_data'
#hover.line_policy='none'
#p.add_tools(hover)
show(p, notebook_handle=True)

#Save
#export_png(obj=p,filename= "plots/entrance_vauxhall_pie.png")



## How to generate  Barplot with Bokeh 


In [None]:
data

In [None]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file

In [37]:
from bokeh.io import show
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource, ranges, LabelSet
#output_file("bar_colormapped.html")

#data['perc'] = data['value'].apply(lambda x : np.round(x*100/total,2))
# data["Perc"] = data['perc'].astype(str)
# data["Perc"] = data["Perc"].apply(lambda x : x+' %')

source = ColumnDataSource(data=dict(time=data['time'],
                                    count=data['value'],
                                    Perc_legend=data['Perc_legend']))



pl2 = figure(x_range=data['time'],
             plot_height=500, 
             plot_width=1000, 
             toolbar_location=None, 
             title="Entrance: Vauxhall station")

pl2.vbar(x=data['time'], 
         top=data['value'], 
         width=0.9,
         fill_color=data['color'],
         line_color='black'
         #source=data, 
         #legend_field="fruits",
       #line_color='white', 
         #fill_color=factor_cmap('fruits', palette=Spectral6, factors=fruits)
        )


labels = LabelSet(x='time', 
                  y='count',
                  text='Perc_legend', 
                  #level='glyph',
                  source=source,
                  x_offset=-13.5,
                  y_offset=0, 
                  #render_mode='canvas'
                 )
#pl2.xgrid.grid_line_color = None
pl2.xaxis.axis_label = 'Time'
pl2.yaxis.axis_label = 'Volume of user'
pl2.y_range.start = 0
pl2.y_range.end = np.max(data['value'])+1500
# pl2.legend.orientation = "horizontal"
#pl2.legend.location = "top_center"
pl2.ygrid.grid_line_alpha = 0.5
pl2.add_layout(labels)

show(pl2, notebook_handle=True)
#export_png(obj=pl2,filename= "entrance_vauxhall_barplot.png")

## How to generate a Heatmap with Bokeh 

In [38]:
data_heat_map =pd.DataFrame(factors_ent,columns=['Period','Hours'])
data_heat_map['Exit'] = y_vaux_ext
data_heat_map = data_heat_map[['Exit','Hours']]

data_heat_map = data_heat_map.pivot( columns='Hours', values='Exit').fillna(0)
res = data_heat_map.sum(axis=1)

In [39]:
data_heat_map = pd.DataFrame(np.array(res)).transpose()
data_heat_map.columns = COLUMNS_HOURS
data_heat_map['Total'] ='Total'
data_heat_map = data_heat_map.set_index(['Total'])

In [40]:
#data_heat_map.index.name='total'
data_heat_map.head(3)

Unnamed: 0_level_0,0200-0215,0215-0230,0230-0245,0245-0300,0300-0315,0315-0330,0330-0345,0345-0400,0400-0415,0415-0430,...,2330-2345,2345-2400,0000-0015,0015-0030,0030-0045,0045-0100,0100-0115,0115-0130,0130-0145,0145-0200
Total,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Total,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,309.0,267.0,197.0,137.0,79.0,48.0,23.0,19.0,14.0,6.0


In [41]:
df = pd.DataFrame(data_heat_map.stack(), columns=['Exit']).reset_index()
df.columns = ['Total','Hours','Exit']
source = ColumnDataSource(df)

In [42]:
from bokeh.io import output_file, show
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import transform
from bokeh.palettes import YlOrRd9 as palette
 

#data_heat_map = data_heat_map.set_index('Hours')
#data_heat_map.drop('Annual', axis=1, inplace=True)
#data_heat_map.columns.name = 'Month'

# reshape to 1D array or rates with a month and year for each row.
#df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()


######
#df = pd.DataFrame(data_heat_map.stack(), columns=['Exit']).reset_index()
#source = ColumnDataSource(df)

#data_heat_map['Entrance'] = y_vaux_ent

#data_heat_map = data_heat_map.set_index('Hours')
#data_heat_map.drop('Period', axis=1, inplace=True)



# this is the colormap from the original NYTimes plot
colors = palette[::-1] 
mapper = LinearColorMapper(palette=colors, 
                           low=np.min(res),
                           high=np.max(res))

#=======
p = figure(plot_width=900, 
           plot_height=300, 
           title="Vauxhall Station Entrance",
           x_range=list(COLUMNS_HOURS), 
           y_range=list(data_heat_map.index),
           toolbar_location=None, 
           tools="", 
           x_axis_location="above"
          )


#=======
p.rect(x="Hours", 
       y="Total", 
       width=1, 
       height=1, 
       source=source,
       line_color='black', 
       fill_color=transform('Exit', mapper)
      )

#====
color_bar = ColorBar(color_mapper=mapper,
                     location=(0, 0),
                    #ticker=BasicTicker(desired_num_ticks=len(colors)),
                     #formatter=PrintfTickFormatter(format="%d%%")
                    )


p.add_layout(color_bar, 'right')
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = 1.0

show(p, notebook_handle=True)

#export_png(obj=p,filename= "entrance_vauxhall_heatmap.png")

In [43]:
%watermark -v 

Python implementation: CPython
Python version       : 3.8.1
IPython version      : 7.13.0



In [44]:
%watermark --iversions


numpy : 1.18.2
bokeh : 2.0.1
pandas: 1.0.3

