# Introduction to the bokeh library




In [38]:
from IPython.display import IFrame
documentation = IFrame(src='https://bokeh.pydata.org/en/latest/', width=1000, height=450)
display(documentation)

In [40]:
import bokeh
import ipywidgets
from bokeh.io import output_notebook, show, reset_output
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.core.properties import value
from bokeh.transform import dodge

# other imports
import vega_datasets
import numpy as np
import pandas as pd

In [41]:
# configure the default output state to generate output in notebook cells when show() is called
# in colab, output_notebook() is called in each cell (this is not always the case)
output_notebook()

# Basic plot with Bokeh 

In [42]:
from bokeh.models import ColumnDataSource

column_data_source = ColumnDataSource({'A': [1, 2, 3, 4, 5],
                                       'B': [5, 4, 3, 2, 1],
                                       'C': [1, 3, 5, 1, 2]})

column_data_source.data

{'A': [1, 2, 3, 4, 5], 'B': [5, 4, 3, 2, 1], 'C': [1, 3, 5, 1, 2]}

In [44]:
type(column_data_source)

bokeh.models.sources.ColumnDataSource

### Line Plot

We can plot simple graphs from array using numpy:

In [46]:
# data
x_line = np.arange(10)
y_line = np.random.rand(10)


# line plot
line_plot = figure(plot_width=500, plot_height=325, title='Line Plot', x_axis_label='x', y_axis_label='y')
line_plot.line(x_line, y_line, legend_label='line', line_width=2)


# another way to set axis labels
# line_plot.xaxis.axis_label = 'x-axis'
# line_plot.yaxis.axis_label = 'y-axis'

show(line_plot)

Multiple lines plots:

In [48]:
# data
multi_line_x = np.arange(10)
multi_line_y1 = np.random.rand(10)
multi_line_y2 = np.random.rand(10)
multi_line_y3 = np.random.rand(10)

# plot 
multi_line_plot = figure(plot_width=500, plot_height=300, toolbar_location='below')
multi_line_plot.line(multi_line_x, multi_line_y1, legend_label = '1', color='red', line_width=3)
multi_line_plot.line(multi_line_x, multi_line_y2, legend_label = '2', color='blue', line_width=5)
multi_line_plot.line(multi_line_x, multi_line_y3, legend_label = '3', color='green', line_width=0.5)
show(multi_line_plot)

### Stacked Bar Charts

It is possible to convert dataframe into ColumnDataSource to plot chart.
(appears to work fine with dataframe or column data source converted dataframe but if dataframe does not work, use column data source)

In [50]:
stacked_bar_df = pd.DataFrame({'y': [1, 2, 3, 4, 5],
                               'x1': [1, 2, 4, 3, 4],
                               'x2': [1, 4, 2, 2, 3]})

cds_stacked_bar_df = ColumnDataSource(stacked_bar_df)

stacked_bar_chart = figure(plot_width=600, plot_height=300, title='stacked bar chart')

stacked_bar_chart.hbar_stack(['x1', 'x2'], 
                             y='y', 
                             height=0.8, 
                             color=('grey', 'lightgrey'), 
                             source=cds_stacked_bar_df)

show(stacked_bar_chart)

### Grouped bar chart

In [52]:
from bokeh.core.properties import value
from bokeh.transform import dodge

# data
categories = ['Avril 2020', 'Mai 2020', 'Juin 2020']
grouped_bar_df = pd.DataFrame({'categories' : categories,
                               'SP500': [12.68, 4.53, 1.84],
                               'CAC40': [4, 2.70, 5.12],
                               'SX5E': [5.06, 4.18, 6.03]})


# plot
grouped_bar = figure(x_range=categories, y_range=(0, 20), plot_height=250)

# offsets bars / bar locations on axis
dodge1 = dodge('categories', -0.25, range=grouped_bar.x_range)
dodge2 = dodge('categories',  0.0,  range=grouped_bar.x_range)
dodge3 = dodge('categories',  0.25, range=grouped_bar.x_range)

grouped_bar.vbar(x=dodge1, top='SP500', width=0.2, source=grouped_bar_df, color='gray', legend_label='SP500')
grouped_bar.vbar(x=dodge2, top='CAC40', width=0.2, source=grouped_bar_df, color='blue', legend_label='CAC40')
grouped_bar.vbar(x=dodge3, top='SX5E', width=0.2, source=grouped_bar_df, color='green', legend_label='SX5E')

# format legend
grouped_bar.legend.location = 'top_left' #or top_left
grouped_bar.legend.orientation = 'horizontal'

show(grouped_bar)

In [53]:
# grouped bar chart

from bokeh.core.properties import value
from bokeh.transform import dodge

# data
categories = ['Févr 2020', 'Mars 2020', 'Avril 2020']
grouped_bar_df = pd.DataFrame({'categories' : categories,
                               'SP500': [-8.41, -12.51, 12.68],
                               'CAC40': [-8.55, -17.21, 5.12],
                               'SX5E': [-8.55, -16.30, 5.06]})


# plot
grouped_bar = figure(x_range=categories, y_range=(-20, 20), plot_height=250)

# offsets bars / bar locations on axis
dodge1 = dodge('categories', -0.25, range=grouped_bar.x_range)
dodge2 = dodge('categories',  0.0,  range=grouped_bar.x_range)
dodge3 = dodge('categories',  0.25, range=grouped_bar.x_range)

grouped_bar.vbar(x=dodge1, top='SP500', width=0.2, source=grouped_bar_df, color='gray', legend_label='SP500')
grouped_bar.vbar(x=dodge2, top='CAC40', width=0.2, source=grouped_bar_df, color='blue', legend_label='CAC40')
grouped_bar.vbar(x=dodge3, top='SX5E', width=0.2, source=grouped_bar_df, color='green', legend_label='SX5E')

# format legend
grouped_bar.legend.location = 'top_left'
grouped_bar.legend.orientation = 'horizontal'

show(grouped_bar)

# External data visualization

### Download data

In [54]:
bokeh.sampledata.download()

Using data directory: C:\Users\bapti\.bokeh\data
Skipping 'CGM.csv' (checksum match)
Skipping 'US_Counties.zip' (checksum match)
Skipping 'us_cities.json' (checksum match)
Skipping 'unemployment09.csv' (checksum match)
Skipping 'AAPL.csv' (checksum match)
Skipping 'FB.csv' (checksum match)
Skipping 'GOOG.csv' (checksum match)
Skipping 'IBM.csv' (checksum match)
Skipping 'MSFT.csv' (checksum match)
Skipping 'WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip' (checksum match)
Skipping 'gapminder_fertility.csv' (checksum match)
Skipping 'gapminder_population.csv' (checksum match)
Skipping 'gapminder_life_expectancy.csv' (checksum match)
Skipping 'gapminder_regions.csv' (checksum match)
Skipping 'world_cities.zip' (checksum match)
Skipping 'airports.json' (checksum match)
Skipping 'movies.db.zip' (checksum match)
Skipping 'airports.csv' (checksum match)
Skipping 'routes.csv' (checksum match)
Skipping 'haarcascade_frontalface_default.xml' (checksum match)


In [55]:
from bokeh.sampledata.stocks import AAPL

print(type(AAPL))
print(AAPL.keys())
AAPL

<class 'dict'>
dict_keys(['date', 'open', 'high', 'low', 'close', 'volume', 'adj_close'])


{'date': ['2000-03-01',
  '2000-03-02',
  '2000-03-03',
  '2000-03-06',
  '2000-03-07',
  '2000-03-08',
  '2000-03-09',
  '2000-03-10',
  '2000-03-13',
  '2000-03-14',
  '2000-03-15',
  '2000-03-16',
  '2000-03-17',
  '2000-03-20',
  '2000-03-21',
  '2000-03-22',
  '2000-03-23',
  '2000-03-24',
  '2000-03-27',
  '2000-03-28',
  '2000-03-29',
  '2000-03-30',
  '2000-03-31',
  '2000-04-03',
  '2000-04-04',
  '2000-04-05',
  '2000-04-06',
  '2000-04-07',
  '2000-04-10',
  '2000-04-11',
  '2000-04-12',
  '2000-04-13',
  '2000-04-14',
  '2000-04-17',
  '2000-04-18',
  '2000-04-19',
  '2000-04-20',
  '2000-04-24',
  '2000-04-25',
  '2000-04-26',
  '2000-04-27',
  '2000-04-28',
  '2000-05-01',
  '2000-05-02',
  '2000-05-03',
  '2000-05-04',
  '2000-05-05',
  '2000-05-08',
  '2000-05-09',
  '2000-05-10',
  '2000-05-11',
  '2000-05-12',
  '2000-05-15',
  '2000-05-16',
  '2000-05-17',
  '2000-05-18',
  '2000-05-19',
  '2000-05-22',
  '2000-05-23',
  '2000-05-24',
  '2000-05-25',
  '2000-05-26',


In [56]:
#Convert to dataframe
df = pd.DataFrame(AAPL)
df['date'] = pd.to_datetime(df['date'])

#output_file("datetime.html")

# create a new plot with a datetime axis type
p = figure(plot_width=800, plot_height=250, x_axis_type="datetime")
p.line(df['date'], df['close'], color='navy', alpha=0.5)

show(p)

### Plot excel data

In [57]:
data_eu_stocks = pd.read_excel('data_1_bis.xlsx')
data_eu_stocks

Unnamed: 0,KONINKLIJKE AHOLD DELHAIZE N,ADIDAS AG,AIR LIQUIDE SA,ALLIANZ SE-REG,ASML HOLDING NV,BASF SE,BAYER AG-REG,BANCO BILBAO VIZCAYA ARGENTA,BAYERISCHE MOTOREN WERKE AG,DANONE,...,SAFRAN SA,SANOFI,BANCO SANTANDER SA,SAP SE,SIEMENS AG-REG,SCHNEIDER ELECTRIC SE,TELEFONICA SA,UNILEVER NV,VIVENDI,VOLKSWAGEN AG-PREF
2000-01-04,33.0765,17.500,30.5762,285.934,36.350,23.925,40.560,13.2678,28.300,26.0673,...,31.4286,37.7000,10.1881,39.292,69.297,37.7151,20.8609,17.9000,80.3224,29.819
2000-01-05,33.3564,17.500,30.2922,294.078,32.883,23.375,39.537,12.9700,27.740,25.9493,...,29.0714,36.1100,9.9405,34.550,66.952,36.1313,19.8862,18.3333,78.3869,30.018
2000-01-06,34.6828,18.250,32.4695,297.698,30.583,24.015,40.699,12.9700,27.650,27.8365,...,30.1667,37.5000,9.9405,35.671,66.134,37.9626,19.8862,18.8333,77.4676,30.664
2000-01-07,36.2040,18.000,32.0719,305.977,32.867,25.000,42.395,13.1333,27.600,27.9545,...,30.9524,39.5000,10.3807,43.250,68.794,38.6555,19.9210,19.4833,78.0966,31.440
2000-01-10,35.1696,18.273,30.9549,307.742,35.233,25.110,43.934,13.0250,28.700,27.6242,...,32.0952,38.6500,10.2890,45.017,72.887,38.0121,20.5215,19.0667,85.1127,31.658
2000-01-11,33.5754,18.103,30.6709,304.936,35.667,23.995,42.634,12.7636,28.600,27.3529,...,34.2857,39.1500,9.9405,46.458,73.390,36.5520,20.5389,18.6667,86.7095,31.966
2000-01-12,32.9791,18.000,31.3714,306.294,35.167,24.000,40.791,12.6183,28.190,28.0724,...,34.2857,38.3000,9.8488,47.417,72.613,37.1459,20.0603,18.6833,83.5160,31.728
2000-01-13,32.6140,17.463,31.7501,311.270,35.550,23.750,40.514,12.3084,27.400,28.0842,...,37.1905,38.0500,9.7663,47.917,75.431,39.0267,21.2787,18.8000,88.3063,30.813
2000-01-14,34.0987,16.838,32.4506,336.018,39.750,23.750,41.482,12.5505,27.200,27.8837,...,39.0476,39.6900,10.0230,49.000,79.349,38.8535,21.9314,18.9500,89.4192,30.823
2000-01-17,33.9648,17.000,31.3714,334.254,41.667,23.935,42.634,12.4343,27.280,28.1314,...,40.4762,39.0500,9.7571,48.250,81.109,37.5171,23.1933,18.7333,90.3869,31.857


### The pandas-bokeh library

It allows us to plot multiple graph easily from a dataframe

In [58]:
#Plot a dataframe using the pandas-bokeh library in 2 lines
import pandas_bokeh

data_eu_stocks_2 = data_eu_stocks[list(data_eu_stocks)[1:6]] #select the first 5 stocks
data_eu_stocks_2.plot_bokeh(kind = 'line', figsize = (900, 600), title = 'European Stocks between 2000 and 2015')

We can add tools such as rangetool:

In [59]:
data_eu_stocks_2.plot_bokeh(kind = 'line', figsize = (900, 400), title = 'European Stocks between 2000 and 2015', rangetool = 'True')

# Linked plot

Visualize and manipulate several figures simultaneaously

In [60]:
from bokeh.layouts import gridplot
from bokeh.models import BoxSelectTool


linked_data_x = np.arange(40)
linked_data_y = np.random.rand(40)

# linked plot 1 (scatter plot)
linked_plot1 = figure(width=250, height=250)
linked_plot1.circle(linked_data_x, linked_data_y)

# create new plots and share both ranges
linked_plot2 = figure(width=250, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot2.line(linked_data_x, linked_data_y)

linked_plot3 = figure(width=250, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot3.vbar(linked_data_x, top=linked_data_y, width=0.5)


# put the subplots in a gridplot
linked_gridplot = gridplot([[linked_plot1, linked_plot2, linked_plot3]])


# show the results
show(linked_gridplot)

We can also add tools and changes the features of the figures such as the size etc:

In [64]:
# linked plot 1
linked_plot1 = figure(width=450, height=250)
linked_plot1.circle(linked_data_x, linked_data_y)

# create new plots and share both ranges
linked_plot3 = figure(width=450, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot3.vbar(linked_data_x, top=linked_data_y, width=0.5)


linked_plot1.add_tools(BoxSelectTool(dimensions="width"))
linked_plot3.add_tools(BoxSelectTool(dimensions="width"))

# the subplots in a gridplot
linked_gridplot = gridplot([[linked_plot1, linked_plot3]])

# show the results
show(linked_gridplot)



# Widgets


In [65]:
#regression
import ipywidgets
from sklearn import linear_model
from bokeh.layouts import layout
from bokeh.models import Toggle
import numpy as np

# data

data_stocks = pd.read_excel('data_1.xlsx').tail(100)
sx5e = data_stocks[list(data_stocks)[1]]
bnp = data_stocks[list(data_stocks)[12]]


x = bnp
X = np.array(x).reshape(-1, 1)
y = sx5e
Y = np.array(y).reshape(-1, 1)

# linear regression object
regr = linear_model.LinearRegression()

# fit linear model
regr.fit(X, Y)

# make predictions
pred = regr.predict(X)

# plot with regression line
regr_plot = figure(plot_width=500, plot_height=300)
scatter = regr_plot.scatter(x, y, size=10)
regr_line = regr_plot.line(x, pred.flatten(), line_color='red')

toggle_button = Toggle(label='line of best fit', button_type='success', active=True)
toggle_button.js_link('active', regr_line, 'visible')

toggle_button2 = Toggle(label='scatter plot', button_type='success', active=True)
toggle_button2.js_link('active', scatter, 'visible')

show(layout([regr_plot], [toggle_button], [toggle_button2]))



In [66]:
import ipywidgets
from bokeh.io import push_notebook
from bokeh.models import Range1d
from bokeh.sampledata.stocks import AAPL, GOOG, MSFT



# widget
drop_down = ipywidgets.Dropdown(options=[ 'GOOG', 'AAPL'],
                                description='Compagnies:',
                                disabled=False)

# data

df_aapl = pd.DataFrame(AAPL).tail(2000)
df_goog = pd.DataFrame(GOOG).tail(2000)


# create a new plot with a datetime axis type
df_aapl['date'] = pd.to_datetime(df_aapl['date'])
plot_stock = figure(plot_width=800, plot_height=350, x_axis_type="datetime", title = 'Apple and Google Stock Prices')
plot_ipw = plot_stock.line(x = df_aapl['date'], y = df_aapl['close'], color='green', alpha=0.5)


# function

def stock_price(stock):
    if stock == 'AAPL':
        plot_ipw.data_source.data['y'] = df_aapl['close']
    
    
    elif stock == 'GOOG':
        plot_ipw.data_source.data['y'] = df_goog['close']
        
        
    push_notebook()

show(plot_stock, notebook_handle=True)
        
# interaction
ipywidgets.interact(stock_price, stock=drop_down)

interactive(children=(Dropdown(description='Compagnies:', options=('GOOG', 'AAPL'), value='GOOG'), Output()), …

<function __main__.stock_price(stock)>

# Maps visualization

In [67]:
# convert longitude, latitude to mercator coordinates
# example - Detroit Michigan 42.334197, -83.047752

from pyproj import Proj, transform

def create_coordinates(long_arg,lat_arg):
    in_wgs = Proj(init='epsg:4326')
    out_mercator = Proj(init='epsg:3857')
    long, lat = long_arg, lat_arg
    mercator_x, mercator_y = transform(in_wgs, out_mercator, long, lat)
    print(mercator_x, mercator_y)
    
# # Detroit
# create_coordinates(-83.047752,42.334197)
# # Cleveland
# create_coordinates(-81.694703,41.499437)
# # Chicago 
# create_coordinates(-87.629849,41.878111)
# #Paris
# create_coordinates(48.8534, 2.3488)
# #London
# create_coordinates(51.5073509, -0.1277583)

from bokeh.tile_providers import get_provider, Vendors

tile_provider = get_provider(Vendors.CARTODBPOSITRON)
# tile_provider = get_provider(Vendors.STAMEN_TONER_BACKGROUND)

# range bounds supplied in web mercator coordinates
m = figure(plot_width=800, 
           plot_height=400,
           x_range=(-12000000, 9000000), 
           y_range=(-1000000, 7000000), 
           x_axis_type='mercator', 
           y_axis_type='mercator')

m.add_tile(tile_provider)

m.circle(x=-9244833, y=5211172, size=10, color='red')
m.circle(x=-9094212, y=5086289, size=10, color='blue')
m.circle(x=-9754910, y=5142738, size=10, color='orange')
m.circle(x=261540, y=6249735, size=10, color='green')
m.circle(x=-14222, y=6706672, size=10, color='black')

show(m)