# Introduction to the bokeh library




In [6]:
from IPython.display import IFrame
documentation = IFrame(src='https://bokeh.pydata.org/en/latest/', width=1000, height=450)
display(documentation)
%bokeh inline

UsageError: Line magic function `%bokeh` not found.


In [5]:
import bokeh
import ipywidgets
from bokeh.io import output_notebook, show, reset_output
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.core.properties import value
from bokeh.transform import dodge

# other imports
import vega_datasets
import numpy as np
import pandas as pd

In [7]:
# configure the default output state to generate output in notebook cells when show() is called
# in colab, output_notebook() is called in each cell (this is not always the case)
output_notebook()

# Basic plot with Bokeh 

In [8]:
from bokeh.models import ColumnDataSource

column_data_source = ColumnDataSource({'A': [1, 2, 3, 4, 5],
                                       'B': [5, 4, 3, 2, 1],
                                       'C': [1, 3, 5, 1, 2]})

column_data_source.data

{'A': [1, 2, 3, 4, 5], 'B': [5, 4, 3, 2, 1], 'C': [1, 3, 5, 1, 2]}

In [9]:
type(column_data_source)

bokeh.models.sources.ColumnDataSource

### Line Plot

We can plot simple graphs from array using numpy:

In [10]:
# data
x_line = np.arange(10)
y_line = np.random.rand(10)


# line plot
line_plot = figure(plot_width=500, plot_height=325, title='Line Plot', x_axis_label='x', y_axis_label='y')
line_plot.line(x_line, y_line, legend_label='line', line_width=2)


# another way to set axis labels
# line_plot.xaxis.axis_label = 'x-axis'
# line_plot.yaxis.axis_label = 'y-axis'

show(line_plot)

AttributeError: unexpected attribute 'legend_label' to Line, possible attributes are js_event_callbacks, js_property_callbacks, line_alpha, line_cap, line_color, line_dash, line_dash_offset, line_join, line_width, name, subscribed_events, tags, x or y

Multiple lines plots (

In [22]:
# data
multi_line_x = np.arange(10)
multi_line_y1 = np.random.rand(10)
multi_line_y2 = np.random.rand(10)
multi_line_y3 = np.random.rand(10)

# plot 
multi_line_plot = figure(plot_width=500, plot_height=300, toolbar_location='below')
multi_line_plot.line(multi_line_x, multi_line_y1, legend = '1', color='red', line_width=3)
multi_line_plot.line(multi_line_x, multi_line_y2, legend = '2', color='blue', line_width=5)
multi_line_plot.line(multi_line_x, multi_line_y3, legend = '3', color='green', line_width=0.5)
show(multi_line_plot)

### Stacked Bar Charts

It is possible to convert dataframe into ColumnDataSource to plot chart.
(appears to work fine with dataframe or column data source converted dataframe but if dataframe does not work, use column data source)

In [23]:
stacked_bar_df = pd.DataFrame({'y': [1, 2, 3, 4, 5],
                               'x1': [1, 2, 4, 3, 4],
                               'x2': [1, 4, 2, 2, 3]})

cds_stacked_bar_df = ColumnDataSource(stacked_bar_df)

stacked_bar_chart = figure(plot_width=600, plot_height=300, title='stacked bar chart')

stacked_bar_chart.hbar_stack(['x1', 'x2'], 
                             y='y', 
                             height=0.8, 
                             color=('grey', 'lightgrey'), 
                             source=cds_stacked_bar_df)

show(stacked_bar_chart)

### Grouped bar chart

In [24]:
from bokeh.core.properties import value
from bokeh.transform import dodge

# data
categories = ['Avril 2020', 'Mai 2020', 'Juin 2020']
grouped_bar_df = pd.DataFrame({'categories' : categories,
                               'SP500': [12.68, 4.53, 1.84],
                               'CAC40': [4, 2.70, 5.12],
                               'SX5E': [5.06, 4.18, 6.03]})


# plot
grouped_bar = figure(x_range=categories, y_range=(0, 20), plot_height=250)

# offsets bars / bar locations on axis
dodge1 = dodge('categories', -0.25, range=grouped_bar.x_range)
dodge2 = dodge('categories',  0.0,  range=grouped_bar.x_range)
dodge3 = dodge('categories',  0.25, range=grouped_bar.x_range)

grouped_bar.vbar(x=dodge1, top='SP500', width=0.2, source=grouped_bar_df, color='gray', legend='SP500')
grouped_bar.vbar(x=dodge2, top='CAC40', width=0.2, source=grouped_bar_df, color='blue', legend='CAC40')
grouped_bar.vbar(x=dodge3, top='SX5E', width=0.2, source=grouped_bar_df, color='green', legend='SX5E')

# format legend
grouped_bar.legend.location = 'top_left'
grouped_bar.legend.orientation = 'horizontal'

show(grouped_bar)

In [25]:
# grouped bar chart

from bokeh.core.properties import value
from bokeh.transform import dodge

# data
categories = ['Févr 2020', 'Mars 2020', 'Avril 2020']
grouped_bar_df = pd.DataFrame({'categories' : categories,
                               'SP500': [-8.41, -12.51, 12.68],
                               'CAC40': [-8.55, -17.21, 5.12],
                               'SX5E': [-8.55, -16.30, 5.06]})


# plot
grouped_bar = figure(x_range=categories, y_range=(-20, 20), plot_height=250)

# offsets bars / bar locations on axis
dodge1 = dodge('categories', -0.25, range=grouped_bar.x_range)
dodge2 = dodge('categories',  0.0,  range=grouped_bar.x_range)
dodge3 = dodge('categories',  0.25, range=grouped_bar.x_range)

grouped_bar.vbar(x=dodge1, top='SP500', width=0.2, source=grouped_bar_df, color='gray', legend='SP500')
grouped_bar.vbar(x=dodge2, top='CAC40', width=0.2, source=grouped_bar_df, color='blue', legend='CAC40')
grouped_bar.vbar(x=dodge3, top='SX5E', width=0.2, source=grouped_bar_df, color='green', legend='SX5E')

# format legend
grouped_bar.legend.location = 'top_left'
grouped_bar.legend.orientation = 'horizontal'

show(grouped_bar)

# External data visualization

### Download data

In [26]:
bokeh.sampledata.download()

Using data directory: C:\Users\user\.bokeh\data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3171836 bytes)
   3171836 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (4816256 bytes)
   4816256 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 [100.00%]
Downloading: gapminder_life_expectancy.csv (73243 bytes)
     73243 [100.00%]
Downloading:

In [27]:
from bokeh.sampledata.stocks import AAPL

type(AAPL)

dict

In [28]:
#Convert to dataframe
df = pd.DataFrame(AAPL)
df['date'] = pd.to_datetime(df['date'])

#output_file("datetime.html")

# create a new plot with a datetime axis type
p = figure(plot_width=800, plot_height=250, x_axis_type="datetime")
p.line(df['date'], df['close'], color='navy', alpha=0.5)

show(p)

### Import excel data

In [29]:
data_eu_stocks = pd.read_excel('data_1_bis.xlsx')
data_eu_stocks.head()

Unnamed: 0.1,Unnamed: 0,KONINKLIJKE AHOLD DELHAIZE N,ADIDAS AG,AIR LIQUIDE SA,ALLIANZ SE-REG,ASML HOLDING NV,BASF SE,BAYER AG-REG,BANCO BILBAO VIZCAYA ARGENTA,BAYERISCHE MOTOREN WERKE AG,...,SAFRAN SA,SANOFI,BANCO SANTANDER SA,SAP SE,SIEMENS AG-REG,SCHNEIDER ELECTRIC SE,TELEFONICA SA,UNILEVER NV,VIVENDI,VOLKSWAGEN AG-PREF
0,2000-01-04,33.0765,17.5,30.5762,285.934,36.35,23.925,40.56,13.2678,28.3,...,31.4286,37.7,10.1881,39.292,69.297,37.7151,20.8609,17.9,80.3224,29.819
1,2000-01-05,33.3564,17.5,30.2922,294.078,32.883,23.375,39.537,12.97,27.74,...,29.0714,36.11,9.9405,34.55,66.952,36.1313,19.8862,18.3333,78.3869,30.018
2,2000-01-06,34.6828,18.25,32.4695,297.698,30.583,24.015,40.699,12.97,27.65,...,30.1667,37.5,9.9405,35.671,66.134,37.9626,19.8862,18.8333,77.4676,30.664
3,2000-01-07,36.204,18.0,32.0719,305.977,32.867,25.0,42.395,13.1333,27.6,...,30.9524,39.5,10.3807,43.25,68.794,38.6555,19.921,19.4833,78.0966,31.44
4,2000-01-10,35.1696,18.273,30.9549,307.742,35.233,25.11,43.934,13.025,28.7,...,32.0952,38.65,10.289,45.017,72.887,38.0121,20.5215,19.0667,85.1127,31.658


### The pandas-bokeh library

It allows us to plot multiple graph easily from a dataframe

In [35]:
#Plot a dataframe using the pandas-bokeh library in 2 lines
import pandas_bokeh

data_eu_stocks_2 = data_eu_stocks[list(data_eu_stocks)[1:6]] #select the first 5 stocks
data_eu_stocks_2.plot_bokeh(kind = 'line', figsize = (900, 600), title = 'European Stocks between 2000 and 2015')

AttributeError: unexpected attribute 'legend_label' to Line, possible attributes are js_event_callbacks, js_property_callbacks, line_alpha, line_cap, line_color, line_dash, line_dash_offset, line_join, line_width, name, subscribed_events, tags, x or y

We can add tools such as rangetool:

In [34]:
data_eu_stocks_2.plot_bokeh(kind = 'line', figsize = (900, 400), title = 'European Stocks between 2000 and 2015', rangetool = 'True')

AttributeError: unexpected attribute 'legend_label' to Line, possible attributes are js_event_callbacks, js_property_callbacks, line_alpha, line_cap, line_color, line_dash, line_dash_offset, line_join, line_width, name, subscribed_events, tags, x or y

# Linked plot

Visualize and manipulate several figures simultaneaously

In [17]:
from bokeh.layouts import gridplot
from bokeh.models import BoxSelectTool


linked_data_x = np.arange(40)
linked_data_y = np.random.rand(40)

# linked plot 1
linked_plot1 = figure(width=250, height=250)
linked_plot1.circle(linked_data_x, linked_data_y)

# create new plots and share both ranges
linked_plot2 = figure(width=250, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot2.line(linked_data_x, linked_data_y)

linked_plot3 = figure(width=250, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot3.vbar(linked_data_x, top=linked_data_y, width=0.5)


# the subplots in a gridplot
linked_gridplot = gridplot([[linked_plot1, linked_plot2, linked_plot3]])


# show the results
show(linked_gridplot)

We can also add tools and changes the features of the figures such as the size etc:

In [18]:
# linked plot 1
linked_plot1 = figure(width=450, height=250)
linked_plot1.circle(linked_data_x, linked_data_y)

# create new plots and share both ranges
linked_plot2 = figure(width=450, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot2.line(linked_data_x, linked_data_y)


linked_plot1.add_tools(BoxSelectTool(dimensions="width"))

# the subplots in a gridplot
linked_gridplot = gridplot([[linked_plot1, linked_plot2]])

# show the results
show(linked_gridplot)



# Widgets


In [36]:
#regression
import ipywidgets
from sklearn import linear_model
from bokeh.layouts import layout
from bokeh.models import Toggle
import numpy as np

# data

data_stocks = pd.read_excel('data_1.xlsx').tail(100)
sx5e = data_stocks[list(data_stocks)[1]]
bnp = data_stocks[list(data_stocks)[12]]


x = bnp
X = np.array(x).reshape(-1, 1)
y = sx5e
Y = np.array(y).reshape(-1, 1)

# linear regression object
regr = linear_model.LinearRegression()

# fit linear model
regr.fit(X, Y)

# make predictions
pred = regr.predict(X)

# plot with regression line
regr_plot = figure(plot_width=500, plot_height=300)
regr_plot.scatter(x, y, size=10)
regr_line = regr_plot.line(x, pred.flatten(), line_color='red')

toggle_button = Toggle(label='line of best fit', button_type='success', active=True)
toggle_button.js_link('active', regr_line, 'visible')

show(layout([regr_plot], [toggle_button]))



In [37]:
import ipywidgets
from bokeh.io import push_notebook
from bokeh.models import Range1d
from bokeh.sampledata.stocks import AAPL, GOOG, MSFT



# widget
drop_down = ipywidgets.Dropdown(options=[ 'GOOG', 'AAPL'],
                                description='Compagnies:',
                                disabled=False)

# data

df_aapl = pd.DataFrame(AAPL).tail(2000)
df_goog = pd.DataFrame(GOOG).tail(2000)


# create a new plot with a datetime axis type
df_aapl['date'] = pd.to_datetime(df_aapl['date'])
plot_stock = figure(plot_width=800, plot_height=250, x_axis_type="datetime", title = 'Apple and Google Stock Prices')
plot_ipw = plot_stock.line(x = df_aapl['date'], y = df_aapl['close'], color='green', alpha=0.5)


# function

def stock_price(stock):
    if stock == 'AAPL':
        plot_ipw.data_source.data['y'] = df_aapl['close']
    
    
    elif stock == 'GOOG':
        plot_ipw.data_source.data['y'] = df_goog['close']
        
        
    push_notebook()

show(plot_stock, notebook_handle=True)
        
# interaction
ipywidgets.interact(stock_price, stock=drop_down)

interactive(children=(Dropdown(description='Compagnies:', options=('GOOG', 'AAPL'), value='GOOG'), Output()), …

<function __main__.stock_price(stock)>

# Maps visualization

In [38]:
# convert longitude, latitude to mercator coordinates
# example - Detroit Michigan 42.334197, -83.047752

from pyproj import Proj, transform

def create_coordinates(long_arg,lat_arg):
    in_wgs = Proj(init='epsg:4326')
    out_mercator = Proj(init='epsg:3857')
    long, lat = long_arg, lat_arg
    mercator_x, mercator_y = transform(in_wgs, out_mercator, long, lat)
    print(mercator_x, mercator_y)
    
# # Detroit
# create_coordinates(-83.047752,42.334197)
# # Cleveland
# create_coordinates(-81.694703,41.499437)
# # Chicago 
# create_coordinates(-87.629849,41.878111)
# #Paris
# create_coordinates(48.8534, 2.3488)
# #London
# create_coordinates(51.5073509, -0.1277583)

from bokeh.tile_providers import get_provider, Vendors

tile_provider = get_provider(Vendors.CARTODBPOSITRON)
# tile_provider = get_provider(Vendors.STAMEN_TONER_BACKGROUND)

# range bounds supplied in web mercator coordinates
m = figure(plot_width=800, 
           plot_height=400,
           x_range=(-12000000, 9000000), 
           y_range=(-1000000, 7000000), 
           x_axis_type='mercator', 
           y_axis_type='mercator')

m.add_tile(tile_provider)

m.circle(x=-9244833, y=5211172, size=10, color='red')
m.circle(x=-9094212, y=5086289, size=10, color='blue')
m.circle(x=-9754910, y=5142738, size=10, color='orange')
m.circle(x=261540, y=6249735, size=10, color='green')
m.circle(x=-14222, y=6706672, size=10, color='black')

show(m)

ModuleNotFoundError: No module named 'pyproj'