# CCIT4092 Data Visualization
# 06 Bokeh

In [2]:
from bokeh.plotting import figure, show

In [3]:
# make bokeh display figures inside the notebook
from bokeh.io import output_notebook

output_notebook()

#### Note:
The cell above allows us to plot the bokeh visualizations inline in the notebook. By default it will open a new tab in your browser window with the plot.

In [5]:
from bokeh.io import output_file

In [6]:
# prepare some data
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]

# output to static HTML file
output_file("lines.html")

# create a new plot with a title and axis labels
p = figure(title="simple line example", 
           x_axis_label='x', y_axis_label='y')

# add a line renderer with legend and line thickness
p.line(x, y, legend_label="Temp.", line_width=2)

# show the results
show(p)

In [7]:
# multi-line plot 

# prepare some data
x = [1, 2, 3, 4, 5]
y1 = [6, 7, 2, 4, 5]
y2 = [2, 3, 4, 5, 6]
y3 = [4, 5, 5, 7, 2]

# create a new plot with a title and axis labels
p = figure(title="Multiple line example", x_axis_label="x", y_axis_label="y")

# add multiple renderers
p.line(x, y1, legend_label="Temp.", line_color="blue", line_width=2)
p.line(x, y2, legend_label="Rate", line_color="red", line_width=2)
p.line(x, y3, legend_label="Objects", line_color="green", line_width=2)

# show the results
show(p)

## Plotting with basic glyphs

In [52]:
# Scatter markers

from bokeh.plotting import figure, output_file, show

# output to static HTML file
output_file("scatter.html")

p = figure(min_width=400, min_height=400)

# add a circle renderer with a size, color, and alpha
p.scatter([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size=20, color="navy", alpha=0.5)

# show the results
show(p)

In [54]:
# Change the markers from circles to squares

from bokeh.plotting import figure, output_file, show

# output to static HTML file
output_file("square.html")

p = figure(min_width=400, min_height=400)

# add a square renderer with a size, color, and alpha
p.scatter([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size=50, color="olive", alpha=0.5, marker='square')

# show the results
show(p)

In [11]:
# Single lines

output_file("line.html")

p = figure(min_width=400, min_height=400)

# add a line renderer
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)

show(p)

In [12]:
# Step lines

p = figure(min_width=400, min_height=400)

# add a steps renderer
# mode="before","after","center"
p.step([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2, mode="before")

show(p)

In [13]:
# Multiple lines

p = figure(min_width=400, min_height=400)

p.multi_line([[1, 3, 2], [3, 4, 6, 6]], [[2, 1, 4], [4, 7, 8, 5]],
             color=["firebrick", "navy"], alpha=[0.8, 0.3], line_width=4)

show(p)

In [14]:
# Line with NaN values

p = figure(min_width=400, min_height=400)

# add a line renderer with a NaN
nan = float('nan')
p.line([1, 2, 3, nan, 4, 5], [6, 7, 2, 4, 4, 5], line_width=2)

show(p)

In [15]:
# Vertical bar chart

p = figure(min_width=400, min_height=400)
p.vbar(x=[1, 2, 3], width=0.5, bottom=[0,1, 2],
       top=[1.2, 2.5, 3.7], color="firebrick")

show(p)

In [16]:
# Horizontal bar chart

p = figure(min_width=400, min_height=400)
p.hbar(y=[1, 2, 3], height=0.5, left=[1,1,2],
       right=[1.2, 2.5, 3.7], color="navy")

show(p)

In [17]:
# Horizontal stacked bar chart

from bokeh.models import ColumnDataSource

source = ColumnDataSource(data=dict(
    y=[1, 2, 3, 4, 5],
    x1=[1, 2, 4, 3, 4],
    x2=[1, 4, 2, 2, 3],
))
p = figure(min_width=400, min_height=400)

p.hbar_stack(['x1', 'x2'], y='y', height=0.8, color=("grey", "lightgrey"), source=source)

show(p)

In [18]:
# Vertical stacked bar chart

from bokeh.models import ColumnDataSource

source = ColumnDataSource(data=dict(
    x=[1, 2, 3, 4, 5],
    y1=[1, 2, 4, 3, 4],
    y2=[1, 4, 2, 2, 3],
))
p = figure(min_width=400, min_height=400)

p.vbar_stack(['y1', 'y2'], x='x', width=0.8, color=("blue", "red"), source=source)

show(p)

In [19]:
# Drawing rectangles

p = figure(min_width=400, min_height=400)
p.quad(top=[2, 3, 4], bottom=[1, 2, 3], left=[1, 2, 3],
       right=[1.2, 2.5, 3.7], color="#B3DE69")

show(p)

In [20]:
# Drawing arbitrary rectangles

from math import pi

p = figure(min_width=400, min_height=400)
p.rect(x=[1, 2, 3], y=[1, 2, 3], width=0.2, height=40, color="#CAB2D6",
       angle=pi/6, height_units="screen")

show(p)

In [21]:
# Single Areas
p = figure(min_width=400, min_height=400)

p.varea(x=[1, 2, 3, 4, 5],
        y1=[2, 6, 4, 3, 5],
        y2=[1, 4, 2, 2, 3])

show(p)

In [22]:
# Stacked areas
source = ColumnDataSource(data=dict(
    x=[1, 2, 3, 4, 5],
    y1=[1, 2, 4, 3, 4],
    y2=[1, 4, 2, 2, 3],
))
p = figure(min_width=400, min_height=400)

p.varea_stack(['y1', 'y2'], x='x', color=("grey", "lightgrey"), source=source)

show(p)

In [23]:
# Single patches
p = figure(min_width=400, min_height=400)

# add a patch renderer with an alpha and line width
p.patch([1, 2, 3, 4, 5], [6, 1, 8, 1, 3], alpha=0.5, line_width=2)

show(p)

In [24]:
# Multiple patches

p = figure(min_width=400, min_height=400)

p.patches([[1, 3, 2], [3, 4, 6, 6]], [[2, 1, 4], [4, 7, 8, 5]],
          color=["firebrick", "navy"], alpha=[0.8, 0.3], line_width=2)

show(p)

In [56]:
# Combining multiple glyphs

x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]

output_file("multiple.html")

p = figure(min_width=400, min_height=400)

# add both a line and circles on the same plot
p.line(x, y, line_width=2)
p.scatter(x, y, fill_color="pink", size=18, marker='square')

show(p)

In [26]:
source = ColumnDataSource(data=dict(
    x=[1, 2, 3, 4, 5],
    y1=[1, 2, 4, 3, 4],
    y2=[1, 4, 2, 2, 3],
))
p = figure(min_width=400, min_height=400)

p.vbar_stack(['y1', 'y2'], x='x', width=0.8, color=("blue", "red"), source=source)
p.line('x', 'y1', line_width=3, source=source)
p.circle_cross('x', 'y1', size=20, color="olive", source=source)
show(p)



## Data source for bokeh

In [28]:
# Python List

x_values = [1, 2, 3, 4, 5]
y_values = [6, 7, 2, 3, 6]

p = figure(min_width=400, min_height=400)
p.line(x=x_values, y=y_values)
show(p)

In [29]:
# Numpy Array

import numpy as np
import math

x_values = np.arange(0, math.pi*2, 0.05)
y_values = np.cos(x_values)

p = figure (min_width=400, min_height=400, 
            title = "cosine wave example", x_axis_label = 'x', y_axis_label = 'y')
p.line(x=x_values, y=y_values, legend_label="cos")
show(p)

In [58]:
# ColumnDataSource using python Dictionary

data = {'x_values': [1, 2, 3, 4, 5],
        'y_values': [6, 7, 2, 3, 6]}

source = ColumnDataSource(data=data)

p = figure (min_width=400, min_height=400, 
            title = "data with ColumnDataSource", x_axis_label = 'x', y_axis_label = 'y')
p.scatter(x='x_values', y='y_values', source=source, size=20, marker='circle_dot')
show(p)

In [60]:
# To modify ColumnDataSource

data = {'x_values': [1, 2, 3, 4, 5],
        'y_values': [6, 7, 2, 3, 6]}

source = ColumnDataSource(data=data)

new_y_values = [10,20,3,20,2]
source.data['y_values'] = new_y_values
source.data['y2_values'] = [1,2,3,5,7]

p = figure (min_width=400, min_height=400, 
            title = "data with ColumnDataSource", x_axis_label = 'x', y_axis_label = 'y')
p.scatter(x='x_values', y='y_values', source=source, size=20, marker='circle_dot')
p.line(x='x_values', y='y2_values', source=source, line_width=2)
show(p)

In [62]:
# Panda DataFrames

import seaborn as sns
tips = sns.load_dataset("tips")

source = ColumnDataSource(data=tips)

p = figure (min_width=400, min_height=400, 
            title = "tips", x_axis_label = 'tip', y_axis_label = 'total_bill')
p.scatter(x='tip', y='total_bill', source=source, size=5, marker='cross')
show(p)

In [33]:
# Mapping markers

from bokeh.transform import factor_cmap, factor_mark
from bokeh.palettes import Spectral6

MARKERS = ['hex', 'triangle']
SMOKERS = ['Yes', 'No']

p = figure (min_width=400, min_height=400)
p.xaxis.axis_label = "tips"
p.yaxis.axis_label = "total_bill"

p.scatter("tip", "total_bill", source=source, legend_field="smoker",
         marker=factor_mark('smoker', MARKERS, SMOKERS),
         color=factor_cmap('smoker', 'Spectral6', SMOKERS))
show(p)

## Configuring plot tools

In [64]:
# Positioning the toolbar: above, below, left, right, None

p = figure(min_width=400, min_height=400,
           title=None, toolbar_location=None)

p.scatter([1, 2, 3, 4, 5], [2, 5, 8, 2, 7], size=10, marker='circle')

show(p)

In [66]:
# Specifying tools: "pan,wheel_zoom,box_zoom,reset"
# https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#userguide-tools-pandrag

p = figure(min_width=400, min_height=400,
           title=None, tools="wheel_zoom")

p.scatter([1, 2, 3, 4, 5], [2, 5, 8, 2, 7], size=10)

show(p)

In [68]:
# Hover

TOOLTIPS = [
    ("x", "$x"),
    ("y", "$y"),
]

p = figure(min_width=400, min_height=400,
           title=None, tools="hover", tooltips=TOOLTIPS)

p.scatter([1, 2, 3, 4, 5], [2, 5, 8, 2, 7], size=10)

show(p)

## Example 

In [39]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

p = figure(x_range=fruits, min_height=250, title="Fruit counts", toolbar_location=None, tools="")

p.vbar(x=fruits, top=counts, width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

In [40]:
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6
from bokeh.plotting import figure

output_file("colormapped_bars.html")

fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

source = ColumnDataSource(data=dict(fruits=fruits, counts=counts, color=Spectral6))

p = figure(x_range=fruits, y_range=(0,9), min_height=250, title="Fruit counts",
           toolbar_location=None, tools="", tooltips="@fruits: @counts")

p.vbar(x='fruits', top='counts', width=0.9, color='color', legend_field="fruits", source=source)

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

## Example

In [42]:
import pandas as pd

In [70]:
# loading the Dataset with geoplotlib
dataset = pd.read_csv('./data/world_population.csv', index_col=0)

In [72]:
# looking at the dataset
dataset.head()

Unnamed: 0_level_0,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aruba,ABW,Population density (people per sq. km of land ...,EN.POP.DNST,,307.972222,312.366667,314.983333,316.827778,318.666667,320.622222,...,562.322222,563.011111,563.422222,564.427778,566.311111,568.85,571.783333,574.672222,577.161111,
Andorra,AND,Population density (people per sq. km of land ...,EN.POP.DNST,,30.587234,32.714894,34.914894,37.170213,39.470213,41.8,...,180.591489,182.161702,181.859574,179.614894,175.161702,168.757447,161.493617,154.86383,149.942553,
Afghanistan,AFG,Population density (people per sq. km of land ...,EN.POP.DNST,,14.038148,14.312061,14.599692,14.901579,15.218206,15.545203,...,39.637202,40.634655,41.674005,42.830327,44.127634,45.533197,46.997059,48.444546,49.821649,
Angola,AGO,Population density (people per sq. km of land ...,EN.POP.DNST,,4.305195,4.384299,4.464433,4.544558,4.624228,4.703271,...,15.387749,15.915819,16.459536,17.020898,17.600302,18.196544,18.808215,19.433323,20.070565,
Albania,ALB,Population density (people per sq. km of land ...,EN.POP.DNST,,60.576642,62.456898,64.329234,66.209307,68.058066,69.874927,...,108.394781,107.566204,106.843759,106.314635,106.013869,105.848431,105.717226,105.60781,105.444051,


In [74]:
# preparing our data for Germany
years = [year for year in dataset.columns if not year[0].isalpha()]
de_vals = [dataset.loc[['Germany']][year] for year in years]

In [75]:
# plotting the population density change in Germany in the given years
plot = figure(title='Population Density of Germany', x_axis_label='Year', y_axis_label='Population Density')

plot.line(years, de_vals, line_width=2, legend_label='Germany')

show(plot)

In [80]:
# preparing the data for the second country
ch_vals = [dataset.loc[['Switzerland']][year] for year in years]

In [84]:
# plotting the data for Germany and Switzerland in one visualization, 
# adding circles for each data point for Switzerland
plot = figure(title='Population Density of Germany and Switzerland', x_axis_label='Year', y_axis_label='Population Density')

plot.line(years, de_vals, line_width=2, legend_label='Germany')
plot.line(years, ch_vals, line_width=2, color='orange', legend_label='Switzerland')
plot.scatter(years, ch_vals, size=4, line_color='orange', fill_color='white', legend_label='Switzerland')

show(plot)

In [86]:
# plotting the Germany and Switzerland plot in two different visualizations
# that are interconnected in terms of view port
from bokeh.layouts import gridplot

plot_de = figure(
    title='Population Density of Germany', 
    x_axis_label='Year', 
    y_axis_label='Population Density',
    min_height=300)

plot_ch = figure(
    title='Population Density of Switzerland', 
    x_axis_label='Year', 
    y_axis_label='Population Density',
    min_height=300,
    x_range=plot_de.x_range, 
    y_range=plot_de.y_range)

plot_de.line(years, de_vals, line_width=2)
plot_ch.line(years, ch_vals, line_width=2)

plot = gridplot([[plot_de, plot_ch]])

show(plot)

In [88]:
# plotting the above declared figures in a vertical manner
plot_v = gridplot([[plot_de], [plot_ch]])

show(plot_v)