*Note: All output_file() calls have been replaced with output_notebook() so that plots will display inline.*

# The Basics of Bokeh

We first have to make sure everything we're going to use has been installed.

In [1]:
# the ! means that the code below is interpreted as being run on the command line, rather than as python
%pip install bokeh pyproj

Note: you may need to restart the kernel to use updated packages.


# Bokeh and Pandas: Exploring the Ingenium Dataset

## Loading Data in Pandas

In [2]:
import pandas as pd
pd.options.display.max_rows = None
pd.options.display.max_columns = None

df = pd.read_csv('ingenium-data.csv')
print(df)

       artifactNumber              ObjectName      GeneralDescription  \
0       1966.0001.001                   Cover                   PAPER   
1       1966.0002.001          Stamp  postage                   PAPER   
2       1966.0003.001          Stamp  postage                   PAPER   
3       1966.0004.001          Stamp  postage                   PAPER   
4       1966.0005.001          Stamp  postage                   PAPER   
...               ...                     ...                     ...   
108458  2017.0005.002                Joystick     Synthetic and metal   
108459  2017.0005.003            Power supply     Synthetic and metal   
108460  2017.0005.004      Cord  power supply     Synthetic and metal   
108461  2017.0005.005  Case  storage-carrying     Synthetic and metal   
108462  2017.0006.001             Salinometer  Synthetic  metal  wood   

                               model SerialNumber Manufacturer ManuCountry  \
0        WESTERN CANADA AIRWAYS LTD.         

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [3]:
df.columns.tolist()

['artifactNumber',
 'ObjectName',
 'GeneralDescription',
 'model',
 'SerialNumber',
 'Manufacturer',
 'ManuCountry',
 'ManuProvince',
 'ManuCity',
 'BeginDate',
 'EndDate',
 'date_qualifier',
 'patent',
 'NumberOfComponents',
 'ArtifactFinish',
 'ContextCanada',
 'ContextFunction',
 'ContextTechnical',
 'group1',
 'category1',
 'subcategory1',
 'group2',
 'category2',
 'subcategory2',
 'group3',
 'category3',
 'subcategory3',
 'material',
 'Length',
 'Width',
 'Height',
 'Thickness',
 'Weight',
 'Diameter',
 'image',
 'thumbnail',
 'Unnamed: 36']

# Creating `df` for Ottawa

In [16]:
# let's now filter the dataframe to items created in Ottawa and Kingston
options = ['Ottawa']  

# selecting rows based on condition  
ott_df = df[df['ManuCity'].isin(options)]

ott_df

# Now lets make a df that only has the ObjectName, GeneralDescription, Manufacturer, group1, and category1

ott_artifacts = ott_df[['ObjectName', 'GeneralDescription', 'Manufacturer', 'group1', 'category1']]

ott_artifacts

Unnamed: 0,ObjectName,GeneralDescription,Manufacturer,group1,category1
30,Therapy machine cobalt,METAL SYNTHETIC & WOOD COMPONENTS.,Atomic Energy of Canada Ltd.,Medical Technology,Radiology
31,Control unit,METAL SYNTHETIC & WOOD COMPONENTS.,Atomic Energy of Canada Ltd.,Medical Technology,Radiology
87,Propeller,WOOD,National Aviation Museum,Aviation,Aircraft parts
88,Propeller,WOOD,National Aviation Museum,Aviation,Aircraft parts
122,Coverall flying,COTTON CORDUROY/ METAL SNAPS & ZIPPER,WOODS MFG. CO. LTD.,Aviation,Clothing
556,Patent,,Government of Canada Patent Office,Industrial Technology,Archives
560,Pass boarding,,OTTAWA RIVER NAVIGATION CO.,,
860,Ticket,CARDBOARD,NMST,Railway Transportation,Services
892,Clock,METAL INCUDING ALUMINUM & BRASS/ SYNTHETIC I...,NRC Applied Physics Div.,Horology,Clocks
1067,Radiography machine model,WOOD; ALUMINUM; STEEL; RUBBER,Atomic Energy of Canada Ltd.,Physics,Atomic & nuclear


In [28]:
# Success! Let's do some frequency counts for simple visuals

# field
field_count = ott_artifacts.group1.value_counts().rename_axis('field').reset_index(name='count')

field_count

Unnamed: 0,field,count
0,Agriculture,594
1,Aviation,377
2,Physics,220
3,Communications,112
4,Space Technology,93
5,Marine Transportation,83
6,Computing Technology,72
7,Medical Technology,66
8,Astronomy,41
9,Industrial Technology,39


In [29]:
# category
cat_count = ott_artifacts.category1.value_counts().rename_axis('category').reset_index(name='count')

cat_count

Unnamed: 0,category,count
0,Research,583
1,Archives,218
2,Commemorative,148
3,Atomic & nuclear,104
4,Navigation instruments & equipment,76
5,Animal husbandry,48
6,Clothing,36
7,Digital peripheral devices,35
8,Miscellaneous,35
9,Space science,33


In [23]:
# Manufacturer
manu_count = ott_artifacts.Manufacturer.value_counts()

manu_count

# hm not a very effective data measure due to possible change in name over time

National Research Council Canada                                         142
NRC  Radio & Electrical Engineering                                      106
Pelling  Dr. Andrew & Modulevski  Daniel                                  92
Unknown                                                                   78
Dept. of Mines & Resources                                                58
National Historic Parks Branch                                            56
Ketchum Mfg. Co. Ltd.                                                     45
Royal Canadian Mint                                                       37
Pritchard-Andrews Co. of Ottawa Ltd.                                      32
DND DRB DRTE                                                              29
Capital Stamp & Stationery Co.                                            24
NRC  Elmus Lab                                                            23
Instruments Ltd.                                                          17

## Most Common Fields and Categories

In [44]:
import pandas as pd
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool

from bokeh.palettes import Viridis11
from bokeh.transform import factor_cmap
output_notebook()

source = field_count
fields = source.field.tolist()
p = figure(x_range=fields)


color_map = factor_cmap(field_name='field',
                    palette=Viridis11, factors=fields)

p.vbar(x='field', top='count', source=source, width=0.70, color=color_map)

p.title.text = 'Number of Artifacts Manufacturer in Ottawa by Field'
p.xaxis.axis_label = 'Field'
p.xaxis.major_label_orientation = 'vertical'
p.yaxis.axis_label = 'Occurence'

hover = HoverTool()
hover.tooltips = [
    ("Totals", "@count")]

hover.mode = 'vline'

p.add_tools(hover)

show(p)



In [54]:
from math import pi

import pandas as pd

from bokeh.palettes import Category20c
from bokeh.plotting import figure, output_notebook, show
from bokeh.transform import cumsum
output_notebook()

data = cat_count
data['angle'] = data['count']/data['count'].sum() * 2*pi
data['color'] = Turbo256[114]

p = figure(title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@category: @count", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='category', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

show(p)