# Chapter 2

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()


In [2]:
# data prep
df = pd.read_csv('literacy_birth_rate.csv', skipfooter=20, engine='python')
df.rename(columns={"female literacy": "female_literacy"}, inplace=True)
print(df.info())

from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource

source = ColumnDataSource(df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 5 columns):
Country            162 non-null object
Continent          162 non-null object
female_literacy    162 non-null float64
fertility          162 non-null float64
population         162 non-null float64
dtypes: float64(3), object(2)
memory usage: 6.4+ KB
None


In [4]:
# Import row from bokeh.layouts
from bokeh.layouts import row

# Create the first figure: p1
p1 = figure(x_axis_label='fertility (children per woman)', y_axis_label='female_literacy (% population)')

# Add a circle glyph to p1
p1.circle('fertility', 'female_literacy', source=source)

# Create the second figure: p2
p2 = figure(x_axis_label='population', y_axis_label='female_literacy (% population)')

# Add a circle glyph to p2
p2.circle('population', 'female_literacy', source=source)

# Put p1 and p2 into a horizontal row: layout
layout = row(p1, p2)

# Specify the name of the output_file and show the result
output_file('fert_row.html')
output_notebook()
show(layout)


In [6]:
# Import column from the bokeh.layouts module
from bokeh.layouts import column

# Create a blank figure: p1
p1 = figure(x_axis_label='fertility (children per woman)', y_axis_label='female_literacy (% population)')

# Add circle scatter to the figure p1
p1.circle('fertility', 'female_literacy', source=source)

# Create a new blank figure: p2
p2 = figure(x_axis_label='population', y_axis_label='female_literacy (% population)')

# Add circle scatter to the figure p2
p2.circle('population', 'female_literacy', source=source)

# Put plots p1 and p2 in a column: layout
layout = column(p1, p2)

# Specify the name of the output_file and show the result
output_file('fert_column.html')
show(layout)

In [13]:
# data prep
df = pd.read_csv('auto.csv')
print(df.info())

from bokeh.models import ColumnDataSource
source = ColumnDataSource(df)

from bokeh.plotting import figure
mpg_hp = figure(x_axis_label='hp', y_axis_label='mpg')
mpg_weight = figure(x_axis_label='weight', y_axis_label='mpg')
avg_mpg = figure(x_axis_label='mean mpg', y_axis_label='year')

mpg_hp.circle('mpg', 'hp', source=source)
mpg_weight.circle('mpg', 'weight', source=source)
avg_mpg.circle('yr', 'mpg', source=source)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392 entries, 0 to 391
Data columns (total 11 columns):
mpg       392 non-null float64
cyl       392 non-null int64
displ     392 non-null float64
hp        392 non-null int64
weight    392 non-null int64
accel     392 non-null float64
yr        392 non-null int64
origin    392 non-null object
name      392 non-null object
color     392 non-null object
size      392 non-null float64
dtypes: float64(4), int64(4), object(3)
memory usage: 33.8+ KB
None


In [14]:
# Import column and row from bokeh.layouts
from bokeh.layouts import row, column

# Make a column layout that will be used as the second row: row2
row2 = column([mpg_hp, mpg_weight], sizing_mode='scale_width')

# Make a row layout that includes the above column layout: layout
layout = row([avg_mpg, row2], sizing_mode='scale_width')

# Specify the name of the output_file and show the result
output_file('layout_custom.html')
show(layout)

In [81]:
# data prep
from bokeh.models import ColumnDataSource
df = pd.read_csv('literacy_birth_rate.csv', skipfooter=20, engine='python')
df.rename(columns=dict({'Country ': 'Country'}), inplace=True)

from bokeh.plotting import figure
p1 = figure(title='Latin America', x_axis_label='fertility (children per woman)', y_axis_label='female literacy')
p1.circle('fertility', 'female literacy', source=ColumnDataSource(df[df['Continent'] == 'LAT']))

p2 = figure(title='Africa', x_axis_label='fertility (children per woman)', y_axis_label='female literacy')
p2.circle('fertility', 'female literacy', source=ColumnDataSource(df[df['Continent'] == 'AF']))

p3 = figure(title='Asia', x_axis_label='fertility (children per woman)', y_axis_label='female literacy')
p3.circle('fertility', 'female literacy', source=ColumnDataSource(df[df['Continent'] == 'ASI']))

p4 = figure(title='Europe', x_axis_label='fertility (children per woman)', y_axis_label='female literacy')
p4.circle('fertility', 'female literacy', source=ColumnDataSource(df[df['Continent'] == 'EUR']))



In [71]:
# Import gridplot from bokeh.layouts
from bokeh.layouts import gridplot

# Create a list containing plots p1 and p2: row1
row1 = [p1, p2]

# Create a list containing plots p3 and p4: row2
row2 = [p3, p4]

# Create a gridplot using row1 and row2: layout
layout = gridplot([row1, row2])

# Specify the name of the output_file and show the result
output_file('grid.html')
show(layout)

In [72]:
# Import Panel from bokeh.models.widgets
from bokeh.models.widgets import Panel

# Create tab1 from plot p1: tab1
tab1 = Panel(child=p1, title='Latin America')

# Create tab2 from plot p2: tab2
tab2 = Panel(child=p2, title='Africa')

# Create tab3 from plot p3: tab3
tab3 = Panel(child=p3, title='Asia')

# Create tab4 from plot p4: tab4
tab4 = Panel(child=p4, title='Europe')


In [73]:
# Import Tabs from bokeh.models.widgets
from bokeh.models.widgets import Tabs

# Create a Tabs layout: layout
layout = Tabs(tabs=[tab1, tab2, tab3, tab4])

# Specify the name of the output_file and show the result
output_file('tabs.html')
show(layout)

In [74]:
# Link the x_range of p2 to p1: p2.x_range
p2.x_range = p1.x_range

# Link the y_range of p2 to p1: p2.y_range
p2.y_range = p1.y_range

# Link the x_range of p3 to p1: p3.x_range
p3.x_range = p1.x_range

# Link the y_range of p4 to p1: p4.y_range
p4.y_range = p1.y_range

# Specify the name of the output_file and show the result
output_file('linked_range.html')
show(layout)


In [75]:
# Create ColumnDataSource: source
source = ColumnDataSource(df)

# Create the first figure: p1
p1 = figure(x_axis_label='fertility (children per woman)', y_axis_label='female literacy (% population)',
            tools='box_select,lasso_select')

# Add a circle glyph to p1
p1.circle('fertility', 'female literacy', source=source)

# Create the second figure: p2
p2 = figure(x_axis_label='fertility (children per woman)', y_axis_label='population (millions)',
            tools='box_select,lasso_select')

# Add a circle glyph to p2
p2.circle('fertility', 'population', source=source)

# Create row layout of figures p1 and p2: layout
layout = row(p1, p2)

# Specify the name of the output_file and show the result
output_file('linked_brush.html')
show(layout)

In [84]:
# data prep
df.rename(columns=dict({'female literacy': 'female_literacy'}), inplace=True)

latin_america=ColumnDataSource(df[df['Continent'] == 'LAT'])
africa=ColumnDataSource(df[df['Continent'] == 'AF'])

p = figure(x_axis_label='fertility (children per woman)',
           y_axis_label='Female Literacy',
           tools='box_select,lasso_select')

In [85]:
# Add the first circle glyph to the figure p
p.circle('fertility', 'female_literacy', source=latin_america, size=10, color='red', legend='Latin America')

# Add the second circle glyph to the figure p
p.circle('fertility', 'female_literacy', source=africa, size=10, color='blue', legend='Africa')

# Specify the name of the output_file and show the result
output_file('fert_lit_groups.html')
show(p)


In [87]:
# Assign the legend to the bottom left: p.legend.location
p.legend.location = 'bottom_left'

# Fill the legend background with the color 'lightgray': p.legend.background_fill_color
p.legend.background_fill_color = 'lightgray'

# Specify the name of the output_file and show the result
output_file('fert_lit_groups.html')
show(p)


In [88]:
# Import HoverTool from bokeh.models
from bokeh.models import HoverTool

# Create a HoverTool object: hover
hover = HoverTool(tooltips=[('Country', '@Country')])

# Add the HoverTool object to figure p
p.add_tools(hover)

# Specify the name of the output_file and show the result
output_file('hover.html')
show(p)


In [80]:
latin_america.to_df()['Country ']

0                     Brésil
1                    Mexique
2                   Colombie
3                  Argentine
4                      Pérou
5                  Venezuela
6                      Chili
7                  Guatemala
8                   Equateur
9                       Cuba
10    République dominicaine
11                   Bolivie
12                  Honduras
13                  Paraguay
14               El Salvador
15                 Nicaragua
16                Costa Rica
17                    Panama
18                   Uruguay
19                  Jamaïque
20         Trinité-et-Tobago
21                  Suriname
22                   Bahamas
23                     Aruba
Name: Country , dtype: object