In [1]:
import bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import NumeralTickFormatter
from bokeh.models import ColumnDataSource
from bokeh.models import HoverTool
from bokeh.models import LinearInterpolator
import pandas as pd

In [2]:
bokeh.__version__

'1.1.0'

## Import Data into the data Frame

In [3]:
#import the data making the year the index
data = pd.read_csv('gapminder_tidy.csv', index_col='Year', thousands=',')

In [4]:
data.head()

Unnamed: 0_level_0,Country,fertility,life,population,child_mortality,gdp,region
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1964,Afghanistan,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1965,Afghanistan,7.671,34.152,10697983.0,334.1,1182.0,South Asia
1966,Afghanistan,7.671,34.662,10927724.0,328.7,1168.0,South Asia
1967,Afghanistan,7.671,35.17,11163656.0,323.3,1173.0,South Asia
1968,Afghanistan,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [5]:
#check the data types of each series

In [6]:
#check the data types of each series
data.dtypes

Country             object
fertility          float64
life               float64
population         float64
child_mortality    float64
gdp                float64
region              object
dtype: object

In [7]:
#load bokehJS into Jupyter notebook
output_notebook()

In [41]:
# get figure
fig = figure(
    #height = 200,
    x_axis_type = 'log',
           x_range = (100, 100000),
           y_range = (0, 100)
            )

In [42]:
#Get the 2010 data only
data_2010 = data.loc[2010]

In [43]:
data_2010.head()

Unnamed: 0_level_0,Country,fertility,life,population,child_mortality,gdp,region
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010,Afghanistan,5.659,59.612,31411743.0,105.0,1637.0,South Asia
2010,Albania,1.741,76.78,3204284.0,16.6,9374.0,Europe & Central Asia
2010,Algeria,2.817,70.615,35468208.0,27.4,12494.0,Middle East & North Africa
2010,Angola,6.218,50.689,19081912.0,182.5,7047.0,Sub-Saharan Africa
2010,Antigua and Barbuda,2.13,75.437,88710.0,9.9,20567.0,America


### Plot Life expectancy against GDP

In [44]:
gdp = data_2010.gdp
life_expectancy = data_2010.life
fig.circle(x = gdp, y = life_expectancy)
show(fig)

In [45]:
# use the formatter to format the x-axis
gdp = data_2010.gdp
life_expectancy = data_2010.life
fig.circle(x = gdp, y = life_expectancy)
fig.xaxis[0].formatter = NumeralTickFormatter(format="$0,")
show(fig)

##  ColumnDataSource

In [46]:
source = ColumnDataSource(dict(
    x = gdp,
    y = life_expectancy,
    country = data_2010.Country
))

In [47]:
#source.data
source.column_names

['x', 'y', 'country']

In [52]:
PLOT_OPTS = dict(
   # height = 200,
    x_axis_type = 'log',
    x_range = (100, 100000),
    y_range = (0, 100)
)

In [53]:
PLOT_OPTS

{'x_axis_type': 'log', 'x_range': (100, 100000), 'y_range': (0, 100)}

In [54]:
fig = figure(**PLOT_OPTS)
fig.circle(x = 'x', y = 'y', source = source)
fig.xaxis[0].formatter = NumeralTickFormatter(format="$0,")
show(fig)

###  Add a hover

In [55]:
hover = HoverTool(tooltips='@country', show_arrow = False)
fig = fig = figure(tools=[hover], **PLOT_OPTS) 
fig.circle(x = 'x', y = 'y', size = 15, alpha = 0.6, source = source)
fig.xaxis[0].formatter = NumeralTickFormatter(format="$0,")
show(fig)

In [56]:
source = ColumnDataSource(dict(
    x = gdp,
    y = life_expectancy,
    country = data_2010.Country,
    population = data_2010.population,
    region = data_2010.region
))
source.column_names

['x', 'y', 'country', 'population', 'region']

In [57]:
# YOu can also do it this way
#source = ColumnDataSource(data.loc[2010])
#source.column_names

### Plot based on the size of the populations of countries

In [58]:


size_mapper = LinearInterpolator(
    x = [data.population.min(), data.population.max()],
    y = [5, 50]
)

hover = HoverTool(tooltips='@country', show_arrow = False)
fig = fig = figure(
    title = 'Life Expectancy Against GDP in 2010',
    toolbar_location = 'above',
    tools=[hover], 
    **PLOT_OPTS
) 
fig.circle(x = 'x', 
           y = 'y', 
           size = {
               'field' : 'population',
               'transform' : size_mapper
           }, 
           alpha = 0.6, 
           source = source
          )
fig.xaxis[0].formatter = NumeralTickFormatter(format="$0,")
show(fig)

###  Add colours to differentiate countries

In [59]:
from bokeh.palettes import Spectral6
Spectral6

['#3288bd', '#99d594', '#e6f598', '#fee08b', '#fc8d59', '#d53e4f']

In [60]:
#Get all the regions in the data; unique
list(data.region.unique())

['South Asia',
 'Europe & Central Asia',
 'Middle East & North Africa',
 'Sub-Saharan Africa',
 'America',
 'East Asia & Pacific']

In [61]:
from bokeh.models import CategoricalColorMapper

In [82]:
from bokeh.io import push_notebook
from ipywidgets import interact

def update(year):
    new_data = dict(
        x = data.loc[year].gdp,
        y = data.loc[year].life,
        country = data.loc[year].Country,
        region = data.loc[year].region,
        population = data.loc[year].population
    )
    
    source.data = new_data
    fig.title.text = str(year)
    push_notebook()
    


size_mapper = LinearInterpolator(
    x = [data.population.min(), data.population.max()],
    y = [5, 50]
)

colour_mapper = CategoricalColorMapper(
    factors = list(data.region.unique()),
    palette = Spectral6,
    
)

hover = HoverTool(tooltips='@country', show_arrow = False)

fig = fig = figure(
    title = 'Life Expectancy Against GDP in 2010',
    toolbar_location = 'above',
    tools=[hover], 
    **PLOT_OPTS
) 
fig.circle(x = 'x', 
           y = 'y', 
           size = {
               'field' : 'population',
               'transform' : size_mapper
           },
           color = {
               'field' : 'region',
               'transform' : colour_mapper
           },
           alpha = 0.6, 
           source = source,
           #legend = 'region'
          )

#set the legend outside the graph
#fig.legend.location = (0, -50)
#fig.right.append(fig.legend[0])

fig.xaxis[0].formatter = NumeralTickFormatter(format="$0,")
show(fig, notebook_handle = True)


In [83]:
# Runs with an error

interact(update, year=(1800, 2014, 1))


interactive(children=(IntSlider(value=1907, description='year', max=2014, min=1800), Output()), _dom_classes=(…

<function __main__.update(year)>