In [1]:
# imports: pandas
import pandas as pd

from bokeh.io import output_notebook, show
from bokeh.plotting import figure

from bokeh.transform import factor_cmap

from bokeh.models import ColumnDataSource, Legend, CategoricalColorMapper


from bokeh.layouts import gridplot

from bokeh.models import HoverTool

from bokeh.palettes import inferno
from bokeh.palettes import Category20
from bokeh.palettes import Turbo256
import itertools

from datetime import date

from bokeh.io import show
from bokeh.models import CustomJS, DateRangeSlider

In [2]:
# import data as dataframe
df = pd.read_csv(f'Gapminder.csv', index_col = False, header = 0, skipinitialspace = True)

In [3]:
# set list of region names
regions = list(df.Region.unique())

# set dictionary for all regions and corresponding countries
dict_regions = {}

# fill in dictionary with region as key and relevant countries as values (array)
for region in regions:
    dict_regions[region] = df[df['Region']== region]['Country'].unique()
    df[df['Region']== region]['Country'].unique()
    

In [4]:
# set index to country name for easily locating countries
df_regions = df.set_index('Country')
df.reset_index()

Unnamed: 0,index,Country,Year,lifeExp,pop,Fertility,Region,ID
0,0,Afghanistan,1964,33.639,10474903.0,7.671,South Asia,AF
1,1,Afghanistan,1965,34.152,10697983.0,7.671,South Asia,AF
2,2,Afghanistan,1966,34.662,10927724.0,7.671,South Asia,AF
3,3,Afghanistan,1967,35.170,11163656.0,7.671,South Asia,AF
4,4,Afghanistan,1968,35.674,11411022.0,7.671,South Asia,AF
...,...,...,...,...,...,...,...,...
12195,12195,Åland,2009,,,,Europe & Central Asia,AX
12196,12196,Åland,2010,,26923.0,,Europe & Central Asia,AX
12197,12197,Åland,2011,,,,Europe & Central Asia,AX
12198,12198,Åland,2012,,,,Europe & Central Asia,AX


In [5]:
# set hovertool for diferent plots
hover = HoverTool(
        tooltips=[
            # add life expectancy
            ("life expectancy", "$y"),
            
            # add info year
            ("Year", "$x"),
            
            # add fertility info
            ("Fertility", "@Fertility")
        ]
    )

# using factor cmap to map the life_expectancy according to the years category; used to map the colors according to the countries
index_cmap = factor_cmap('Country', palette = Turbo256, factors = sorted(df.Country.unique()))

# create list of names in South Asia
countries_SA = dict_regions['South Asia']

# get info for specific region 
info_SA = df_regions.loc[countries_SA]
#display(info_SA)

# create ColumnDataSource for region
source_SA = ColumnDataSource(info_SA)

# pass the column names to glyph method
p_SA = figure(plot_width = 500, plot_height = 500, title = "South Asia", tools = [hover])

# set legend outside of plot
p_SA.add_layout(Legend(), 'right')

# plot lines
# p_SA.line('Year', 'lifeExp', legend_group = "Country", source = source_SA)
p_SA.circle('Year', 'lifeExp', source = source_SA, fill_color = index_cmap, legend_group = "Country")

# show(p_SA)

# 

# create list of names in Europe & Central Asia
countries_ECA = dict_regions['Europe & Central Asia']

# get info for specific region 
info_ECA = df_regions.loc[countries_ECA]
# display(info_ECA)

# create ColumnDataSource for region
source_ECA = ColumnDataSource(info_ECA)

# pass the column names to glyph method
p_ECA = figure(plot_width = 500, plot_height = 500, title = "Europe & Central Asia", tools = [hover])

# set legend outside of plot
p_ECA.add_layout(Legend(), 'right')

# plot lines
#p_ECA.line('Year', 'lifeExp', legend_group = "Country", source = source_ECA)
p_ECA.circle('Year', 'lifeExp', source = source_ECA, fill_color = index_cmap, legend_group = "Country")
#show(p_ECA)

# 

# create list of names in Middle East & North Africa
countries_MENA = dict_regions['Middle East & North Africa']

# get info for specific region
info_MENA = df_regions.loc[countries_MENA]
#display(info_MENA)

# create ColumnDataSource for region
source_MENA = ColumnDataSource(info_MENA)

# pass the column names to glyph method
p_MENA = figure(plot_width = 500, plot_height = 500, title = "Middle East & North Africa", tools = [hover])

# set legend outside of plot
p_MENA.add_layout(Legend(), 'right')

# plot lines
#p_MENA.line('Year', 'lifeExp', legend_group = "Country", source = source_MENA)
p_MENA.circle('Year', 'lifeExp', source = source_MENA, fill_color = index_cmap, legend_group = "Country")
# show(p_MENA)

# 

# create list of names in East Asia & Pacific
countries_EAP = dict_regions['East Asia & Pacific']

# get info for specific region 
info_EAP = df_regions.loc[countries_EAP]
# display(info_EAP)

# create ColumnDataSource for region
source_EAP = ColumnDataSource(info_EAP)

# pass the column names to glyph method
p_EAP = figure(plot_width = 500, plot_height = 500, title = "East Asia & Pacific", tools = [hover])

# set legend outside of plot
p_EAP.add_layout(Legend(), 'right')

# plot lines
#p_EAP.line('Year', 'lifeExp', legend_group = "Country", source = source_EAP)
p_EAP.circle('Year', 'lifeExp', source = source_EAP, fill_color = index_cmap, legend_group = "Country")
#show(p_EAP)

# 

# create list of names in Sub-Saharan Africa
countries_SSA = dict_regions['Sub-Saharan Africa']

# get info for specific region 
info_SSA = df_regions.loc[countries_SSA]
# display(info_SSA)

# create ColumnDataSource for region
source_SSA = ColumnDataSource(info_SSA)

# pass the column names to glyph method
p_SSA = figure(plot_width = 500, plot_height = 500, title = "Sub-Saharan Africa", tools = [hover])

# set legend outside of plot
p_SSA.add_layout(Legend(), 'right')

# plot lines
#p_SSA.line('Year', 'lifeExp', legend_group = "Country", source = source_SSA)
p_SSA.circle('Year', 'lifeExp', source = source_SSA, fill_color = index_cmap, legend_group = "Country")
# show(p_SSA)

# 

# create list of names in America
countries_America = dict_regions['America']

# get info for specific region 
info_America = df_regions.loc[countries_America]
# display(info_America)

# create ColumnDataSource for region
source_America = ColumnDataSource(info_America)

# pass the column names to glyph method
p_America = figure(plot_width = 500, plot_height = 500, title = "America", tools = [hover])

# set legend outside of plot
p_America.add_layout(Legend(), 'right')

# plot lines
#p_America.line('Year', 'lifeExp', legend_group = "Country", source = source_America)
p_America.circle('Year', 'lifeExp', source = source_America, fill_color = index_cmap, legend_group = "Country")
#show(p_America)


In [6]:
# make a grid
#grid = gridplot([p_SA, p_ECA, p_MENA, p_EAP, p_SSA, p_America], ncols = 2, width = 250, height = 250)
grid = gridplot([p_SA, p_ECA, p_MENA, p_EAP, p_SSA, p_America], ncols = 3)

show(grid)


In [7]:
# using factor cmap to map the life_expectancy according to the years category; used to map the colors according to the countries
index_cmap = factor_cmap('Country', palette = 'Turbo256', factors = sorted(df.Country.unique()))

# set (scatter) figure
p = figure(plot_width = 1400, plot_height = 1400, title = "Countries: Years x life expectancy")
p.scatter('Year','lifeExp', source = df, fill_alpha = 0.6, fill_color = index_cmap, size = 1, legend = 'Country')

# set plotting characteristics
p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Life Expectancies'
p.legend.location = "top_left"

# show
show(p)



In [8]:
# HOW TO INCORPORATE SLIDER TO SELECT YEARS?
date_range_slider = DateRangeSlider(value=(date(2016, 1, 1), date(2016, 12, 31)),
                                    start=date(2015, 1, 1), end=date(2017, 12, 31))
date_range_slider.js_on_change("value", CustomJS(code="""
    console.log('date_range_slider: value=' + this.value, this.toString())
"""))

show(date_range_slider)