## Interactive visualisation of world development data using Bokeh
This workbook uses data from [the Gapminder Foundation](https://www.gapminder.org/data/) to create an interactive, animated scatter/bubble chart showing how a variety of developmental indicators changed across the world between 1970 and 2010. The late Hans Rosling and his clleagues at Gapminder used this kind of animated bubble chart as a very effective way to communicate the developmental changes that have happened in the world, especially for non-technical (and often very influencial) audiences.

This workbook requires the bokeh, pandas, numpy and math packages to be installed.

In [337]:
from bokeh.layouts import column, row, widgetbox
from bokeh.models import ColumnDataSource, Slider, Select, CheckboxGroup, Button, HoverTool, CategoricalColorMapper
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
from bokeh.palettes import Spectral6, Accent6
import pandas as pd
import numpy as np
from math import pi

output_notebook()

In [338]:
def modify_doc(doc):    
    data=pd.read_csv('gapminder_tidy.csv', index_col='Year')
    
    # Set default marker size when markers not scaled by population
    circle_size=pd.Series(np.zeros(len(data.loc[1970]))+7)

    # Make the ColumnDataSource: source
    source = ColumnDataSource(data={
        'x'       : data.loc[1970, 'fertility'],
        'y'       : data.loc[1970, 'life'],
        'country' : data.loc[1970, 'Country'],
        'region'  : data.loc[1970, 'region'],
        'c_size'  : circle_size
    })

    # Save the minimum and maximum values of the fertility column: xmin, xmax
    xmin, xmax = min(data.fertility), max(data.fertility)

    # Save the minimum and maximum values of the life expectancy column: ymin, ymax
    ymin, ymax = min(data.life), max(data.life)

    # Create the figure: plot
    plot = figure(title='Gapminder Data for 1970', plot_height=500, plot_width=700,
                  x_range=(xmin, xmax), y_range=(ymin, ymax))

    # Set the x-axis label
    plot.xaxis.axis_label ='Fertility (children per woman)'

    # Set the y-axis label
    plot.yaxis.axis_label = 'Life Expectancy (years)'

    # Make a list of the unique values from the region column: regions_list
    regions_list = data.region.unique().tolist()
    
    # Create dictionary to map from column names to axis titles
    axis_lookup = ({
        'fertility': 'Fertility (children per woman)',
        'life': 'Life Expectancy (years)',
        'child_mortality': 'Child Mortality (per 1,000 births)',
        'gdp': 'GDP per capita ($)'
    })
    
    # Create dictionary to map from dropdown titles to data names
    data_lookup = ({
        'Fertility (children per woman)': 'fertility',
        'Life Expectancy (years)': 'life',
        'Child Mortality (per 1,000 births)': 'child_mortality',
        'GDP per capita ($)': 'gdp'
    })

    # Make a color mapper: color_mapper
    color_mapper = CategoricalColorMapper(factors=regions_list, palette=['lawngreen','dodgerblue','orange','darkkhaki', 'red', 'mediumseagreen'])

    # Add the color mapper to the circle glyph
    plot.circle(x='x', y='y', fill_alpha=0.8, size='c_size', source=source,
                color=dict(field='region', transform=color_mapper), legend='region')

    # Set the legend.location attribute of the plot to 'top_right'
    plot.legend.location = 'top_right'
    plot.legend.background_fill_alpha = 0.6
    plot.legend.label_text_font_size="9px"
    plot.legend.spacing=0
    plot.legend.margin=0
    plot.legend.padding=5
    plot.legend.label_standoff=0

    

    # Define the callback: update_plot
    def update_plot(attr, old, new):
        # Read the current value off the slider and 2 dropdowns: yr, x, y
        yr = slider.value
        x = data_lookup[x_select.value]
        y = data_lookup[y_select.value]
        
        active_regions = []
        for a in list(region_checkboxes.active):
            active_regions.append(regions_list[a])

        # Label axes of plot
        plot.xaxis.axis_label = axis_lookup[x]
        plot.yaxis.axis_label = axis_lookup[y]
        
        # Extract data for the currently selected years and regions
        live_data = data.loc[yr][data.loc[yr, 'region'].isin(active_regions)]
        
        if scale_select.value == 'Yes':
            # Bokeh circle sizes refer to diameter. Want area as more representative.
            circle_size = (live_data['population'].div(pi) ** 0.5) / 100
        else:
            circle_size = pd.Series(np.zeros(len(live_data))+7)

        # Set new_data
        new_data = {
            'x'       : live_data[x],
            'y'       : live_data[y],
            'country' : live_data['Country'],
            'region'  : live_data['region'],
            'c_size'  : circle_size
        }
        # Assign new_data to source.data
        source.data = new_data

        # Set the range of all axes
        plot.x_range.start = min(data[x])
        plot.x_range.end = max(data[x])
        plot.y_range.start = min(data[y])
        plot.y_range.end = max(data[y])
        
        if 0 not in list(legend_checkbox.active):
            plot.legend.visible=False
        else:
            plot.legend.visible=True

        # Add title to plot
        plot.title.text = 'Gapminder data for %d' % yr

    
    # Create a dropdown slider widget: slider
    slider = Slider(start=1970, end=2010, step=1, value=1970, title='Year')

    # Attach the callback to the 'value' property of slider
    slider.on_change('value', update_plot)
    
    
    # Define functions for when play button is pressed
    def animate_update():
        slider_year = slider.value + 1
        if slider_year > 2010:
            slider_year = 1970
        slider.value = slider_year

    def animate():
        if button.label == '► Autoplay':
            button.label = '❚❚ Pause'
            doc.add_periodic_callback(animate_update, 100)
        else:
            button.label = '► Autoplay'
            doc.remove_periodic_callback(animate_update)
    
    button = Button(label='► Autoplay', width=60)
    button.on_click(animate)
    
    # Create a checkbox for to hide/show the legend
    legend_checkbox = CheckboxGroup(
        labels=['Show legend'], active=[0])
    
    # Attach the update_plot callback to the 'value' property of y_select
    legend_checkbox.on_change('active', update_plot)

    # Create a dropdown Select widget for the x data: x_select
    x_select = Select(
        options=([
            'Fertility (children per woman)',
            'Life Expectancy (years)',
            'Child Mortality (per 1,000 births)',
            'GDP per capita ($)'
        ]),
        value='Fertility (children per woman)',
        title='x-axis data'
    )
    
    # Create a group of checkboxes for selecting regions to show
    region_checkboxes = CheckboxGroup(
        labels=regions_list, active=[0, 1, 2, 3, 4, 5])
    
    # Attach the update_plot callback to the 'value' property of y_select
    region_checkboxes.on_change('active', update_plot)

    # Attach the update_plot callback to the 'value' property of x_select
    x_select.on_change('value', update_plot)

    # Create a dropdown Select widget for the y data: y_select
    y_select = Select(
        options=([
            'Fertility (children per woman)',
            'Life Expectancy (years)',
            'Child Mortality (per 1,000 births)',
            'GDP per capita ($)'
        ]),
        value='Life Expectancy (years)',
        title='y-axis data'
    )

    # Attach the update_plot callback to the 'value' property of y_select
    y_select.on_change('value', update_plot)
        
    scale_select = Select(
        options=['No', 'Yes'],
        value='No',
        title="Scale marker areas by country's population?"
    )
    
    # Attach the update_plot callback to the 'value' property of scale_select
    scale_select.on_change('value', update_plot)
    
    # Create a HoverTool: hover
    hover = HoverTool(tooltips=[('Country', '@country')])

    # Add the HoverTool to the plot
    plot.add_tools(hover)

    # Create layout and add to current document
    layout = row(widgetbox(slider, button, legend_checkbox, x_select, y_select, scale_select, region_checkboxes), plot)
    doc.add_root(layout)

In [339]:
show(modify_doc)

The above tool can be very revealing of a long term trends as well as shorter term events. The long-term trends are generally very positive while short term shock events have left their marks.  For example, when scrolling through the years of the fertility vs life expectancy plot, the effects of the Cambodia and Timor-Leste genocides in the 1970s are very clearly seen.