# COMPSCI 690V - hw1 - Interactive Bokeh Visualizations

### Read in three datasets from the Gapminder website.

In [1]:
import pandas as pd

gdp_df = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx', index_col=0)
traffic_df = pd.read_excel('RTI age adjusted indicator LIVE.xlsx', index_col=0)
pop_growth_df = pd.read_excel('population growth.xlsx', index_col=0)

# Find the intersection of countries among the three datasets.
countries = list(set(gdp_df.index).intersection(traffic_df.index, pop_growth_df.index))

### Import standard functions.

In [2]:
from bokeh.io import output_notebook, push_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, BoxSelectTool, BoxZoomTool, LassoSelectTool
from bokeh.models import NumeralTickFormatter
from ipywidgets import interact

output_notebook()

### Plot the data using bokeh library.

In [3]:
source = ColumnDataSource(
        data = dict(
            x = gdp_df[2003].loc[countries],
            y = traffic_df[2003].loc[countries],
            p = pop_growth_df["2003"].loc[countries],
            c = countries,
            size = (pop_growth_df["2003"].loc[countries] - min(pop_growth_df["2003"].loc[countries]))*10
        )
    )

hover = HoverTool(
        tooltips=[
            ("Country", "@c"),
            ("GDP", "@x{$ 0,0}"),
            ("Traffic mortality", "@y{0.0}"),
            ("Population growth", "@p{0.00}")
        ]
    )

TOOLS = "pan, box_select,box_zoom,lasso_select,reset, crosshair"
fig = figure(tools=TOOLS, title="Scatter Plot")
fig.add_tools(hover)
fig.xaxis.axis_label = 'GDP per capita'
fig.yaxis.axis_label = 'Traffic mortality per 100,000, age adjusted'
fig.circle('x', 'y', source=source, size='size', fill_alpha=0.5, hover_fill_color="firebrick")
fig.xaxis[0].formatter = NumeralTickFormatter(format="$0,0")
fig.yaxis[0].formatter = NumeralTickFormatter(format="0")

def update(year):
    year_str = str(year)
    new_data = dict(
        x = gdp_df[year].loc[countries],
        y = traffic_df[year].loc[countries],
        p = pop_growth_df[year_str].loc[countries],
        c = countries,
        size = (pop_growth_df[year_str].loc[countries] - min(pop_growth_df[year_str].loc[countries]))*10
    )
    source.data = new_data
    fig.title.text = str(year)
    push_notebook()

show(fig, notebook_handle=True)
interact(update, year=(1995, 2004, 1));