In [1]:
from bokeh.plotting import figure, gridplot
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import inferno
from bokeh.io import output_notebook, show
import pandas as pd
import numpy as np

In [2]:
xd = pd.ExcelFile("http://docs.google.com/spreadsheet/pub?key=phAwcNAVuyj0-LE4StzCsEw&output=xlsx")
df = xd.parse(xd.sheet_names[0], header=None)

c1 = inferno(6)[1]
c2 = inferno(6)[2]
c3 = inferno(6)[3] 
c4 = inferno(6)[4] 

In [3]:
current_urban_population = []
percentage_increase = []
original_indices = []
country_names = []
years = np.asarray(df.iloc[0])[1:]
colors = []
labels = []

for n in range(1, df.shape[0]):
    current = df.iloc[n]
    if((np.isfinite(np.sum(current[1:])))):
        current_urban_population.append(current[df.shape[1] - 1])
        increase = ((np.max(current[1:]) - np.min(current[1:])) / np.min(current[1:])) * 100
        percentage_increase.append(increase)
        country_names.append(current[0])
        original_indices.append(n)
        if(np.min(current[1:]) < 25):
            colors.append(c1)
            labels.append('Urban Population started below 25%')
        elif(np.min(current[1:]) >= 25 and np.min(current[1:]) < 50):
            colors.append(c2)
            labels.append('Urban Population started between 25% and 50%')
        elif(np.min(current[1:]) >= 50 and np.min(current[1:]) < 75):
            colors.append(c3)
            labels.append('Urban Population started between 50% and 75%')
        else:
            colors.append(c4)
            labels.append('Urban Population started above 75%')
            
output_notebook()

source = ColumnDataSource(data=dict(
    x=np.nan_to_num(current_urban_population),
    y=np.nan_to_num(percentage_increase),
    color=colors,
    label=labels,
    country=country_names
))

hover = HoverTool(tooltips=[
    ("Country", "@country"),
    ("% Increase", "@y"),
    ("Urban Population %", "@x"),
])

p1 = figure(plot_width=900, plot_height=700, tools=[hover], x_axis_label='Current Urban Population (As a percentage of total population)', y_axis_label='Percentage increase in urban population between 1960 and 2011', title='Percentage Increase in Urban Population vs Latest Urban Population Percentage', y_range=(-10, 1000), x_range=(-1, 102))
p1.circle(x='x', y='y', size=10, color='color', legend='label', source=source)
#Hover over points in p1 for country information

In [4]:
temp_percentage_increase = percentage_increase[:]
output_notebook()

p2 = figure(plot_width=900, plot_height=400, x_axis_label='Year', y_axis_label='Urban Population (As a percentage of total population)', title='Urban Population Increase Trajectory (Highest Percentage Increases)')
for m in range(0, 5):
    max_index = np.argmax(temp_percentage_increase)
    original_index = original_indices[max_index]
    current_frame = np.nan_to_num(np.asarray(df.iloc[original_index][1:]))
    p2.line(years, current_frame, line_width=2, color=inferno(7)[m + 1], legend=df.iloc[original_index][0])
    temp_percentage_increase[max_index] = 0.0

temp_percentage_increase = percentage_increase[:]
p2.legend.location = "top_left"
p2.legend.click_policy="hide"
#Use legend in p2 to hide/show a trajectory

p = gridplot([[p1], [p2]], toolbar_location=None)

show(p)