In [41]:
# import the pandas library
import pandas as pd

In [42]:
# import the dataset
data = pd.read_csv(r'C:\Users\arshi\OneDrive\Desktop\Data Analysis using Pandas and Matplotlib\Data-Analysis-using-Pandas-and-Matplotlib\CSV_files\gapminder.csv', thousands=',', index_col='Year')

In [43]:
# first five rows of the dataset
data.head()

Unnamed: 0_level_0,Country,life,population,income,region
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1800,Afghanistan,28.211,3280000.0,603.0,South Asia
1801,Afghanistan,28.200753,,603.0,South Asia
1802,Afghanistan,28.190507,,603.0,South Asia
1803,Afghanistan,28.18026,,603.0,South Asia
1804,Afghanistan,28.170013,,603.0,South Asia


In [44]:
# import 'output_notebook' function from bokeh.io
# 'output_notebook' enables the plots to render in the Jupyter notebook itself

from bokeh.io import output_notebook
output_notebook()

In [45]:
# import 'show' and 'figure' method
# show() renders the plot
# figure() creates a new plot
from bokeh.io import show
from bokeh.plotting import figure

In [46]:
# population value in first five rows corresponding to the year '2010'
data.loc[2010].population.head()

Year
2010    27962207.0
2010     2901883.0
2010    36036159.0
2010       84419.0
2010    21219954.0
Name: population, dtype: float64

In [47]:
# RELATION BETWEEN INCOME AND LIFE EXPECTANCY

# import NumeralTickerFormatter
from bokeh.models import NumeralTickFormatter

# create a new figure 'p'
p = figure(height=200, x_axis_type='log', 
           x_range=(100, 100000), y_range=(0,100))

# add circles for each co-ordinate
p.circle(x=data.loc[2010]["income"], y=data.loc[2010]["life"], color="orange")

# format the way ticks on the x-axis are shown
p.xaxis[0].formatter = NumeralTickFormatter(format='$0.0')

# render the plot
show(p)

In [48]:
# import ColumnDataSource
# The ColumnDataSource is a fundamental data structure of Bokeh. 
# Most plots, data tables, etc. will be driven by a ColumnDataSource.
# Maps names of columns to sequences or arrays.
# A ColumnDataSource can be passed a Python Dictionary or a Pandas DataFrame
from bokeh.models import ColumnDataSource

# pass a dictionary ColumnDataSource
# ColumnDataSource maps column names with lists of data
# here, 'x', 'y', 'country' are column names (keys of the dictionary) present in 'source'
# the Pandas Series passed to the column names is the list of values to populate that particular column with
source = ColumnDataSource(dict(
    x = data.loc[2010]["income"],
    y = data.loc[2010]["life"],
    country = data.loc[2010]["Country"]
    ))

In [49]:
# 'source' is an instance of ColumnDataSource
type(source)

bokeh.models.sources.ColumnDataSource

In [50]:
# column names of the ColumnDataSource object 'source'
source.column_names

['x', 'y', 'country']

In [51]:
# create a dictionary which can be passed as **kwargs while creating the figure
PLOT_OPTS = dict(
        height= 300, width= 800, x_axis_type="log",
        x_range=(100, 100000), y_range=(0, 100)
)

In [52]:
# import 'HoverTool'
from bokeh.models import HoverTool

# create a 'HoverTool' instance.
# the tooltip to be displayed are the vaues in the column 'country'
# the syntax to specify the same is: tooltips='@colume_name'
hover = HoverTool(tooltips='@country', show_arrow=False)

# create a figure instance
# pass the tools argument with the 'HoverTool' instance we had created
p = figure(tools=[hover], **PLOT_OPTS)

# render circles for each country's co-ordinate
# specify the columns for x and y axes; specify the data source
p.circle(x='x', y='y', source=source)

# render the figure
show(p)

In [53]:
# increase the size of the circles and decrease the opacity
p.circle(x= 'x', y= 'y', source=source, size=20, alpha=0.5)
show(p)

In [54]:
# add columns 'population', 'region' and their data values to 'source'
source = ColumnDataSource(dict(
    x = data.loc[2010]["income"],
    y = data.loc[2010]["life"],
    country = data.loc[2010]["Country"],
    population=data.loc[2010]["population"],
    region=data.loc[2010]["region"],
    ))

# display the name sof columns present in 'source'
source.column_names

['x', 'y', 'country', 'population', 'region']

In [55]:
# we require the circles to be coloured according to the country's continent
# list of unique values in 'region'
# gives the continents present in the column 'region'
list(data["region"].unique())

['South Asia',
 'Europe & Central Asia',
 'Middle East & North Africa',
 'Sub-Saharan Africa',
 'America',
 'East Asia & Pacific']

In [56]:
# import a color palette to color the circles
# import Spectral6 palette for this example
from  bokeh.palettes import Spectral6
Spectral6

['#3288bd', '#99d594', '#e6f598', '#fee08b', '#fc8d59', '#d53e4f']

In [57]:
# import LinearInterpolator, CategoricalInterpolator and push_notebook
from bokeh.models import LinearInterpolator, CategoricalColorMapper
from bokeh.io import push_notebook    

# Create a LinearInterpolator
# Compute a linear interpolation between the control points provided through the x, y, and data parameters.
# In easy English, it computes the size of each country's circle corresponding to the value for that country on the y axis
# Here, the smallest circle gets a size of 5 while the largest one gets size 80
# Rest, of the circles get sizes between 5 and 80
size_mapper = LinearInterpolator(
    x = [data["population"].min(), data["population"].max()],
    y = [5,80]
)

# Create a CategoricalColorMapper
# Assigns color depending upon the category (in this case, the 'region' to which the country belongs)
# Colors are assigned from the specified palette (here, the palette is Spectral6)
color_mapper = CategoricalColorMapper(
    factors = list(data["region"].unique()),
    palette = Spectral6,
)


# Create a figure
# Give a title to the figure, location of the title, location of the Bokeh toolbar
# Add the Hovertool to the tools
# Pass **kwargs 'PLOT_OPTS'
p = figure(title=str(2010), 
           title_location="left", 
           toolbar_location="above", 
           tools=[HoverTool(tooltips='@country', show_arrow=False)], 
           **PLOT_OPTS
        )

# Create a circle for each country
# The size and color of the circles is set using size_mapper and color_mapper respectively
# The size depends upon the 'population' value and the color depends upon the 'region'
p.circle(x= 'x', 
         y= 'y', 
         source=source,
         size={'field': 'population', 'transform': size_mapper},
         color={'field': 'region', 'transform': color_mapper},
         alpha=0.5,
         legend="region"
        )

# format the legend
p.legend.border_line_color = None
p.legend.location = (0,-10)
p.right.append(p.legend[0])

# The function which updates the plot as the year is changed using the slider
def update(year):
    new_data = dict(
        x = data.loc[year]["income"],
        y = data.loc[year]["life"],
        country = data.loc[year]["Country"],
        region = data.loc[year]["region"],
        population = data.loc[year]["population"]
    )
    source.data = new_data
    p.title.text = str(year)
    push_notebook() 

# render the gapminder
show(p, notebook_handle = True)

In [58]:
from bokeh.io import show
from bokeh.layouts import widgetbox, row
from bokeh.models.widgets import Slider

output_notebook()

slider = Slider(start=1950, end=2014, value=1, step=1, title="Year")

show(row(p,widgetbox(slider)))

In [59]:
from bokeh.models import (
    LinearInterpolator,
    CategoricalColorMapper,
    ColumnDataSource,
    HoverTool,
)
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
PLOT_OPTS = dict(
        height=400, x_axis_type='log',
        x_range=(100, 100000), y_range=(0, 100),
)
source = ColumnDataSource(dict(
    x=data.loc[2010].income,
    y=data.loc[2010].life,
    country=data.loc[2010].Country,
    population=data.loc[2010].population,
    region=data.loc[2010].region
))

size_mapper = LinearInterpolator(
    x=[data.population.min(), data.population.max()],
    y=[5, 50]
)
color_mapper = CategoricalColorMapper(
    factors=list(data.region.unique()),
    palette=Spectral6,
)

p = figure(
    title=str(2010), toolbar_location='above',
    tools=[HoverTool(tooltips='@country', show_arrow=False)],
    **PLOT_OPTS)
p.circle(
    x='x', y='y',
    size={'field': 'population', 'transform': size_mapper},
    color={'field': 'region', 'transform': color_mapper},
    alpha=0.6,
    source=source,
    legend='region'
)
p.legend.border_line_color = None
p.legend.location = (0, -50)
p.right.append(p.legend[0])

from bokeh.models import Slider

def update(attr, old, new):
    # new = year
    year = new
    new_data = dict(
        x=data.loc[year].income,
        y=data.loc[year].life,
        country=data.loc[year].Country,
        region=data.loc[year].region,
        population=data.loc[year].population,
    )
    source.data = new_data
    p.title.text = str(year)

slider = Slider(start=1800, end=2010, step=1, value=1800, title='Year')
slider.on_change('value', update)


from bokeh.layouts import column
layout = column(p, slider)
show(layout)

In [60]:
#from ipywidgets import interact
#interact(update, year=(1950, 2014, 1))