**3**. Creating effective visualizations using [best practices](https://rafalab.github.io/dsbook/data-visualization-principles.html)

Create 3 informative visualizations about malaria using Python in a Jupyter notebook, starting with the data sets at https://github.com/rfordatascience/tidytuesday/tree/master/data/2018/2018-11-13. Where appropriate, make the visualizations [interactive](https://jupyterbook.org/interactive/interactive.html).

**Data Visualizations**:

Thank you for reviewing my homework. Please see also my [github page](https://lujun995.github.io/) and [github site](https://github.com/Lujun995/BIOS823) for a detailed explanation. 

In [None]:
#This notebook require bokeh
#!pip install bokeh

In [2]:
#read in the data and import required modules
import pandas as pd
from bokeh.plotting import figure, show, output_notebook

country_continent_code_url = ("https://pkgstore.datahub.io/JohnSnowLabs/country-and-continent-codes-list/"
                              "country-and-continent-codes-list-csv_csv/data/b7876b7f496677669644f3d1069d3121/"
                              "country-and-continent-codes-list-csv_csv.csv")
country_continent_code = pd.read_csv(country_continent_code_url)
country_continent_code = country_continent_code[["Continent_Name","Three_Letter_Country_Code"]]
country_continent_code.rename(columns={"Three_Letter_Country_Code": "Code"}, inplace=True)

tol21rainbow = ("#771155", "#AA4488", "#CC99BB", "#114477", "#4477AA", "#77AADD", 
                "#117777", "#44AAAA", "#77CCCC", "#117744", "#44AA77", "#88CCAA", 
                "#777711", "#AAAA44", "#DDDD77", "#774411", "#AA7744", "#DDAA77", 
                "#771122", "#AA4455", "#DD7788")

data_death = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-11-13/malaria_deaths.csv")
data_age = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-11-13/malaria_deaths_age.csv")
data_incidence = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-11-13/malaria_inc.csv")

In [3]:
#to show the first several rows
data_death.head()
country_continent_code.head()
#drop those without a country code (or assigned to "Global"?)
data_death = data_death[~(data_death["Code"].isna())]
#add continent information
data_death = data_death.merge(right = country_continent_code, how ='left',
                              left_on = "Code", right_on = "Code")
data_death.head()
#the last column name is tooooooooooo long, I change the name
data_death.rename(columns={"Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate) (per 100,000 people)": "Death_rate"},
                 inplace=True)
data_death.head()

Unnamed: 0,Entity,Code,Year,Death_rate,Continent_Name
0,Afghanistan,AFG,1990,6.80293,Asia
1,Afghanistan,AFG,1991,6.973494,Asia
2,Afghanistan,AFG,1992,6.989882,Asia
3,Afghanistan,AFG,1993,7.088983,Asia
4,Afghanistan,AFG,1994,7.392472,Asia


In [4]:
#data_death visualization
output_notebook()
p_death = figure(title="Malaria deaths in Asia by country for all ages", width=800, height=600)

#we need to render each country each time
entities = data_death.Entity.unique()
continent = data_death.Continent_Name.unique()
for i in range(0, len(entities)):
    data_death_temp = data_death[(data_death["Entity"] == entities[i]) & (data_death["Continent_Name"] == "Asia")]
    if len(data_death_temp) == 0: continue
    p_death.circle(data_death_temp["Year"], data_death_temp["Death_rate"], 
                   color = tol21rainbow[(i % 21)], legend_label = entities[i])
    p_death.line(data_death_temp["Year"], data_death_temp["Death_rate"], 
                 color = tol21rainbow[(i % 21)], legend_label = entities[i])

p_death.legend.title = 'Regions'
show(p_death)


In [5]:
#to show the first several rows
data_age.head()
#drop those without a country code (or assigned to "Global"?)
data_age = data_age[~(data_age["code"].isna())]
#add continent information
data_age = data_age.merge(right = country_continent_code, how ='left',
                          left_on = "code", right_on = "Code")
data_age.head()

Unnamed: 0.1,Unnamed: 0,entity,code,year,age_group,deaths,Continent_Name,Code
0,1,Afghanistan,AFG,1990,Under 5,184.606435,Asia,AFG
1,2,Afghanistan,AFG,1991,Under 5,191.658193,Asia,AFG
2,3,Afghanistan,AFG,1992,Under 5,197.140197,Asia,AFG
3,4,Afghanistan,AFG,1993,Under 5,207.357753,Asia,AFG
4,5,Afghanistan,AFG,1994,Under 5,226.209363,Asia,AFG


In [6]:
#data_age visualization
output_notebook()

p_age = figure(title="Malaria deaths under 5 in Asia by country", y_axis_type="log", width=800, height=600)

#we need to render each country each time
entities = data_age.entity.unique()
continent = data_age.Continent_Name.unique()
age_groups = data_age.age_group.unique()
for i in range(0, len(entities)):
    data_age_temp = data_age[(data_age["entity"] == entities[i]) & (data_age["Continent_Name"] == "Asia") & 
                            (data_age["age_group"] == "Under 5")]
    if len(data_age_temp) == 0: continue
    p_age.circle(data_age_temp["year"], data_age_temp["deaths"], 
                 color = tol21rainbow[(i % 21)], legend_label = entities[i])
    p_age.line(data_age_temp["year"], data_age_temp["deaths"], 
               color = tol21rainbow[(i % 21)], legend_label = entities[i])

p_age.legend.title = 'Regions'
show(p_age)


In [7]:
#to show the first several rows
data_incidence.head()
#replace a too loooooong column name
data_incidence.rename(columns={"Incidence of malaria (per 1,000 population at risk) (per 1,000 population at risk)": "Incidence"},
                      inplace=True)
#drop those without a country code (or assigned to "Global"?)
data_incidence = data_incidence[~(data_incidence["Code"].isna())]
#add continent information
data_incidence = data_incidence.merge(right = country_continent_code, how ='left',
                                      left_on = "Code", right_on = "Code")
data_incidence.head()

Unnamed: 0,Entity,Code,Year,Incidence,Continent_Name
0,Afghanistan,AFG,2000,107.1,Asia
1,Afghanistan,AFG,2005,46.5,Asia
2,Afghanistan,AFG,2010,23.9,Asia
3,Afghanistan,AFG,2015,23.6,Asia
4,Algeria,DZA,2000,0.037746,Africa


In [8]:
#data_incidence visualization
output_notebook()

p_incidence = figure(title="Malaria incidence in Asia by country", y_axis_type="log",
                     width=800, height=600)

#we need to render each country each time
entities = data_incidence.Entity.unique()
continent = data_incidence.Continent_Name.unique()
for i in range(0, len(entities)):
    data_incidence_temp = data_incidence[(data_incidence["Entity"] == entities[i]) &
                                         (data_incidence["Continent_Name"] == "Asia")]
    if len(data_incidence_temp) == 0: continue
    p_incidence.circle(data_incidence_temp["Year"], data_incidence_temp["Incidence"], 
                       color = tol21rainbow[(i % 21)], legend_label = entities[i])
    p_incidence.line(data_incidence_temp["Year"], data_incidence_temp["Incidence"], 
                     color = tol21rainbow[(i % 21)], legend_label = entities[i])

p_incidence.legend.title = 'Regions'
show(p_incidence)