# Plotting and Interface
## View live image at : https://plot.ly/~CrispyCrafter/7.embed

In [1]:
import pandas as pd
import numpy as np

import plotly 
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import os
Cdir = os.getcwd() + "/Data/"

from ipywidgets import widgets
from IPython.display import display, clear_output, Image

import itertools

## Import Country Capitals as refference

In [2]:
Coords = pd.read_csv(Cdir + "country-capitals.csv")
Coords.index = Coords["CountryName"]
Coords = Coords.drop("CountryName", axis=1)
Coords = Coords.drop("Unnamed: 6", axis=1)
Coords

Unnamed: 0_level_0,CapitalName,CapitalLatitude,CapitalLongitude,CountryCode,ContinentName
CountryName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Somaliland,Hargeisa,9.55,44.050000,,Africa
South Georgia and South Sandwich Islands,King Edward Point,-54.283333,-36.500000,GS,Antarctica
French Southern and Antarctic Lands,Port-aux-Français,-49.35,70.216667,TF,Antarctica
Palestine,Jerusalem,31.7666666667,35.233333,PS,Asia
Aland Islands,Mariehamn,60.116667,19.900000,AX,Europe
Nauru,Yaren,-0.5477,166.920867,NR,Australia
Saint Martin,Marigot,18.0731,-63.082200,MF,North America
Tokelau,Atafu,-9.166667,-171.833333,TK,Australia
Western Sahara,El-Aaiún,27.153611,-13.203333,EH,Africa
Afghanistan,Kabul,34.5166666667,69.183333,AF,Asia


## Import Population Statistics 
### Data source : https://data.worldbank.org/data-catalog/health-nutrition-and-population-statistics 

In [3]:
Health = pd.read_csv(Cdir + "PopHealth.csv")
columns = [
    'Series_Name',
    'Series_Code',
    'Country_Name',
    'Country_Code'
]
linRange = np.linspace(1960, 2016,retstep=1,dtype=int,num=57)[0]
columns = np.array(columns)
columns= np.append(columns,linRange)
Health.columns = columns

# Quick function to summarize dataset
def summarise(df):    
    SummaryList = []
    for series in df["Series_Name"]:
        if not series in SummaryList:
            SummaryList.append(series)
    return SummaryList

SummaryList = summarise(Health) 

In [4]:
SummaryList

['Adolescent fertility rate (births per 1,000 women ages 15-19)',
 'Adults (ages 15+) and children (0-14 years) living with HIV',
 'Adults (ages 15+) and children (ages 0-14) newly infected with HIV',
 'Adults (ages 15+) living with HIV',
 'Adults (ages 15+) newly infected with HIV',
 'Age at first marriage, female',
 'Age at first marriage, male',
 'Age dependency ratio (% of working-age population)',
 'Age dependency ratio, old',
 'Age dependency ratio, young',
 'Antiretroviral therapy coverage (% of people living with HIV)',
 'Antiretroviral therapy coverage for PMTCT (% of pregnant women living with HIV)',
 'ARI treatment (% of children under 5 taken to a health provider)',
 'Birth rate, crude (per 1,000 people)',
 'Births attended by skilled health staff (% of total)',
 'Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)',
 'Cause of death, by injury (% of total)',
 'Cause of death, by non-communicable diseases (% of total)',
 'Ch

## Cleaning and Preperation prior to plotting

In [5]:
def CleanData(TopicList):
    DB_dict = {}
    for Topic in range(0,len(TopicList)):
        # Get topic based on position in list
        Topicdf = TopicList[Topic] 
        
        # Create Series index with Country names
        TopicCountries = Topicdf["Country_Name"].astype(str)
        TopicCountries = [string.replace('’','') for string in TopicCountries]
        
        #Use first Entry of Series_Name to Rename Series
        TopicDescription = Topicdf["Series_Name"].iloc[0]
        
        # Drop Meta
        Topicdf = Topicdf.drop(["Series_Name","Series_Code","Country_Code","Country_Name"],axis=1).T
        
        # Update column and index
        Topicdf.columns = TopicCountries
        Topicdf.index.name = "Year"
        Topicdf.index = Topicdf.index.astype(int)
        
        Topicdf = Topicdf[Topicdf.index > 2006]
        
        #Clean fodder
        Topicdf = Topicdf.replace('..',0).astype(float)
        Topicdf = Topicdf.replace('`','')
        
        #Find Latest
        Topicdf = Topicdf.append(get_recent(Topicdf))
        
        #Create Dict
        DB_dict.update({TopicDescription: Topicdf}) 
    return DB_dict

'''
Simple sorting script to identify latest value of given dataset within the last ten years.
This was required due to variations in reporting years per country
'''
def get_recent(frame, years=10):
    CountryVal = []
    CountryYear = []
    
    for Country in frame.columns:
        i = years-1
        while frame[Country].iloc[i] == 0:
            if i == 0:
                break
            else:
                i+= -1        
    
        CountryVal.append(frame[Country].iloc[i])
        CountryYear.append(frame[Country].index[i]) 
        
    df = pd.DataFrame()
    df["Latest_Value"] = CountryVal
    df["Latest_Year"] = CountryYear
    df.index = frame.columns
    
    return df.T

'''
Simple search algorithm to help populate input data dict to Plotly
Returns all instances which contain the search keyword
Implements basic cache to save time
To save time search strings are assumed to be ordered

index - defualt True
    If set to true String is matched in index column
    Set to desired column number if not
    eg search(Frame, String='Lorem Ipsum', index=1)
summary - defualt False
    If set to true, only summary of mathcing data-entries is returned
'''
search_cache = {}
def search(Frame, String, index=True, summary=False):
    global search_cache

    df = pd.DataFrame()
    main = pd.DataFrame()        
    if type(String) != str:
            raise TypeError("Input must be a string")
    else:
        # Query Cache 
        if String in search_cache:
            if summary == True:
                return search_cache[String]["df_sum"]
            else:
                return search_cache[String]["df"]
        #Else
        else:
            # expand string to list of all possible case derrangements
            # https://en.wikipedia.org/wiki/Derangement
            str_list = list(map(''.join, itertools.product(*zip(String.upper(), String.lower()))))
            for combo in str_list:
                # Still needs optimisation - Some conditional to break once derangement was found
                # something like -  summary = summarize(df.index()); while string not found; found string break  
                if index == True:
                    df = Frame[Frame.index.str.contains(combo) == True]
                else: 
                    df = Frame[Frame.iloc[:,index].str.contains(combo) == True]
                if not df.empty:
                    main = pd.concat([main,df])
                    
            search_cache[String] = {"df":main,"df_sum":summarise(main)}
            
            if summary == True:
                return summarise(main)
            else:    
                return main

## Prepare dictionary of desired dataset to make available within dropdownlist

### Search for desired dataset

In [6]:
search(Health,"cA",index=0, summary=True)

['Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)',
 'Cause of death, by injury (% of total)',
 'Cause of death, by non-communicable diseases (% of total)',
 'Completeness of death registration with cause-of-death information (%)',
 'GNI per capita, Atlas method (current US$)',
 'Health expenditure per capita (current US$)',
 'Health expenditure per capita, PPP',
 'Malaria cases reported',
 'Mortality caused by road traffic injury (per 100,000 people)',
 'Mortality from CVD, cancer, diabetes or CRD between exact ages 30 and 70 (%)',
 'Number of surgical procedures (per 100,000 population)',
 'People practicing open defecation (% of population)',
 'People practicing open defecation, rural (% of rural population)',
 'People practicing open defecation, urban (% of urban population)',
 'Postnatal care coverage (% mothers)',
 'Pregnant women receiving prenatal care (%)',
 'Pregnant women receiving prenatal care of at least four visits (%

In [7]:
# Small function to populate global list of desired datasets for plotting
InputList = []
def UpdateList(check):
    global InputList
    global Health
    
    if type(check) == str:
        df = Health[Health["Series_Name"] == check]
        InputList.append(df)
    elif type(check) == list:
        for Series_Name in check:
            df = Health[Health["Series_Name"] == Series_Name]
            InputList.append(df)

In [8]:
UpdateList(search(Health,"HIV",index=0, summary=True))
list(CleanData(InputList).keys())

['Children orphaned by HIV/AIDS',
 'Adults (ages 15+) living with HIV',
 'Prevalence of HIV, total (% of population ages 15-49)',
 'Comprehensive correct knowledge of HIV/AIDS, ages 15-24, female (2 prevent ways and reject 3 misconceptions)',
 "Women's share of population ages 15+ living with HIV (%)",
 'Prevalence of HIV, female (% ages 15-24)',
 'Antiretroviral therapy coverage for PMTCT (% of pregnant women living with HIV)',
 'Adults (ages 15+) and children (ages 0-14) newly infected with HIV',
 'Antiretroviral therapy coverage (% of people living with HIV)',
 'Adults (ages 15+) newly infected with HIV',
 'Incidence of HIV (% of uninfected population ages 15-49)',
 'Adults (ages 15+) and children (0-14 years) living with HIV',
 'Prevalence of HIV, male (% ages 15-24)',
 'Comprehensive correct knowledge of HIV/AIDS, ages 15-24, male (2 prevent ways and reject 3 misconceptions)']

In [9]:
InputList = []
UpdateList(['Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)',
         'Cause of death, by injury (% of total)',
         'Cause of death, by non-communicable diseases (% of total)',
         'Completeness of birth registration (%)',
         'Completeness of death registration with cause-of-death information (%)',
         'Mortality rate, adult, female (per 1,000 female adults)',
         'Mortality rate, adult, male (per 1,000 male adults)',
         'Mortality rate, infant (per 1,000 live births)',
         'Mortality rate, neonatal (per 1,000 live births)',
         'Mortality rate, under-5 (per 1,000)',
         'Number of maternal deaths']
       )
InputList
InputDict = CleanData(InputList)

## Create debugging plot (Chloropleth) without dropdown options to validate method

In [10]:
def TestPlot(data, years,scale, continent='africa'):
    cases = []
    colors = ['rgb(239,243,255)','rgb(189,215,231)','rgb(107,174,214)','rgb(33,113,181)']
    inset = [
        go.Choropleth(
            locationmode = 'country names',
            locations = data.index,
            z = data[str(years[0])]/scale,
            text = data.index,
            colorscale = [[0,'rgb(230, 238, 255)'],[1,'rgb(51, 153, 255)']],
            autocolorscale = True,
            showscale = True,
            geo = 'geo'

    )]

    layout = dict(
        geo = dict(
            resolution = 100,
            scope = continent,
            showframe = False,
            showcoastlines = True,
            showland = True,
            landcolor = "rgb(229, 229, 229)",
            countrycolor = "rgb(255, 255, 255)" ,
            coastlinecolor = "rgb(255, 255, 255)",
            projection = dict(
                type = 'Mercator'
            ),
            domain = dict(x = [0,1], y = [0,1]),
        ),
        width = 700,
        height = 450,
        margin = dict(
            l = 0,
            r = 0,
            t = 0,
            b = 0,
            autoexpand = True
        ),
        dragmode = "orbit"
    )
    
    return go.Figure(layout=layout, data=inset)

years = ['Latest_Value']
scale = 100

fig = TestPlot(next (iter (InputDict.values())).T, years, scale)
plotly.offline.iplot(fig, validate=False)

## Create Interative plot
### For further refference see: https://plot.ly/python/reference/#choropleth

In [11]:
DropOptions = list(InputDict.keys())
DropList = []
for item in DropOptions:
    DropList.append({'label': item, 'value': item})

DropList
read = 'Completeness of death registration with cause-of-death information (%)'
InputDict[read].T['Latest_Value']
    

Afghanistan                   0.0
Albania                      53.0
Algeria                       0.0
American Samoa                0.0
Andorra                      80.0
Angola                        0.0
Antigua and Barbuda          79.0
Argentina                   100.0
Armenia                      76.0
Aruba                         0.0
Australia                   100.0
Austria                     100.0
Azerbaijan                   93.0
Bahamas, The                 93.0
Bahrain                      88.0
Bangladesh                    0.0
Barbados                    100.0
Belarus                     100.0
Belgium                     100.0
Belize                      100.0
Benin                         0.0
Bermuda                       0.0
Bhutan                        0.0
Bolivia                       0.0
Bosnia and Herzegovina       89.0
Botswana                      0.0
Brazil                       93.0
British Virgin Islands        0.0
Brunei Darussalam            89.0
Bulgaria      

In [12]:
def DropPlot(data,years=['Latest_Value'],scale=1, continent='africa'):
    #Generate input data as list of Chloropleth objects
    plotdata = CreateInset(data)
    
    #Define Color Scheme for plotting
    colors = ['rgb(239,243,255)','rgb(189,215,231)','rgb(107,174,214)','rgb(33,113,181)']
    
    #Create interactive button traces 
    updatemenus = list([
        # Create button to select dataset
        dict(
            buttons= CreateDataButtons(data),
            direction = 'down',
            pad = {'b': 10, 't': 10},
            showactive = True,
            x = 1,
            xanchor = 'bottom',
            y = 1,
            yanchor = 'bottom',
            borderwidth = 0
            
        ),
        # Create button to select continent
        dict(
            buttons = CreateGeoLayout(),
            direction = 'down',
            pad = {'b': 10, 't': 10},
            showactive = True,
            x = 1,
            xanchor = 'bottom',
            y = 0.9,
            yanchor = 'bottom',
            borderwidth = 0
        )
    ])
    # Chloropleth projection and layout specification
    layout = dict(
        geo = dict(
            resolution = 100,
            scope = continent,
            showframe = False,
            showcoastlines = True,
            showland = True,
            landcolor = "rgb(229, 229, 229)",
            countrycolor = "rgb(255, 255, 255)" ,
            coastlinecolor = "rgb(255, 255, 255)",
            projection = dict(
                type = 'Mercator'
            ),
            domain = dict(x = [0,1], y = [0,1]),
        ),
        updatemenus = updatemenus,
        width = 700,
        height = 450,
        margin = dict(
            l = 0,
            r = 0,
            t = 0,
            b = 0,
            autoexpand = True
        ),
        dragmode = "orbit"
    )

    return go.Figure(layout=layout, data=plotdata)

# Function to populate dataset selection button
def CreateDataButtons(DatDict):
    # init
    button = []
    # Populate dataset
    for i , Series in enumerate(list(DatDict.keys())):
        # Create Boolean list to toggle continent data visibility
        BoolRay = np.full((1,len(DatDict)), False)[0].tolist()
        BoolRay[i] = True
        
        tmp = dict(
            label= Series, 
            method='update', 
            execute = True , 
            args = [{'visible':BoolRay}]
        )
        button.append(tmp)
    
    return button

# Function to populate continent selection button
def CreateGeoLayout():
    scope = []
    button = []
    world = ["africa",
                "world",
                 "usa",
                 "europe",
                 "asia",
                 "north america",
                 "south america"]
    
    for continent in world:        
        tmp = dict(
            label = continent,
            method = 'relayout',
            args = [{'geo.scope': continent,}],
            execute = True)
        button.append(tmp)

    return button
    
# Function to populate Dataset values
def CreateInset(DatDict, selection='all'):
    # Populate Dataset for all continents
    if selection == 'all':
        inset = []
        for Series in list(DatDict.keys()):
            tmp = go.Choropleth(
                locationmode = 'country names',
                locations = DatDict[Series].T.index,
                z = DatDict[str(Series)].T[str(years[0])]/scale,
                text = DatDict[str(Series)].T.index,
                colorscale = [[0,'rgb(230, 238, 255)'],[1,'rgb(51, 153, 255)']],
                autocolorscale = True,
                showscale = False,
                geo = 'geo',
                hoverinfo = "location+z",
                colorbar = dict(
                    thicknessmode = "fraction",
                    lenmode = "fraction",
                    len = 0.75,
                    x = 1.1,
                    xanchor = 'right',
                    
               )
            )
            inset.append(tmp)
    # Populate Dataset for specicif continent
    elif selection in next (iter (DatDict.values())).T.index:
            inset =[ 
            go.Choropleth(
                locationmode = 'country names',
                locations = DatDict[Series].T.index,
                z = DatDict[str(Series)].T[str(years[0])]/scale,
                text = DatDict[str(Series)].T.index,
                colorscale = [[0,'rgb(230, 238, 255)'],[1,'rgb(51, 153, 255)']],
                autocolorscale = True,
                geo = 'geo',
                colorbar = dict(title = 'Scale',
                                thicknessmode = "fraction",
                                lenmode = "fraction",
                                len = 0.25,
                                x = 0.02,
                                xanchor = 'left',
                               ),
                showlegend = True,
                showscale = False,
                hoverinfo = "z"
            )]

    return inset

# This years option can be used to select specific year, list of years =, or only latest value.
# Implementation not yet optimal as data was preprocessed priop to input. Not a train smash, just not elegant
years = ['Latest_Value']

# Optionally all datasets can be scaled by factor. Again, not elegant yet.
scale = 1

# Initialise plot
fig = DropPlot(InputDict, years, scale)
# Plot offline 
plotly.offline.iplot(fig, validate=False)
