### Data Sources

https://data.oecd.org/healthstat/infant-mortality-rates.htm#indicator-chart
https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv

### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf

## Importing Data and EDA

In [5]:
data = pd.read_csv('InfantMortalityRates.csv')

In [6]:
data.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1960,20.2,
1,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1961,19.5,
2,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1962,20.4,
3,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1963,19.5,
4,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1964,19.1,


In [7]:
data.describe()

Unnamed: 0,TIME,Value
count,2365.0,2365.0
mean,1988.661311,20.369725
std,16.229813,24.748055
min,1960.0,0.7
25%,1975.0,5.6
50%,1989.0,11.9
75%,2003.0,22.8
max,2016.0,165.1


In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2365 entries, 0 to 2364
Data columns (total 8 columns):
LOCATION      2365 non-null object
INDICATOR     2365 non-null object
SUBJECT       2365 non-null object
MEASURE       2365 non-null object
FREQUENCY     2365 non-null object
TIME          2365 non-null int64
Value         2365 non-null float64
Flag Codes    7 non-null object
dtypes: float64(1), int64(1), object(6)
memory usage: 147.9+ KB


In [9]:
data.groupby('LOCATION').count()

Unnamed: 0_level_0,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
LOCATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AUS,57,57,57,57,57,57,0
AUT,57,57,57,57,57,57,0
BEL,57,57,57,57,57,57,0
BRA,56,56,56,56,56,56,0
CAN,55,55,55,55,55,55,0
CHE,57,57,57,57,57,57,0
CHL,56,56,56,56,56,56,0
CHN,47,47,47,47,47,47,0
COL,56,56,56,56,56,56,0
CRI,51,51,51,51,51,51,0


In [10]:
data[(data['LOCATION'] == 'KOR') | (data['LOCATION'] == 'TUR')]

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
853,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1970,45.0,
854,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1981,17.0,
855,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1984,16.0,
856,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1985,13.0,
857,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1987,13.0,
858,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1989,12.0,
859,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1991,10.0,
860,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1993,9.9,
861,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1996,7.7,
862,KOR,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1999,6.2,


In [12]:
country_names = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

In [13]:
country_names.columns = ['COUNTRY', 'GDP (BILLIONS)', 'LOCATION']

In [14]:
country_names.head()

Unnamed: 0,COUNTRY,GDP (BILLIONS),LOCATION
0,Afghanistan,21.71,AFG
1,Albania,13.4,ALB
2,Algeria,227.8,DZA
3,American Samoa,0.75,ASM
4,Andorra,4.8,AND


In [15]:
data_plus = pd.merge(data, country_names, on='LOCATION', how='left')

In [16]:
data_plus.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes,COUNTRY,GDP (BILLIONS)
0,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1960,20.2,,Australia,1483.0
1,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1961,19.5,,Australia,1483.0
2,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1962,20.4,,Australia,1483.0
3,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1963,19.5,,Australia,1483.0
4,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1964,19.1,,Australia,1483.0


In [17]:
data_plus.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2365 entries, 0 to 2364
Data columns (total 10 columns):
LOCATION          2365 non-null object
INDICATOR         2365 non-null object
SUBJECT           2365 non-null object
MEASURE           2365 non-null object
FREQUENCY         2365 non-null object
TIME              2365 non-null int64
Value             2365 non-null float64
Flag Codes        7 non-null object
COUNTRY           2365 non-null object
GDP (BILLIONS)    2365 non-null float64
dtypes: float64(2), int64(1), object(7)
memory usage: 203.2+ KB


## Plotly

In [3]:
#Use Plotly in offline mode
init_notebook_mode(connected=True)
cf.go_offline()

In [34]:
firstyear = 1980
years = list(range(firstyear, 2016))

# make list of continents
countries = []
for country in data_plus['COUNTRY']:
    if country not in countries:
        countries.append(country)
# make figure
figure = {
    'data': [],
    'layout': {},
    'frames': []
}

# fill in most of layout
figure['layout']={'autosize': False, 
                  'width':1000,
                  'height':1000}
figure['layout']['xaxis'] = {'title': 'Infant Mortality per 1000 Live Births', 
                             'rangemode':'tozero', 
                             'range':[0,100]}
figure['layout']['yaxis'] = {'ticktext': countries}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {'args': ['transition', {'duration': 400,
                                                       'easing': 'cubic-in-out'}],
                               'initialValue': str(firstyear),
                               'plotlycommand': 'animate',
                               'values': years,
                               'visible': True}
figure['layout']['updatemenus'] = [{
    'buttons': [{'args': [None, {'frame': {'duration': 500,
                                           'redraw': False},
                                 'fromcurrent': True,
                                 'transition': {'duration': 300,
                                                'easing': 'quadratic-in-out'}}],
                 'label': 'Play',
                 'method': 'animate'},
                {'args': [[None], {'frame': {'duration': 0,
                                             'redraw': False},
                                   'mode': 'immediate',
                                   'transition': {'duration': 0}}],
                 'label': 'Pause',
                 'method': 'animate'}],
    'direction': 'left',
    'pad': {'r': 10, 't': 87},
    'showactive': False,
    'type': 'buttons',
    'x': 0.1,
    'xanchor': 'right',
    'y': 0,
    'yanchor': 'top'}]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'},
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []}

# make data
# make frames
for year in years:
    frame = {'data': [], 'name': str(year)}
    for country in countries:
        dataset_by_year = data_plus[data_plus['TIME'] == int(year)]
        dataset_by_year_and_cont = dataset_by_year[dataset_by_year['COUNTRY'] == country]

        if dataset_by_year_and_cont.empty:
            data_dict = {
                'x': [0],
                'y': [country],
                'mode': 'markers',
                'text': [0],
                'name': country}
        else:
            data_dict = {
                'x': list(dataset_by_year_and_cont['Value']),
                'y': list(dataset_by_year_and_cont['COUNTRY']),
                'mode': 'markers',
                'text': list(dataset_by_year_and_cont['Value']),
                'name': country}
            
        if year == firstyear: 
            figure['data'].append(data_dict)
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [[year],
                            {'frame': {'duration': 300, 'redraw': False},
                             'mode': 'immediate',
                             'transition': {'duration': 300}}],
                   'label': year,
                   'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]
# figure['data']

plot(figure, filename = 'animation.html')

'file:///Users/samantha/Desktop/Desktop_MacbookPro/FlatironSchool/BlogProjects/animation.html'

## Plotly Express

In [18]:
import plotly_express as px

In [28]:
data_xp = data_plus[data_plus['TIME']>1979].copy()

In [30]:
data_xp = data_xp[data_xp['LOCATION']!= 'KOR']

In [32]:
data_xp = data_xp[data_xp['LOCATION']!= 'TUR']

In [36]:
data_xp = data_xp[data_xp['TIME'] < 2016]

In [37]:
data_xp.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes,COUNTRY,GDP (BILLIONS)
20,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1980,10.7,,Australia,1483.0
21,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1981,10.0,,Australia,1483.0
22,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1982,10.3,,Australia,1483.0
23,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1983,9.6,,Australia,1483.0
24,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1984,9.2,,Australia,1483.0


In [43]:
fig = px.scatter(data_xp, x="Value", y="COUNTRY", color = "COUNTRY", hover_name="COUNTRY", 
           animation_frame="TIME", animation_group="COUNTRY", range_x=[0,100],
           labels=dict(COUNTRY="Country", Value="Infant Mortality Rate"))

fig.update(layout = {'autosize': False, 'width':1000,'height':1000})

In [46]:
chorfig = px.choropleth(data_xp, locations="LOCATION",color="Value", hover_name="COUNTRY", animation_frame="TIME",
             color_continuous_scale=px.colors.sequential.deep, scope = "world")
plot(chorfig, filename = 'chorfig.html')

'chorfig.html'

In [45]:
plot(fig, filename = 'ani_express.html')

'ani_express.html'