### Links to data 

In [None]:
'https://data.oecd.org/healthstat/infant-mortality-rates.htm#indicator-chart'
'https://data.oecd.org/healthres/health-spending.htm#indicator-chart'
'https://data.oecd.org/healthres/doctors.htm#indicator-chart'

## Data Cleanup

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [19]:
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf

In [26]:
from IPython.display import display, HTML

In [20]:
init_notebook_mode(connected=True)

In [21]:
cf.go_offline()

In [218]:
data = pd.read_csv('InfantMortalityRates.csv')

In [141]:
doctors = pd.read_csv('Doctors.csv')

In [142]:
spending = pd.read_csv('HealthSpending.csv')

In [143]:
spending = spending[spending['MEASURE'] == 'USD_CAP'].copy()
spending.shape

(5448, 8)

In [219]:
data.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1960,20.2,
1,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1961,19.5,
2,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1962,20.4,
3,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1963,19.5,
4,AUS,INFANTMORTALITY,TOT,DEATH_1000BIRTH,A,1964,19.1,


In [228]:
firstyear = 1980
years = list(range(firstyear, 2017))

# make list of continents
countries = []
for country in data['LOCATION']:
    if country not in countries:
        countries.append(country)
# make figure
figure = {
    'data': [],
    'layout': {},
    'frames': []
}

# fill in most of layout
figure['layout']['title']={'text': 'Infant Mortality per 1000 Live Births'}
figure['layout']['geo'] = {'showframe': False, 'showcoastlines': False, 'projection':{'type':'equirectangular'}}
figure['layout']['annotations'] = {'x':0.55,'y':0.1,'xref':'paper','yref':'paper','text':'Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
            CIA World Factbook</a>','showarrow':False}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': str(firstyear),
    'plotlycommand': 'animate',
    'values': years,
    'visible': True
}
figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': False},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

# make data
# make frames
for year in years:
    frame = {'data': [], 'name': str(year)}
    for country in countries:
        dataset_by_year = data[data['TIME'] == int(year)]
        dataset_by_year_and_cont = dataset_by_year[dataset_by_year['LOCATION'] == country]

        if dataset_by_year_and_cont.empty:
            data_dict = {
                'locations': [country],
                'z': [0],
                'text': [country],
                'autocolorscale': True,
                'reversescale': True,
                'marker': {'line': {'color':'rgb(180,180,180)','width':0.5}},
                'colorbar':{'tickprefix':'$','title':'Infant Mortality <br> per 1000 Live Births'}
            }
        else:
            data_dict = {
                'locations': list(dataset_by_year_and_cont['LOCATION']),
                'z': list(dataset_by_year_and_cont['Value']),
                'text': list(dataset_by_year_and_cont['LOCATION']),
                'mode': 'markers',
                'text': [country],
                'autocolorscale': True,
                'reversescale': True,
                'marker': {'line': {'color':'rgb(180,180,180)','width':0.5}},
                'colorbar':{'tickprefix':'$','title':'Infant Mortality <br> per 1000 Live Births'}
            }
            
        if year == firstyear: 
            figure['data'].append(data_dict)
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [year],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': year,
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]
# figure['data']

fig = {
    'data': go.Choropleth({figure['data']}),
    'layout': go.Choropleth({figure['layout']}),
    'frames': figure['frames']}
iplot(fig)


TypeError: unhashable type: 'list'

In [145]:
doctors.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUS,MEDICALDOC,TOT,1000HAB,A,1961,1.13,
1,AUS,MEDICALDOC,TOT,1000HAB,A,1964,1.23,
2,AUS,MEDICALDOC,TOT,1000HAB,A,1965,1.22,
3,AUS,MEDICALDOC,TOT,1000HAB,A,1966,1.23,
4,AUS,MEDICALDOC,TOT,1000HAB,A,1967,1.26,


In [146]:
spending.head()

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
47,AUS,HEALTHEXP,OOPEXP,USD_CAP,A,1971,39.973,
48,AUS,HEALTHEXP,OOPEXP,USD_CAP,A,1972,43.282,
49,AUS,HEALTHEXP,OOPEXP,USD_CAP,A,1973,47.777,
50,AUS,HEALTHEXP,OOPEXP,USD_CAP,A,1974,66.546,
51,AUS,HEALTHEXP,OOPEXP,USD_CAP,A,1975,64.43,


In [118]:
print(data['INDICATOR'].unique())
print(doctors['INDICATOR'].unique())
print(spending['INDICATOR'].unique())

['INFANTMORTALITY']
['MEDICALDOC']
['HEALTHEXP']


In [119]:
print(data['SUBJECT'].unique())
print(doctors['SUBJECT'].unique())
print(spending['SUBJECT'].unique())

['TOT']
['TOT']
['OOPEXP' 'TOT' 'COMPULSORY' 'VOLUNTARY']


In [120]:
print(data['MEASURE'].unique())
print(doctors['MEASURE'].unique())
print(spending['MEASURE'].unique())

['DEATH_1000BIRTH']
['1000HAB']
['USD_CAP']


In [147]:
gov_spending = spending[spending['SUBJECT']== 'COMPULSORY'].copy()
tot_spending = spending[spending['SUBJECT']== 'TOT'].copy()

In [148]:
dataframes = [data, doctors, gov_spending, tot_spending]

In [149]:
def clean_data(df_list):
    for df in df_list:
        value = df['INDICATOR'].iloc[0] + '_' + df['SUBJECT'].iloc[0]
        df.drop(['INDICATOR','SUBJECT','MEASURE','FREQUENCY','Flag Codes'], axis=1, inplace=True)
        df.columns = ['Country','Year', value]
        df['Key'] = df.apply(lambda row: row['Country'] + str(row['Year']), axis=1)
        print(df.head())

In [150]:
clean_data(dataframes)

  Country  Year  INFANTMORTALITY_TOT      Key
0     AUS  1960                 20.2  AUS1960
1     AUS  1961                 19.5  AUS1961
2     AUS  1962                 20.4  AUS1962
3     AUS  1963                 19.5  AUS1963
4     AUS  1964                 19.1  AUS1964
  Country  Year  MEDICALDOC_TOT      Key
0     AUS  1961            1.13  AUS1961
1     AUS  1964            1.23  AUS1964
2     AUS  1965            1.22  AUS1965
3     AUS  1966            1.23  AUS1966
4     AUS  1967            1.26  AUS1967
     Country  Year  HEALTHEXP_COMPULSORY      Key
6624     AUS  1971               137.014  AUS1971
6625     AUS  1972               145.257  AUS1972
6626     AUS  1973               160.193  AUS1973
6627     AUS  1974               196.676  AUS1974
6628     AUS  1975               293.329  AUS1975
     Country  Year  HEALTHEXP_TOT      Key
4320     AUS  1971        223.365  AUS1971
4321     AUS  1972        238.788  AUS1972
4322     AUS  1973        263.500  AUS1973
4323  

In [173]:
data['Country'].unique()

array(['AUS', 'AUT', 'BEL', 'CAN', 'CZE', 'DNK', 'FIN', 'FRA', 'DEU',
       'GRC', 'HUN', 'ISL', 'IRL', 'ITA', 'JPN', 'KOR', 'LUX', 'MEX',
       'NLD', 'NZL', 'NOR', 'POL', 'PRT', 'SVK', 'ESP', 'SWE', 'CHE',
       'TUR', 'GBR', 'USA', 'BRA', 'CHL', 'CHN', 'EST', 'IND', 'IDN',
       'ISR', 'RUS', 'SVN', 'ZAF', 'COL', 'LVA', 'LTU', 'CRI'],
      dtype=object)

In [159]:
all_data = pd.DataFrame([{'Key': 'AUS1960'}])

In [160]:
all_data.head()

Unnamed: 0,Key
0,AUS1960


In [164]:
def merge_data(agg_df, df_list):
    for df in df_list:
        agg_df = pd.merge(agg_df, df, how = 'outer', on = 'Key') 
    return agg_df

In [165]:
all_data = merge_data(all_data, dataframes)

In [170]:
all_data.drop(['Country_x', 'Year_x', 'Country_y', 'Year_y'], axis=1, inplace=True)

In [180]:
all_data['Country'] = all_data.apply(lambda row: row['Key'][:3], axis=1)
all_data['Year'] = all_data.apply(lambda row: row['Key'][3:], axis=1)

In [181]:
all_data.head()

Unnamed: 0,Key,INFANTMORTALITY_TOT,MEDICALDOC_TOT,HEALTHEXP_COMPULSORY,HEALTHEXP_TOT,Country,Year
0,AUS1960,20.2,,,,AUS,1960
1,AUS1961,19.5,1.13,,,AUS,1961
2,AUS1962,20.4,,,,AUS,1962
3,AUS1963,19.5,,,,AUS,1963
4,AUS1964,19.1,1.23,,,AUS,1964


In [185]:
2016 - 1980

36

In [186]:
count = all_data.groupby('Country').count()

In [194]:
clean_count = count[(count['MEDICALDOC_TOT']>20) & (count['HEALTHEXP_COMPULSORY']>20) & (count['HEALTHEXP_TOT']>20) & (count['INFANTMORTALITY_TOT']>20)].copy()
clean_count

Unnamed: 0_level_0,Key,INFANTMORTALITY_TOT,MEDICALDOC_TOT,HEALTHEXP_COMPULSORY,HEALTHEXP_TOT,Year
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AUS,58,57,53,47,47,58
AUT,58,57,57,48,48,58
BEL,58,57,56,26,48,58
CAN,58,55,57,48,48,58
CZE,58,57,44,28,28,58
DEU,58,57,26,47,47,58
DNK,58,57,36,47,47,58
ESP,58,57,24,48,48,58
FRA,58,57,28,32,32,58
GBR,58,57,58,48,48,58


In [196]:
countries = list(clean_count.index)

In [203]:
clean_data = all_data[all_data['Country'].isin(countries)].copy()

In [209]:
# list(range(1980,2017))
list(clean_data['Year'].unique())

['1960',
 '1961',
 '1962',
 '1963',
 '1964',
 '1965',
 '1966',
 '1967',
 '1968',
 '1969',
 '1970',
 '1971',
 '1972',
 '1973',
 '1974',
 '1975',
 '1976',
 '1977',
 '1978',
 '1979',
 '1980',
 '1981',
 '1982',
 '1983',
 '1984',
 '1985',
 '1986',
 '1987',
 '1988',
 '1989',
 '1990',
 '1991',
 '1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017']

In [210]:
years = ['1980','1981','1982','1983','1984','1985','1986','1987','1988','1989','1990','1991','1992','1993','1994','1995',
 '1996','1997','1998','1999','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016']


In [211]:
clean_data = clean_data[clean_data['Year'].isin(years)]

In [213]:
clean_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 777 entries, 20 to 2450
Data columns (total 7 columns):
Key                     777 non-null object
INFANTMORTALITY_TOT     743 non-null float64
MEDICALDOC_TOT          668 non-null float64
HEALTHEXP_COMPULSORY    695 non-null float64
HEALTHEXP_TOT           722 non-null float64
Country                 777 non-null object
Year                    777 non-null object
dtypes: float64(4), object(3)
memory usage: 48.6+ KB


In [214]:
clean_data[clean_data['INFANTMORTALITY_TOT'].isnull()]

Unnamed: 0,Key,INFANTMORTALITY_TOT,MEDICALDOC_TOT,HEALTHEXP_COMPULSORY,HEALTHEXP_TOT,Country,Year
2365,CAN2015,,2.66,3267.052,4632.837,CAN,2015
2366,CAN2016,,2.68,3319.113,4721.578,CAN,2016
2372,KOR1982,,0.55,26.873,111.654,KOR,1982
2373,KOR1983,,0.54,34.845,125.619,KOR,1983
2374,KOR1986,,0.65,49.588,162.453,KOR,1986
2375,KOR1988,,0.74,74.114,216.312,KOR,1988
2376,KOR1990,,0.83,122.083,304.54,KOR,1990
2377,KOR1992,,0.97,139.8,365.947,KOR,1992
2378,KOR1994,,1.07,172.099,412.286,KOR,1994
2379,KOR1995,,1.12,192.661,457.45,KOR,1995


In [215]:
clean_data[clean_data['MEDICALDOC_TOT'].isnull()]

Unnamed: 0,Key,INFANTMORTALITY_TOT,MEDICALDOC_TOT,HEALTHEXP_COMPULSORY,HEALTHEXP_TOT,Country,Year
50,AUS2010,4.1,,2478.320,3610.796,AUS,2010
280,CZE2014,2.4,,2041.650,2469.139,CZE,2014
281,CZE2015,2.5,,1996.719,2392.635,CZE,2015
282,CZE2016,2.8,,2034.436,2481.697,CZE,2016
339,DNK2016,3.1,,4268.811,5074.523,DNK,2016
417,FRA1980,10.0,,521.188,654.946,FRA,1980
418,FRA1981,9.7,,,,FRA,1981
419,FRA1982,9.5,,,,FRA,1982
420,FRA1983,9.1,,,,FRA,1983
421,FRA1984,8.3,,,,FRA,1984


In [69]:
firstyear = 1980
years = list(range(firstyear, 2017))

# make list of continents
countries = []
for country in data['LOCATION']:
    if country not in countries:
        countries.append(country)
# make figure
figure = {
    'data': [],
    'layout': {},
    'frames': []
}

# fill in most of layout
figure['layout']={'autosize': False, 'width':1000,'height':1000}
figure['layout']['xaxis'] = {'title': 'Infant Mortality per 1000 Live Births'}
figure['layout']['yaxis'] = {'title': 'Country','ticktext': countries}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': str(firstyear),
    'plotlycommand': 'animate',
    'values': years,
    'visible': True
}
figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': False},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Year:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

# make data
# make frames
for year in years:
    frame = {'data': [], 'name': str(year)}
    for country in countries:
        dataset_by_year = data[data['TIME'] == int(year)]
        dataset_by_year_and_cont = dataset_by_year[dataset_by_year['LOCATION'] == country]

        if dataset_by_year_and_cont.empty:
            data_dict = {
                'x': [0],
                'y': [country],
                'mode': 'markers',
                'text': [0],
        #         'marker': {
        #             'sizemode': 'area',
        #             'sizeref': 200000,
        #             'size': list(dataset_by_year_and_cont['pop'])
        #         },
                'name': country
            }
        else:
            data_dict = {
                'x': list(dataset_by_year_and_cont['Value']),
                'y': list(dataset_by_year_and_cont['LOCATION']),
                'mode': 'markers',
                'text': list(dataset_by_year_and_cont['Value']),
        #         'marker': {
        #             'sizemode': 'area',
        #             'sizeref': 200000,
        #             'size': list(dataset_by_year_and_cont['pop'])
        #         },
                'name': country
            }
            
        if year == firstyear: 
            figure['data'].append(data_dict)
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [year],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': year,
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]
# figure['data']

iplot(figure)