# Assignment 3 - Economic Growth and Comparative Development
#             Bogota Summer School in Economics
## By Lisseth Yaya (yaya_la@javeriana.edu.co)

HOMEWORK:

Using Pandas and Statsmodels write a Jupyter Notebook that: 

1. Uses the data from the Maddison Project to plot the evolution of total population across the world. 
2. Plots the evolution of the share of the world population by countries and WB regions.
3. Downloads fertility, mortality and life expectancy data from the WB and plots its evolution in the last 60 years.
4. Downloads mortality and life expectancy data (across regions and cohorts) from the [Human Mortality Database](https://www.mortality.org/) and plots its evolution.
5. Using this data analyze the convergence of life expectanty, mortality and fertility.

Submit your notebook as a ``pull request`` to the course's github repository.

In [1]:
# Let's import pandas and some other basic packages we will use 
from __future__ import division
%pylab --no-import-all
%matplotlib inline
import pandas as pd
import numpy as np

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


# Working with Pandas

## Data

### Country-level data economic data

* [Maddison Historical Data](http://www.ggdc.net/maddison/oriindex.htm) provides the most used historical statistics on population.
* [World Bank](https://data.worldbank.org/) provides all kinds of socio-economic data.
* [Human Mortality Database](https://www.mortality.org/) provides detailed mortality and population data for the world for the last two centuries.

In [2]:
import os

pathout = './data/'

if not os.path.exists(pathout):
    os.mkdir(pathout)
    
pathgraphs = './graphs/'
if not os.path.exists(pathgraphs):
    os.mkdir(pathgraphs)

### 1. Uses the data from the Maddison Project to plot the evolution of total population across the world. 
### Download New Maddison Project Data

In [3]:
try:
    maddison_new = pd.read_stata(pathout + 'Maddison2018.dta')
    maddison_new_region = pd.read_stata(pathout + 'Maddison2018_region.dta')
    maddison_new_1990 = pd.read_stata(pathout + 'Maddison2018_1990.dta')
except:
    maddison_new = pd.read_stata('https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2018.dta')
    maddison_new.to_stata(pathout + 'Maddison2018.dta', write_index=False, version=117)
    maddison_new_region = pd.read_stata('https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2018_region_data.dta')
    maddison_new_region.to_stata(pathout + 'Maddison2018_region.dta', write_index=False, version=117)
    maddison_new_1990 = pd.read_stata('https://www.rug.nl/ggdc/historicaldevelopment/maddison/data/mpd2018_1990bm.dta')
    maddison_new_1990.to_stata(pathout + 'Maddison2018_1990.dta', write_index=False, version=117)

In [4]:
if not os.path.exists(pathout + 'Maddison_original.xls'):
    import urllib
    dataurl = "http://www.ggdc.net/maddison/Historical_Statistics/horizontal-file_02-2010.xls"
    urllib.request.urlretrieve(dataurl, pathout + 'Maddison_original.xls')

In [5]:
maddison_old_pop = pd.read_excel(pathout + 'Maddison_original.xls', sheet_name="Population", skiprows=2)
maddison_old_pop

Unnamed: 0.1,Unnamed: 0,1,Unnamed: 2,1000,Unnamed: 4,1500,Unnamed: 6,1600,Unnamed: 8,1700,...,2002,2003,2004,2005,2006,2007,2008,2009,Unnamed: 201,2030
0,Western Europe,,,,,,,,,,...,,,,,,,,,,
1,Austria,500.0,,700.0,,2000.0,,2500.0,,2500.0,...,8148.312,8162.656,8174.762,8184.691,8192.880,8199.783,8205.533,8210,,8120.000
2,Belgium,300.0,,400.0,,1400.0,,1600.0,,2000.0,...,10311.970,10330.824,10348.276,10364.388,10379.067,10392.226,10403.951,10414,,10409.000
3,Denmark,180.0,,360.0,,600.0,,650.0,,700.0,...,5374.693,5394.138,5413.392,5432.335,5450.661,5468.120,5484.723,5501,,5730.488
4,Finland,20.0,,40.0,,300.0,,400.0,,400.0,...,5193.039,5204.405,5214.512,5223.442,5231.372,5238.460,5244.749,5250,,5201.445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,Guadeloupe,,,,,,,,,,...,435.739,440.189,444.515,448.713,452.776,456.698,460.486,n.a.,,523.493
274,Guyana (Fr.),,,,,,,,,,...,182.333,186.917,191.309,195.506,199.509,203.321,206.941,n.a.,,272.781
275,Martinique,,,,,,,,,,...,422.277,425.966,429.510,432.900,436.131,439.202,442.119,n.a.,,486.714
276,Reunion,,,,,,,,,,...,743.981,755.171,766.153,776.948,787.584,798.094,808.506,n.a.,,1025.217


In [6]:
maddison_old_pop.rename(columns={'Unnamed: 0':'Country'}, inplace=True)
maddison_old_pop

Unnamed: 0,Country,1,Unnamed: 2,1000,Unnamed: 4,1500,Unnamed: 6,1600,Unnamed: 8,1700,...,2002,2003,2004,2005,2006,2007,2008,2009,Unnamed: 201,2030
0,Western Europe,,,,,,,,,,...,,,,,,,,,,
1,Austria,500.0,,700.0,,2000.0,,2500.0,,2500.0,...,8148.312,8162.656,8174.762,8184.691,8192.880,8199.783,8205.533,8210,,8120.000
2,Belgium,300.0,,400.0,,1400.0,,1600.0,,2000.0,...,10311.970,10330.824,10348.276,10364.388,10379.067,10392.226,10403.951,10414,,10409.000
3,Denmark,180.0,,360.0,,600.0,,650.0,,700.0,...,5374.693,5394.138,5413.392,5432.335,5450.661,5468.120,5484.723,5501,,5730.488
4,Finland,20.0,,40.0,,300.0,,400.0,,400.0,...,5193.039,5204.405,5214.512,5223.442,5231.372,5238.460,5244.749,5250,,5201.445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,Guadeloupe,,,,,,,,,,...,435.739,440.189,444.515,448.713,452.776,456.698,460.486,n.a.,,523.493
274,Guyana (Fr.),,,,,,,,,,...,182.333,186.917,191.309,195.506,199.509,203.321,206.941,n.a.,,272.781
275,Martinique,,,,,,,,,,...,422.277,425.966,429.510,432.900,436.131,439.202,442.119,n.a.,,486.714
276,Reunion,,,,,,,,,,...,743.981,755.171,766.153,776.948,787.584,798.094,808.506,n.a.,,1025.217


In [7]:
maddison_old_pop = maddison_old_pop[[col for col in maddison_old_pop.columns if str(col).startswith('Unnamed')==False]]
maddison_old_pop

Unnamed: 0,Country,1,1000,1500,1600,1700,1820,1821,1822,1823,...,2001,2002,2003,2004,2005,2006,2007,2008,2009,2030
0,Western Europe,,,,,,,,,,...,,,,,,,,,,
1,Austria,500.0,700.0,2000.0,2500.0,2500.0,3369.0,3386.0,3402.0,3419.0,...,8131.690,8148.312,8162.656,8174.762,8184.691,8192.880,8199.783,8205.533,8210,8120.000
2,Belgium,300.0,400.0,1400.0,1600.0,2000.0,3434.0,3464.0,3495.0,3526.0,...,10291.679,10311.970,10330.824,10348.276,10364.388,10379.067,10392.226,10403.951,10414,10409.000
3,Denmark,180.0,360.0,600.0,650.0,700.0,1155.0,1167.0,1179.0,1196.0,...,5355.826,5374.693,5394.138,5413.392,5432.335,5450.661,5468.120,5484.723,5501,5730.488
4,Finland,20.0,40.0,300.0,400.0,400.0,1169.0,1186.0,1202.0,1219.0,...,5180.309,5193.039,5204.405,5214.512,5223.442,5231.372,5238.460,5244.749,5250,5201.445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,Guadeloupe,,,,,,,,,,...,431.170,435.739,440.189,444.515,448.713,452.776,456.698,460.486,n.a.,523.493
274,Guyana (Fr.),,,,,,,,,,...,177.562,182.333,186.917,191.309,195.506,199.509,203.321,206.941,n.a.,272.781
275,Martinique,,,,,,,,,,...,418.454,422.277,425.966,429.510,432.900,436.131,439.202,442.119,n.a.,486.714
276,Reunion,,,,,,,,,,...,732.570,743.981,755.171,766.153,776.948,787.584,798.094,808.506,n.a.,1025.217


In [8]:
maddison_old_pop.columns = ['Country'] + ['pop_'+str(col) for col in maddison_old_pop.columns[1:]]
maddison_old_pop

Unnamed: 0,Country,pop_1,pop_1000,pop_1500,pop_1600,pop_1700,pop_1820,pop_1821,pop_1822,pop_1823,...,pop_2001,pop_2002,pop_2003,pop_2004,pop_2005,pop_2006,pop_2007,pop_2008,pop_2009,pop_2030
0,Western Europe,,,,,,,,,,...,,,,,,,,,,
1,Austria,500.0,700.0,2000.0,2500.0,2500.0,3369.0,3386.0,3402.0,3419.0,...,8131.690,8148.312,8162.656,8174.762,8184.691,8192.880,8199.783,8205.533,8210,8120.000
2,Belgium,300.0,400.0,1400.0,1600.0,2000.0,3434.0,3464.0,3495.0,3526.0,...,10291.679,10311.970,10330.824,10348.276,10364.388,10379.067,10392.226,10403.951,10414,10409.000
3,Denmark,180.0,360.0,600.0,650.0,700.0,1155.0,1167.0,1179.0,1196.0,...,5355.826,5374.693,5394.138,5413.392,5432.335,5450.661,5468.120,5484.723,5501,5730.488
4,Finland,20.0,40.0,300.0,400.0,400.0,1169.0,1186.0,1202.0,1219.0,...,5180.309,5193.039,5204.405,5214.512,5223.442,5231.372,5238.460,5244.749,5250,5201.445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,Guadeloupe,,,,,,,,,,...,431.170,435.739,440.189,444.515,448.713,452.776,456.698,460.486,n.a.,523.493
274,Guyana (Fr.),,,,,,,,,,...,177.562,182.333,186.917,191.309,195.506,199.509,203.321,206.941,n.a.,272.781
275,Martinique,,,,,,,,,,...,418.454,422.277,425.966,429.510,432.900,436.131,439.202,442.119,n.a.,486.714
276,Reunion,,,,,,,,,,...,732.570,743.981,755.171,766.153,776.948,787.584,798.094,808.506,n.a.,1025.217


In [9]:
population = maddison_old_pop.loc[maddison_old_pop.Country.apply(lambda x: str(x).upper().find('TOTAL')!=-1)].reset_index(drop=True)
population = population.dropna(subset=['pop_1'])
population = population.loc[2:]
population['Country'] = population.Country.str.replace('Total', '').str.replace('Countries', '').str.replace('\d+', '').str.replace('European', 'Europe').str.strip()
population = population.loc[population.Country.apply(lambda x: x.find('USSR')==-1 and x.find('World')==-1 and  x.find('West Asian')==-1)].reset_index(drop=True)
population

Unnamed: 0,Country,pop_1,pop_1000,pop_1500,pop_1600,pop_1700,pop_1820,pop_1821,pop_1822,pop_1823,...,pop_2001,pop_2002,pop_2003,pop_2004,pop_2005,pop_2006,pop_2007,pop_2008,pop_2009,pop_2030
0,Western Europe,25050.0,25560.0,57268.0,73778.0,81460.0,133028.0,134323.0,135615.0,136912.0,...,394361.2,395509.3,396597.8,397650.0,398656.277,399607.2,400512.5,401352.9,402418,405751.702
1,Western Offshoots,1120.0,1870.0,2800.0,2300.0,1750.0,11230.5102,,,,...,339424.0,342385.0,345602.0,348944.0,352249.0,355678.0,358548.0,362621.0,366175,443038.0
2,East Europe,4750.0,6500.0,13500.0,16950.0,18800.0,36457.0,,,,...,120656.9,120639.3,120607.4,120581.9,120531.758,120463.7,120378.0,120274.5,120154,113554.887
3,Latin America,5600.0,11400.0,17500.0,8600.0,12050.0,21591.447,,,,...,528191.5,535515.6,542916.9,549417.0,557941.371,565416.0,572830.0,580217.2,583991,724464.84
4,Asia,168400.0,182600.0,283800.0,378500.0,401800.0,710400.013266,,,,...,3653696.0,3700181.0,3746026.0,3781524.0,3837845.758,3882964.0,3927514.0,3972154.0,4017611,4916929.556
5,Africa,17000.0,32300.0,46610.0,55320.0,61080.0,74236.0,,,,...,830481.5,850284.7,870204.9,890226.2,910631.214,931486.5,952787.1,974527.7,990447,1517429.536


In [10]:
population = population.dropna(axis=1, how='any')
population

Unnamed: 0,Country,pop_1,pop_1000,pop_1500,pop_1600,pop_1700,pop_1820,pop_1870,pop_1900,pop_1913,...,pop_2001,pop_2002,pop_2003,pop_2004,pop_2005,pop_2006,pop_2007,pop_2008,pop_2009,pop_2030
0,Western Europe,25050.0,25560.0,57268.0,73778.0,81460.0,133028.0,187499.0,233645.0,260975.0,...,394361.2,395509.3,396597.8,397650.0,398656.277,399607.2,400512.5,401352.9,402418,405751.702
1,Western Offshoots,1120.0,1870.0,2800.0,2300.0,1750.0,11230.5102,46087.6295,86396.0,111401.0,...,339424.0,342385.0,345602.0,348944.0,352249.0,355678.0,358548.0,362621.0,366175,443038.0
2,East Europe,4750.0,6500.0,13500.0,16950.0,18800.0,36457.0,53557.0,70993.0,79530.0,...,120656.9,120639.3,120607.4,120581.9,120531.758,120463.7,120378.0,120274.5,120154,113554.887
3,Latin America,5600.0,11400.0,17500.0,8600.0,12050.0,21591.447,40400.569,64605.986,80830.64,...,528191.5,535515.6,542916.9,549417.0,557941.371,565416.0,572830.0,580217.2,583991,724464.84
4,Asia,168400.0,182600.0,283800.0,378500.0,401800.0,710400.013266,769049.869189,873324.0,979299.062822,...,3653696.0,3700181.0,3746026.0,3781524.0,3837845.758,3882964.0,3927514.0,3972154.0,4017611,4916929.556
5,Africa,17000.0,32300.0,46610.0,55320.0,61080.0,74236.0,90466.0,110000.0,124697.0,...,830481.5,850284.7,870204.9,890226.2,910631.214,931486.5,952787.1,974527.7,990447,1517429.536


In [11]:
population = pd.wide_to_long(population, ['pop_'], i='Country', j='year').reset_index()
population

Unnamed: 0,Country,year,pop_
0,Western Europe,1,25050
1,Western Offshoots,1,1120
2,East Europe,1,4750
3,Latin America,1,5600
4,Asia,1,168400
...,...,...,...
427,Western Offshoots,2030,443038
428,East Europe,2030,113555
429,Latin America,2030,724465
430,Asia,2030,4.91693e+06


In [None]:
import matplotlib as mpl
import seaborn as sns
# Setup seaborn
sns.set()

In [None]:
population2 = population.pivot_table(index='year',columns='Country',values='pop_',aggfunc='sum')
population2

In [None]:
# Select some colors
mycolors = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
# Use seaborn to setup a color map to be used by matplotlib
my_cmap = mpl.colors.ListedColormap(sns.color_palette(mycolors).as_hex())

# Set the size of the figure and get a figure and axis object
fig, ax = plt.subplots(figsize=(30,20))
# Plot using the axis ax and colormap my_cmap
population2.loc[1800:2020].plot(ax=ax, linewidth=8, cmap=my_cmap)
# Change options of axes, legend
ax.tick_params(axis = 'both', which = 'major', labelsize=32)
ax.tick_params(axis = 'both', which = 'minor', labelsize=16)
#ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:, .0f}'))
ax.legend(prop={'size': 30}).set_title("Region", prop = {'size':40})
# Label axes
ax.set_xlabel('Year', fontsize=36)
ax.set_ylabel('Population', fontsize=36)

In [None]:
population['Region'] = population.Country.astype('category')
population['pop_'] = population.pop_.astype(float)
# Plot
fig, ax = plt.subplots(figsize=(20,15))
sns.lineplot(x='year', y='pop_', hue='Region', data=population.loc[population.year>=1800].reset_index(drop=True), alpha=1, lw=8, palette=sns.color_palette(mycolors), style='Region', dashes=False, markers=False)
ax.tick_params(axis = 'both', which = 'major', labelsize=32)
ax.tick_params(axis = 'both', which = 'minor', labelsize=16)
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year', fontsize=36)
ax.set_ylabel('Population', fontsize=36)

In [None]:
# Create category for hue
population['Region'] = population.Country.astype('category')
population['pop_'] = population.pop_.astype(float)

sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='pop_', hue='Region', data=population.loc[(population.year>=1800) & (population.year.apply(lambda x: x not in [
       1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1971, 1972,
       1973, 1974, 1975, 1976, 1977, 1978, 1979, 1981, 1982, 1983,
       1984, 1985, 1986, 1987, 1988, 1989, 1991, 1992, 1993, 1994,
       1995, 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,
       2006, 2007]))].reset_index(drop=True), alpha=1, palette=sns.color_palette(mycolors), style='Region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Population')
plt.savefig(pathgraphs + 'y1800-2030.pdf', dpi=300, bbox_inches='tight')

In [None]:
maddison_new_region

In [None]:
maddison_new_region['Region'] = maddison_new_region.region_name

mycolors2 = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71", "orange", "b"]
sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='pop', hue='Region', data=maddison_new_region.loc[(maddison_new_region.year.apply(lambda x: x in [1870, 1890, 1913, 1929,1950, 2016])) | ((maddison_new_region.year>1950) & (maddison_new_region.year.apply(lambda x: np.mod(x,10)==0)))], alpha=1, palette=sns.color_palette(mycolors2), style='Region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Population')
plt.savefig(pathgraphs + 'y1870-2016.pdf', dpi=300, bbox_inches='tight')

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='pop_', hue='Region', data=population.loc[(population.year>=1700) & (population.year.apply(lambda x: x not in [
       1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1971, 1972,
       1973, 1974, 1975, 1976, 1977, 1978, 1979, 1981, 1982, 1983,
       1984, 1985, 1986, 1987, 1988, 1989, 1991, 1992, 1993, 1994,
       1995, 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,
       2006, 2007]))].reset_index(drop=True), alpha=1, palette=sns.color_palette(mycolors), style='Region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Population')
plt.savefig(pathgraphs + 'take-off-1700-2016.pdf', dpi=300, bbox_inches='tight')

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='pop_', hue='Region', data=population.loc[(population.year>=1000) & (population.year.apply(lambda x: x not in [
       1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1971, 1972,
       1973, 1974, 1975, 1976, 1977, 1978, 1979, 1981, 1982, 1983,
       1984, 1985, 1986, 1987, 1988, 1989, 1991, 1992, 1993, 1994,
       1995, 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,
       2006, 2007]))].reset_index(drop=True), alpha=1, palette=sns.color_palette(mycolors), style='Region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Population')
plt.savefig(pathgraphs + 'y1000-2016.pdf', dpi=300, bbox_inches='tight')

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='pop_', hue='Region', data=population.loc[(population.year>=0) & (population.year.apply(lambda x: x not in [
       1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1961,
       1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1971, 1972,
       1973, 1974, 1975, 1976, 1977, 1978, 1979, 1981, 1982, 1983,
       1984, 1985, 1986, 1987, 1988, 1989, 1991, 1992, 1993, 1994,
       1995, 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005,
       2006, 2007]))].reset_index(drop=True), alpha=1, palette=sns.color_palette(mycolors), style='Region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Population')
plt.savefig(pathgraphs + 'y1-2016.pdf', dpi=300, bbox_inches='tight')

## 2. Plots the evolution of the share of the world population by countries and WB regions.
### Getting data from the World Bank

In [None]:
from pandas_datareader import data, wb

In [None]:
wbcountries = wb.get_countries()
wbcountries['name'] = wbcountries.name.str.strip()
wbcountries

In [None]:
popvars = wb.search(string='population')
popvars

In [None]:
femalepop = popvars.loc[popvars.id.apply(lambda x: x.find('SP.POP.')!=-1)]
malepop = popvars.loc[popvars.id.apply(lambda x: x.find('SP.POP.')!=-1)]
popfields = ['SP.POP.TOTL', 'EN.URB.MCTY', 'EN.URB.LCTY']
popfields

In [None]:
wdi = wb.download(indicator=popfields+[], country=wbcountries.iso2c.values, start=1800, end=2017)
wdi

In [None]:
wdi.sort_index()

In [None]:
wdi = wdi.groupby(['country', 'year']).max()
wdi.reset_index(inplace=True)
wdi

In [None]:
wbcountries

In [None]:
wdi = wbcountries.merge(wdi, left_on='name', right_on='country')
wdi

In [None]:
wdi['year'] = wdi.year.astype(int)
wdi

In [None]:
wdi['region'] = wdi.region

mycolors2 = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71", "orange", "b"]
sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='SP.POP.TOTL', hue='region', data=wdi.loc[(wdi.year.apply(lambda x: x in [1870, 1890, 1913, 1929,1950, 2016])) | ((wdi.year>1800) & (wdi.year.apply(lambda x: np.mod(x,10)==0)))], alpha=1, palette=sns.color_palette(mycolors2), style='region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Population_miles')
plt.savefig(pathgraphs + 'yWB1800-2016.pdf', dpi=300, bbox_inches='tight')

## 3. Fertility, mortality and life expectancy from WB

### Fertility

In [None]:
fertility_rate = ['SP.DYN.TFRT.IN']
fertility_rate 

In [None]:
wdi_fertility = wb.download(indicator=fertility_rate+[], country=wbcountries.iso2c.values, start=1957, end=2017)
wdi_fertility

In [None]:
wdi_fertility.sort_index()

In [None]:
wdi_fertility = wdi_fertility.dropna(axis=0, how='any')
wdi_fertility

In [None]:
wdi_fertility = wdi_fertility.groupby(['country', 'year']).max()
wdi_fertility.reset_index(inplace=True)
wdi_fertility

In [None]:
wdi_fertility['year'] = wdi_fertility.year.astype(int)
wdi_fertility

In [None]:
wdi_fertility = wbcountries.merge(wdi_fertility, left_on='name', right_on='country')
wdi_fertility

In [None]:
wdi_fertility['region'] = wdi_fertility.region

mycolors2 = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71", "orange", "b"]
sns.set(rc={'figure.figsize':(20.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='SP.DYN.TFRT.IN', hue='region', data=wdi_fertility.loc[(wdi_fertility.year.apply(lambda x: x in [1870, 1890, 1913, 1929,1950, 2016])) | ((wdi_fertility.year>1800) & (wdi_fertility.year.apply(lambda x: np.mod(x,10)==0)))], alpha=1, palette=sns.color_palette(mycolors2), style='region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 25})
ax.set_xlabel('Year')
ax.set_ylabel('Fertility rate')
plt.savefig(pathgraphs + 'yFR1956-2016.pdf', dpi=300, bbox_inches='tight')

### Mortality

In [None]:
mortality_rate = ['SH.DYN.MORT']
mortality_rate

In [None]:
wdi_mortality = wb.download(indicator=mortality_rate+[], country=wbcountries.iso2c.values, start=1957, end=2017)
wdi_mortality

In [None]:
wdi_mortality.sort_index()

In [None]:
wdi_mortality = wdi_mortality.dropna(axis=0, how='any')
wdi_mortality

In [None]:
wdi_mortality = wdi_mortality.groupby(['country', 'year']).max()
wdi_mortality.reset_index(inplace=True)
wdi_mortality

In [None]:
wdi_mortality['year'] = wdi_mortality.year.astype(int)
wdi_mortality

In [None]:
wdi_mortality = wbcountries.merge(wdi_mortality, left_on='name', right_on='country')
wdi_mortality

In [None]:
wdi_mortality['region'] = wdi_mortality.region

mycolors2 = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71", "orange", "b"]
sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='SH.DYN.MORT', hue='region', data=wdi_mortality.loc[(wdi_mortality.year.apply(lambda x: x in [1870, 1890, 1913, 1929,1950, 2016])) | ((wdi_mortality.year>1800) & (wdi_mortality.year.apply(lambda x: np.mod(x,10)==0)))], alpha=1, palette=sns.color_palette(mycolors2), style='region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Mortality rate')
plt.savefig(pathgraphs + 'yMR1800-2016.pdf', dpi=300, bbox_inches='tight')

### Life expectancy

In [None]:
life_expectancy_rate = ['SP.DYN.LE00.IN']
life_expectancy_rate 

In [None]:
wdi_life_expectancy_rate = wb.download(indicator=life_expectancy_rate+[], country=wbcountries.iso2c.values, start=1957, end=2017)
wdi_life_expectancy_rate

In [None]:
wdi_life_expectancy_rate.sort_index()

In [None]:
wdi_life_expectancy_rate = wdi_life_expectancy_rate.dropna(axis=0, how='any')
wdi_life_expectancy_rate

In [None]:
wdi_life_expectancy_rate = wdi_life_expectancy_rate.groupby(['country', 'year']).max()
wdi_life_expectancy_rate.reset_index(inplace=True)
wdi_life_expectancy_rate

In [None]:
wdi_life_expectancy_rate['year'] = wdi_life_expectancy_rate.year.astype(int)
wdi_life_expectancy_rate

In [None]:
wdi_life_expectancy_rate = wbcountries.merge(wdi_life_expectancy_rate, left_on='name', right_on='country')
wdi_life_expectancy_rate

In [None]:
wdi_life_expectancy_rate ['region'] = wdi_life_expectancy_rate .region

mycolors2 = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71", "orange", "b"]
sns.set(rc={'figure.figsize':(11.7,8.27)})
#sns.reset_orig()
sns.set_context("talk")
# Plot
fig, ax = plt.subplots()
sns.lineplot(x='year', y='SP.DYN.LE00.IN', hue='region', data=wdi_life_expectancy_rate .loc[(wdi_life_expectancy_rate .year.apply(lambda x: x in [1870, 1890, 1913, 1929,1950, 2016])) | ((wdi_life_expectancy_rate.year>1800) & (wdi_life_expectancy_rate.year.apply(lambda x: np.mod(x,10)==0)))], alpha=1, palette=sns.color_palette(mycolors2), style='region', dashes=False, markers=True,)
ax.tick_params(axis = 'both', which = 'major')
ax.tick_params(axis = 'both', which = 'minor')
ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
#ax.legend(title='', prop={'size': 40})
ax.set_xlabel('Year')
ax.set_ylabel('Life expectancy rate')
plt.savefig(pathgraphs + 'yLER1960-2016.pdf', dpi=300, bbox_inches='tight')

## 4. Downloads mortality and life expectancy data (across regions and cohorts) from the [Human Mortality Database](https://www.mortality.org/) and plots its evolution.
### Getting data from the Human Mortality Database

In [None]:
if not os.path.exists(pathout + 'HumanMortalityDatabase.csv'):
    import urllib
    dataurl = "https://www.mortality.org/Public/STMF/Outputs/stmf.csv"
    urllib.request.urlretrieve(dataurl, pathout + 'HumanMortalityDatabase.csv')