In [47]:
import pandas as pd
import os 
import numpy as np

In [48]:
fert = pd.read_csv('data/gapminder_total_fertility.csv', index_col=0)
life = pd.read_excel('data/gapminder_lifeexpectancy.xlsx', index_col=0)

In [49]:
fert.columns = fert.columns.astype(int)

In [50]:
fert.index.name = 'country'
fert = fert.reset_index()
fert = fert.melt(id_vars='country', var_name='year', value_name='fertility_rate')
fert.head()

Unnamed: 0,country,year,fertility_rate
0,Abkhazia,1800,
1,Afghanistan,1800,7.0
2,Akrotiri and Dhekelia,1800,
3,Albania,1800,4.6
4,Algeria,1800,6.99


In [51]:
life.index.name = 'country'
life = life.reset_index()
life = life.melt(id_vars='country', var_name='year', value_name='life_expectancy')
life.head()

Unnamed: 0,country,year,life_expectancy
0,Abkhazia,1800.0,
1,Afghanistan,1800.0,28.21
2,Akrotiri and Dhekelia,1800.0,
3,Albania,1800.0,35.4
4,Algeria,1800.0,28.82


In [52]:
population = pd.read_excel('data/gapminder_population.xlsx', index_col=0)
#population.shape
#population.columns
#population.index
population.index.name = 'country'
population = population.reset_index()
population = population.melt(id_vars='country', var_name='year', value_name='population')
population.head()

Unnamed: 0,country,year,population
0,Abkhazia,1800.0,
1,Afghanistan,1800.0,3280000.0
2,Akrotiri and Dhekelia,1800.0,
3,Albania,1800.0,410445.0
4,Algeria,1800.0,2503218.0


In [53]:
#df = life.merge(population)
df = fert.merge(population)
df.head()

Unnamed: 0,country,year,fertility_rate,population
0,Abkhazia,1800,,
1,Afghanistan,1800,7.0,3280000.0
2,Akrotiri and Dhekelia,1800,,
3,Albania,1800,4.6,410445.0
4,Algeria,1800,6.99,2503218.0


In [54]:
df = life.merge(df)
df.head()

Unnamed: 0,country,year,life_expectancy,fertility_rate,population
0,Abkhazia,1800.0,,,
1,Afghanistan,1800.0,28.21,7.0,3280000.0
2,Akrotiri and Dhekelia,1800.0,,,
3,Albania,1800.0,35.4,4.6,410445.0
4,Algeria,1800.0,28.82,6.99,2503218.0


In [55]:
cont = pd.read_csv('data/continents.csv',sep=';')

In [56]:
df = df.merge(cont)
df.head()

Unnamed: 0,country,year,life_expectancy,fertility_rate,population,continent
0,Afghanistan,1800.0,28.21,7.0,3280000.0,Asia
1,Afghanistan,1810.0,28.11,7.0,3280000.0,Asia
2,Afghanistan,1820.0,28.01,7.0,3323519.0,Asia
3,Afghanistan,1830.0,27.9,7.0,3448982.0,Asia
4,Afghanistan,1840.0,27.8,7.0,3625022.0,Asia


In [57]:
import seaborn as sns
import matplotlib.pyplot as plt

In [58]:
## Fix the size range adapted to the minimum and maximum of all the
## years.
min_size = 5
max_size = 800
range_df = df['population'].max()-df['population'].min()
size_range = max_size - min_size
def function(x):
    return min_size +(size_range/range_df)*(x-df['population'].min())

sizes_dict = dict(zip(df['population'],df['population'].apply(function)))


In [59]:
for year in range(1961,2016):
    df_subset = df.loc[df['year'] == year]
    fig = sns.scatterplot(x='life_expectancy',
                y='fertility_rate',
                data=df_subset,
                alpha=0.6,
                size = 'population',
                sizes = sizes_dict,
                hue = df['continent'],
                legend= ''
                )
    #Create lagend
    label_sizes = [10000000,100000000,1000000000]
    for size in label_sizes:  ##Fix sizes
        plt.scatter([], [], c='k', alpha=0.3, s=function(size),
        label=f'{size/1000000:.0f} milions')
    #plt.gca().add_artist(plt.legend(scatterpoints=1, frameon=False, labelspacing=2, title='Population'))
    label_continents = [continent for continent in df['continent'].unique()]
    colors = sns.color_palette()#("hls", 5)#['blue','orange','green','red','purple']
    plt.scatter([], [], alpha=0.6,s=0.0,label=f'   Continents') ###Here I cheet a bit...
    for color,continent in zip(colors,label_continents):  ##Fix sizes
        plt.scatter([], [], c=color, alpha=0.6,s=function(100000000),
        label=f'{continent}')
    plt.gca().add_artist(plt.legend(scatterpoints=1, loc='best', bbox_to_anchor=(1.5, 1.1, 0., 0.), labelspacing=1.1, title='Population '))
    plt.title(f'{year}')
    plt.suptitle('Life expectancy vs fertility evolution from 1961 to 2016')
    plt.axis((20, 100, 0, 10))
    fig.set(xlabel='Life expectancy', ylabel='Fertility rate')
    plt.savefig(f'{year}.png',dpi=200, bbox_inches = "tight")
    plt.close()

*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
*c* argument looks like a single numeric RGB or R

In [60]:
#!pip install imageio
import imageio

images = []

for i in range(1961,2016):
    filename = '{}.png'.format(i)
    images.append(imageio.imread(filename))

imageio.mimsave('output.gif', images, fps=16)
for year in range(1961,2016):
    os.remove(f'{year}.png')
    
