In [1]:
#Imports all the packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#Read all the data into Dataframes
life = pd.read_excel('Spiced_Projects/gapminder_lifeexpectancy.xlsx', index_col=0)
fert = pd.read_csv('Spiced_Projects/gapminder_total_fertility.csv', index_col=0)
popu = pd.read_excel('Spiced_Projects/gapminder_population.xlsx', index_col = 0)
cont = pd.read_csv('Spiced_Projects/continents.csv', sep=';')

In [3]:
#format all the tables 
#merge all the tables
#find max and min of fert and life to define x,y ranges
#define function to avoid for loop over dataframe

In [4]:
#tidy fert
fert.columns = fert.columns.astype(int)
fert.index.name = 'country'
fert = fert.reset_index()
fert = fert.melt(id_vars='country', var_name='year', value_name='fertility_rate')
fert

Unnamed: 0,country,year,fertility_rate
0,Abkhazia,1800,
1,Afghanistan,1800,7.00
2,Akrotiri and Dhekelia,1800,
3,Albania,1800,4.60
4,Algeria,1800,6.99
...,...,...,...
56155,Yugoslavia,2015,
56156,Zambia,2015,5.59
56157,Zimbabwe,2015,3.35
56158,Åland,2015,


In [5]:
#tidy popu
popu.columns = popu.columns.astype(int)
popu.index.name = 'country'
popu = popu.reset_index()
popu = popu.melt(id_vars='country', var_name='year', value_name='population')
popu

Unnamed: 0,country,year,population
0,Abkhazia,1800,
1,Afghanistan,1800,3280000.0
2,Akrotiri and Dhekelia,1800,
3,Albania,1800,410445.0
4,Algeria,1800,2503218.0
...,...,...,...
22270,Northern Marianas,2015,
22271,South Georgia and the South Sandwich Islands,2015,
22272,US Minor Outlying Islands,2015,
22273,Virgin Islands,2015,


In [6]:
life.index.name = 'country'

In [7]:
life = life.reset_index()

In [8]:
life = life.melt(id_vars='country', var_name='year', value_name='life_expectancy')

In [9]:
df = fert.merge(popu)

In [10]:
df = df.merge(life)

In [11]:
df

Unnamed: 0,country,year,fertility_rate,population,life_expectancy
0,Abkhazia,1800,,,
1,Afghanistan,1800,7.00,3280000.0,28.21
2,Akrotiri and Dhekelia,1800,,,
3,Albania,1800,4.60,410445.0,35.40
4,Algeria,1800,6.99,2503218.0,28.82
...,...,...,...,...,...
20974,Yugoslavia,2015,,,
20975,Zambia,2015,5.59,16211767.0,56.70
20976,Zimbabwe,2015,3.35,15602751.0,59.30
20977,Åland,2015,,,


In [12]:
cont

Unnamed: 0,continent,country
0,Africa,Algeria
1,Africa,Angola
2,Africa,Benin
3,Africa,Botswana
4,Africa,Burkina
...,...,...
189,South America,Paraguay
190,South America,Peru
191,South America,Suriname
192,South America,Uruguay


In [13]:
df = df.merge(cont)

In [14]:
df

Unnamed: 0,country,year,fertility_rate,population,life_expectancy,continent
0,Afghanistan,1800,7.00,3280000.0,28.21,Asia
1,Afghanistan,1810,7.00,3280000.0,28.11,Asia
2,Afghanistan,1820,7.00,3323519.0,28.01,Asia
3,Afghanistan,1830,7.00,3448982.0,27.90,Asia
4,Afghanistan,1840,7.00,3625022.0,27.80,Asia
...,...,...,...,...,...,...
14170,Zimbabwe,2011,3.64,14255592.0,51.60,Africa
14171,Zimbabwe,2012,3.56,14565482.0,54.20,Africa
14172,Zimbabwe,2013,3.49,14898092.0,55.70,Africa
14173,Zimbabwe,2014,3.41,15245855.0,57.00,Africa


In [15]:
df.dropna
df

Unnamed: 0,country,year,fertility_rate,population,life_expectancy,continent
0,Afghanistan,1800,7.00,3280000.0,28.21,Asia
1,Afghanistan,1810,7.00,3280000.0,28.11,Asia
2,Afghanistan,1820,7.00,3323519.0,28.01,Asia
3,Afghanistan,1830,7.00,3448982.0,27.90,Asia
4,Afghanistan,1840,7.00,3625022.0,27.80,Asia
...,...,...,...,...,...,...
14170,Zimbabwe,2011,3.64,14255592.0,51.60,Africa
14171,Zimbabwe,2012,3.56,14565482.0,54.20,Africa
14172,Zimbabwe,2013,3.49,14898092.0,55.70,Africa
14173,Zimbabwe,2014,3.41,15245855.0,57.00,Africa


In [16]:
#grouping data by years

In [17]:
yearly_group = df[df['year']>1960].groupby('year') 

In [18]:
yearly_group.head()

Unnamed: 0,country,year,fertility_rate,population,life_expectancy,continent
26,Afghanistan,1961,7.67,9164945.0,32.47,Asia
27,Afghanistan,1962,7.67,9343772.0,33.01,Asia
28,Afghanistan,1963,7.67,9531555.0,33.53,Asia
29,Afghanistan,1964,7.67,9728645.0,34.07,Asia
30,Afghanistan,1965,7.67,9935358.0,34.60,Asia
...,...,...,...,...,...,...
400,Angola,2011,6.10,21942296.0,58.10,Africa
401,Angola,2012,5.98,22685632.0,58.50,Africa
402,Angola,2013,5.86,23448202.0,58.80,Africa
403,Angola,2014,5.75,24227524.0,59.20,Africa


In [19]:
df.max() 

country                 Zimbabwe
year                        2015
fertility_rate              9.22
population          1376048943.0
life_expectancy             84.8
continent          South America
dtype: object

In [20]:
df.min()

country            Afghanistan
year                      1800
fertility_rate            1.13
population              1834.0
life_expectancy            4.0
continent               Africa
dtype: object

In [21]:
def create_scatterplot(year, dataframe):
    plt.figure(figsize=(20,15)) 
    sns.scatterplot(
        x='life_expectancy',
        y='fertility_rate',
        size='population',
        hue='continent',
        sizes = (200,4000),
        data=dataframe,
        alpha=0.7,
        linestyle='dashed',
        palette = 
    )
    plt.axis([4.0, 85.0, 1.0, 9.5])
    plt.title(year,fontsize=20)
    p = plt.subplot()
    h, l = p.get_legend_handles_labels()
    plt.legend (h[0:7], l[0:7], 
               bbox_to_anchor=(1.02, 1), 
               loc='upper left',
               borderaxespad=0, 
               scatterpoints=1, 
               fontsize=15,
               markerscale=2)
    plt.xlabel('Life Expectancy',fontsize=17)
    plt.ylabel('Fertility Rate',fontsize=17)
    plt.tight_layout()
    plt.savefig(f'plots/life_exp{year}.png')
    plt.close()

In [22]:
for year, sub_df in yearly_group:
    create_scatterplot(year, sub_df)

In [23]:
pip install imageio


Note: you may need to restart the kernel to use updated packages.


In [24]:
import imageio.v2 as imageio

images = []

for i in range(1961,2015):
    filename='plots/life_exp{}.png'.format(i)
    images.append(imageio.imread(filename))

imageio.mimsave('week_01_project.gif', images, fps=8)