In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

fert = pd.read_csv('gapminder_total_fertility.csv', index_col=0)
life = pd.read_excel('gapminder_lifeexpectancy.xlsx', index_col=0)

In [2]:
fert.shape, life.shape

((260, 216), (260, 217))

In [3]:
fert.head(2)

Unnamed: 0_level_0,1800,1801,1802,1803,1804,1805,1806,1807,1808,1809,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
Total fertility rate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abkhazia,,,,,,,,,,,...,,,,,,,,,,
Afghanistan,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,...,6.7,6.46,6.2,5.93,5.66,5.4,5.14,4.9,4.68,4.47


In [4]:
life.head(2)

Unnamed: 0_level_0,1800,1801,1802,1803,1804,1805,1806,1807,1808,1809,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Life expectancy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abkhazia,,,,,,,,,,,...,,,,,,,,,,
Afghanistan,28.21,28.2,28.19,28.18,28.17,28.16,28.15,28.14,28.13,28.12,...,52.4,52.8,53.3,53.6,54.0,54.4,54.8,54.9,53.8,52.72


In [5]:
fert.columns = fert.columns.astype(int)

In [6]:
fert.index.name = 'country'

In [7]:
fert = fert.reset_index()

In [8]:
fert = fert.melt(id_vars='country', var_name='year', value_name='Total fertility rate')
fert.head(2)

Unnamed: 0,country,year,Total fertility rate
0,Abkhazia,1800,
1,Afghanistan,1800,7.0


In [9]:
life.index

Index(['Abkhazia', 'Afghanistan', 'Akrotiri and Dhekelia', 'Albania',
       'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla',
       'Antigua and Barbuda',
       ...
       'Vietnam', 'Virgin Islands (U.S.)', 'North Yemen (former)',
       'South Yemen (former)', 'Yemen', 'Yugoslavia', 'Zambia', 'Zimbabwe',
       'Åland', 'South Sudan'],
      dtype='object', name='Life expectancy', length=260)

In [10]:
life.index.name = 'country'

In [11]:
life = life.reset_index()

In [12]:
life = life.melt(id_vars='country', var_name='year', value_name='Life expectancy')
life.head(2)

Unnamed: 0,country,year,Life expectancy
0,Abkhazia,1800,
1,Afghanistan,1800,28.21


In [13]:
population = pd.read_excel('gapminder_population.xlsx', index_col=0)
population.columns = population.columns.astype(int)
population.index.name = 'country'
population = population.reset_index()
population = population.melt(id_vars='country', var_name='year', value_name='Total population')
population.head(2)

Unnamed: 0,country,year,Total population
0,Abkhazia,1800,
1,Afghanistan,1800,3280000.0


In [14]:
df1 = fert.merge(population)
df2 = df1.merge(life)
df2.head(2)

Unnamed: 0,country,year,Total fertility rate,Total population,Life expectancy
0,Abkhazia,1800,,,
1,Afghanistan,1800,7.0,3280000.0,28.21


In [15]:
continents = pd.read_csv('continents.csv', sep=';')

In [16]:
df3 = df2.merge(continents, on='country')
df3 = df3.iloc[:, [0,5,1,2,3,4]]
df3 = df3.sort_values(by=['country', 'year'])
df3.head(2)

Unnamed: 0,country,continent,year,Total fertility rate,Total population,Life expectancy
0,Afghanistan,Asia,1800,7.0,3280000.0,28.21
1,Afghanistan,Asia,1810,7.0,3280000.0,28.11


In [17]:
data_subset = df3.loc[df3['year'] >= 1960]
data_subset.head(2)

Unnamed: 0,country,continent,year,Total fertility rate,Total population,Life expectancy
25,Afghanistan,Asia,1960,7.67,8994793.0,31.94
26,Afghanistan,Asia,1961,7.67,9164945.0,32.47


In [23]:
for year in data_subset['year'].unique():
    fig, ax = plt.subplots(figsize=(10, 10))
    clarity_ranking = data_subset['continent']
    sns.set_theme(style="darkgrid")
    sns.scatterplot(data=data_subset.loc[data_subset['year'] == year],
                    x='Life expectancy',
                    y='Total fertility rate',
                    alpha=0.8,
                    size='Total population',
                    legend='brief',
                    hue='continent',
                    sizes=(50, 1000),
                    ax = ax)
    h, l = ax.get_legend_handles_labels()
    plt.legend(h[0:7],l[0:7], bbox_to_anchor=(1.05, 1), loc=0, borderaxespad=0, fontsize=13)
    plt.title(f'Life Expectancy and Fertility Rate by Continent during {year}')
    plt.axis((30, 85, 0, 10))
    plt.savefig(f"./countries/df_{year}.png")
    plt.close();
    

In [23]:
#!pip install imageio

Note: you may need to restart the kernel to use updated packages.


In [34]:
import imageio.v2 as imageio

images = []

for i in range(1960, 2015):
    filename = '/Users/chris/Desktop/spiced_projects/spiced_academy/week_1/Countries/df_{}.png'.format(i)
    images.append(imageio.imread(filename))

imageio.mimsave('Chris_Williams_Animated.gif', images, fps=20)
