## Importing libraries

In [1]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import imageio
%matplotlib inline

## read data

In [2]:
ls ./data/

continents.csv                  [0m[01;32mgapminder_total_fertility.csv[0m*
[01;32mgapminder_lifeexpectancy.xlsx[0m*  [01;32mpenguins_clean.csv[0m*
[01;32mgapminder_population.xlsx[0m*      [01;32mpenguins_simple.csv[0m*


In [3]:
fert = pd.read_csv('data/gapminder_total_fertility.csv', index_col=0)
life = pd.read_excel('data/gapminder_lifeexpectancy.xlsx', index_col=0)
popu = pd.read_excel('data/gapminder_population.xlsx', index_col=0)
cont = pd.read_csv('data/continents.csv', sep=';')

In [4]:
print(fert.head(3))
print(fert.info())

                       1800  1801  1802  1803  1804  1805  1806  1807  1808  \
Total fertility rate                                                          
Abkhazia                NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   
Afghanistan             7.0   7.0   7.0   7.0   7.0   7.0   7.0   7.0   7.0   
Akrotiri and Dhekelia   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   

                       1809  ...  2006  2007  2008  2009  2010  2011  2012  \
Total fertility rate         ...                                             
Abkhazia                NaN  ...   NaN   NaN   NaN   NaN   NaN   NaN   NaN   
Afghanistan             7.0  ...   6.7  6.46   6.2  5.93  5.66   5.4  5.14   
Akrotiri and Dhekelia   NaN  ...   NaN   NaN   NaN   NaN   NaN   NaN   NaN   

                       2013  2014  2015  
Total fertility rate                     
Abkhazia                NaN   NaN   NaN  
Afghanistan             4.9  4.68  4.47  
Akrotiri and Dhekelia   NaN   NaN   NaN  

[3

## EDA

In [5]:
# fert.columns is in str : change into int
fert.columns = fert.columns.astype(int)

# popu.columns contain many columns starting with "Unnamed" : drop them (learned from Marija's example code)
popu.drop(popu.filter(like='Unnamed'), axis=1, inplace=True)

In [6]:
print(fert.head(1))
print(life.head(1))
print(popu.head(1))

                      1800  1801  1802  1803  1804  1805  1806  1807  1808  \
Total fertility rate                                                         
Abkhazia               NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   

                      1809  ...  2006  2007  2008  2009  2010  2011  2012  \
Total fertility rate        ...                                             
Abkhazia               NaN  ...   NaN   NaN   NaN   NaN   NaN   NaN   NaN   

                      2013  2014  2015  
Total fertility rate                    
Abkhazia               NaN   NaN   NaN  

[1 rows x 216 columns]
                 1800  1801  1802  1803  1804  1805  1806  1807  1808  1809  \
Life expectancy                                                               
Abkhazia          NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   

                 ...  2007  2008  2009  2010  2011  2012  2013  2014  2015  \
Life expectancy  ...                                                  

In [7]:
# fert, life and popu : change index name to country and move index to column 
fert.index.name = 'country'
life.index.name = 'country'
popu.index.name = 'country'
fert.reset_index(inplace=True)
life.reset_index(inplace=True)
popu.reset_index(inplace=True)

In [8]:
print(fert.head(1))
print(life.head(1))
print(popu.head(1))

    country  1800  1801  1802  1803  1804  1805  1806  1807  1808  ...  2006  \
0  Abkhazia   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   NaN   

   2007  2008  2009  2010  2011  2012  2013  2014  2015  
0   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  

[1 rows x 217 columns]
    country  1800  1801  1802  1803  1804  1805  1806  1807  1808  ...  2007  \
0  Abkhazia   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   NaN   

   2008  2009  2010  2011  2012  2013  2014  2015  2016  
0   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  

[1 rows x 218 columns]
    country  1800  1810  1820  1830  1840  1850  1860  1870  1880  ...  2006  \
0  Abkhazia   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   NaN   

   2007  2008  2009  2010  2011  2012  2013  2014  2015  
0   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  

[1 rows x 82 columns]


## Data wrangling - convertiing into one datframe 

In [9]:
# fert, life, popu : use 'country' as index variable, convert into long format, set value name to fertility_rate, life_expectancy and population
fert = fert.melt(id_vars='country', var_name='year', value_name='fertility_rate')
life = life.melt(id_vars='country', var_name='year', value_name='life_expectancy')
popu = popu.melt(id_vars='country', var_name='year', value_name='population')

In [10]:
print(fert.head(3))
print(life.head(3))
print(popu.head(3))

                 country  year  fertility_rate
0               Abkhazia  1800             NaN
1            Afghanistan  1800             7.0
2  Akrotiri and Dhekelia  1800             NaN
                 country  year  life_expectancy
0               Abkhazia  1800              NaN
1            Afghanistan  1800            28.21
2  Akrotiri and Dhekelia  1800              NaN
                 country  year  population
0               Abkhazia  1800         NaN
1            Afghanistan  1800   3280000.0
2  Akrotiri and Dhekelia  1800         NaN


In [11]:
# if want to use population in milion unit
popu['population_milion'] = popu['population']/1_000_000
popu.drop(columns=['population'], inplace=True)
popu.head()

Unnamed: 0,country,year,population_milion
0,Abkhazia,1800,
1,Afghanistan,1800,3.28
2,Akrotiri and Dhekelia,1800,
3,Albania,1800,0.410445
4,Algeria,1800,2.503218


In [12]:
cont.head()

Unnamed: 0,continent,country
0,Africa,Algeria
1,Africa,Angola
2,Africa,Benin
3,Africa,Botswana
4,Africa,Burkina


In [13]:
# merge into one dataframe
print(fert.shape, life.shape, popu.shape, cont.shape)
complete = fert.merge(life).merge(popu).merge(cont) 
print(complete.shape)
print(complete.head(3))


(56160, 3) (56420, 3) (22275, 3) (194, 2)
(14175, 6)
       country  year  fertility_rate  life_expectancy  population_milion  \
0  Afghanistan  1800             7.0            28.21           3.280000   
1  Afghanistan  1810             7.0            28.11           3.280000   
2  Afghanistan  1820             7.0            28.01           3.323519   

  continent  
0      Asia  
1      Asia  
2      Asia  


## Plotting

In [14]:
# plot and save one frame of given year
def plot_save_year(year):
    
    plt.figure(figsize=(10,10))
    sns.set_theme(style="darkgrid")
    
    year_df = complete.loc[complete['year'] == year]

    # for setting marker size
    total_max = complete['population_milion'].max() # popu data_max
    year_min_norm = year_df['population_milion'].min()/total_max # popu frame_min/data_max
    year_max_norm = year_df['population_milion'].max()/total_max # popu frame_max/data_max

    im = sns.relplot(
        x='fertility_rate', y='life_expectancy', size='population_milion', 
        hue='continent', sizes=(100*year_min_norm, 1000*year_max_norm), 
        alpha=0.7, data=year_df);

    # for adding text
    x = year_df['fertility_rate'].loc[year_df['country'] == 'India']
    y = year_df['life_expectancy'].loc[year_df['country'] == 'India']
    plt.text(x=x, y=y, s='India')
    
    plt.xlabel('Fertility rate', fontsize=14)
    plt.ylabel('Life expectancy', fontsize=14)
    plt.title('{}'.format(year), fontsize=16);
    plt.xlim([complete['fertility_rate'].min()*0.95, complete['fertility_rate'].max()*1.05])
    plt.ylim([complete['life_expectancy'].min()*0.95, complete['life_expectancy'].max()*1.05])

    plt.close()
    im.savefig('./results/fert_life_year_{}.png'.format(year))

In [15]:
# test first and last year
plot_save_year(1960)
plot_save_year(2015)

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

In [16]:
# generate image for all years
images = []
for year in range(1960,2016):
    plot_save_year(year)
    filename = './results/fert_life_year_{}.png'.format(year)
    images.append(imageio.imread(filename))

# save gif 
imageio.mimsave('./results/gapminder.gif', images, fps=10)

  fig, axes = plt.subplots(nrow, ncol, **kwargs)
  plt.figure(figsize=(10,10))


<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>