# Importing libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Preparing DataFrames for Scatterplots

In [2]:
fert = pd.read_csv('gapminder_total_fertility.csv', index_col=0)
life = pd.read_excel('gapminder_lifeexpectancy.xlsx', index_col=0)
pop = pd.read_excel('gapminder_population.xlsx',index_col=0)

In [3]:
fert.columns = fert.columns.astype(int)

In [4]:
life.index.name = 'country'
fert.index.name = 'country'
pop.index.name = 'country'

In [5]:
fert = fert.reset_index()
life = life.reset_index()
pop = pop.reset_index()

In [6]:
fert = pd.melt(fert
               ,id_vars='country'
                ,var_name='year'
                  ,value_name='fertility'
              )
life = pd.melt(life
               ,id_vars='country'
                ,var_name='year'
                  ,value_name='life_expectancy'
              )
pop = pd.melt(pop
               ,id_vars='country'
                ,var_name='year'
                  ,value_name='population'
              )

In [7]:
pop = pop.set_index('country')
life = life.set_index('country')
fert = fert.set_index('country')

# Merging tables

In [8]:
df1 = fert.merge(life,on=['country','year'],how='inner')


In [9]:
df2 = df1.merge(pop,on=['country','year'],how='inner')

In [10]:
continents = pd.read_csv('continents.csv',sep=';')

In [11]:
df3 = df2.merge(continents,on=['country'],how='inner')

# Animated Scatterplot

In [14]:
import seaborn as sns
import matplotlib.pyplot as plt

for i in range(1960,2016):
    df_subset = df3.loc[df3['year'] == i]
    fig, axis = plt.subplots(figsize=(15,10))
    plt.title('Life expectancy vs. Fertility')
    g = sns.scatterplot(x='life_expectancy'
                    ,y='fertility'
                    ,hue='continent'
                    ,s=800
                    ,data=df_subset
                    ,alpha=0.4
                    ,size='population'
                    ,palette='dark'
                    ,legend=True
                    ,sizes=(100,500)
                    ,ax=axis
                   )
    plt.axhline(y=df_subset['fertility'].mean(), color="black", linestyle="--")
    plt.axvline(x=df_subset['life_expectancy'].mean(), color="black", linestyle="--")
    plt.axis((4,90,1,10))     
    h,l = g.get_legend_handles_labels()
    plt.legend(h[0:5],l[0:5],bbox_to_anchor=(1,1),loc='upper right', borderaxespad=0.)
#     plt.legend(title='Continents', loc='upper left', labels=['Asia','Europe','Africa'])
    plt.savefig('scatterplots/'+'lifeexp'+str(i))
    plt.close()

In [15]:
import imageio

images = []

for i in range(1960, 2016):
    filename = 'scatterplots/lifeexp{i}.png'.format(i = str(i))
    images.append(imageio.imread(filename))

imageio.mimsave('output.gif', images, fps=20)


# Little Data Manipulation

In [28]:
pop['year']

country
Abkhazia                                               1800
Afghanistan                                            1800
Akrotiri and Dhekelia                                  1800
Albania                                                1800
Algeria                                                1800
                                                   ...     
Northern Marianas                               Unnamed: 91
South Georgia and the South Sandwich Islands    Unnamed: 91
US Minor Outlying Islands                       Unnamed: 91
Virgin Islands                                  Unnamed: 91
West Bank                                       Unnamed: 91
Name: year, Length: 25025, dtype: object

In [29]:
columns = pop['year']

In [30]:
columns.tail()

country
Northern Marianas                               Unnamed: 91
South Georgia and the South Sandwich Islands    Unnamed: 91
US Minor Outlying Islands                       Unnamed: 91
Virgin Islands                                  Unnamed: 91
West Bank                                       Unnamed: 91
Name: year, dtype: object

In [None]:
columns.str.startswith('U')

In [31]:
j=0

for i in columns:
    if str(i).startswith('U') == True:
        columns[j] = '19'+i[-2:]
    j += 1        
columns.tail()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  columns[j] = '19'+i[-2:]


country
Northern Marianas                               1991
South Georgia and the South Sandwich Islands    1991
US Minor Outlying Islands                       1991
Virgin Islands                                  1991
West Bank                                       1991
Name: year, dtype: object