## 1. Import libraries

In [None]:
import pandas as pd
import pylab as plt
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline

## 2. Read in all data

In [None]:
fert = pd.read_csv('gapminder_total_fertility.csv', index_col=0)
life = pd.read_excel('gapminder_lifeexpectancy.xlsx', index_col=0)
pop = pd.read_excel('gapminder_population.xlsx', index_col=0)

## 3. Check shape of datasets

In [None]:
print(fert.shape)

In [None]:
print(life.shape)

In [None]:
print(pop.shape)

## 4. compare datatype of columns

In [None]:
fert.columns

In [None]:
life.columns

In [None]:
pop.columns

### 4.1 convert fertility columns from object to integer

In [None]:
ncol = [int(x) for x in fert.columns]
fert.set_axis(axis=1, labels=ncol, inplace=True)
fert.columns

## 5. stack datasets fert, life, pop

- add fert,life and pop to a dictionary d
- add d to a dataframe df
- print to check outcome
- stack combined dataframe to get an index for fert, life and pop

In [None]:
sfert = fert.stack()
slife = life.stack()
spop = pop.stack()

In [None]:
d = {'fertility': sfert, 'lifeexp': slife, 'population': spop}
df = pd.DataFrame(data=d)
print(df)

In [None]:
df2 = df.stack()
print(df2)

### 5.1 unstack country and fert, life and pop from the dataset

- years remain as rows

In [None]:
df3 = df2.unstack((0,2))
print(df3)

## 6. Plotting datasets
### 6.1 line chart

In [None]:
df3[['Germany', 'France', 'Sweden']].plot()

### 6.2 Scatter plots

- all years on one chart

In [None]:
df.plot.scatter('fertility', 'lifeexp', s=0.1)

- use original df dataset and reformat to years as colums first, to be able to plot single years from now on

In [None]:
df4 = df.stack().unstack(1).unstack(1)
df4 = df4[2015]
df4.plot.scatter('fertility', 'lifeexp', s=0.1)

- recolour plot

In [None]:
cmap = plt.get_cmap('tab20b', lut = len(df4)).colors
df4.plot.scatter('fertility', 'lifeexp', s=0.1, c=cmap)

- change size of marker with s=df['population']

In [None]:
cmap = plt.get_cmap('tab20b', lut = len(df4)).colors
df4.plot.scatter('fertility', 'lifeexp', s=df['population'] / 1000000, c=cmap)
plt.axis((1,10,10,90))

In [None]:
df4 = df.stack().unstack(1).unstack(1)
df4 = df4[2015]
cmap = plt.get_cmap('tab20b', lut = len(df4)).colors
df4.plot.scatter('fertility', 'lifeexp', s=df['population'] / 100000, c=cmap)
plt.axis((1,10,10,90))
plt.title('1950')
plt.savefig('lifeexp_1950.png', bbox_inches='tight')

## 7. loop through years 1960-2015 and save png of each year in folder

In [None]:
gm = df.stack().unstack(1).unstack(1)
for i in range (1960, 2016):
    gm1 = gm[i]
    cmap = plt.get_cmap('tab20b', lut = len(gm1)).colors
    gm1.plot.scatter('fertility', 'lifeexp', s=df['population'] / 100000, c=cmap)
    plt.axis((1,10,10,90))
    plt.title(f'{i}')
    plt.savefig(f'scatter_images/lifeexp_{i}.png', bbox_inches='tight')
    plt.clf()


## 8. Combine to GIF

- loop through all pictures in folder and save to gif

In [None]:
import imageio

images = []

for i in range(1960, 2016):
    filename = 'scatter_images/lifeexp_{}.png'.format(i)
    images.append(imageio.imread(filename))

imageio.mimsave('output_gapminder.gif', images, fps=10)