In [3]:
import pandas as pd

The original data directly from gapminder comes in wide format.

Read the `gapminder_total_fertility.csv` file in a dataframe

In [6]:
gapminder_df = pd.read_csv('../data/gapminder_total_fertility.csv')
gapminder_df.head()

Unnamed: 0,country,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Afghanistan,7.45,7.45,7.45,7.45,7.45,7.45,7.45,7.45,7.45,...,6.18,5.98,5.77,5.56,5.36,5.16,4.98,4.8,4.63,4.47
1,Albania,6.49,6.4,6.28,6.13,5.96,5.77,5.58,5.39,5.22,...,1.65,1.66,1.67,1.68,1.69,1.69,1.68,1.66,1.64,1.62
2,Algeria,7.52,7.57,7.61,7.65,7.67,7.67,7.68,7.67,7.67,...,2.8,2.86,2.91,2.95,2.99,3.02,3.04,3.05,3.04,3.02
3,Andorra,,,,,,,,,,...,1.19,1.27,,,,,,,,
4,Angola,6.71,6.79,6.87,6.95,7.04,7.12,7.19,7.27,7.33,...,6.26,6.19,6.12,6.04,5.95,5.86,5.77,5.69,5.6,5.52


In the original gapminder dataset the columns are the country names and years.

![alt text](<Screenshot 2024-04-15 at 13.09.45.png>)


Melt the table into a form that resembles the data below:

![alt text](<Screenshot 2024-04-15 at 13.10.23.png>)

In [13]:
gapminder_df_long = pd.melt(gapminder_df,                   # dataFrame
                            id_vars=['country'],            # these columns stay the same
                            #value_vars=['1960', '1961'],   # the columns to be used as values
                            var_name='year',
                            value_name='fertility_rate')
gapminder_df_long

Unnamed: 0,country,year,fertility_rate
0,Afghanistan,1960,7.45
1,Albania,1960,6.49
2,Algeria,1960,7.52
3,Andorra,1960,
4,Angola,1960,6.71
...,...,...,...
11264,Venezuela,2018,2.27
11265,Vietnam,2018,2.05
11266,Yemen,2018,3.79
11267,Zambia,2018,4.63


Pivot the table back to its original form.

In [31]:
gapminder_df_wide = pd.pivot(gapminder_df_long,
                             columns='year',
                             index=['country'],
                             values='fertility_rate').reset_index()
gapminder_df_wide.columns.name = None
gapminder_df_wide

Unnamed: 0,country,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Afghanistan,7.45,7.45,7.45,7.45,7.45,7.45,7.45,7.45,7.45,...,6.18,5.98,5.77,5.56,5.36,5.16,4.98,4.80,4.63,4.47
1,Albania,6.49,6.40,6.28,6.13,5.96,5.77,5.58,5.39,5.22,...,1.65,1.66,1.67,1.68,1.69,1.69,1.68,1.66,1.64,1.62
2,Algeria,7.52,7.57,7.61,7.65,7.67,7.67,7.68,7.67,7.67,...,2.80,2.86,2.91,2.95,2.99,3.02,3.04,3.05,3.04,3.02
3,Andorra,,,,,,,,,,...,1.19,1.27,,,,,,,,
4,Angola,6.71,6.79,6.87,6.95,7.04,7.12,7.19,7.27,7.33,...,6.26,6.19,6.12,6.04,5.95,5.86,5.77,5.69,5.60,5.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,Venezuela,6.36,6.30,6.23,6.16,6.07,5.98,5.87,5.75,5.61,...,2.50,2.47,2.44,2.42,2.39,2.37,2.34,2.32,2.29,2.27
187,Vietnam,6.35,6.39,6.42,6.45,6.46,6.47,6.49,6.49,6.49,...,1.93,1.94,1.95,1.96,1.98,2.00,2.01,2.03,2.04,2.05
188,Yemen,7.94,7.96,7.99,8.03,8.07,8.11,8.17,8.22,8.28,...,4.80,4.67,4.55,4.44,4.33,4.21,4.10,3.99,3.89,3.79
189,Zambia,7.12,7.17,7.21,7.25,7.27,7.29,7.30,7.32,7.33,...,5.50,5.42,5.33,5.23,5.13,5.03,4.92,4.81,4.72,4.63


Create an aggregate table with the average fertility_rate of each country.

![alt text](<Screenshot 2024-04-15 at 13.18.30.png>)

In [34]:
gapminder_df_long.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11269 entries, 0 to 11268
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   country         11269 non-null  object 
 1   year            11269 non-null  object 
 2   fertility_rate  10807 non-null  float64
dtypes: float64(1), object(2)
memory usage: 264.2+ KB


In [32]:
gapminder_df_long.duplicated().value_counts()

False    11269
Name: count, dtype: int64

In [35]:
gapminder_df_wide.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191 entries, 0 to 190
Data columns (total 60 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   country  191 non-null    object 
 1   1960     181 non-null    float64
 2   1961     181 non-null    float64
 3   1962     181 non-null    float64
 4   1963     180 non-null    float64
 5   1964     181 non-null    float64
 6   1965     181 non-null    float64
 7   1966     181 non-null    float64
 8   1967     181 non-null    float64
 9   1968     181 non-null    float64
 10  1969     181 non-null    float64
 11  1970     181 non-null    float64
 12  1971     182 non-null    float64
 13  1972     183 non-null    float64
 14  1973     181 non-null    float64
 15  1974     181 non-null    float64
 16  1975     181 non-null    float64
 17  1976     182 non-null    float64
 18  1977     181 non-null    float64
 19  1978     181 non-null    float64
 20  1979     181 non-null    float64
 21  1980     181 non

In [33]:
gapminder_df_wide.duplicated().value_counts()

False    191
Name: count, dtype: int64

In [38]:
gapminder_df_agg = gapminder_df_long.pivot_table(values='fertility_rate',
                                                 columns='country',
                                                 #index='country',
                                                 aggfunc='mean')
gapminder_df_agg

country,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,United States,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Yemen,Zambia,Zimbabwe
fertility_rate,7.032034,3.289153,5.050847,1.226,6.894576,2.568644,2.841017,2.507119,2.151017,1.737966,...,1.982034,2.128305,2.489153,4.249322,5.184915,3.814746,3.942712,7.184576,6.427797,5.453051


Try to transpose it.

![alt text](<Screenshot 2024-04-15 at 13.19.53.png>)

In [39]:
gapminder_df_agg.T

Unnamed: 0_level_0,fertility_rate
country,Unnamed: 1_level_1
Afghanistan,7.032034
Albania,3.289153
Algeria,5.050847
Andorra,1.226000
Angola,6.894576
...,...
Venezuela,3.814746
Vietnam,3.942712
Yemen,7.184576
Zambia,6.427797
