# Analyzing GDP per capita, Share of agricultural employment, and education.
Using data from WDI and ILOSTAT.

In [1]:
## Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
## Import data
data_edu = pd.read_csv('Data\EAP_TEAP_SEX_EDU_NB_A-filtered-2024-03-03.csv')
data_WB = pd.read_excel('Data\P_Data_Extract_From_World_Development_Indicators.xlsx')

In [3]:
data_edu

Unnamed: 0,ref_area.label,classif1.label,time,obs_value
0,Afghanistan,Education (ISCED-11): Total,2021,8154.806
1,Afghanistan,Education (ISCED-11): 3. Upper secondary educa...,2021,1077.267
2,Afghanistan,Education (ISCED-11): 4. Post-secondary non-te...,2021,225.569
3,Afghanistan,Education (ISCED-11): 5. Short-cycle tertiary ...,2021,68.476
4,Afghanistan,Education (ISCED-11): 6. Bachelor's or equival...,2021,419.220
...,...,...,...,...
6374,Zimbabwe,Education (ISCED-11): 8. Doctoral or equivalen...,2021,7.747
6375,Zimbabwe,Education (ISCED-11): Total,2019,5330.368
6376,Zimbabwe,Education (ISCED-11): 3. Upper secondary educa...,2019,1971.898
6377,Zimbabwe,Education (ISCED-11): 4. Post-secondary non-te...,2019,373.202


In [4]:
data_WB

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1998 [YR1998],1999 [YR1999],2000 [YR2000],2001 [YR2001],2002 [YR2002],2003 [YR2003],...,2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],2021 [YR2021],2022 [YR2022]
0,Afghanistan,AFG,Employment in agriculture (% of total employme...,SL.AGR.EMPL.ZS,65.277464,65.354736,65.347961,65.254189,63.931615,63.256091,...,47.697315,44.798594,44.593516,44.337137,43.989031,44.4536,45.01604,45.983408,46.587823,46.58907
1,Afghanistan,AFG,"GDP per capita, PPP (constant 2017 internation...",NY.GDP.PCAP.PP.KD,..,..,..,..,1280.463171,1292.333437,...,2165.340915,2144.449634,2108.714173,2101.422187,2096.093111,2060.698973,2079.921861,1968.341002,1516.273265,..
2,Albania,ALB,Employment in agriculture (% of total employme...,SL.AGR.EMPL.ZS,51.319016,50.692993,49.91959,48.988817,48.143328,47.995341,...,44.198027,42.257063,41.283525,40.040852,38.078346,37.285732,36.416856,36.190744,35.640848,34.926718
3,Albania,ALB,"GDP per capita, PPP (constant 2017 internation...",NY.GDP.PCAP.PP.KD,4819.091331,5474.87661,5892.610924,6441.472108,6753.914123,7154.030284,...,11361.307891,11586.873945,11878.495523,12291.901997,12771.054137,13317.1842,13653.248783,13278.434516,14596.015558,15492.067404
4,Algeria,DZA,Employment in agriculture (% of total employme...,SL.AGR.EMPL.ZS,22.690649,22.475059,22.235421,21.715431,21.707295,21.722026,...,10.753295,9.746314,8.834767,8.535155,10.16129,10.092127,9.798442,10.023791,10.033098,9.74323
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
434,,,,,,,,,,,...,,,,,,,,,,
435,,,,,,,,,,,...,,,,,,,,,,
436,,,,,,,,,,,...,,,,,,,,,,
437,Data from database: World Development Indicators,,,,,,,,,,...,,,,,,,,,,


## Cleaning and merging data

In [17]:
### For education data. Compute the proportion of the population with at least an upper secondary education.
## First create total labor force. (from 'obs_value' of "classif1.label" = 'Education (ISCED-11): Total')
Total_LF = data_edu[data_edu["classif1.label"] == 'Education (ISCED-11): Total']
## Then create the labor force with at least an upper secondary education. (from 'obs_value' of "classif1.label" != 'Education (ISCED-11): Total')
Upper_Sec_LF = data_edu[data_edu["classif1.label"] != 'Education (ISCED-11): Total'].groupby(['ref_area.label', 'time'])['obs_value'].sum().reset_index()

## Merge.
Final_EDU_DF = pd.merge(Total_LF, Upper_Sec_LF, on = ['ref_area.label', 'time'], how = 'left', suffixes = ('_Total', '_Upper_Sec'))

## Compute the proportion of the population with at least an upper secondary education.
Final_EDU_DF['Proportion_Upper_Sec'] = (Final_EDU_DF['obs_value_Upper_Sec'] / Final_EDU_DF['obs_value_Total']) * 100


In [18]:
Final_EDU_DF

Unnamed: 0,ref_area.label,classif1.label,time,obs_value_Total,obs_value_Upper_Sec,Proportion_Upper_Sec
0,Afghanistan,Education (ISCED-11): Total,2021,8154.806,1809.086,22.184292
1,Afghanistan,Education (ISCED-11): Total,2020,6884.703,1464.419,21.270620
2,Afghanistan,Education (ISCED-11): Total,2017,7201.977,1461.605,20.294497
3,Afghanistan,Education (ISCED-11): Total,2014,7604.930,1074.775,14.132609
4,Angola,Education (ISCED-11): Total,2021,13192.842,2573.239,19.504812
...,...,...,...,...,...,...
1133,Zambia,Education (ISCED-11): Total,2017,4103.371,1409.440,34.348344
1134,Zambia,Education (ISCED-11): Total,2015,5068.024,1266.498,24.989976
1135,Zimbabwe,Education (ISCED-11): Total,2022,6005.217,4056.066,67.542372
1136,Zimbabwe,Education (ISCED-11): Total,2021,5869.093,3826.678,65.200500
