# 4.1.1 World Wide Average

In [1]:
import pandas as pd

# Load Excel file into a dataframe
df = pd.read_excel('4.1 worldwide.xlsx')

# Determine base and most recent year for each country
base_recent_years_df = df.groupby(['GeoAreaName', 'Sex', 'Education level', 'Type of skill']).agg(BaseYear=('TimePeriod', 'min'), RecentYear=('TimePeriod', 'max')).reset_index()

# Merge with original data to get values for base and recent years
merged_df = pd.merge(df, base_recent_years_df, on=['GeoAreaName', 'Sex', 'Education level', 'Type of skill'], how='left')

# Filter for base and recent years
base_year_df = merged_df[merged_df['TimePeriod'] == merged_df['BaseYear']]
recent_year_df = merged_df[merged_df['TimePeriod'] == merged_df['RecentYear']]

# Calculate percentage point change
percentage_change_df = pd.merge(base_year_df[['GeoAreaName', 'Sex', 'TimePeriod', 'Value', 'Education level', 'Type of skill']], 
                               recent_year_df[['GeoAreaName', 'Sex', 'TimePeriod', 'Value', 'Education level', 'Type of skill']], 
                               on=['GeoAreaName', 'Sex', 'Education level', 'Type of skill'], 
                               suffixes=('_base', '_recent'))

percentage_change_df['PercentagePointChange'] = percentage_change_df['Value_recent'] - percentage_change_df['Value_base']

# Display the first few rows
percentage_change_df.head()

Unnamed: 0,GeoAreaName,Sex,TimePeriod_base,Value_base,Education level,Type of skill,TimePeriod_recent,Value_recent,PercentagePointChange
0,Albania,BOTHSEX,2000,29.7,LOWSEC,SKILL_READ,2022,26.29,-3.41
1,Albania,MALE,2000,19.4,LOWSEC,SKILL_READ,2022,18.39,-1.01
2,Albania,BOTHSEX,2000,36.61129,LOWSEC,SKILL_MATH,2022,26.05,-10.56129
3,Albania,MALE,2000,32.79,LOWSEC,SKILL_MATH,2022,23.03,-9.76
4,Albania,FEMALE,2000,40.27,LOWSEC,SKILL_MATH,2022,29.33,-10.94


In [2]:
# Group by 'Sex', 'Education level', and 'Type of skill' and calculate the mean percentage point change
worldwide_average_change_by_group = percentage_change_df.groupby(['Sex', 'Education level', 'Type of skill'])['Value_recent'].mean().reset_index()

# Display the average percentage point change for each group
worldwide_average_change_by_group.head()


Unnamed: 0,Sex,Education level,Type of skill,Value_recent
0,BOTHSEX,GRAD23,SKILL_MATH,58.788465
1,BOTHSEX,GRAD23,SKILL_READ,54.610125
2,BOTHSEX,LOWSEC,SKILL_MATH,49.246906
3,BOTHSEX,LOWSEC,SKILL_READ,55.795934
4,BOTHSEX,PRIMAR,SKILL_MATH,41.765598


In [3]:
# Save the DataFrame as a CSV file
worldwide_average_change_by_group.to_csv('4.1_worldwideaverage.csv', index=False)

print("CSV file saved successfully.")

CSV file saved successfully.


# 4.2.2 World Wide Average

In [4]:
# Load Excel file into a dataframe
df = pd.read_excel('4.2 worldwide.xlsx')

# Determine base and most recent year for each country
base_recent_years_df = df.groupby(['GeoAreaName', 'Sex']).agg(BaseYear=('TimePeriod', 'min'), RecentYear=('TimePeriod', 'max')).reset_index()

# Merge with original data to get values for base and recent years
merged_df = pd.merge(df, base_recent_years_df, on=['GeoAreaName', 'Sex'], how='left')

# Filter for base and recent years
base_year_df = merged_df[merged_df['TimePeriod'] == merged_df['BaseYear']]
recent_year_df = merged_df[merged_df['TimePeriod'] == merged_df['RecentYear']]

# Calculate percentage point change
percentage_change_df = pd.merge(base_year_df[['GeoAreaName', 'Sex', 'TimePeriod', 'Value']], 
                               recent_year_df[['GeoAreaName', 'Sex', 'TimePeriod', 'Value']], 
                               on=['GeoAreaName', 'Sex'], 
                               suffixes=('_base', '_recent'))

percentage_change_df['PercentagePointChange'] = percentage_change_df['Value_recent'] - percentage_change_df['Value_base']

# Display the first few rows
percentage_change_df.head()

Unnamed: 0,GeoAreaName,Sex,TimePeriod_base,Value_base,TimePeriod_recent,Value_recent,PercentagePointChange
0,Albania,BOTHSEX,2000,55.94355,2022,93.72076,37.77721
1,Albania,FEMALE,2001,61.94651,2022,91.47674,29.53023
2,Albania,MALE,2001,56.22775,2022,95.92553,39.69778
3,Algeria,BOTHSEX,2003,8.9441,2023,68.0336,59.0895
4,Algeria,FEMALE,2003,8.95593,2023,69.54814,60.59221


In [5]:
# Group by 'Sex', 'Education level', and 'Type of skill' and calculate the mean percentage point change
worldwide_average_change_by_group = percentage_change_df.groupby(['Sex'])['Value_recent'].mean().reset_index()

# Display the average percentage point change for each group
worldwide_average_change_by_group.head()

Unnamed: 0,Sex,Value_recent
0,BOTHSEX,71.683881
1,FEMALE,71.428913
2,MALE,71.022048


In [6]:
# Save the DataFrame as a CSV file
worldwide_average_change_by_group.to_csv('4.2_worldwideaverage.csv', index=False)

print("CSV file saved successfully.")

CSV file saved successfully.


# 4.3.1 World Wide Average

In [7]:
import pandas as pd

# Load Excel file into a dataframe
df = pd.read_excel('4.3 worldwide.xlsx')

df.head()

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,TimePeriod,Value,Time_Detail,...,LowerBound,BasePeriod,Source,GeoInfoUrl,FootNote,Age,Nature,Reporting Type,Sex,Units
0,4,4.3,4.3.1,SE_ADT_EDUCTRN,Participation rate in formal and non-formal ed...,4,Afghanistan,2014,12.83215,2014,...,,,AFG - HIES - Households Living Conditions Surv...,,,15-64,E,G,BOTHSEX,PERCENT
1,4,4.3,4.3.1,SE_ADT_EDUCTRN,Participation rate in formal and non-formal ed...,4,Afghanistan,2014,8.19726,2014,...,,,AFG - HIES - Households Living Conditions Surv...,,,15-64,E,G,FEMALE,PERCENT
2,4,4.3,4.3.1,SE_ADT_EDUCTRN,Participation rate in formal and non-formal ed...,4,Afghanistan,2014,42.96634,2014,...,,,AFG - HIES - Households Living Conditions Surv...,,,15-24,E,G,MALE,PERCENT
3,4,4.3,4.3.1,SE_ADT_EDUCTRN,Participation rate in formal and non-formal ed...,4,Afghanistan,2014,31.94815,2014,...,,,AFG - HIES - Households Living Conditions Surv...,,,15-24,E,G,BOTHSEX,PERCENT
4,4,4.3,4.3.1,SE_ADT_EDUCTRN,Participation rate in formal and non-formal ed...,4,Afghanistan,2014,20.67723,2014,...,,,AFG - HIES - Households Living Conditions Surv...,,,15-24,E,G,FEMALE,PERCENT


In [8]:
# Group by 'Sex', 'Education level', and 'Type of skill' and calculate the mean percentage point change
worldwide_average_change_by_group = df.groupby(['Sex', 'TimePeriod', 'Age'])['Value'].mean().reset_index()

# Display the average percentage point change for each group
worldwide_average_change_by_group.head()

Unnamed: 0,Sex,TimePeriod,Age,Value
0,BOTHSEX,2010,15-24,54.637289
1,BOTHSEX,2010,15-64,17.062193
2,BOTHSEX,2010,25-54,6.098127
3,BOTHSEX,2010,55-64,3.037775
4,BOTHSEX,2011,15-24,55.486347


In [9]:
# Save the DataFrame as a CSV file
worldwide_average_change_by_group.to_csv('4.3_worldwideaverage.csv', index=False)

print("CSV file saved successfully.")

CSV file saved successfully.
