# Analyzing Trends in Education Within Sub-Saharn Africa

## Importing packages

In [2]:
# import relevant packages here or going forward
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from   sqlalchemy.engine import URL
from   sqlalchemy import text

### Connecting to SQL server

In [17]:
# Write your answer here: 
url_server = URL.create(
    "postgresql",
    host = 'localhost',
    database = 'postgres',
    username = 'postgres',
    port = 5432,
    password  = "postgres") # I installed postgres in a separate class so I have a password

connection = create_engine(url_server)

### Loading the Data

In [None]:
education_data = pd.read_csv('data/wdi_education_data.csv')

### Pivoting
After I (Alix) was unable to safely pivot the table in postgres, I did it in python and reuploaded it back to sql

In [23]:
# Load the data
education_data = pd.read_csv('data/wdi_education_data.csv')

# Step 1: Select only the year columns (starting from the 4th column onward)
year_columns = education_data.columns[3:]

# Step 2: Remove rows with NaN values in the year columns
education_data_cleaned = education_data.dropna(subset=year_columns)

# Step 3: Melt the DataFrame to convert year columns into rows
education_data_melted = education_data_cleaned.melt(
    id_vars=["Country Name", "Series Name"],  # Keep 'Country Name' and 'Series Name' as they are
    value_vars=year_columns,  # The year columns (from column 4 onward)
    var_name="Year",  # New column for year
    value_name="Value"  # New column for values
)

# Step 4: Clean up the 'Year' column by extracting the year (if necessary)
education_data_melted["Year"] = education_data_melted["Year"].str.extract(r"(\d{4})")

# Step 5: Remove rows where 'Year' is NaN before converting to integer
education_data_melted = education_data_melted.dropna(subset=["Year"])

# Step 6: Convert the 'Year' column to integer
education_data_melted["Year"] = education_data_melted["Year"].astype(int)

# Step 7: Remove duplicate rows in the melted data
education_data_melted = education_data_melted.drop_duplicates(subset=["Country Name", "Year", "Series Name"])

# Pivot the data using the `pivot_table` method
education_data_pivoted = education_data_melted.pivot_table(
    index=["Country Name", "Year"],  # Use both 'Country Name' and 'Year' as index
    columns="Series Name",  # Pivot on 'Series Name'
    values="Value",  # Use 'Value' as the values to fill in the pivoted table
    aggfunc="first"  # Aggregation function in case of duplicate rows, here we take the first
)

# Reset the index to move 'Country Name' and 'Year' into regular columns
education_data_pivoted_reset = education_data_pivoted.reset_index()

# Step 8: Ensure unique column names by renaming duplicates
education_data_pivoted_reset.columns = [str(col) + "_" + str(i) if education_data_pivoted_reset.columns.tolist().count(col) > 1 else col for i, col in enumerate(education_data_pivoted_reset.columns)]

# Print out the columns
print("Columns in the pivoted DataFrame:")
print(education_data_pivoted_reset.columns)

# Step 8: Check for uniqueness
duplicate_columns = education_data_pivoted_reset.columns[education_data_pivoted_reset.columns.duplicated()]

if len(duplicate_columns) > 0:
    print("\nDuplicate columns found:")
    print(duplicate_columns)
else:
    print("\nAll column names are unique.")


KeyboardInterrupt: 

In [24]:
# Step 8: Check for uniqueness
duplicate_columns = education_data_pivoted_reset.columns[education_data_pivoted_reset.columns.duplicated()]

if len(duplicate_columns) > 0:
    print("\nDuplicate columns found:")
    print(duplicate_columns)
else:
    print("\nAll column names are unique.")


All column names are unique.


### Uploading the Data into SQL

In [26]:
education_data_pivoted_reset.to_sql('wdi_education_data',
               con = connection,
               if_exists='replace',
               index=False)


ProgrammingError: (psycopg2.errors.DuplicateColumn) column "Educational attainment, Doctoral or equivalent, population 25+," specified more than once

[SQL: 
CREATE TABLE wdi_education_data (
	"Country Name" TEXT, 
	"Year" BIGINT, 
	"Adjusted net enrollment rate, primary (%% of primary school age children)" TEXT, 
	"Adjusted net enrollment rate, primary, female (%% of primary school age children)" TEXT, 
	"Adjusted net enrollment rate, primary, male (%% of primary school age children)" TEXT, 
	"Adolescents out of school (%% of lower secondary school age)" TEXT, 
	"Adolescents out of school, female (%% of female lower secondary school age)" TEXT, 
	"Adolescents out of school, male (%% of male lower secondary school age)" TEXT, 
	"Children out of school (%% of primary school age)" TEXT, 
	"Children out of school, female (%% of female primary school age)" TEXT, 
	"Children out of school, male (%% of male primary school age)" TEXT, 
	"Children out of school, primary" TEXT, 
	"Children out of school, primary, female" TEXT, 
	"Children out of school, primary, male" TEXT, 
	"Compulsory education, duration (years)" TEXT, 
	"Current education expenditure, primary (%% of total expenditure in primary public institutions)" TEXT, 
	"Current education expenditure, secondary (%% of total expenditure in secondary public institutions)" TEXT, 
	"Current education expenditure, tertiary (%% of total expenditure in tertiary public institutions)" TEXT, 
	"Current education expenditure, total (%% of total expenditure in public institutions)" TEXT, 
	"Educational attainment, Doctoral or equivalent, population 25+, female (%%) (cumulative)" TEXT, 
	"Educational attainment, Doctoral or equivalent, population 25+, male (%%) (cumulative)" TEXT, 
	"Educational attainment, Doctoral or equivalent, population 25+, total (%%) (cumulative)" TEXT, 
	"Educational attainment, at least Bachelor's or equivalent, population 25+, female (%%) (cumulative)" TEXT, 
	"Educational attainment, at least Bachelor's or equivalent, population 25+, male (%%) (cumulative)" TEXT, 
	"Educational attainment, at least Bachelor's or equivalent, population 25+, total (%%) (cumulative)" TEXT, 
	"Educational attainment, at least Master's or equivalent, population 25+, female (%%) (cumulative)" TEXT, 
	"Educational attainment, at least Master's or equivalent, population 25+, male (%%) (cumulative)" TEXT, 
	"Educational attainment, at least Master's or equivalent, population 25+, total (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed lower secondary, population 25+, female (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed lower secondary, population 25+, male (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed lower secondary, population 25+, total (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed post-secondary, population 25+, female (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed post-secondary, population 25+, male (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed post-secondary, population 25+, total (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed primary, population 25+ years, female (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed primary, population 25+ years, male (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed primary, population 25+ years, total (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed short-cycle tertiary, population 25+, female (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed short-cycle tertiary, population 25+, male (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed short-cycle tertiary, population 25+, total (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed upper secondary, population 25+, female (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed upper secondary, population 25+, male (%%) (cumulative)" TEXT, 
	"Educational attainment, at least completed upper secondary, population 25+, total (%%) (cumulative)" TEXT, 
	"Expenditure on primary education (%% of government expenditure on education)" TEXT, 
	"Expenditure on secondary education (%% of government expenditure on education)" TEXT, 
	"Expenditure on tertiary education (%% of government expenditure on education)" TEXT, 
	"Female primary school age children out-of-school (%%)" TEXT, 
	"Female pupils below minimum reading proficiency at end of primary (%%). Low GAML threshold" TEXT, 
	"Government expenditure on education, total (%% of GDP)" TEXT, 
	"Government expenditure on education, total (%% of government expenditure)" TEXT, 
	"Government expenditure per student, primary (%% of GDP per capita)" TEXT, 
	"Government expenditure per student, secondary (%% of GDP per capita)" TEXT, 
	"Government expenditure per student, tertiary (%% of GDP per capita)" TEXT, 
	"Gross intake ratio in first grade of primary education, female (%% of relevant age group)" TEXT, 
	"Gross intake ratio in first grade of primary education, male (%% of relevant age group)" TEXT, 
	"Gross intake ratio in first grade of primary education, total (%% of relevant age group)" TEXT, 
	"Learning poverty: Share of Children at the End-of-Primary age below minimum reading proficiency adjusted by Out-of-School Children (%%)" TEXT, 
	"Learning poverty: Share of Female Children at the End-of-Primary age below minimum reading proficiency adjusted by Out-of-School Children (%%)" TEXT, 
	"Learning poverty: Share of Male Children at the End-of-Primary age below minimum reading proficiency adjusted by Out-of-School Children (%%)" TEXT, 
	"Literacy rate, adult female (%% of females ages 15 and above)" TEXT, 
	"Literacy rate, adult male (%% of males ages 15 and above)" TEXT, 
	"Literacy rate, adult total (%% of people ages 15 and above)" TEXT, 
	"Literacy rate, youth (ages 15-24), gender parity index (GPI)" TEXT, 
	"Literacy rate, youth female (%% of females ages 15-24)" TEXT, 
	"Literacy rate, youth male (%% of males ages 15-24)" TEXT, 
	"Literacy rate, youth total (%% of people ages 15-24)" TEXT, 
	"Lower secondary completion rate, female (%% of relevant age group)" TEXT, 
	"Lower secondary completion rate, male (%% of relevant age group)" TEXT, 
	"Lower secondary completion rate, total (%% of relevant age group)" TEXT, 
	"Lower secondary school starting age (years)" TEXT, 
	"Male primary school age children out-of-school (%%)" TEXT, 
	"Male pupils below minimum reading proficiency at end of primary (%%). Low GAML threshold" TEXT, 
	"Net intake rate in grade 1 (%% of official school-age population)" TEXT, 
	"Net intake rate in grade 1, female (%% of official school-age population)" TEXT, 
	"Net intake rate in grade 1, male (%% of official school-age population)" TEXT, 
	"Over-age students, primary (%% of enrollment)" TEXT, 
	"Over-age students, primary, female (%% of female enrollment)" TEXT, 
	"Over-age students, primary, male (%% of male enrollment)" TEXT, 
	"Persistence to grade 5, female (%% of cohort)" TEXT, 
	"Persistence to grade 5, male (%% of cohort)" TEXT, 
	"Persistence to grade 5, total (%% of cohort)" TEXT, 
	"Persistence to last grade of primary, female (%% of cohort)" TEXT, 
	"Persistence to last grade of primary, male (%% of cohort)" TEXT, 
	"Persistence to last grade of primary, total (%% of cohort)" TEXT, 
	"Preprimary education, duration (years)" TEXT, 
	"Primary completion rate, female (%% of relevant age group)" TEXT, 
	"Primary completion rate, male (%% of relevant age group)" TEXT, 
	"Primary completion rate, total (%% of relevant age group)" TEXT, 
	"Primary education, duration (years)" TEXT, 
	"Primary education, pupils" TEXT, 
	"Primary education, pupils (%% female)" TEXT, 
	"Primary education, teachers" TEXT, 
	"Primary education, teachers (%% female)" TEXT, 
	"Primary school age children out-of-school (%%)" TEXT, 
	"Primary school starting age (years)" TEXT, 
	"Progression to secondary school (%%)" TEXT, 
	"Progression to secondary school, female (%%)" TEXT, 
	"Progression to secondary school, male (%%)" TEXT, 
	"Pupil-teacher ratio, lower secondary" TEXT, 
	"Pupil-teacher ratio, preprimary" TEXT, 
	"Pupil-teacher ratio, primary" TEXT, 
	"Pupil-teacher ratio, secondary" TEXT, 
	"Pupil-teacher ratio, tertiary" TEXT, 
	"Pupil-teacher ratio, upper secondary" TEXT, 
	"Pupils below minimum reading proficiency at end of primary (%%). Low GAML threshold" TEXT, 
	"Repeaters, primary, female (%% of female enrollment)" TEXT, 
	"Repeaters, primary, male (%% of male enrollment)" TEXT, 
	"Repeaters, primary, total (%% of total enrollment)" TEXT, 
	"School enrollment, preprimary (%% gross)" TEXT, 
	"School enrollment, preprimary, female (%% gross)" TEXT, 
	"School enrollment, preprimary, male (%% gross)" TEXT, 
	"School enrollment, primary (%% gross)" TEXT, 
	"School enrollment, primary (%% net)" TEXT, 
	"School enrollment, primary (gross), gender parity index (GPI)" TEXT, 
	"School enrollment, primary and secondary (gross), gender parity index (GPI)" TEXT, 
	"School enrollment, primary, female (%% gross)" TEXT, 
	"School enrollment, primary, female (%% net)" TEXT, 
	"School enrollment, primary, male (%% gross)" TEXT, 
	"School enrollment, primary, male (%% net)" TEXT, 
	"School enrollment, primary, private (%% of total primary)" TEXT, 
	"School enrollment, secondary (%% gross)" TEXT, 
	"School enrollment, secondary (%% net)" TEXT, 
	"School enrollment, secondary (gross), gender parity index (GPI)" TEXT, 
	"School enrollment, secondary, female (%% gross)" TEXT, 
	"School enrollment, secondary, female (%% net)" TEXT, 
	"School enrollment, secondary, male (%% gross)" TEXT, 
	"School enrollment, secondary, male (%% net)" TEXT, 
	"School enrollment, secondary, private (%% of total secondary)" TEXT, 
	"School enrollment, tertiary (%% gross)" TEXT, 
	"School enrollment, tertiary (gross), gender parity index (GPI)" TEXT, 
	"School enrollment, tertiary, female (%% gross)" TEXT, 
	"School enrollment, tertiary, male (%% gross)" TEXT, 
	"Secondary education, duration (years)" TEXT, 
	"Secondary education, general pupils" TEXT, 
	"Secondary education, general pupils (%% female)" TEXT, 
	"Secondary education, pupils" TEXT, 
	"Secondary education, pupils (%% female)" TEXT, 
	"Secondary education, teachers" TEXT, 
	"Secondary education, teachers (%% female)" TEXT, 
	"Secondary education, teachers, female" TEXT, 
	"Secondary education, vocational pupils" TEXT, 
	"Secondary education, vocational pupils (%% female)" TEXT, 
	"Tertiary education, academic staff (%% female)" TEXT, 
	"Trained teachers in lower secondary education (%% of total teachers)" TEXT, 
	"Trained teachers in lower secondary education, female (%% of female teachers)" TEXT, 
	"Trained teachers in lower secondary education, male (%% of male teachers)" TEXT, 
	"Trained teachers in preprimary education (%% of total teachers)" TEXT, 
	"Trained teachers in preprimary education, female (%% of female teachers)" TEXT, 
	"Trained teachers in preprimary education, male (%% of male teachers)" TEXT, 
	"Trained teachers in primary education (%% of total teachers)" TEXT, 
	"Trained teachers in primary education, female (%% of female teachers)" TEXT, 
	"Trained teachers in primary education, male (%% of male teachers)" TEXT, 
	"Trained teachers in secondary education (%% of total teachers)" TEXT, 
	"Trained teachers in secondary education, female (%% of female teachers)" TEXT, 
	"Trained teachers in secondary education, male (%% of male teachers)" TEXT, 
	"Trained teachers in upper secondary education (%% of total teachers)" TEXT, 
	"Trained teachers in upper secondary education, female (%% of female teachers)" TEXT, 
	"Trained teachers in upper secondary education, male (%% of male teachers)" TEXT
)

]
(Background on this error at: https://sqlalche.me/e/14/f405)