In [1]:
import wbgapi as wb
import pandas as pd

In [2]:
#Variables of interest
INDICATORS = {
    "SP.DYN.LE00.IN": "life_expectancy_birth",
    "SH.H2O.BASW.ZS": "basic_water_access_pct",
    "SH.STA.BASS.ZS": "basic_sanitation_access_pct",
    "SE.PRM.ENRR": "primary_school_enrollment",
    "NY.GDP.PCAP.KD": "gdp_per_capita_constant_usd"
}

In [3]:
# Choose our regions
ssa = set(wb.region.members("SSF"))  # Sub-Saharan Africa
sa = set(wb.region.members("SAS"))   # South Asia
filtered_countries = list(ssa | sa)  # Union of sets

In [4]:
#Create df of countries and years
df = wb.data.DataFrame(
    list(INDICATORS.keys()),       # indicator codes
    economy=filtered_countries,    
    time=range(2000, 2021),
    labels=False,                  # keep codes to rename to descriptive names later
    numericTimeKeys=True           # years are integers
)

In [5]:
df.reset_index(inplace=True)

In [6]:
# Melt the df so it's one row per country-year-indicator
df_long = df.melt(
    id_vars=['economy', 'series'],
    value_vars=list(range(2000, 2021)),
    var_name='Year',
    value_name='Value'
)

In [7]:
# Pivot so each indicator is a separate column
df_final = df_long.pivot_table(
    index=['economy', 'Year'],
    columns='series',
    values='Value'
).reset_index()

In [8]:
# Rename indicators to be their descriptive names
df_final.rename(columns=INDICATORS, inplace=True)
df_final.rename(columns={'economy': 'Country'}, inplace=True)

#Add region label
region_labels = {}

for c in ssa:
    region_labels[c] = "Sub-Saharan Africa"

for c in sa:
    region_labels[c] = "South Asia"

# Add the region to df_final
df_final["Region"] = df_final["Country"].map(region_labels)

df_final

series,Country,Year,gdp_per_capita_constant_usd,primary_school_enrollment,basic_water_access_pct,basic_sanitation_access_pct,life_expectancy_birth,Region
0,AGO,2000,1932.988479,,41.144310,27.557524,46.501,Sub-Saharan Africa
1,AGO,2001,1947.856730,,42.254676,28.989223,47.032,Sub-Saharan Africa
2,AGO,2002,2139.872597,,43.376800,30.422469,47.874,Sub-Saharan Africa
3,AGO,2003,2128.195793,,44.363873,31.856061,50.218,Sub-Saharan Africa
4,AGO,2004,2277.866965,,45.351335,33.290391,51.123,Sub-Saharan Africa
...,...,...,...,...,...,...,...,...
1129,ZWE,2016,1377.639277,95.734095,64.468961,37.529191,59.760,Sub-Saharan Africa
1130,ZWE,2017,1422.193460,94.334173,63.996627,36.941674,60.263,Sub-Saharan Africa
1131,ZWE,2018,1471.394890,93.755202,63.538773,36.357160,60.906,Sub-Saharan Africa
1132,ZWE,2019,1356.838211,93.427097,63.094954,35.774336,61.060,Sub-Saharan Africa


In [9]:
#Save as CSV
df_final.to_csv("qtm350_final.csv", index=False)

In [10]:
#Save as db
import sqlite3

# Create a connection to a new database file
conn = sqlite3.connect("qtm350_final.db")

# Write the DataFrame to a table called "indicators"
df_final.to_sql("indicators", conn, if_exists="replace", index=False)

# Close the connection
conn.close()