In [None]:
# Loading in packages and intializing economic indicators
- I curated a basket of macro indicators (growth, inflation, savings, labor) that plausibly drive unemployment. 
- Also mapped cryptic World Bank codes to readable column names to keep downstream code legible and plots self-explanatory.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pltb
import wbgapi as wb

In [3]:
pd.set_option("display.max_columns", 16)
Indicators = {"series": ["SL.UEM.TOTL.ZS", "NY.GDP.MKTP.KD.ZG", "NE.CON.PRVT.KD.ZG", 
               "NE.CON.GOVT.KD.ZG", "NE.GDI.TOTL.KD.ZG", "NE.EXP.GNFS.KD.ZG", 
               "NE.IMP.GNFS.KD.ZG", "NV.IND.TOTL.KD.ZG", "NV.SRV.TOTL.KD.ZG", 
               "NV.AGR.TOTL.KD.ZG", "FP.CPI.TOTL.ZG", "NY.GDP.DEFL.KD.ZG", 
               "FR.INR.DPST", "FR.INR.RINR", "FM.LBL.MQMY.GD.ZS", "SL.TLF.CACT.ZS",
               "SL.EMP.TOTL.SP.ZS", "SP.POP.GROW", "NY.GNS.ICTR.ZS"]}
column_names = {
    "FP.CPI.TOTL.ZG": "cpi_inflation_pct",
    "FR.INR.DPST": "deposit_rate_pct",
    "FR.INR.RINR": "real_interest_rate_pct",
    "NE.CON.GOVT.KD.ZG": "govt_consumption_growth_pct",
    "NE.CON.PRVT.KD.ZG": "private_consumption_growth_pct",
    "NE.EXP.GNFS.KD.ZG": "exports_growth_pct",
    "NE.GDI.TOTL.KD.ZG": "gross_capital_form_growth_pct",
    "NE.IMP.GNFS.KD.ZG": "imports_growth_pct",
    "NV.AGR.TOTL.KD.ZG": "agriculture_growth_pct",
    "NV.IND.TOTL.KD.ZG": "industry_growth_pct",
    "NV.SRV.TOTL.KD.ZG": "services_growth_pct",
    "NY.GDP.DEFL.KD.ZG": "gdp_deflator_inflation_pct",
    "NY.GDP.MKTP.KD.ZG": "gdp_growth_pct",
    "NY.GNS.ICTR.ZS": "gross_savings_gdp_pct",
    "SL.EMP.TOTL.SP.ZS": "employment_ratio_pct",
    "SL.TLF.CACT.ZS": "labor_force_participation_pct",
    "SL.UEM.TOTL.ZS": "unemployment_rate_pct",
    "SP.POP.GROW": "population_growth_pct",
}

- Code to show how the API works

In [6]:
df_test = wb.data.DataFrame(Indicators['Series'], economy = ['DEU', 'AFG'], time = [1980, 1981])
print(df_test)
values = df_test.columns
df_test.reset_index(inplace = True)
df_test = df_test.melt(id_vars = ['economy', 'series'], value_vars = values, var_name='year',value_name='values')
df_test['year'] = pd.to_datetime(df_test['year'].str.replace("YR", ""), format = '%Y')
print(df_test)
df_test = df_test.pivot_table(values = 'values', columns = 'series', index = ['year' ,'economy']).rename(columns=column_names).reset_index()   
print(df_test)


                              YR1980     YR1981
economy series                                 
AFG     FP.CPI.TOTL.ZG           NaN        NaN
        FR.INR.DPST              NaN        NaN
        FR.INR.RINR              NaN        NaN
        NE.CON.GOVT.KD.ZG        NaN        NaN
        NE.CON.PRVT.KD.ZG        NaN        NaN
        NE.EXP.GNFS.KD.ZG        NaN        NaN
        NE.GDI.TOTL.KD.ZG        NaN        NaN
        NE.IMP.GNFS.KD.ZG        NaN        NaN
        NV.AGR.TOTL.KD.ZG        NaN        NaN
        NV.IND.TOTL.KD.ZG        NaN        NaN
        NV.SRV.TOTL.KD.ZG        NaN        NaN
        NY.GDP.DEFL.KD.ZG        NaN        NaN
        NY.GDP.MKTP.KD.ZG        NaN        NaN
        NY.GNS.ICTR.ZS           NaN        NaN
        SL.EMP.TOTL.SP.ZS        NaN        NaN
        SL.TLF.CACT.ZS           NaN        NaN
        SL.UEM.TOTL.ZS           NaN        NaN
        SP.POP.GROW        -3.625808  -9.819771
DEU     FP.CPI.TOTL.ZG      5.441058   6

# Pulling data from the World Bank API
- Using wbgapi, I fetched country–year panels in small time slices to avoid oversized API pulls and concatenated them into one tidy DataFrame.
- Additionaly made some reshaping so i can work with this dataset directly in machine learning context.

In [17]:
def get_clean_data(series):
    df_final = pd.DataFrame()
    for i in range(1980, 2022, 2):
        years = [i, min(i + 1, 2020)]
        df = wb.data.DataFrame(series, economy = 'all', time=years, skipAggs=True)
        values = df.columns
        df.reset_index(inplace = True)
        df = df.melt(id_vars = ['economy', 'series'], value_vars = values, var_name='years',value_name='values')
        df["years"] = pd.to_datetime(df["years"].str.replace("YR", ""), format = '%Y')
        df = df.pivot_table(values = 'values', columns = 'series', index = ['years' ,'economy']).rename(columns=column_names).reset_index()
        df.columns.name = None
        df_final = pd.concat([df_final, df])
    return df_final
dataset = get_clean_data(Indicators['series'])

In [22]:
dataset.to_csv(r'D:\Python\PycharmProjects\Bundesbank\unemplyoment_data.csv')