# World Bank Data Fetching Script <a id="top"></a>

This code downloads and combines World Bank data:

1. **Defines 35+ economic indicators** (GDP growth, poverty, education, health, governance, etc.) with their World Bank API codes
2. **Fetches data** from World Bank API for each indicator (2014-2024) for all countries
3. **Processes responses** into structured DataFrames with Country, Year, and indicator values
4. **Merges all indicators** into one comprehensive dataset using country and year as keys
5. **Saves** the combined dataset as a CSV file

The Code builds a complete multi-year, multi-country dataset by pulling dozens of development indicators from the World Bank's public API and combining them into one analysis-ready CSV file.

In [1]:
import requests
import pandas as pd
import time
from functools import reduce

In [2]:
indicators = {
    # Economic
    'NY.GDP.MKTP.KD.ZG': 'GDP_growth',
    'GC.TAX.TOTL.GD.ZS': 'Tax_revenue_percent_GDP',
    'SI.POV.DDAY': 'Poverty_rate',
    'SL.UEM.TOTL.ZS': 'Unemployment_rate',
    'FP.CPI.TOTL.ZG': 'Inflation_rate',
    'BX.KLT.DINV.WD.GD.ZS': 'FDI_percent_GDP',
    'BN.CAB.XOKA.GD.ZS': 'Current_account_balance_percent_GDP',
    'SP.URB.TOTL.IN.ZS': 'Urbanization_rate',
    'SP.POP.TOTL': 'Population_total',
    'SP.POP.GROW': 'Population_growth_rate',
    'NV.AGR.TOTL.ZS': 'Agriculture_value_added_percent_GDP',
    'NV.IND.TOTL.ZS': 'Industry_value_added_percent_GDP',
    'NV.SRV.TOTL.ZS': 'Services_value_added_percent_GDP',
    'SI.POV.GINI': 'Gini_index',
    'NE.GDI.TOTL.ZS': 'Gross_capital_formation_percent_GDP',
    'NE.TRD.GNFS.ZS': 'Trade_percent_GDP',
    'GB.XPD.RSDV.GD.ZS': 'R&D_expenditure_percent_GDP',
    # Education
    'SE.SEC.ENRR': 'School_enrollment_secondary',
    'SE.TER.ENRR': 'School_enrollment_tertiary',
    'SE.ADT.LITR.ZS': 'Adult_literacy_rate',
    'SE.XPD.TOTL.GD.ZS': 'Education_expenditure_percent_GDP',
    'SE.PRM.ENRL.TC.ZS': 'Pupil_teacher_ratio_primary',
    # Infrastructure
    'EG.ELC.ACCS.ZS': 'Access_to_electricity_percent',
    'IT.NET.USER.ZS': 'Internet_users_per_100',
    'IT.CEL.SETS.P2': 'Mobile_subscriptions_per_100',
    'LP.LPI.OVRL.XQ': 'Logistics_Performance_Index',
    # Health
    'SP.DYN.LE00.IN': 'Life_expectancy',
    'SH.XPD.CHEX.GD.ZS': 'Health_expenditure_percent_GDP',
    'SH.DYN.MORT': 'Under5_mortality_rate',
    # Governance (Worldwide Governance Indicators)
    'GE.EST': 'Gov_Effectiveness',
    'CC.EST': 'Control_Corruption',
    'RQ.EST': 'Reg_Quality',
    'RL.EST': 'Rule_of_Law',
    'PV.EST': 'Political_Stability',
    'VA.EST': 'Voice_Accountability',
    # Trade and Innovation
    'NE.EXP.GNFS.ZS': 'Exports_percent_GDP',
    'NE.IMP.GNFS.ZS': 'Imports_percent_GDP',
    'IP.PAT.RESD': 'Patent_applications_residents'
}


In [3]:
def fetch_indicator_data(indicator_code, start_year=2014, end_year=2024):
    url = f"http://api.worldbank.org/v2/country/all/indicator/{indicator_code}?date={start_year}:{end_year}&format=json&per_page=10000"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if len(data) > 1:
            return data[1]
    return []


In [4]:
def process_data(raw_data, indicator_name):
    records = []
    for entry in raw_data:
        if entry.get('value') is not None and entry.get('country') and entry.get('date'):
            records.append({
                'Country': entry['country']['value'],
                'CountryCode': entry['country']['id'],
                'Year': int(entry['date']),
                indicator_name: entry['value']
            })
    return pd.DataFrame(records)


In [5]:
data_frames = []
for code, name in indicators.items():
    raw = fetch_indicator_data(code)
    df = process_data(raw, name)
    if not df.empty and all(col in df.columns for col in ['Country', 'CountryCode', 'Year']):
        data_frames.append(df)
    time.sleep(1)

In [6]:
if data_frames:
    merged_df = reduce(lambda left, right: pd.merge(left, right, on=['Country', 'CountryCode', 'Year'], how='outer'), data_frames)
    merged_df.to_csv("world_bank_data_2014_2024.csv", index=False)

[<a href="#top">Go To Beginning</a> ]

___
**Declaration:** I acknowledge the use of Microsoft Copilot to assist in checking and refining the code provided in this research. Copilot was utilised specifically for debugging, improving readability, and ensuring alignment with best coding practices. All final decisions and implementations were made by the author.
___