In [1]:
import requests
import pandas as pd

In [2]:
indicators_to_keep = [
    'NY.GDP.MKTP.CD',
    'NY.GDP.PCAP.CD',
    'FP.CPI.TOTL.ZG',
    'SL.UEM.TOTL.ZS',
    'SP.POP.TOTL',
    'NE.EXP.GNFS.ZS'
]
data = []
for indicator in indicators_to_keep:
    url = f"https://api.worldbank.org/v2/country/all/indicator/{indicator}"
    params = {
        "format": "json",
        "per_page": 20000
    }
    response = requests.get(url, params)
    data = data + response.json()[1]

df = pd.json_normalize(data)

In [3]:
df.head()

Unnamed: 0,countryiso3code,date,value,unit,obs_status,decimal,indicator.id,indicator.value,country.id,country.value
0,AFE,2024,1242694000000.0,,,0,NY.GDP.MKTP.CD,GDP (current US$),ZH,Africa Eastern and Southern
1,AFE,2023,1179359000000.0,,,0,NY.GDP.MKTP.CD,GDP (current US$),ZH,Africa Eastern and Southern
2,AFE,2022,1228968000000.0,,,0,NY.GDP.MKTP.CD,GDP (current US$),ZH,Africa Eastern and Southern
3,AFE,2021,1114145000000.0,,,0,NY.GDP.MKTP.CD,GDP (current US$),ZH,Africa Eastern and Southern
4,AFE,2020,938607600000.0,,,0,NY.GDP.MKTP.CD,GDP (current US$),ZH,Africa Eastern and Southern


In [4]:
df = df[['country.value', 'countryiso3code', 'indicator.value', 'indicator.id', 'date', 'value']]
df.columns = ["country_name", "country_code", "indicator_name", "indicator_code", "year", "value"]
df.head()

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,year,value
0,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD,2024,1242694000000.0
1,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD,2023,1179359000000.0
2,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD,2022,1228968000000.0
3,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD,2021,1114145000000.0
4,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD,2020,938607600000.0


In [5]:
years = [str(y) for y in range(2020,2025)]
years

['2020', '2021', '2022', '2023', '2024']

In [6]:
df_filtered = df[df['year'].isin(years)]
df_filtered['year'].unique()

<StringArray>
['2024', '2023', '2022', '2021', '2020']
Length: 5, dtype: str

In [7]:
countries_to_keep = ['Afghanistan', 'Bangladesh', 'Bhutan', 'India', 'Maldives', 'Nepal', 'Pakistan', 'Sri Lanka'] 
df_filtered = df_filtered[df_filtered['country_name'].isin(countries_to_keep)]
df_filtered['country_name'].unique()

<StringArray>
['Afghanistan',  'Bangladesh',      'Bhutan',       'India',    'Maldives',
       'Nepal',    'Pakistan',   'Sri Lanka']
Length: 8, dtype: str

In [8]:
df['year'] = df['year'].astype(int)

In [9]:
df_filtered = df_filtered.dropna(subset= ['value'])
df_filtered.shape

(234, 6)

In [10]:
df_final = df_filtered.sort_values(
    by=['country_code', 'indicator_code', 'year'],
    ascending=[True, True, True]
).reset_index(drop=True)

df_final

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,year,value
0,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,2020,5.601888e+00
1,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,2021,5.133203e+00
2,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,2022,1.371210e+01
3,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,2023,-4.644709e+00
4,Afghanistan,AFG,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,2024,-6.601186e+00
...,...,...,...,...,...,...
229,Pakistan,PAK,"Population, total",SP.POP.TOTL,2020,2.350017e+08
230,Pakistan,PAK,"Population, total",SP.POP.TOTL,2021,2.394778e+08
231,Pakistan,PAK,"Population, total",SP.POP.TOTL,2022,2.437007e+08
232,Pakistan,PAK,"Population, total",SP.POP.TOTL,2023,2.475045e+08


In [11]:
df_final.to_csv('../data/processed/south_asia_economic_metric_2020_to_2024.csv')
print('Clean WDI South Asia')

Clean WDI South Asia
