In [1]:
import requests
import pandas as pd

url = "http://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD"

params = {
    "format": "json",
    "per_page": 20000 
}

response = requests.get(url, params=params)
data = response.json()
records = data[1]
df_gdp = pd.DataFrame(records)
df_gdp.head()


Unnamed: 0,indicator,country,countryiso3code,date,value,unit,obs_status,decimal
0,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2023,3967.860937,,,0
1,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2022,3974.803045,,,0
2,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2021,3934.287267,,,0
3,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2020,3861.111238,,,0
4,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2019,4073.880522,,,0


In [9]:
df_gdp["indicator"] = df_gdp["indicator"].apply(lambda x: x['value'] if isinstance(x, dict) else x)
df_gdp["country"] = df_gdp["country"].apply(lambda x: x['value'] if isinstance(x, dict) else x)

# Select columns
df_gdp_clean = df_gdp[["countryiso3code", "country", "date", "value"]].copy()

#Rename
df_gdp_clean.columns = ["Country Code", "Country", "Year", "GDP per Capita (PPP)"]

df_gdp_clean.sort_values(by=["Country", "Year"], ascending=[True, False], inplace=True)

df_gdp_clean.head()



Unnamed: 0,Country Code,Country,Year,GDP per Capita (PPP)
3136,AFG,Afghanistan,2023,1992.424394
3137,AFG,Afghanistan,2022,1981.710168
3138,AFG,Afghanistan,2021,2144.16657
3139,AFG,Afghanistan,2020,2769.685745
3140,AFG,Afghanistan,2019,2927.245144


In [21]:
#Export to CSV
df_gdp_clean.to_csv("gdp_per_capita.csv", index=False)

In [11]:
#API endpoint for Life Expectancy
url = "http://api.worldbank.org/v2/country/all/indicator/SP.DYN.LE00.IN"
params = {
    "format": "json",
    "per_page": 20000
}

response = requests.get(url, params=params)
data = response.json()

records = data[1]
df_life = pd.DataFrame(records)

#Clean data
df_life["indicator"] = df_life["indicator"].apply(lambda x: x['value'] if isinstance(x, dict) else x)
df_life["country"] = df_life["country"].apply(lambda x: x['value'] if isinstance(x, dict) else x)

#Rename columns
df_life_clean = df_life[["countryiso3code", "country", "date", "value"]].copy()
df_life_clean.columns = ["Country Code", "Country", "Year", "Life Expectancy"]

#Sort data
df_life_clean.sort_values(by=["Country", "Year"], ascending=[True, False], inplace=True)

df_life_clean.head()


Unnamed: 0,Country Code,Country,Year,Life Expectancy
3136,AFG,Afghanistan,2023,
3137,AFG,Afghanistan,2022,62.879
3138,AFG,Afghanistan,2021,61.982
3139,AFG,Afghanistan,2020,62.575
3140,AFG,Afghanistan,2019,63.565


In [19]:
#Export to CSV
df_life_clean.to_csv("life_expectancy.csv", index=False)

In [13]:
#Merge on Country Code and Year (inner join)
df_merged = pd.merge(
    df_gdp_clean,
    df_life_clean,
    on=["Country Code", "Year"],
    suffixes=("_GDP", "_Life"),
    how="inner"
)

# Drop duplicate columns
df_merged.drop(columns=["Country_Life"], inplace=True)
df_merged.rename(columns={"Country_GDP": "Country"}, inplace=True)

df_merged.head()


Unnamed: 0,Country Code,Country,Year,GDP per Capita (PPP),Life Expectancy
0,AFG,Afghanistan,2023,1992.424394,
1,AFG,Afghanistan,2022,1981.710168,62.879
2,AFG,Afghanistan,2021,2144.16657,61.982
3,AFG,Afghanistan,2020,2769.685745,62.575
4,AFG,Afghanistan,2019,2927.245144,63.565


In [15]:
#Country API data
country_url = "http://api.worldbank.org/v2/country"
params = {"format": "json", "per_page": 500}

response = requests.get(country_url, params=params)
country_data = response.json()

country_records = country_data[1]
df_countries = pd.DataFrame(country_records)

df_countries_meta = pd.DataFrame({
    "Country Code": df_countries["id"],
    "Region": df_countries["region"].apply(lambda x: x["value"] if isinstance(x, dict) else None),
    "Income Level": df_countries["incomeLevel"].apply(lambda x: x["value"] if isinstance(x, dict) else None),
    "Capital City": df_countries["capitalCity"]
})

df_final = pd.merge(df_merged, df_countries_meta, on="Country Code", how="left")

#Remove non-countries
df_final = df_final[df_final["Region"] != "Aggregates"]
df_final = df_final[df_final["Region"].notna()]

df_final.head()


Unnamed: 0,Country Code,Country,Year,GDP per Capita (PPP),Life Expectancy,Region,Income Level,Capital City
0,AFG,Afghanistan,2023,1992.424394,,South Asia,Low income,Kabul
1,AFG,Afghanistan,2022,1981.710168,62.879,South Asia,Low income,Kabul
2,AFG,Afghanistan,2021,2144.16657,61.982,South Asia,Low income,Kabul
3,AFG,Afghanistan,2020,2769.685745,62.575,South Asia,Low income,Kabul
4,AFG,Afghanistan,2019,2927.245144,63.565,South Asia,Low income,Kabul


In [23]:
#Export to CSV
df_final.to_csv("final_country_data.csv", index=False)