In [3]:
pip install wbgapi

Collecting wbgapi
  Downloading wbgapi-1.0.12-py3-none-any.whl.metadata (13 kB)
Downloading wbgapi-1.0.12-py3-none-any.whl (36 kB)
Installing collected packages: wbgapi
Successfully installed wbgapi-1.0.12


In [4]:
import wbgapi as wb
import pandas as pd

# Advanced chart example: GDP, Population, and Government's expenditure

Let's use the World Bank API's package to get data on:

- GDP Pc PPP
- Government expense
- Population

### GDP per capita
First, starting with GDP per capita (PPP, 2021 USD)

In [10]:
# The GDP PC PPP data
full_gdp_pc_df = wb.data.DataFrame('NY.GDP.PCAP.PP.KD', labels=True)

In [11]:
gdp_pc_df = full_gdp_pc_df.copy() # Using a copy to avoid re-downloading the data as we experiment

gdp_pc_df = gdp_pc_df.reset_index() # The country code (iso3) is in the index, so we need to move it to a column

gdp_pc_df = gdp_pc_df.melt(id_vars=['Country', 'economy'], var_name='date', value_name='gdp_pc') # Moving our data from wide to long format
gdp_pc_df.columns = ['country', 'iso3', 'date', 'gdp_pc'] # Renaming the columns

gdp_pc_df['date'] = gdp_pc_df['date'].str.replace('YR', '') # Removing the 'YR' prefix from the date
gdp_pc_df['date'] = pd.to_datetime(gdp_pc_df['date'], format='%Y') # And treating it as a date

gdp_pc_df = gdp_pc_df.dropna() # Dropping rows with missing values

gdp_pc_df

Unnamed: 0,country,iso3,date,gdp_pc
7980,Zimbabwe,ZWE,1990-01-01,4189.552481
7981,Zambia,ZMB,1990-01-01,2456.588763
7985,Viet Nam,VNM,1990-01-01,2416.030525
7987,Vanuatu,VUT,1990-01-01,3044.869413
7988,Uzbekistan,UZB,1990-01-01,3807.604591
...,...,...,...,...
17019,Central Europe and the Baltics,CEB,2023-01-01,42391.250132
17020,Caribbean small states,CSS,2023-01-01,28099.087171
17021,Arab World,ARB,2023-01-01,16288.054296
17022,Africa Western and Central,AFW,2023-01-01,4857.987533


In [None]:
full_co2_df = wb.data.DataFrame('EN.GHG.CO2.PC.CE.AR5', labels=False)

In [None]:
co2_df = full_co2_df.copy() # Using a copy to avoid re-downloading the data as we experiment

co2_df = co2_df.reset_index() # The country code (iso3) is in the index, so we need to move it to a column

co2_df = co2_df.melt(id_vars=['economy'], var_name='date', value_name='co2') # Moving our data from wide to long format
co2_df.columns = ['iso3', 'date', 'co2'] # Renaming the columns

co2_df['date'] = co2_df['date'].str.replace('YR', '') # Removing the 'YR' prefix from the date
co2_df['date'] = pd.to_datetime(co2_df['date'], format='%Y') # And treating it as a date

co2_df = co2_df.dropna() # Dropping rows with missing values

co2_df

Unnamed: 0,iso3,date,co2
2660,ABW,1970-01-01,0.426353
2661,AFE,1970-01-01,1.350890
2662,AFG,1970-01-01,0.161267
2663,AFW,1970-01-01,0.349508
2664,AGO,1970-01-01,1.484021
...,...,...,...
16752,WSM,2022-01-01,1.598151
16754,YEM,2022-01-01,0.363930
16755,ZAF,2022-01-01,6.761533
16756,ZMB,2022-01-01,0.463126


In [5]:
full_ex_df = wb.data.DataFrame('GC.XPN.TOTL.GD.ZS', labels=False)

In [7]:
ex_df = full_ex_df.copy() # Using a copy to avoid re-downloading the data as we experiment

ex_df = ex_df.reset_index() # The country code (iso3) is in the index, so we need to move it to a column

ex_df = ex_df.melt(id_vars=['economy'], var_name='date', value_name='expense') # Moving our data from wide to long format
ex_df.columns = ['iso3', 'date', 'expense'] # Renaming the columns

ex_df['date'] = ex_df['date'].str.replace('YR', '') # Removing the 'YR' prefix from the date
ex_df['date'] = pd.to_datetime(ex_df['date'], format='%Y') # And treating it as a date

ex_df = ex_df.dropna() # Dropping rows with missing values

ex_df

Unnamed: 0,iso3,date,expense
3205,AUS,1972-01-01,18.059578
3231,CHL,1972-01-01,32.446542
3273,GBR,1972-01-01,29.511858
3304,IRN,1972-01-01,20.763831
3311,JPN,1972-01-01,10.775446
...,...,...,...
16739,UGA,2022-01-01,16.942420
16740,UKR,2022-01-01,50.326462
16743,USA,2022-01-01,15.513263
16751,WLD,2022-01-01,22.049515


## Population

In [13]:
full_pop_df = wb.data.DataFrame('SP.POP.TOTL', labels=False)

In [14]:
pop_df = full_pop_df.copy() # Using a copy to avoid re-downloading the data as we experiment

pop_df = pop_df.reset_index() # The country code (iso3) is in the index, so we need to move it to a column

pop_df = pop_df.melt(id_vars=['economy'], var_name='date', value_name='pop') # Moving our data from wide to long format
pop_df.columns = ['iso3', 'date', 'pop'] # Renaming the columns

pop_df['date'] = pop_df['date'].str.replace('YR', '') # Removing the 'YR' prefix from the date
pop_df['date'] = pd.to_datetime(pop_df['date'], format='%Y') # And treating it as a date

pop_df = pop_df.dropna() # Dropping rows with missing values

pop_df

Unnamed: 0,iso3,date,pop
0,ABW,1960-01-01,54608.0
1,AFE,1960-01-01,130692579.0
2,AFG,1960-01-01,8622466.0
3,AFW,1960-01-01,97256290.0
4,AGO,1960-01-01,5357195.0
...,...,...,...
17019,XKX,2023-01-01,1756374.0
17020,YEM,2023-01-01,34449825.0
17021,ZAF,2023-01-01,60414495.0
17022,ZMB,2023-01-01,20569737.0


## Adding Region Data

In [8]:
region_df = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/refs/heads/master/all/all.csv")
region_df = region_df[['alpha-3', 'region']]
region_df.columns = ['iso3', 'region']


# Merging our data

In [17]:
df = pd.merge(gdp_pc_df, ex_df, on=['iso3', 'date'], how='inner')
df = pd.merge(df, pop_df, on=['iso3', 'date'], how='inner')
df = pd.merge(df, region_df, on='iso3', how='inner')

df.to_csv('gdp_co2_pop_region.csv', index=False)

In [18]:
df

Unnamed: 0,country,iso3,date,gdp_pc,expense,pop,region
0,Vanuatu,VUT,1990-01-01,3044.869413,19.692448,150882.0,Oceania
1,United States,USA,1990-01-01,44395.131454,15.276841,249623000.0,Americas
2,United Kingdom,GBR,1990-01-01,35551.827147,32.583043,57247586.0,Europe
3,Tunisia,TUN,1990-01-01,6591.287374,26.132382,8440023.0,Africa
4,Switzerland,CHE,1990-01-01,64588.954095,8.827295,6715519.0,Europe
...,...,...,...,...,...,...,...
3262,"Bahamas, The",BHS,2022-01-01,31424.871447,23.617039,409984.0,Americas
3263,Austria,AUT,2022-01-01,65836.889366,35.004865,9041851.0,Europe
3264,Australia,AUS,2022-01-01,59099.847939,27.626830,26014399.0,Oceania
3265,Armenia,ARM,2022-01-01,19100.245631,21.162789,2780469.0,Asia


In [19]:
gdp_pc_df.columns

Index(['country', 'iso3', 'date', 'gdp_pc'], dtype='object')