In [1]:
import requests
import numpy as np  # for using pandas
import pandas as pd  # for using dataframes

In [2]:
endpoint = "https://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?format=json&per_page=1000"
res = requests.get(endpoint)
data = res.json()

pages = data[0]['pages']

df = pd.DataFrame(data[1], columns=['country', 'date', 'indicator'])

for p in range(2, pages + 1):
    endpoint = f"https://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?format=json&page={p}&per_page=1000"
    res = requests.get(endpoint)
    data = res.json()
    dfi = pd.DataFrame(data[1], columns=['country', 'date', 'indicator'])
    df = pd.concat([df, dfi])

df.reset_index(drop=True, inplace=True)

In [3]:
endpoint = "https://api.worldbank.org/v2/indicator?format=json&per_page=25000"
res = requests.get(endpoint)
data = res.json()

# for i in data[1]:
    # print(i['id'])

if 'SP.POP.TOTL' in data[1]:
    print("Population data is available")

In [4]:
df

Unnamed: 0,country,date,indicator
0,"{'id': 'ZH', 'value': 'Africa Eastern and Sout...",2023,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
1,"{'id': 'ZH', 'value': 'Africa Eastern and Sout...",2022,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
2,"{'id': 'ZH', 'value': 'Africa Eastern and Sout...",2021,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
3,"{'id': 'ZH', 'value': 'Africa Eastern and Sout...",2020,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
4,"{'id': 'ZH', 'value': 'Africa Eastern and Sout...",2019,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
...,...,...,...
17019,"{'id': 'ZW', 'value': 'Zimbabwe'}",1964,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
17020,"{'id': 'ZW', 'value': 'Zimbabwe'}",1963,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
17021,"{'id': 'ZW', 'value': 'Zimbabwe'}",1962,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."
17022,"{'id': 'ZW', 'value': 'Zimbabwe'}",1961,"{'id': 'SP.POP.TOTL', 'value': 'Population, to..."


In [5]:
# country column is a dictionary with id and value, we need to extract the country name
df[['country_id', 'country_name']] = pd.DataFrame(df['country'].to_list(), index=df.index)
df[['indicator_id', 'indicator_value']] = pd.DataFrame(df['indicator'].to_list(), index=df.index)
df.drop(columns=['country', 'indicator'], inplace=True)
df

Unnamed: 0,date,country_id,country_name,indicator_id,indicator_value
0,2023,ZH,Africa Eastern and Southern,SP.POP.TOTL,"Population, total"
1,2022,ZH,Africa Eastern and Southern,SP.POP.TOTL,"Population, total"
2,2021,ZH,Africa Eastern and Southern,SP.POP.TOTL,"Population, total"
3,2020,ZH,Africa Eastern and Southern,SP.POP.TOTL,"Population, total"
4,2019,ZH,Africa Eastern and Southern,SP.POP.TOTL,"Population, total"
...,...,...,...,...,...
17019,1964,ZW,Zimbabwe,SP.POP.TOTL,"Population, total"
17020,1963,ZW,Zimbabwe,SP.POP.TOTL,"Population, total"
17021,1962,ZW,Zimbabwe,SP.POP.TOTL,"Population, total"
17022,1961,ZW,Zimbabwe,SP.POP.TOTL,"Population, total"


In [6]:
def getIndicators(df):
    assert isinstance(df, pd.DataFrame), "df must be a pandas DataFrame"

    df[['indicator_id', 'indicator_value']] = pd.DataFrame(df['indicator'].to_list(), index=df.index)

    for _id in df.indicator_id.unique().tolist():
        df[_id] = df[(df.indicator_id == _id)]['value']

    if df['indicator_id'].nunique() > 1:
        df = df.groupby('countryiso3code').first()
    else:
        df.set_index('countryiso3code', inplace=True)

    df.drop(columns=['value', 'indicator','indicator_id','indicator_value'], inplace=True)
    return df

In [7]:
endpoint = "https://api.worldbank.org/v2/country/DE;FR/indicator/SP.POP.TOTL?date=2015:2020&format=json&per_page=1000"
res = requests.get(endpoint)
data = res.json()
df_1a = pd.DataFrame(data[1], columns=['indicator', 'date', 'value','countryiso3code'])

df_1a = getIndicators(df_1a)
df_1a

Unnamed: 0_level_0,date,SP.POP.TOTL
countryiso3code,Unnamed: 1_level_1,Unnamed: 2_level_1
DEU,2020,83160871
DEU,2019,83092962
DEU,2018,82905782
DEU,2017,82657002
DEU,2016,82348669
DEU,2015,81686611
FRA,2020,67571107
FRA,2019,67388001
FRA,2018,67158348
FRA,2017,66918020


In [30]:
# there are some countries that are not countries: high income, low income, not classified, etc
# df_original[(df_original.countryiso3code == '')]

endpoint = "https://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL;NY.GDP.MKTP.CD;SP.DYN.LE00.IN?date=2012&format=json&per_page=1000&source=2"
res = requests.get(endpoint)
data = res.json()
df_1b = pd.DataFrame(data[1], columns=['indicator', 'countryiso3code', 'date', 'value'])

df_1b = getIndicators(df_1b)
df_1b.drop('', inplace=True)
df_1b

Unnamed: 0_level_0,date,SP.POP.TOTL,NY.GDP.MKTP.CD,SP.DYN.LE00.IN
countryiso3code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABW,2012,102112.0,2.615208e+09,75.531000
AFE,2012,552530654.0,9.526756e+11,60.050780
AFG,2012,30466479.0,1.990733e+10,61.923000
AFW,2012,376797999.0,7.377996e+11,55.340561
AGO,2012,25188292.0,1.280529e+11,58.623000
...,...,...,...,...
XKX,2012,1807106.0,6.163484e+09,78.280000
YEM,2012,26223391.0,3.540133e+10,67.343000
ZAF,2012,53145033.0,4.344005e+11,61.846000
ZMB,2012,14744658.0,2.550306e+10,58.867000


In [10]:
df_medal = pd.read_csv("medal_table.csv")
df_medal

Unnamed: 0,year,country,country_code,gold,silver,bronze
0,2012,United States,USA,46,28,30
1,2012,People's Republic of China,CHN,38,31,22
2,2012,Great Britain,GBR,29,17,19
3,2012,Russian Federation,RUS,20,20,27
4,2012,Republic of Korea,KOR,13,9,8
...,...,...,...,...,...,...
81,2012,Kingdom of Saudi Arabia,KSA,0,0,1
82,2012,Kuwait,KUW,0,0,1
83,2012,Morocco,MAR,0,0,1
84,2012,Tajikistan,TJK,0,0,1


In [None]:
for cm in df_medal.country_code:
    if not cm in df_1b.index:
        print(f"'{cm}':'',") 

In [28]:
{'GER':'DEU',
'IRI':'',
'NED':'',
'RSA':'',
'CRO':'',
'DEN':'',
'SUI':'',
'SLO':'',
'TPE':'',
'LAT':'',
'ALG':'',
'GRN':'',
'BAH':'',
'MGL':'',
'BUL':'',
'INA':'',
'MAS':'',
'PUR':'',
'BOT':'',
'GUA':'',
'POR':'',
'GRE':'',
'KSA':'',
'KUW':'',
'VIE':''
}

'ZI'

In [35]:
l=df_1b.index.tolist()

# get all that start with C
for i in l:
    if i[0]=='C':
        print(i)

CAF
CAN
CEB
CHE
CHI
CHL
CHN
CIV
CMR
COD
COG
COL
COM
CPV
CRI
CSS
CUB
CUW
CYM
CYP
CZE
