In [1]:
# Querying the world bank indicators
# https://pypi.org/project/world-bank-data/


In [2]:
# pip install world_bank_data --upgrade

In [3]:
import pandas as pd
import world_bank_data as wb
pd.set_option('display.max_rows', None)

In [4]:
wb.get_topics()

Unnamed: 0_level_0,value,sourceNote
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Agriculture & Rural Development,For the 70 percent of the world's poor who liv...
2,Aid Effectiveness,Aid effectiveness is the impact that aid has i...
3,Economy & Growth,Economic growth is central to economic develop...
4,Education,Education is one of the most powerful instrume...
5,Energy & Mining,The world economy needs ever-increasing amount...
6,Environment,Natural and man-made environmental resources –...
7,Financial Sector,An economy's financial markets are critical to...
8,Health,Improving health is central to the Millennium ...
9,Infrastructure,Infrastructure helps determine the success of ...
10,Social Protection & Labor,The supply of labor available in an economy in...


In [11]:
wb.get_sources()

Unnamed: 0_level_0,lastupdated,name,code,description,url,dataavailability,metadataavailability,concepts
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,2021-08-18,Doing Business,DBS,,,Y,Y,3
2,2023-09-19,World Development Indicators,WDI,,,Y,Y,3
3,2022-09-23,Worldwide Governance Indicators,WGI,,,Y,Y,3
5,2016-03-21,Subnational Malnutrition Database,SNM,,,Y,Y,3
6,2022-12-06,International Debt Statistics,IDS,,,Y,Y,4
11,2013-02-22,Africa Development Indicators,ADI,,,Y,Y,3
12,2020-12-20,Education Statistics,EDS,,,Y,Y,3
13,2022-03-25,Enterprise Surveys,ESY,,,Y,Y,3
14,2023-07-24,Gender Statistics,GDS,,,Y,Y,3
15,2023-07-21,Global Economic Monitor,GEM,,,Y,Y,3


In [5]:
countries_all = wb.get_countries()#.head()
high_inc_countries = countries_all.loc[countries_all['incomeLevel'] == 'High income']
#high_inc_countries

In [18]:
# Indicators 
indicators = [
    'SH.STA.MMRT', #Maternal mortality ratio (modeled estimate, per 100,000 live births)
    'SP.ADO.TFRT' #Adolescent fertility rate (births per 1,000 women ages 15-19)
#    'SP.DYN.CONU' #Contraceptive prevalence, any method (% of married women ages 15-49)
#    'SL.FAM.WORK.MA.ZS' # Contributing family workers, male (% of male employment) (modeled ILO estimate)
]

####### Alternatively #########
# this will take all the indicators for a given topic

# indicators = wb.get_indicators(topic=17).index # gender topic indicators

In [19]:
# Input
# indicators - list of strings such as ['SH.STA.MMRT', '...']
# countries - list of relevant country codes
#
# Output 
# world_bank_dict - dictionary of dictionaries structured as follows: 
#
# {
#    "Indicator1": {
#        "Country1": {2021: 100, 2020: 90},
#        "Country2": {2021: 150, 2020: 140},
#    },
#    "Indicator2": {
#        "Country1": {2021: 200, 2020: 180},
#        "Country2": {2021: 250, 2020: 240},
#    }


def indicator_to_dict(indicators, countries): 
    world_bank_dict = {}
    
    for indicator in indicators: 
        # iterate through each indicator and pull the relevant series 
        indicator_dict = {} 
        indicator_series = wb.get_series(indicator, id_or_value = 'id', simplify_index=True) 
        
        # iterate through each country of interest and add that countries data to indicator_dict
        for country in countries: 
            try: 
                indicator_dict[country] = indicator_series[country].to_dict()
            except KeyError:
                value = None
                
        # add all data for a given indicator to the world_bank_dict with indicator as key         
        world_bank_dict[indicator] = indicator_dict
    
    return world_bank_dict
    

In [20]:
 data_dict = indicator_to_dict(indicators, high_inc_countries.index)

In [21]:
# Convert the nested dictionary to a DataFrame
data_df = pd.DataFrame.from_dict({(indicator, country): values
                             for indicator, countries in data_dict.items()
                             for country, values in countries.items()},
                            orient='index')

# Reset the index to separate the indicator and country columns
data_df.reset_index(inplace=True)
data_df.rename(columns={'level_0': 'Indicator', 'level_1': 'Country'}, inplace=True)

#data_df

In [22]:
data_df

Unnamed: 0,Indicator,Country,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,SH.STA.MMRT,ABW,,,,,,,,,...,,,,,,,,,,
1,SH.STA.MMRT,AND,,,,,,,,,...,,,,,,,,,,
2,SH.STA.MMRT,ARE,,,,,,,,,...,9.0,9.0,9.0,8.0,9.0,9.0,9.0,9.0,,
3,SH.STA.MMRT,ASM,,,,,,,,,...,,,,,,,,,,
4,SH.STA.MMRT,ATG,,,,,,,,,...,29.0,23.0,27.0,21.0,21.0,19.0,19.0,21.0,,
5,SH.STA.MMRT,AUS,,,,,,,,,...,6.0,5.0,5.0,5.0,5.0,4.0,5.0,3.0,,
6,SH.STA.MMRT,AUT,,,,,,,,,...,6.0,6.0,6.0,5.0,5.0,6.0,5.0,5.0,,
7,SH.STA.MMRT,BEL,,,,,,,,,...,6.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,,
8,SH.STA.MMRT,BHR,,,,,,,,,...,16.0,15.0,16.0,15.0,15.0,14.0,15.0,16.0,,
9,SH.STA.MMRT,BHS,,,,,,,,,...,82.0,97.0,84.0,79.0,77.0,77.0,82.0,77.0,,
