In [1]:
!pip install wbdata
!pip install iso3166

import pandas as pd
import wbdata
import iso3166 #iso3166.countries.get('country details')



In [2]:
import wbdata

# Return list of all country/region codes:
wbdata.get_country()

# Return list matching a query term:
wbdata.search_countries("World")

## Try your own search!
wbdata.search_countries("")

id    name
----  --------------------------------------------------------------------------------
ABW   Aruba
AFE   Africa Eastern and Southern
AFG   Afghanistan
AFR   Africa
AFW   Africa Western and Central
AGO   Angola
ALB   Albania
AND   Andorra
ARB   Arab World
ARE   United Arab Emirates
ARG   Argentina
ARM   Armenia
ASM   American Samoa
ATG   Antigua and Barbuda
AUS   Australia
AUT   Austria
AZE   Azerbaijan
BDI   Burundi
BEA   East Asia & Pacific (IBRD-only countries)
BEC   Europe & Central Asia (IBRD-only countries)
BEL   Belgium
BEN   Benin
BFA   Burkina Faso
BGD   Bangladesh
BGR   Bulgaria
BHI   IBRD countries classified as high income
BHR   Bahrain
BHS   Bahamas, The
BIH   Bosnia and Herzegovina
BLA   Latin America & the Caribbean (IBRD-only countries)
BLR   Belarus
BLZ   Belize
BMN   Middle East & North Africa (IBRD-only countries)
BMU   Bermuda
BOL   Bolivia
BRA   Brazil
BRB   Barbados
BRN   Brunei Darussalam
BSS   Sub-Saharan Africa (IBRD-only countries)
BTN   Bhutan
BWA  

In [3]:
wbdata.get_source()

  id  name
----  --------------------------------------------------------------------
   1  Doing Business
   2  World Development Indicators
   3  Worldwide Governance Indicators
   5  Subnational Malnutrition Database
   6  International Debt Statistics
  11  Africa Development Indicators
  12  Education Statistics
  13  Enterprise Surveys
  14  Gender Statistics
  15  Global Economic Monitor
  16  Health Nutrition and Population Statistics
  18  IDA Results Measurement System
  19  Millennium Development Goals
  20  Quarterly Public Sector Debt
  22  Quarterly External Debt Statistics SDDS
  23  Quarterly External Debt Statistics GDDS
  25  Jobs
  27  Global Economic Prospects
  28  Global Financial Inclusion
  29  The Atlas of Social Protection: Indicators of Resilience and Equity
  30  Exporter Dynamics Database – Indicators at Country-Year Level
  31  Country Policy and Institutional Assessment
  32  Global Financial Development
  33  G20 Financial Inclusion Indicators
  34  Glob

In [4]:
source_id = 40 # "Population estimates and projections

indicators = wbdata.get_indicator(source=source_id)
indicators

id                 name
-----------------  -------------------------------------------------------------------
SH.DTH.0509        Number of deaths ages 5-9 years
SH.DTH.1014        Number of deaths ages 10-14 years
SH.DTH.1019        Number of deaths ages 10-19 years
SH.DTH.1519        Number of deaths ages 15-19 years
SH.DTH.2024        Number of deaths ages 20-24 years
SH.DTH.IMRT        Number of infant deaths
SH.DTH.IMRT.FE     Number of infant deaths, female
SH.DTH.IMRT.MA     Number of infant deaths, male
SH.DTH.MORT        Number of under-five deaths
SH.DTH.MORT.FE     Number of under-five deaths, female
SH.DTH.MORT.MA     Number of under-five deaths, male
SH.DTH.NMRT        Number of neonatal deaths
SH.DYN.0509        Probability of dying among children ages 5-9 years (per 1,000)
SH.DYN.1014        Probability of dying among adolescents ages 10-14 years (per 1,000)
SH.DYN.1019        Probability of dying among adolescents ages 10-19 years (per 1,000)
SH.DYN.1519        Probabil

In [5]:
def pop_df(year='2021', group='all', age_lower=0, age_upper=100, location='world'):
    df = pop_df_helper(year, age_lower, age_upper, location)
        
    if group == 'Male':
        return df.drop(columns = ['Female'])
    elif group == 'Female':
        return df.drop(columns = ['Male'])
    else:
        total_pop = df["Female"] + df["Male"]
        df["Total"] = total_pop
    return df

# Returns a list of input strings for population age ranges
def pop_df_helper(year, age_lower, age_upper, location):
    if location != 'WLD':
        country_alpha3 = iso3166.countries.get(location).alpha3
        country_name = iso3166.countries.get(location).apolitical_name
    else:
        country_alpha3 = 'WLD'
        country_name = 'World'
    inputs = list_of_age_inputs(age_lower, age_upper)

    #create indicator-dictionaries required for WBData API
    indicator_dict_m = {}
    indicator_dict_f = {}
    for i in inputs:
        key_m = "SP.POP.{}.{}".format(i, 'MA')
        key_f = "SP.POP.{}.{}".format(i, 'FE')
        value = "{}-{}".format(i[:2], i[2:])
        indicator_dict_m[key_m] = value
        indicator_dict_f[key_f] = value

    #source_id = 40, refer to wbdata.get_source()
    wbdf_m = wbdata.get_dataframe(indicator_dict_m, country=country_alpha3, source=source_id)
    wbdf_f = wbdata.get_dataframe(indicator_dict_f, country=country_alpha3, source=source_id)
    datas_m = wbdf_m.query("date=='{}'".format(year)).sum(axis=0).tolist()
    datas_f = wbdf_f.query("date=='{}'".format(year)).sum(axis=0).tolist()
    df = pd.DataFrame({
        'Country': country_name,
        'Year': year,
        'Age': list(indicator_dict_m.values()),
        'Female': datas_f,
        'Male': datas_m
    })
    return df

# Returns a list of input strings for population age ranges
def list_of_age_inputs(age_lower, age_upper):
    def round_down(n):
        return max(0, n - n % 5)

    def round_up(n):
        return n - n % 5 + 5

    results = []
    r_lower_bound, r_upper_bound = round_down(age_lower), round_up(age_upper)   
    while r_lower_bound < min(79, r_upper_bound):
        results.append("{:02d}{:02d}".format(r_lower_bound, r_lower_bound + 4))
        r_lower_bound += 5
    if age_upper >= 80:
        results.append('80UP')
    return results

In [12]:
df = pop_df(year=2018,group='Total',age_lower = 0, age_upper = 100,location='WLD')
df

Unnamed: 0,Country,Year,Age,Female,Male,Total
0,World,2018,00-04,333098585.0,354088174.0,687186759.0
1,World,2018,05-09,325470037.0,347389724.0,672859761.0
2,World,2018,10-14,308035159.0,329029796.0,637064955.0
3,World,2018,15-19,295004074.0,315050425.0,610054499.0
4,World,2018,20-24,288393154.0,306524173.0,594917327.0
5,World,2018,25-29,295497148.0,310713277.0,606210425.0
6,World,2018,30-34,285117787.0,296708276.0,581826063.0
7,World,2018,35-39,260070368.0,268841828.0,528912196.0
8,World,2018,40-44,238289828.0,244013351.0,482303179.0
9,World,2018,45-49,235273671.0,237876277.0,473149948.0


In [161]:
def population(year='', sex='', age_range=(0), place=''):
    age_lower, age_upper = age_range
    df = pop_df(year, sex, age_lower, age_upper, place)
    inputs = list_of_age_inputs(age_lower, age_upper);
    age_l = inputs[0][0:2]
    
    if sex.lower() == 'people':
        g = 'people'
    elif sex.lower() == 'male':
        g = "males"
    elif sex.lower() == 'female':
        g = "females"
    else: 
        g = sex

    if age_upper >= 80:
        age_h = '80 or over'
    else:
        age_h = inputs[-1][2:4]

    if place == 'WLD' and sex.lower() != "people":
        loc = 'the world'
        print("In {}, there are {} {} aged {} to aged {} living in {}.".format(year, df.sum()[3].astype(int), g, age_l, age_h, loc))
        
    elif place == "WLD" and sex.lower() == "people":
        loc = "the world"
        print("In {}, there are {} {} aged {} to aged {} living in {}.".format(year, df.sum()[5].astype(int), g, age_l, age_h, loc))

    elif place != "WLD" and sex.lower() == "people":
        loc = iso3166.countries.get(place).alpha3
        print("In {}, there are {} {} aged {} to aged {} living in {}.".format(year, df.sum()[5].astype(int), g, age_l, age_h, iso3166.countries.get(place)[0]))
    else:
        loc = iso3166.countries.get(place).alpha3
        print("In {}, there are {} {} aged {} to aged {} living in {}.".format(year, df.sum()[3].astype(int), g, age_l, age_h, iso3166.countries.get(place)[0]))


In [162]:
population(year=2010,sex='People',age_range=(0,50),place='WLD')

In 2010, there are 5889158289 people aged 00 to aged 54 living in the world.
