Project Deliverables

[#A] Data Imports

In [2]:
!pip install wbdata
!pip install cufflinks
!pip install iso3166

import iso3166 #iso3166.countries.get('country details')
import wbdata
import cufflinks as cf
import pandas as pd
import numpy as np
import plotly
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.offline as py
import plotly.graph_objs as go
cf.go_offline()

Collecting wbdata
  Using cached wbdata-0.3.0-py3-none-any.whl (14 kB)
Installing collected packages: wbdata
Successfully installed wbdata-0.3.0
Collecting iso3166
  Using cached iso3166-2.1.1-py3-none-any.whl (9.8 kB)
Installing collected packages: iso3166
Successfully installed iso3166-2.1.1



The Shapely GEOS version (3.10.3-CAPI-1.16.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.



In [3]:
#wbdata.get_source() to get all sources
#wbdata.get_topic()
#wbdata.get_indicator()

source_id = 40 #Population Estimates and Projections
indicators = wbdata.get_indicator(source=source_id)

[#A] Population Pyramids

In [4]:
# Data from WDI on age-sex comes in the forms of variables
# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.

# We construct a list of age-ranges.

# Start with an empty list of age-rages
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

# WLD is the World; substitute your own code or list of codes.
# Remember you can search for the appropriate codes using
# wbdata.search_countries("")

df = wbdata.get_dataframe(variables,country="KOR")

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = 2020

bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='pink'),
               hoverinfo='skip'
               ),

        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='blue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))

In [5]:
df = wbdata.get_dataframe(variables,country="MMR")

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = 2020

bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='pink'),
               hoverinfo='skip'
               ),

        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='blue'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))

[C#] Animated Population Pyramids

In [40]:
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

print(age_ranges)

male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

print(variables)

# MMR
df = wbdata.get_dataframe(variables,country="MMR")

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = 2021

bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))

# Count down by increments of 20 years
years = range(2021,1961,-20)

# This makes a list of graphs, year by year
bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men {:d}'.format(year),
               hoverinfo='skip'
              )
        for year in years]
          
bins += [go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
                y=[int(s[:2])+1 for s in age_ranges],
                orientation='h',
                name='Women {:d}'.format(year),
                hoverinfo='skip',
               )
         for year in years]

py.iplot(dict(data=bins, layout=layout))


['0004', '0509', '1014', '1519', '2024', '2529', '3034', '3539', '4044', '4549', '5054', '5559', '6064', '6569', '7074', '7579', '80UP']
{'SP.POP.0004.MA': 'Males 0004', 'SP.POP.0509.MA': 'Males 0509', 'SP.POP.1014.MA': 'Males 1014', 'SP.POP.1519.MA': 'Males 1519', 'SP.POP.2024.MA': 'Males 2024', 'SP.POP.2529.MA': 'Males 2529', 'SP.POP.3034.MA': 'Males 3034', 'SP.POP.3539.MA': 'Males 3539', 'SP.POP.4044.MA': 'Males 4044', 'SP.POP.4549.MA': 'Males 4549', 'SP.POP.5054.MA': 'Males 5054', 'SP.POP.5559.MA': 'Males 5559', 'SP.POP.6064.MA': 'Males 6064', 'SP.POP.6569.MA': 'Males 6569', 'SP.POP.7074.MA': 'Males 7074', 'SP.POP.7579.MA': 'Males 7579', 'SP.POP.80UP.MA': 'Males 80UP', 'SP.POP.0004.FE': 'Females 0004', 'SP.POP.0509.FE': 'Females 0509', 'SP.POP.1014.FE': 'Females 1014', 'SP.POP.1519.FE': 'Females 1519', 'SP.POP.2024.FE': 'Females 2024', 'SP.POP.2529.FE': 'Females 2529', 'SP.POP.3034.FE': 'Females 3034', 'SP.POP.3539.FE': 'Females 3539', 'SP.POP.4044.FE': 'Females 4044', 'SP.POP.4549

In [27]:
# KOR

df = wbdata.get_dataframe(variables,country="KOR")

py.init_notebook_mode(connected=True)

layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

year = 2021

bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),

        go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
               y=[int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip',
               )
        ]
py.iplot(dict(data=bins, layout=layout))

# Count down by increments of 20 years
years = range(2021,1961,-20)

# This makes a list of graphs, year by year
bins = [go.Bar(x = df.loc[str(year),:].filter(regex="Male").values,
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men {:d}'.format(year),
               hoverinfo='skip'
              )
        for year in years]
          
bins += [go.Bar(x = -df.loc[str(year),:].filter(regex="Female").values,
                y=[int(s[:2])+1 for s in age_ranges],
                orientation='h',
                name='Women {:d}'.format(year),
                hoverinfo='skip',
               )
         for year in years]

py.iplot(dict(data=bins, layout=layout))


[#A] Population Dataframe

In [43]:
def dataframefunction(agelowerbound=0, ageupperbound=80, givencountry='world'):
    age_ranges = []
    editedcountry = givencountry
    if len(givencountry) != 3:
        editedcountry = wbdata.search_countries(givencountry)[0]['id']
        if editedcountry == 'ARB' and givencountry =='World':
            editedcountry = 'WLD'
    # Ranges top out at 80, and go in five year increments
    for i in range(agelowerbound,ageupperbound,5):
        age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

    if ageupperbound == 80:
        age_ranges.append("80UP")

    male_variables = {"SP.POP."+age_range+".MA":"Males "+age_range for age_range in age_ranges}
    female_variables = {"SP.POP."+age_range+".FE":"Females "+age_range for age_range in age_ranges}

    variables = male_variables
    variables.update(female_variables)

    country = editedcountry
    df = wbdata.get_dataframe(variables, country)
    if len(country) == 3:
        country = wbdata.get_country(country)[0]['name']
    df['Country'] = [country for i in range(0, len(df))]
    df.insert(0, "Country", df.pop("Country"))
    return df
df

Unnamed: 0_level_0,Males 0004,Males 0509,Males 1014,Males 1519,Males 2024,Males 2529,Males 3034,Males 3539,Males 4044,Males 4549,...,Females 3539,Females 4044,Females 4549,Females 5054,Females 5559,Females 6064,Females 6569,Females 7074,Females 7579,Females 80UP
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021,2310513.0,2271404.0,2287917.0,2351085.0,2280229.0,2234744.0,2139374.0,2099590.0,1893296.0,1699389.0,...,2073224.0,1895750.0,1731185.0,1563398.0,1349404.0,1115453.0,867252.0,576126.0,335041.0,276239.0
2020,2318768.0,2268374.0,2300425.0,2360501.0,2278275.0,2232782.0,2138928.0,2078186.0,1866538.0,1677226.0,...,2052893.0,1869607.0,1709572.0,1537268.0,1316917.0,1089436.0,839680.0,544629.0,326585.0,274011.0
2019,2320731.0,2272722.0,2315383.0,2362041.0,2284447.0,2229585.0,2147964.0,2048967.0,1839680.0,1654654.0,...,2025198.0,1843339.0,1687972.0,1509382.0,1284612.0,1061486.0,810525.0,512123.0,318683.0,269853.0
2018,2318211.0,2282127.0,2335591.0,2358455.0,2295470.0,2221996.0,2163090.0,2016469.0,1815137.0,1631310.0,...,1994137.0,1819268.0,1665460.0,1479138.0,1254048.0,1032064.0,775241.0,486284.0,312733.0,265549.0
2017,2313547.0,2294378.0,2360918.0,2350164.0,2305666.0,2213232.0,2171965.0,1985639.0,1792966.0,1606626.0,...,1964709.0,1797561.0,1641722.0,1447066.0,1224864.0,1001402.0,733651.0,469619.0,308673.0,260970.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1964,1940328.0,1624079.0,1378332.0,1062397.0,915000.0,864320.0,820503.0,720349.0,618131.0,536819.0,...,721353.0,613565.0,542941.0,442358.0,372893.0,298392.0,212654.0,139769.0,77578.0,45136.0
1963,1905143.0,1584722.0,1319238.0,1024788.0,903413.0,864333.0,809531.0,704501.0,611037.0,521891.0,...,703279.0,606083.0,528105.0,433735.0,367092.0,289902.0,206746.0,135677.0,74951.0,43332.0
1962,1872075.0,1542314.0,1251804.0,1000488.0,896565.0,864135.0,796036.0,688656.0,605449.0,505329.0,...,683943.0,600948.0,511596.0,426896.0,361291.0,280987.0,201382.0,131782.0,72361.0,41637.0
1961,1839764.0,1497163.0,1190921.0,976197.0,893178.0,862143.0,781703.0,674505.0,598370.0,489794.0,...,666607.0,594470.0,496160.0,420669.0,354902.0,272419.0,196191.0,128005.0,69823.0,40003.0


[A#] Population Statistics

In [54]:
def population(year='', sex='', age_range=(0), place=''):
    newplace = place
    if len(place) != 3:
        newplace = wbdata.search_countries(place)[0]['id']
        if newplace == 'ARB' and place =='World':
            newplace = 'WLD'
    upperage = age_range[1]
    lowerage = age_range[0]
    if upperage > 80:
        upperage = 80
    theage = (upperage - lowerage)//5 + 1
    theyear = 2021 - int(year)
    if sex == 'Male'or sex == 'male':
        value = dataframefunction(lowerage, upperage, newplace).iloc[theyear,1:(theage+1)].sum()
        return 'In ' + str(year) + ', there are ' + str(value) + ' ' + str(sex.lower()) + "s aged " + str(age_range[0]) + " to "+ str(age_range[1]) + " in the " + wbdata.get_country(newplace)[0]['name']
    if sex == 'Female' or sex == 'female':
        value = dataframefunction(lowerage, upperage, newplace).iloc[theyear,18:18+(theage+1)].sum()
        return 'In ' + str(year) + ', there are ' + str(value) + ' ' + str(sex.lower()) + "s aged " + str(age_range[0]) + " to "+ str(age_range[1]) + " in the " + wbdata.get_country(newplace)[0]['name']
    if sex == 'People' or sex == 'people':
        value = dataframefunction(lowerage, upperage, newplace).iloc[theyear,1:(theage+1)].sum() + dataframefunction(lowerage, upperage, newplace).iloc[theyear,18:18+(theage+1)].sum()
        return 'In ' + str(year) + ', there are ' + str(value) + ' ' + str(sex.lower()) + " aged " + str(age_range[0]) + " to "+ str(age_range[1]) + " in the " + wbdata.get_country(newplace)[0]['name']
    
population(year='2011', sex='female', age_range=(40, 80), place='MMR') 

'In 2011, there are 227136.0 females aged 40 to 80 in the Myanmar'

[#C] Other Visualization Tools

#1: GDP Visualizations showing the growth rate of GDP over time in South Korea vs Myanmar and how GDP has changed over time in each country

In [13]:
# Give variable for clarity
variable_labels = {"NY.GDP.PCAP.CD":"GDP per capita"}

myanmar = wbdata.get_dataframe(variable_labels, country="MMR")

# Date index is of type string; change to integers
myanmar.index = myanmar.index.astype(int)

# Print a few years' data
myanmar.head()

myanmar.iplot(title="Myanmar GDP Over Time",xTitle='Year',yTitle='GDP per Capita (current US$)')

In [12]:
variable_labels = {"NY.GDP.PCAP.CD":"GDP per capita"}
south_korea = wbdata.get_dataframe(variable_labels, country="KOR")
south_korea.index = south_korea.index.astype(int)
south_korea.head()
south_korea.iplot(title="South Korea GDP Over Time",xTitle='Year',yTitle='GDP per Capita (current US$)')

In [14]:
gdp_indicator = {"NY.GDP.PCAP.CD":"GDP per capita"} 
countries = {"MMR":"Mynmar",
             "KOR":"South Korea"}

gdp = wbdata.get_dataframe(gdp_indicator, country = countries).squeeze().unstack('country')
gdp.iplot(title="GDP per Capita of Myanmar and South Korea Over Time",xTitle='Year',yTitle='GDP per Capita (current US$)')

In [15]:
variable_labels = {"NY.GDP.PCAP.CD":"GDP per capita"}

# Three letter codes come from wbdata.get_country()
countries = {"MMR":"Mynmar",
             "KOR":"South Korea"}

df = wbdata.get_dataframe(variable_labels, country = countries).squeeze()

df = df.unstack('country')
# Date index is of type string; change to integers
df.index = df.index.astype(int)

# Differences (over time) in logs give us growth rates
np.log(df).diff().iplot(title="GDP Growth Rate of Mynmar vs South Korea",
                        yTitle="Growth Rate",xTitle='Year')

2. Education Visualizations of South Korea and Myanmar showing the number of children out of primary school, the number of children in secondary education, and the percent of children enrolled in primary school

In [16]:
#variable_labels = {"SE.PRM.UNER":  "Children out of primary school"}

# Three letter codes come from wbdata.get_country()
countries = {"MMR":"Myanmar",
             "KOR":"South Korea"}
variable_labels1 = {"SE.PRM.UNER":  "Children out of primary school"}

df = wbdata.get_dataframe(variable_labels1, country = countries).squeeze()

df = df.unstack('country')
# Date index is of type string; change to integers
df.index = df.index.astype(int)

# Differences (over time) in logs give us growth rates
df.iplot(title="Number of Children out of Primary School",
                       yTitle="Number of Children",xTitle='Year')

In [17]:
variable_labels2 = {"SE.SEC.ENRL.GC": "Secondary Education"}
countries = {"MMR":"Myanmar",
             "KOR":"South Korea"}

df = wbdata.get_dataframe(variable_labels2, country = countries).squeeze()

df = df.unstack('country')
# Date index is of type string; change to integers
df.index = df.index.astype(int)

# Differences (over time) in logs give us growth rates
df.iplot(title="Number of Children In Secondary Education",
                       yTitle="Number of Children",xTitle='Year')

In [18]:
variable_labels3 = {"SE.PRM.NENR": "Percent Net Primary School Enrollment"}
countries = {"MMR":"Myanmar",
             "KOR":"South Korea"}

df = wbdata.get_dataframe(variable_labels3, country = countries).squeeze()

df = df.unstack('country')
# Date index is of type string; change to integers
df.index = df.index.astype(int)

# Differences (over time) in logs give us growth rates
df.iplot(title="Percent of Children Enrolled in Primary School",
                       yTitle="Number of Children",xTitle='Year')

3. Tax Visualizations showing the tax revenue over time for Myanmar and South Korea

In [19]:
variable_labels4 = {"GC.TAX.TOTL.GD.ZS": "Tax Revenue"}

Myanmar4 = wbdata.get_dataframe(indicators=variable_labels2,country="MMR")
Korea4 = wbdata.get_dataframe(variable_labels4, country="KOR")

countries = {"MMR":"Myanmar",
             "KOR":"South Korea"}

df = wbdata.get_dataframe(variable_labels4, country = countries).squeeze()

df = df.unstack('country')
# Date index is of type string; change to integers
df.index = df.index.astype(int)

# Differences (over time) in logs give us growth rates
df.iplot(title="Tax Revenue as a % of GDP",
                       yTitle="% of GDP",xTitle='Year')


4. Foreign Direct Investment Visualizations showing how FDI changes between Myanmar and South Korea

In [20]:
variable_labels4 = {"BX.KLT.DINV.CD.WD":"Foreign Direct Investment, net inflows(in current USD)"}
                   
korea_fdi = wbdata.get_dataframe(variable_labels4, country="KOR")
myanmar_fdi = wbdata.get_dataframe(variable_labels4, country = "MMR")

korea_fdi.iplot(title="Foreign Direct Investment Inflows",xTitle='Year',yTitle='Investment in USD')
myanmar_fdi.iplot(title="Foreign Direct Investment Inflows",xTitle='Year',yTitle='Investment in USD')

foreign_investment= wbdata.get_dataframe(variable_labels4, country= countries).squeeze().unstack('country')

foreign_investment.iplot(title= "Foreign Investment of Myanmar and South Korea Over Time", xTitle= 'Year', yTitle= 'Investment in USD')


5. Export of goods and services visualizations showing how FDI changes between Myanmar and South Korea

In [25]:
variable_labels5 = {"BX.GSR.TOTL.CD":"Exports of Goods and Services in USD"}
                    
korea_exports = wbdata.get_dataframe(variable_labels5, country="KOR")
myanmar_exports = wbdata.get_dataframe(variable_labels5, country = "MMR")

exports= wbdata.get_dataframe(variable_labels5, country= countries).squeeze().unstack('country')

korea_exports.iplot(title= "Exports of South Korea Over Time", xTitle= 'Year', yTitle= 'Investment in USD')
myanmar_exports.iplot(title= "Exports of Myanmar Over Time", xTitle= 'Year', yTitle= 'Investment in USD')
exports.iplot(title= "Exports of Myanmar and South Korea Over Time", xTitle= 'Year', yTitle= 'Investment in USD')

6. Population Visualizations of South Korea and Myanmar over Time

In [42]:
variable_labels6 = {"SP.POP.TOTL":"Total Population"}
                    
korea_exports = wbdata.get_dataframe(variable_labels6, country="KOR")
myanmar_exports = wbdata.get_dataframe(variable_labels6, country = "MMR")

exports= wbdata.get_dataframe(variable_labels6, country= countries).squeeze().unstack('country')

#korea_exports.iplot(title= "Total Population of South Korea Over Time", xTitle= 'Year', yTitle= 'Number of People')
#myanmar_exports.iplot(title= "Total Population of Myanmar Over Time", xTitle= 'Year', yTitle= 'Number of People')
exports.iplot(title= "Total Population of Myanmar and South Korea Over Time", xTitle= 'Year', yTitle= 'Number of People')
