In [1]:
%matplotlib inline

from pathlib import Path
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

DATA_EXT = (Path(os.getcwd()) / os.pardir / 'data' / 'external').resolve()

In [2]:
import sys
sys.path.append(os.path.join(os.getcwd(), os.pardir, 'src'))

%load_ext autoreload

%autoreload 2
from visualization.visualize import *

### This notebook loads the available world bank indicators for population, birth rate, and infant mortality rate per country and then saves that to `data/interim`

In [26]:
dfs = []
for indicator in (DATA_EXT/'world-bank-indicators').glob('*.csv'):
    if 'CBRT' in indicator.name:
        header = 'crude_birth_rate'
    elif 'IMRT' in indicator.name:
        header = 'infant_mortality_rate'
    elif 'TOTL' in indicator.name:
        header = 'population'
    
    df = pd.read_csv(indicator, skiprows=4, header=0)
    df = df.fillna(method='ffill', axis=1)
    df.rename(columns={'2016': f"{header}_2016"}, inplace=True)
    
    dfs.append(df[['Country Code', f"{header}_2016"]].set_index('Country Code'))

In [37]:
wb_data = pd.concat(dfs, axis=1).apply(pd.to_numeric, errors='coerce', axis=0)

wb_data

Unnamed: 0_level_0,crude_birth_rate_2016,population_2016,infant_mortality_rate_2016
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABW,10.963000,1.048220e+05,
AFG,33.980000,3.465603e+07,53.200000
AGO,42.321000,2.881346e+07,54.600000
ALB,11.879000,2.876101e+06,12.000000
AND,9.500000,7.728100e+04,2.400000
ARB,26.431200,4.064527e+08,28.604027
ARE,9.885000,9.269612e+06,6.600000
ARG,17.364000,4.384743e+07,9.900000
ARM,13.808000,2.924816e+06,11.900000
ASM,17.500000,5.559900e+04,


In [38]:
wb_data.to_csv(Path("../data/interim/external-processed/country_world_bank_indicators.csv"))