In [1]:
import pandas as pd
import numpy as np
import wbdata

In [2]:
def wb_clear(df, x):
    df.index = df.index.set_levels(df.index.levels[1].astype(int), level=1)
    df.index = df.index.set_levels(df.index.levels[0].str.lower(), level=0)
    df = df[df.index.get_level_values(1) >= 2006]
    df = df[df.loc[:, x].notna()]
    df.index = df.index.set_names('year', level=1)
    return df

In [3]:
def add_wb_var(code, var_name, join_path='raw_data/master.csv'):
    '''
    Get one additional variable from WB api and adds to data in master.csv

    Parameters
    ------------------
    code: str
        Identifier for the data in WB api formatte as XX.XX.XX
    var_name: str
        Variable name that will be added to return data. Can be anything.
    join_path: str, optional
        Change base file for joining.
    
    Returns
    ------------------
    pandas.Dataframe
        Dataframe containing data from join_path and new variable from WB
    '''
    base = pd.read_csv(join_path)
    x = wb_clear(wbdata.get_dataframe({code:var_name}), var_name)
    all_df = base.set_index(['country', 'year'])
    all_df = all_df.join(x).reset_index()

    
    all_df = all_df[(all_df.year == 2008) | (all_df.year == 2018)]

    m_row = [all_df.loc[i].isna().mean() > .15 for i in all_df.index]
    all_df = all_df.drop(np.array(all_df.index)[m_row], axis=0)

    m_col = [all_df.loc[:,i].isna().mean() > .15 for i in all_df.columns]
    all_df = all_df.drop(np.array(all_df.columns)[m_col], axis=1)

    no_data_country = np.array(list(all_df.groupby('country') \
        .groups.keys()))[list(all_df.groupby('country').count().min(axis=1) != 2)]
    all_df = all_df[~all_df.country.isin(no_data_country)]

    var_n = 'Jippikayjei you just successfully added one new variable!' if all_df.shape[1] > base.shape[1] else 'Damn it too many empty values, just keep going!'
    country_n = 'You were able to keep all of the countries from the base data!' if all_df.shape[0] == base.shape[0] else 'Unfortunately you lost some sample countries with:('
    
    print(f"{list(zip(all_df.columns, [all_df.loc[:,i].isna().sum() for i in all_df.columns]))}\n\n{all_df.shape}\n\n{var_n}\n\n{country_n}")
    
    return all_df

### Life expectancy
- ('SP.DYN.LE00.IN', 'life_expectancy')
### Broadband
- ('IT.NET.BBND', 'broadband')

In [None]:
df = add_wb_var('SP.DYN.LE00.IN', 'life_expectancy', join_path='raw_data/small_master_c.csv')

In [None]:
#df.to_csv('raw_data/small_master.csv')
#df.to_csv('raw_data/small_master_base.csv')
df.to_csv('raw_data/small_master_c.csv')