In [9]:
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.stats as stats


In [10]:
# Import CSV files 
TB_Rates = Path('Resources/TB_Rates.csv')
HDI = Path('Resources/HDI.csv')
Drug_Resistant_Rates = Path('Resources/Drug_Resistant_Rates.csv')

# Create Data Frames 
tb_rates_df = pd.read_csv(TB_Rates)
hdi_df = pd.read_csv(HDI)
drug_resistant_df = pd.read_csv(Drug_Resistant_Rates)

In [11]:
drug_resistant_df.head()

Unnamed: 0,country,iso2,iso3,iso_numeric,g_whoregion,year,source_rr_new,e_rr_pct_new,e_rr_pct_new_lo,e_rr_pct_new_hi,source_rr_ret,e_rr_pct_ret,e_rr_pct_ret_lo,e_rr_pct_ret_hi,e_inc_rr_num,e_inc_rr_num_lo,e_inc_rr_num_hi
0,Afghanistan,AF,AFG,4,EMR,2015,Model,5.1,0.45,19.0,Surveillance,8.6,6.5,11.0,3600,0,10000
1,Afghanistan,AF,AFG,4,EMR,2016,Model,5.1,0.5,20.0,Surveillance,11.0,9.4,14.0,3800,0,11000
2,Afghanistan,AF,AFG,4,EMR,2017,Model,5.2,0.53,19.0,Surveillance,15.0,13.0,16.0,3900,0,11000
3,Afghanistan,AF,AFG,4,EMR,2018,Model,5.2,0.54,20.0,Surveillance,19.0,17.0,20.0,4100,0,12000
4,Afghanistan,AF,AFG,4,EMR,2019,Model,5.3,0.55,20.0,Surveillance,24.0,21.0,26.0,4300,0,12000


In [14]:
#Remove Unused Columns
dr_drop =   ['iso2',
            'iso3','iso_numeric',
            'g_whoregion',
            'source_rr_new',
            'source_rr_ret']

drug_resistant_df.drop(dr_drop, inplace=True, axis=1)
drug_resistant_df

Unnamed: 0,country,year,e_rr_pct_new,e_rr_pct_new_lo,e_rr_pct_new_hi,e_rr_pct_ret,e_rr_pct_ret_lo,e_rr_pct_ret_hi,e_inc_rr_num,e_inc_rr_num_lo,e_inc_rr_num_hi
0,Afghanistan,2015,5.1,0.45,19.0,8.6,6.5,11.0,3600,0,10000
1,Afghanistan,2016,5.1,0.50,20.0,11.0,9.4,14.0,3800,0,11000
2,Afghanistan,2017,5.2,0.53,19.0,15.0,13.0,16.0,3900,0,11000
3,Afghanistan,2018,5.2,0.54,20.0,19.0,17.0,20.0,4100,0,12000
4,Afghanistan,2019,5.3,0.55,20.0,24.0,21.0,26.0,4300,0,12000
...,...,...,...,...,...,...,...,...,...,...,...
1486,Zimbabwe,2017,3.5,3.20,3.8,20.0,16.0,24.0,1500,1000,2000
1487,Zimbabwe,2018,2.9,2.70,3.1,21.0,18.0,25.0,1200,850,1600
1488,Zimbabwe,2019,2.4,2.20,2.7,23.0,20.0,27.0,1000,710,1400
1489,Zimbabwe,2020,2.0,1.70,2.4,25.0,20.0,30.0,830,540,1100


In [17]:
# Rename Columns
dr_rename = {
    'e_rr_pct_new': 'Est. New Cases %',
    'e_rr_pct_new_lo': 'Low Est. New Cases %',
    'e_rr_pct_new_hi': 'High Est. New Cases %',
    'e_rr_pct_ret':'Est. Reinfection %',
    'e_rr_pct_ret_lo':'Low Est. Reinfection %',
    'e_rr_pct_ret_hi':'High Est. Reinfection %',
    'e_inc_rr_num':'Estimated New Case Number',
    'e_inc_rr_num_lo':'Low Estimated New Case Number',
    'e_inc_rr_num_hi':'High Estimated New Case Number'
}

drug_resistant_df.rename(columns=dr_rename, inplace=True)
drug_resistant_df

Unnamed: 0,country,year,Est. New Cases %,Low Est. New Cases %,High Est. New Cases %,Est. Reinfection %,Low Est. Reinfection %,High Est. Reinfection %,Estimated New Case Number,Low Estimated New Case Number,High Estimated New Case Number
0,Afghanistan,2015,5.1,0.45,19.0,8.6,6.5,11.0,3600,0,10000
1,Afghanistan,2016,5.1,0.50,20.0,11.0,9.4,14.0,3800,0,11000
2,Afghanistan,2017,5.2,0.53,19.0,15.0,13.0,16.0,3900,0,11000
3,Afghanistan,2018,5.2,0.54,20.0,19.0,17.0,20.0,4100,0,12000
4,Afghanistan,2019,5.3,0.55,20.0,24.0,21.0,26.0,4300,0,12000
...,...,...,...,...,...,...,...,...,...,...,...
1486,Zimbabwe,2017,3.5,3.20,3.8,20.0,16.0,24.0,1500,1000,2000
1487,Zimbabwe,2018,2.9,2.70,3.1,21.0,18.0,25.0,1200,850,1600
1488,Zimbabwe,2019,2.4,2.20,2.7,23.0,20.0,27.0,1000,710,1400
1489,Zimbabwe,2020,2.0,1.70,2.4,25.0,20.0,30.0,830,540,1100
