In [40]:
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

In [41]:
# Import Clean CSV
TB_Rates = Path('Resources/TB_Rates_Cleaned.csv')

In [42]:
# Create Data Frame
tb_rates_df = pd.read_csv(TB_Rates)

In [43]:
tb_rates_df


Unnamed: 0,country,iso2,iso3,iso_numeric,g_whoregion,year,e_pop_num,e_inc_100k,e_inc_100k_lo,e_inc_100k_hi,...,cfr,cfr_lo,cfr_hi,cfr_pct,cfr_pct_lo,cfr_pct_hi,c_newinc_100k,c_cdr,c_cdr_lo,c_cdr_hi
0,Afghanistan,AF,AFG,4,EMR,2000,19542982,190.0,122.0,271.0,...,,,,,,,36.0,19.0,13.0,30.0
1,Afghanistan,AF,AFG,4,EMR,2001,19688632,189.0,122.0,271.0,...,,,,,,,51.0,27.0,19.0,42.0
2,Afghanistan,AF,AFG,4,EMR,2002,21000256,189.0,122.0,270.0,...,,,,,,,66.0,35.0,24.0,54.0
3,Afghanistan,AF,AFG,4,EMR,2003,22645130,189.0,122.0,270.0,...,,,,,,,61.0,32.0,23.0,50.0
4,Afghanistan,AF,AFG,4,EMR,2004,23553551,189.0,122.0,270.0,...,,,,,,,78.0,41.0,29.0,64.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4697,Zimbabwe,ZW,ZWE,716,AFR,2017,14751101,221.0,158.0,295.0,...,,,,,,,175.0,79.0,59.0,110.0
4698,Zimbabwe,ZW,ZWE,716,AFR,2018,15052184,210.0,153.0,276.0,...,,,,,,,167.0,80.0,61.0,110.0
4699,Zimbabwe,ZW,ZWE,716,AFR,2019,15354608,199.0,147.0,258.0,...,,,,,,,137.0,69.0,53.0,93.0
4700,Zimbabwe,ZW,ZWE,716,AFR,2020,15669666,181.0,130.0,240.0,...,,,,,,,100.0,55.0,42.0,77.0


In [44]:
# Drop Columns
tb_drop =   ['iso2',
            'iso3','iso_numeric',
            'g_whoregion',
            'e_inc_100k_lo',
            'e_inc_100k_hi',
            'cfr_lo',
            'cfr_hi',
            'cfr_pct_lo',
            'cfr_pct_hi',
            'c_cdr',
            'c_cdr_lo',
            'c_cdr_hi',
            'e_inc_num_lo',
            'e_inc_num_hi',
            'e_tbhiv_prct_lo',
            'e_tbhiv_prct_hi',
            'e_inc_tbhiv_100k_lo',
            'e_inc_tbhiv_100k_hi',
            'e_inc_tbhiv_num_lo',
            'e_inc_tbhiv_num_hi',
            'e_mort_exc_tbhiv_100k_lo',
            'e_mort_exc_tbhiv_100k_hi',
            'e_mort_tbhiv_100k_lo',
            'e_mort_tbhiv_100k_hi',
            'e_mort_tbhiv_num_lo',
            'e_mort_tbhiv_num_hi',
            'e_mort_100k_lo',
            'e_mort_100k_hi',
            'e_mort_num_lo',
            'e_mort_num_hi',
            'e_mort_exc_tbhiv_num_lo',
            'e_mort_exc_tbhiv_num_hi',
            ]
tb_rates_df.drop(tb_drop, inplace=True, axis=1)
tb_rates_df.columns


Index(['country', 'year', 'e_pop_num', 'e_inc_100k', 'e_inc_num',
       'e_tbhiv_prct', 'e_inc_tbhiv_100k', 'e_inc_tbhiv_num',
       'e_mort_exc_tbhiv_100k', 'e_mort_exc_tbhiv_num', 'e_mort_tbhiv_100k',
       'e_mort_tbhiv_num', 'e_mort_100k', 'e_mort_num', 'cfr', 'cfr_pct',
       'c_newinc_100k'],
      dtype='object')

In [45]:
tb_rename = {'country':'Country', 
            'year':'Country', 
            'e_pop_num':'Population', 
            'e_inc_100k': 'Cases per Cap.(All)', 
            'e_inc_num': 'Cases',
            'e_tbhiv_prct': 'Cases - HIV & TB %', 
            'e_inc_tbhiv_100k': 'Cases -HIV & TB per Cap.', 
            'e_inc_tbhiv_num': 'Cases - HIV & TB',
            'e_mort_exc_tbhiv_100k': 'Mortality - TB (exclude HIV)per Cap', 
            'e_mort_exc_tbhiv_num': 'Mortality - TB (exclude HIV)', 
            'e_mort_tbhiv_100k': 'Mortality - HIV & TB incident per Cap.',
            'e_mort_tbhiv_num':'Martality - HIV & TB', 
            'e_mort_100k':'Mortality - Per Cap', 
            'e_mort_num': 'Mortality', 
            'cfr':'Case Fatality Ratio', 
            'cfr_pct': 'Case Fatality Ratio %',
            'c_newinc_100k': 'Total New and Relapse Cases'}
tb_rates_df.rename(columns=tb_rename, inplace=True)
tb_rates_df.columns

Index(['Country', 'Country', 'Population', 'Cases per Cap.(All)', 'Cases',
       'Cases - HIV & TB %', 'Cases -HIV & TB per Cap.', 'Cases - HIV & TB',
       'Mortality - TB (exclude HIV)per Cap', 'Mortality - TB (exclude HIV)',
       'Mortality - HIV & TB incident per Cap.', 'Martality - HIV & TB',
       'Mortality - Per Cap', 'Mortality', 'Case Fatality Ratio',
       'Case Fatality Ratio %', 'Total New and Relapse Cases'],
      dtype='object')

In [46]:
tb_rates_df

Unnamed: 0,Country,Country.1,Population,Cases per Cap.(All),Cases,Cases - HIV & TB %,Cases -HIV & TB per Cap.,Cases - HIV & TB,Mortality - TB (exclude HIV)per Cap,Mortality - TB (exclude HIV),Mortality - HIV & TB incident per Cap.,Martality - HIV & TB,Mortality - Per Cap,Mortality,Case Fatality Ratio,Case Fatality Ratio %,Total New and Relapse Cases
0,Afghanistan,2000,19542982,190.0,37000,0.02,0.03,6.0,68.0,13000.0,0.17,33.0,68.0,13000.0,,,36.0
1,Afghanistan,2001,19688632,189.0,37000,0.01,0.03,6.0,63.0,12000.0,0.30,60.0,63.0,12000.0,,,51.0
2,Afghanistan,2002,21000256,189.0,40000,0.01,0.03,6.0,57.0,12000.0,0.27,57.0,57.0,12000.0,,,66.0
3,Afghanistan,2003,22645130,189.0,43000,0.01,0.03,6.0,58.0,13000.0,0.29,66.0,58.0,13000.0,,,61.0
4,Afghanistan,2004,23553551,189.0,44000,0.01,0.03,6.0,51.0,12000.0,0.29,67.0,52.0,12000.0,,,78.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4697,Zimbabwe,2017,14751101,221.0,33000,63.00,140.00,21000.0,8.3,1200.0,27.00,4000.0,35.0,5200.0,,,175.0
4698,Zimbabwe,2018,15052184,210.0,32000,62.00,130.00,20000.0,7.7,1200.0,24.00,3600.0,32.0,4800.0,,,167.0
4699,Zimbabwe,2019,15354608,199.0,31000,60.00,119.00,18000.0,11.0,1700.0,31.00,4800.0,43.0,6600.0,,,137.0
4700,Zimbabwe,2020,15669666,181.0,28000,61.00,108.00,17000.0,12.0,1800.0,30.00,4700.0,42.0,6600.0,,,100.0


In [1]:
# handle nas
