In [1]:
%matplotlib notebook

import pandas as pd
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt 
import numpy as np
import scipy.stats as stats

In [2]:
# analysis of health indicators in afghanistan 2015

# import files

whole_data = Path("C:\\Users\\sohai\\OneDrive\\Desktop\\Project1_group4\\all_data.csv")

df = pd.read_csv(whole_data)

df.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1990,1991,1992,1993,1994,1995,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q1,,,,,,,...,,,,5.3,,,,,,
1,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q2,,,,,,,...,,,,5.4,,,,,,
2,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q3,,,,,,,...,,,,5.8,,,,,,
3,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q4,,,,,,,...,,,,5.3,,,,,,
4,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q5,,,,,,,...,,,,4.6,,,,,,


In [3]:
#isolate Afghanistan's data only

afg_whole_data = df.loc[(df["Country Name"] == "Afghanistan")]


afg_whole_data

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1990,1991,1992,1993,1994,1995,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q1,,,,,,,...,,,,5.3,,,,,,
1,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q2,,,,,,,...,,,,5.4,,,,,,
2,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q3,,,,,,,...,,,,5.8,,,,,,
3,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q4,,,,,,,...,,,,5.3,,,,,,
4,Afghanistan,AFG,Total fertility rate (TFR) (births per woman)...,SP.DYN.TFRT.Q5,,,,,,,...,,,,4.6,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,Afghanistan,AFG,Vitamin A supplements for postpartum women (% ...,SN.ITK.VAPP.Q1.ZS,,,,,,,...,,,,18.3,,,,,,
416,Afghanistan,AFG,Vitamin A supplements for postpartum women (% ...,SN.ITK.VAPP.Q2.ZS,,,,,,,...,,,,18.5,,,,,,
417,Afghanistan,AFG,Vitamin A supplements for postpartum women (% ...,SN.ITK.VAPP.Q3.ZS,,,,,,,...,,,,20.9,,,,,,
418,Afghanistan,AFG,Vitamin A supplements for postpartum women (% ...,SN.ITK.VAPP.Q4.ZS,,,,,,,...,,,,27.0,,,,,,


In [4]:
#further clean data to eliminate un-needed data

afg_year_data = afg_whole_data[["Country Name", "Indicator Name", "2015"]]


afg_year_data

Unnamed: 0,Country Name,Indicator Name,2015
0,Afghanistan,Total fertility rate (TFR) (births per woman)...,5.3
1,Afghanistan,Total fertility rate (TFR) (births per woman)...,5.4
2,Afghanistan,Total fertility rate (TFR) (births per woman)...,5.8
3,Afghanistan,Total fertility rate (TFR) (births per woman)...,5.3
4,Afghanistan,Total fertility rate (TFR) (births per woman)...,4.6
...,...,...,...
415,Afghanistan,Vitamin A supplements for postpartum women (% ...,18.3
416,Afghanistan,Vitamin A supplements for postpartum women (% ...,18.5
417,Afghanistan,Vitamin A supplements for postpartum women (% ...,20.9
418,Afghanistan,Vitamin A supplements for postpartum women (% ...,27.0


In [5]:

unique_indicators = afg_whole_data["Indicator Name"].unique()

unique_indicators

array([' Total fertility rate (TFR) (births per woman): Q1 (lowest)',
       ' Total fertility rate (TFR) (births per woman): Q2',
       ' Total fertility rate (TFR) (births per woman): Q3',
       ' Total fertility rate (TFR) (births per woman): Q4',
       ' Total fertility rate (TFR) (births per woman): Q5 (highest)',
       'Acceptability of media messages on family planning (% of women): Q1 (lowest)',
       'Acceptability of media messages on family planning (% of women): Q2',
       'Acceptability of media messages on family planning (% of women): Q3',
       'Acceptability of media messages on family planning (% of women): Q4',
       'Acceptability of media messages on family planning (% of women): Q5 (highest)',
       'Antenatal care (any skilled personnel) (% of women with a birth): Q1 (lowest)',
       'Antenatal care (any skilled personnel) (% of women with a birth): Q2',
       'Antenatal care (any skilled personnel) (% of women with a birth): Q3',
       'Antenatal car

In [6]:
# Isolate the chosen Indicators only by using isin

indicators_defecation = ['People practicing open defecation (% of population): Q1 (lowest)',
              'People practicing open defecation (% of population): Q2',
              'People practicing open defecation (% of population): Q3',
              'People practicing open defecation (% of population): Q4',
              'People practicing open defecation (% of population): Q5 (highest)']

indicators_defecation_rural = ['People practicing open defecation, rural (% of rural population): Q1 (lowest)',
                               'People practicing open defecation, rural (% of rural population): Q2',
                               'People practicing open defecation, rural (% of rural population): Q3',
                               'People practicing open defecation, rural (% of rural population): Q4',
                               'People practicing open defecation, rural (% of rural population): Q5 (highest)']

indicators_defecation_urban = ['People practicing open defecation, urban (% of urban population): Q1 (lowest)',
                               'People practicing open defecation, urban (% of urban population): Q2',
                               'People practicing open defecation, urban (% of urban population): Q3',
                               'People practicing open defecation, urban (% of urban population): Q4',
                               'People practicing open defecation, urban (% of urban population): Q5 (highest)']

indicators_water = ['People using at least basic drinking water services (% of  population): Q1 (lowest)',
                    'People using at least basic drinking water services (% of  population): Q2',
                    'People using at least basic drinking water services (% of  population): Q3',
                    'People using at least basic drinking water services (% of  population): Q4',
                    'People using at least basic drinking water services (% of  population): Q5 (highest)']

indicators_water_rural = ['People using at least basic drinking water services, rural (% of rural population): Q1 (lowest)',
                          'People using at least basic drinking water services, rural (% of rural population): Q2',
                          'People using at least basic drinking water services, rural (% of rural population): Q3',
                          'People using at least basic drinking water services, rural (% of rural population): Q4',
                          'People using at least basic drinking water services, rural (% of rural population): Q5 (highest)']

indicators_water_urban = ['People using at least basic drinking water services, urban (% of urban population): Q1 (lowest)',
                          'People using at least basic drinking water services, urban (% of urban population): Q2',
                          'People using at least basic drinking water services, urban (% of urban population): Q3',
                          'People using at least basic drinking water services, urban (% of urban population): Q4',
                          'People using at least basic drinking water services, urban (% of urban population): Q5 (highest)']

indicators_sanitary_services = ['People using at least basic sanitation services (% of  population): Q1 (lowest)',
              'People using at least basic sanitation services (% of  population): Q2',
              'People using at least basic sanitation services (% of  population): Q3',
              'People using at least basic sanitation services (% of  population): Q4',
              'People using at least basic sanitation services (% of  population): Q5 (highest)']


indicators_sanitary_services_rural = ['People using at least basic sanitation services, rural (% of rural population): Q1 (lowest)',
                                      'People using at least basic sanitation services, rural (% of rural population): Q2', 
                                      'People using at least basic sanitation services, rural (% of rural population): Q3',
                                      'People using at least basic sanitation services, rural (% of rural population): Q4',
                                      'People using at least basic sanitation services, rural (% of rural population): Q5 (highest)']


indicators_sanitary_services_urban = ['People using at least basic sanitation services, urban (% of urban population): Q1 (lowest)',
                                      'People using at least basic sanitation services, urban (% of urban population): Q2',
                                      'People using at least basic sanitation services, urban (% of urban population): Q3',
                                      'People using at least basic sanitation services, urban (% of urban population): Q4',
                                      'People using at least basic sanitation services, urban (% of urban population): Q5 (highest)',]



indicators_ARI_prevalance = ['Prevalence of acute respiratory infection (ARI) (% of children under 5): Q1 (lowest)',
              'Prevalence of acute respiratory infection (ARI) (% of children under 5): Q2',
              'Prevalence of acute respiratory infection (ARI) (% of children under 5): Q3',
              'Prevalence of acute respiratory infection (ARI) (% of children under 5): Q4',
              'Prevalence of acute respiratory infection (ARI) (% of children under 5): Q5 (highest)']

indicators_diarrhea_prevalance  =['Prevalence of diarrhea (% of children under 5): Q1 (lowest)',
              'Prevalence of diarrhea (% of children under 5): Q2',
              'Prevalence of diarrhea (% of children under 5): Q3',
              'Prevalence of diarrhea (% of children under 5): Q4',
              'Prevalence of diarrhea (% of children under 5): Q5 (highest)']

indicators_ARI_treatment = ['Treatment of acute respiratory infection (ARI) (% of children under 5 taken to a health provider): Q1 (lowest)',
              'Treatment of acute respiratory infection (ARI) (% of children under 5 taken to a health provider): Q2',
              'Treatment of acute respiratory infection (ARI) (% of children under 5 taken to a health provider): Q3',
              'Treatment of acute respiratory infection (ARI) (% of children under 5 taken to a health provider): Q4',
              'Treatment of acute respiratory infection (ARI) (% of children under 5 taken to a health provider): Q5 (highest)']

indicators_diarrhea_treatment = ['Treatment of diarrhea (ORS, RHS or increased fluids) (% of children under 5): Q1 (lowest)',
              'Treatment of diarrhea (ORS, RHS or increased fluids) (% of children under 5): Q2',
              'Treatment of diarrhea (ORS, RHS or increased fluids) (% of children under 5): Q3',
              'Treatment of diarrhea (ORS, RHS or increased fluids) (% of children under 5): Q4',
              'Treatment of diarrhea (ORS, RHS or increased fluids) (% of children under 5): Q5 (highest)']

mortality = ['Under-5 mortality rate (per 1,000 live births): Q1 (lowest)',
              'Under-5 mortality rate (per 1,000 live births): Q2',
              'Under-5 mortality rate (per 1,000 live births): Q3',
              'Under-5 mortality rate (per 1,000 live births): Q4',
              'Under-5 mortality rate (per 1,000 live births): Q5 (highest)']


In [13]:
Quintiles = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']

In [14]:
#People practicing open defecation data  (% of population)

def_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_defecation)]
def_indicator['Quintiles'] = Quintiles
def_indicator= def_indicator.set_index("Quintiles").drop('Country Name', axis=1)

def_indicator


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  def_indicator['Quintiles'] = Quintiles


Unnamed: 0_level_0,Indicator Name,2015
Quintiles,Unnamed: 1_level_1,Unnamed: 2_level_1
Q1,People practicing open defecation (% of popula...,23.15329
Q2,People practicing open defecation (% of popula...,17.63285
Q3,People practicing open defecation (% of popula...,11.75851
Q4,People practicing open defecation (% of popula...,17.91257
Q5,People practicing open defecation (% of popula...,1.08756


In [9]:
#People practicing open defecation data  (% of population)
def_indicator_rural = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_defecation_rural)]

def_indicator_rural['Quintiles'] = Quintiles
def_indicator_rural= def_indicator_rural.set_index("Quintiles").drop('Country Name', axis=1)

def_indicator_rural 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  def_indicator_rural['Quintiles'] = Quintiles


Unnamed: 0_level_0,Indicator Name,2015
Quintiles,Unnamed: 1_level_1,Unnamed: 2_level_1
Q1,"People practicing open defecation, rural (% of...",24.11084
Q2,"People practicing open defecation, rural (% of...",18.9242
Q3,"People practicing open defecation, rural (% of...",16.04451
Q4,"People practicing open defecation, rural (% of...",12.35115
Q5,"People practicing open defecation, rural (% of...",9.77398


In [10]:
def_indicator_urban = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_defecation_urban)]
def_indicator_urban['Quintiles'] = Quintiles
def_indicator_urban= def_indicator_urban.set_index("Quintiles").drop('Country Name', axis=1)

def_indicator_urban


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  def_indicator_urban['Quintiles'] = Quintiles


Unnamed: 0_level_0,Indicator Name,2015
Quintiles,Unnamed: 1_level_1,Unnamed: 2_level_1
Q1,"People practicing open defecation, urban (% of...",3.87469
Q2,"People practicing open defecation, urban (% of...",0.25908
Q3,"People practicing open defecation, urban (% of...",0.12752
Q4,"People practicing open defecation, urban (% of...",0.25798
Q5,"People practicing open defecation, urban (% of...",0.0


In [22]:
merged_def_data = pd.merge(pd.merge(def_indicator, def_indicator_rural,on='Quintiles'),def_indicator_urban,on='Quintiles')
merged_def_data


Unnamed: 0_level_0,Indicator Name_x,2015_x,Indicator Name_y,2015_y,Indicator Name,2015
Quintiles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Q1,People practicing open defecation (% of popula...,23.15329,"People practicing open defecation, rural (% of...",24.11084,"People practicing open defecation, urban (% of...",3.87469
Q2,People practicing open defecation (% of popula...,17.63285,"People practicing open defecation, rural (% of...",18.9242,"People practicing open defecation, urban (% of...",0.25908
Q3,People practicing open defecation (% of popula...,11.75851,"People practicing open defecation, rural (% of...",16.04451,"People practicing open defecation, urban (% of...",0.12752
Q4,People practicing open defecation (% of popula...,17.91257,"People practicing open defecation, rural (% of...",12.35115,"People practicing open defecation, urban (% of...",0.25798
Q5,People practicing open defecation (% of popula...,1.08756,"People practicing open defecation, rural (% of...",9.77398,"People practicing open defecation, urban (% of...",0.0


In [34]:
merged_def = merged_def_data.rename(columns={'Indicator Name_x': 'Open defecation practice (% of Population)', 
                                '2015_x': '2015_(% of Population)',
                                'Indicator Name_y': 'Open defecation practice Rural(% of Population)',
                                '2015_y': '2015_(% of rural population)',
                                'Indicator Name': 'Open defecation practice Urban(% of Population)',
                                '2015': '2015_(% of urban population)'})

merged_def

Unnamed: 0_level_0,Open defecation practice (% of Population),2015_(% of Population),Open defecation practice Rural(% of Population),2015_(% of rural population),Open defecation practice Urban(% of Population),2015_(% of urban population)
Quintiles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Q1,People practicing open defecation (% of popula...,23.15329,"People practicing open defecation, rural (% of...",24.11084,"People practicing open defecation, urban (% of...",3.87469
Q2,People practicing open defecation (% of popula...,17.63285,"People practicing open defecation, rural (% of...",18.9242,"People practicing open defecation, urban (% of...",0.25908
Q3,People practicing open defecation (% of popula...,11.75851,"People practicing open defecation, rural (% of...",16.04451,"People practicing open defecation, urban (% of...",0.12752
Q4,People practicing open defecation (% of popula...,17.91257,"People practicing open defecation, rural (% of...",12.35115,"People practicing open defecation, urban (% of...",0.25798
Q5,People practicing open defecation (% of popula...,1.08756,"People practicing open defecation, rural (% of...",9.77398,"People practicing open defecation, urban (% of...",0.0


In [35]:
merged_def_plot = merged_def.plot(kind ='bar', figsize = (8,5), rot=45, width = 0.8) 


merged_def_plot.set_title(label = "Open defecation practice (% of Afghanistan population in 2015)")
merged_def_plot.set_xlabel("Data per Quintiles")
merged_def_plot.set_ylabel("% of population")


plt.grid()
plt.tight_layout()

#plt.savefig("images/def_indicator bar chart.png")
plt.show
#plt.close()
                     
                


<IPython.core.display.Javascript object>

<function matplotlib.pyplot.show(*, block=None)>

In [None]:
fig, axis = plt.subplots(1, 3, figsize = (10,5))

def_indicator["2015"].plot(kind = 'bar', ax = axis[0], label = "Open defecation practice (% of population)")
def_indicator_rural ["2015"].plot(kind = 'bar', ax = axis[1], label = "Open defecation practice (% of Rural population)")
def_indicator_urban["2015"].plot(kind = 'bar', ax = axis[2], label =  "Open defecation practice (% of Urban population)")
plt.tight_layout()
plt.grid()
plt.legend()
plt.show()
#plt.close()

In [None]:
#bar chart with panda
panda_plot = def_indicator.plot(kind ='bar', figsize = (8,5), rot=45, width = 0.8) 


panda_plot.set_title(label = "Open defecation practice (% of population)")
panda_plot.set_xlabel("Data per Quintiles")
panda_plot.set_ylabel("% of population")


plt.grid()
plt.tight_layout()

#plt.savefig("images/Panda's bar chart.png")
plt.show
#plt.close()


In [None]:
# Plot box plot of def_indicators
def_data = def_indicator['2015'].plot(kind = 'bar', title = "People practicing open defecation data (% of population)")
plt.ylabel('% of Population')
plt.show()
plt.grid()
#plt.savefig("images/Afg 2015 def_indicator.png")
#plt.close()

In [None]:
#People using at least basic drinking water service (% of  population):
water_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_water)]
water_indicator

In [None]:
# Plot box plot of water_indicator

water_indicator['2015'].plot(kind = 'box', title = "People using at least basic drinking water service (% of  population)")
plt.xlabel('Year')
plt.ylabel('% of Population')
plt.show()
plt.grid()
plt.savefig("images/Afg 2015 water indicator.png")
plt.close()

#plt.close

In [None]:
#People using at least basic sanitation service (% of  population):
sanitary_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_sanitary_services)]
sanitary_indicator

In [None]:
# Plot box plot of sanitary_indicator

sanitary_indicator['2015'].plot(kind = 'box', title = "People using at least basic sanitation service (% of  population)")
plt.xlabel('Year')
plt.ylabel('% of Population')
plt.show()
plt.grid()
plt.savefig("images/Afg 2015 sanitary indicator.png")
plt.close()



In [None]:
#Prevalence of acute respiratory infection (ARI) (% of children under 5)
ari_prevalance_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_ARI_prevalance)]
ari_prevalance_indicator

In [None]:
# Plot box plot of ari_prevalance_indicator

ari_prevalance_indicator['2015'].plot(kind = 'box', title = "Prevalence of acute respiratory infection (ARI) (% of children under 5)")
plt.xlabel('Year')
plt.ylabel('% of children under 5')
plt.show()
plt.grid()
plt.savefig("images/Afg 2015 ARI prevalance indicator.png")
plt.close()


In [None]:
#Prevalence of diarrhea (% of children under 5)
diarrhea_prevalance_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_diarrhea_prevalance)]
diarrhea_prevalance_indicator

In [None]:
# Plot box plot of diarrhea_prevalance_indicator

diarrhea_prevalance_indicator['2015'].plot(kind = 'box', title = "Prevalence of diarrhea (% of children under 5)")
plt.xlabel('Year')
plt.ylabel('% of children under 5')
plt.show()
plt.grid()
plt.savefig("images/Afg 2015 diarrhea prevalance indicator.png")
plt.close()

In [None]:
#Treatment of acute respiratory infection (% of children under 5)
ari_treatment_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_ARI_treatment)]
ari_treatment_indicator

In [None]:
# Plot box plot of acute respiratory infection (% of children under 5)

ari_treatment_indicator['2015'].plot(kind = 'box', title = "Treatment of acute respiratory infection (ARI) (% of children under 5 taken to a health provider)")
plt.xlabel('Year')
plt.ylabel('% of children under 5 taken to a health provider')
plt.show()
plt.grid()
plt.savefig("images/Afg 2015 ARI treatment indicator.png")
plt.close()

In [None]:
# Treatment of diarrhea (% of children under 5)
diarrhea_treatment_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(indicators_diarrhea_treatment)]
diarrhea_treatment_indicator

In [None]:
# Plot box plot of diarrhea (% of children under 5)

diarrhea_treatment_indicator['2015'].plot(kind = 'box', title = "Treatment of diarrhea (ORS, RHS or increased fluids) (% of children under 5)")
plt.xlabel('Year')
plt.ylabel('% of children under 5')
plt.show()
plt.grid()
plt.savefig("images/Afg 2015 diarrhea treatment indicator.png")
plt.close()

In [None]:
# Under-5 mortality rate (per 1,000 live births)
mortality_indicator = afg_year_data[afg_year_data["Indicator Name"].isin(mortality)]
mortality_indicator

In [None]:
# Plot box plot of mortality

mortality_indicator['2015'].plot(kind = 'box', title = "Under-5 mortality rate (per 1,000 live births)")
plt.xlabel('Year')
plt.ylabel('Mortality of under 5 (per 1,000 live births)')
plt.show()
plt.grid()
plt.savefig("images/Afg 2015 under 5 mortality rate.png")
plt.close()

In [None]:
mortality_indicator

In [None]:
cleaned_mortality_data = mortality_indicator.drop(['Country Name', 'Indicator Name'], axis = 1)
cleaned_mortality_data.rename( columns = {'2015' : 'Mortality data 2015'})

In [None]:
cleaned_def_indicator = def_indicator.drop(['Country Name', 'Indicator Name'], axis = 1)
cleaned_def_indicator.rename( columns = {'2015' : 'Def data 2015'})


In [None]:
def_indicator

In [None]:

water_indicator

In [None]:
sanitary_indicator


In [None]:
fig, axis = plt.subplots(1, 3, figsize = (10,5))
def_indicator["2015"].plot(kind = 'box', ax = axis[0], label = "Open Defecation Practice")
water_indicator["2015"].plot(kind = 'box', ax = axis[1], label = "Basic drinking water use")
sanitary_indicator["2015"].plot(kind = 'box', ax = axis[2], label = )
plt.tight_layout()
plt.show()
plt.close()