#### Although mortality from infectious diseases has decreased dramatically as the quality of life has improved and the availability of antibiotics and anti-parasitic diseases is more than ever. However, infectious diseases are still life-threatening in some parts of the world. This project will answer the following research questions.

### Research Questions
####  1. What is the incidence of death due to infection in the netherlands compared to the total death rate since 1995
#### 2. Which disease is the most deadly infection
#### 3. Which age group is mostly affected
#### 4. Which gender is mostly affected

## Importing Libraries

In [46]:
#  importing necessary libraries
import pandas as pd
import numpy as np
import yaml

from bokeh.io import output_notebook, show
output_notebook()
import panel as pn
pn.extension()
from panel.interact import interact
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource,HoverTool, FactorRange
from math import pi
from bokeh.palettes import Category10, Dark2
from bokeh.transform import cumsum
import holoviews as hv
from bokeh.plotting import figure, show
from bokeh.transform import cumsum

## Data Loading

In [47]:
'''loading total-death data with config'''
with open('config.yaml', 'r') as stream:
    config= yaml.safe_load(stream)
total_death= config['Total_death']
df_total= pd.read_csv(total_death, sep=';')
df_total.head()

Unnamed: 0,ID,Gender,Periods,TotalDeaths_1
0,72,3000,1950JJ00,39121
1,73,3000,1951JJ00,40294
2,74,3000,1952JJ00,39467
3,75,3000,1953JJ00,42128
4,76,3000,1954JJ00,41970


In [48]:
'''loading cause-of-death data'''
death_cause = config['cause_of_death']
df_cause = pd.read_csv(death_cause, sep= ';')

df_cause.head()



Unnamed: 0,ID,Sex,Age,Periods,k_11Tuberculosis_3,k_12CerebrospinalInfection_4,k_13ViralHepatitis_5,k_14AIDS_6,k_15OtherInfectAndParasiticDiseases_7,k_61Meningitis_40,TotalDiseasesOfTheRespiratorySystem_50,k_81Influenza_51,k_82Pneumonia_52,TotalCongenitalAnomalies_72,k_151OfTheNervousSystem_73,k_152OfTheCirculatorySystem_74,k_181ConfirmedCOVID19_94
0,1656,3000,10010,1950JJ00,10,12,0,,89,36,216,14,175,555,170,209,
1,1657,3000,10010,1951JJ00,9,11,1,,99,30,238,18,191,590,176,245,
2,1658,3000,10010,1952JJ00,9,20,2,,64,27,185,17,137,568,180,213,
3,1659,3000,10010,1953JJ00,3,12,2,,56,26,168,20,123,585,171,246,
4,1660,3000,10010,1954JJ00,5,10,0,,31,41,135,4,111,581,206,220,


## Data Exploration

In [49]:
'''checking if the data is properly loaded'''

def information(df):
    try: 
        df.info()  
    except:
        print('data is not properlly loaded')
    return df

In [50]:
''''looking over the name of columns'''

print('df_cause columns=',df_cause.columns)
print('df_total columns=',df_total.columns)


df_cause columns= Index(['ID', 'Sex', 'Age', 'Periods', 'k_11Tuberculosis_3',
       'k_12CerebrospinalInfection_4', 'k_13ViralHepatitis_5', 'k_14AIDS_6',
       'k_15OtherInfectAndParasiticDiseases_7', 'k_61Meningitis_40',
       'TotalDiseasesOfTheRespiratorySystem_50', 'k_81Influenza_51',
       'k_82Pneumonia_52', 'TotalCongenitalAnomalies_72',
       'k_151OfTheNervousSystem_73', 'k_152OfTheCirculatorySystem_74',
       'k_181ConfirmedCOVID19_94'],
      dtype='object')
df_total columns= Index(['ID', 'Gender', 'Periods', 'TotalDeaths_1'], dtype='object')


In [51]:
'''checking the shape of the data'''
print('Cause-of-death data set has {} rows and {} columns.'.format(df_cause.shape[0], df_cause.shape[1]))
print('Total-death data set has {} rows and {} columns.'.format(df_total.shape[0], df_total.shape[1]))

Cause-of-death data set has 2880 rows and 17 columns.
Total-death data set has 144 rows and 4 columns.


In [52]:
'''çhecking the types of the data'''
print('df_cause=',df_cause.dtypes)
print('df_total=',df_total.dtypes)

df_cause= ID                                          int64
Sex                                         int64
Age                                         int64
Periods                                    object
k_11Tuberculosis_3                          int64
k_12CerebrospinalInfection_4                int64
k_13ViralHepatitis_5                        int64
k_14AIDS_6                                float64
k_15OtherInfectAndParasiticDiseases_7       int64
k_61Meningitis_40                           int64
TotalDiseasesOfTheRespiratorySystem_50      int64
k_81Influenza_51                            int64
k_82Pneumonia_52                            int64
TotalCongenitalAnomalies_72                 int64
k_151OfTheNervousSystem_73                  int64
k_152OfTheCirculatorySystem_74              int64
k_181ConfirmedCOVID19_94                  float64
dtype: object
df_total= ID                int64
Gender            int64
Periods          object
TotalDeaths_1     int64
dtype: object


In [53]:
'''çhecking if there is missing value in the data'''
print('Cause of death data set has {} missing valus.'.format(df_cause.isnull().sum().sum()))
print('Total number of death data set has {} missing valus.'.format(df_total.isnull().sum().sum()))

Cause of death data set has 3960 missing valus.
Total number of death data set has 0 missing valus.


## Data Preparation

In [54]:
''''dropping a columns which are not infectious diseases'''
df_cause.drop(df_cause.iloc[:,13:16], axis=1, inplace=True)

df_cause

Unnamed: 0,ID,Sex,Age,Periods,k_11Tuberculosis_3,k_12CerebrospinalInfection_4,k_13ViralHepatitis_5,k_14AIDS_6,k_15OtherInfectAndParasiticDiseases_7,k_61Meningitis_40,TotalDiseasesOfTheRespiratorySystem_50,k_81Influenza_51,k_82Pneumonia_52,k_181ConfirmedCOVID19_94
0,1656,3000,10010,1950JJ00,10,12,0,,89,36,216,14,175,
1,1657,3000,10010,1951JJ00,9,11,1,,99,30,238,18,191,
2,1658,3000,10010,1952JJ00,9,20,2,,64,27,185,17,137,
3,1659,3000,10010,1953JJ00,3,12,2,,56,26,168,20,123,
4,1660,3000,10010,1954JJ00,5,10,0,,31,41,135,4,111,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,4675,4000,71900,2017JJ00,1,0,1,0.0,346,4,1264,80,596,
2876,4676,4000,71900,2018JJ00,2,0,0,1.0,401,2,1308,138,620,
2877,4677,4000,71900,2019JJ00,2,0,0,0.0,376,2,1137,62,535,
2878,4678,4000,71900,2020JJ00,4,0,1,0.0,319,0,847,33,406,1675.0


In [55]:
''''droping a columns which are not infectious diseases further'''

df_cause.drop(df_cause.iloc[:,9:11], axis=1, inplace=True)
df_cause.head()

Unnamed: 0,ID,Sex,Age,Periods,k_11Tuberculosis_3,k_12CerebrospinalInfection_4,k_13ViralHepatitis_5,k_14AIDS_6,k_15OtherInfectAndParasiticDiseases_7,k_81Influenza_51,k_82Pneumonia_52,k_181ConfirmedCOVID19_94
0,1656,3000,10010,1950JJ00,10,12,0,,89,14,175,
1,1657,3000,10010,1951JJ00,9,11,1,,99,18,191,
2,1658,3000,10010,1952JJ00,9,20,2,,64,17,137,
3,1659,3000,10010,1953JJ00,3,12,2,,56,20,123,
4,1660,3000,10010,1954JJ00,5,10,0,,31,4,111,


In [56]:
''''rename the columns to make them more readable'''

# Here I make a dictinary of old name as a key and new  as value:
new_columns ={'Periods':'year','Sex':'Gender','k_11Tuberculosis_3':'Tuberculosis','k_12CerebrospinalInfection_4':'CerebrospinalInfection',
             'k_13ViralHepatitis_5':'ViralHepatitis','k_14AIDS_6':'AIDS', 
              'k_15OtherInfectAndParasiticDiseases_7':'OtherInfectAndParasiticDiseases','k_81Influenza_51':'Influenza','k_82Pneumonia_52':'Pneumonia', 
             'k_181ConfirmedCOVID19_94':'ConfirmedCOVID19'}
df_cause.rename(columns= new_columns, inplace=True)

df_cause.head()

Unnamed: 0,ID,Gender,Age,year,Tuberculosis,CerebrospinalInfection,ViralHepatitis,AIDS,OtherInfectAndParasiticDiseases,Influenza,Pneumonia,ConfirmedCOVID19
0,1656,3000,10010,1950JJ00,10,12,0,,89,14,175,
1,1657,3000,10010,1951JJ00,9,11,1,,99,18,191,
2,1658,3000,10010,1952JJ00,9,20,2,,64,17,137,
3,1659,3000,10010,1953JJ00,3,12,2,,56,20,123,
4,1660,3000,10010,1954JJ00,5,10,0,,31,4,111,


In [57]:
''''' 
Cause-of-death data: replacing a string fromat and 'jj00'change the object format in Periods in to datetime format: '''

df_cause['year']=df_cause['year'].str.replace('JJ00','')
df_cause['year']= df_cause['year'].astype(int)
df_cause.head()


Unnamed: 0,ID,Gender,Age,year,Tuberculosis,CerebrospinalInfection,ViralHepatitis,AIDS,OtherInfectAndParasiticDiseases,Influenza,Pneumonia,ConfirmedCOVID19
0,1656,3000,10010,1950,10,12,0,,89,14,175,
1,1657,3000,10010,1951,9,11,1,,99,18,191,
2,1658,3000,10010,1952,9,20,2,,64,17,137,
3,1659,3000,10010,1953,3,12,2,,56,20,123,
4,1660,3000,10010,1954,5,10,0,,31,4,111,


In [58]:
'''''
Total-death data: replace a string format to none.
the periods columns value was an object. 
since i only need the year, i just change it to integer format instead of datetime.
'''
# changing columns name 'periods' to 'year'
df_total.rename(columns={'Periods' :'year'}, inplace=True)
df_total['year']=df_total['year'].str.replace('JJ00','')
df_total['year']= df_total['year'].astype(int)

df_total.dtypes

ID               int64
Gender           int64
year             int32
TotalDeaths_1    int64
dtype: object

In [59]:
# drop Id columns
df_total.drop(columns=['ID'],inplace = True)
df_cause.drop(columns=['ID'],inplace = True)

In [60]:
'''merging the two DataFrame at two columns(periods and Gender)
'''
df_merged =pd.merge( df_cause, df_total, on=['year','Gender'], how='inner')
df_merged.head()


Unnamed: 0,Gender,Age,year,Tuberculosis,CerebrospinalInfection,ViralHepatitis,AIDS,OtherInfectAndParasiticDiseases,Influenza,Pneumonia,ConfirmedCOVID19,TotalDeaths_1
0,3000,10010,1950,10,12,0,,89,14,175,,39121
1,3000,51300,1950,40,17,0,,153,8,54,,39121
2,3000,70200,1950,16,1,0,,38,1,6,,39121
3,3000,70300,1950,12,1,0,,5,1,9,,39121
4,3000,70400,1950,32,1,0,,7,3,6,,39121


In [61]:
'''çhecking the shape of the data'''
print('Merged data set has {} rows and {} columns.'.format(df_merged.shape[0], df_merged.shape[1]))

Merged data set has 2880 rows and 12 columns.


## Inspecting the Merged Data

In [62]:
print(df_merged.columns)
print(df_merged.dtypes)
print(df_merged.shape)

Index(['Gender', 'Age', 'year', 'Tuberculosis', 'CerebrospinalInfection',
       'ViralHepatitis', 'AIDS', 'OtherInfectAndParasiticDiseases',
       'Influenza', 'Pneumonia', 'ConfirmedCOVID19', 'TotalDeaths_1'],
      dtype='object')
Gender                               int64
Age                                  int64
year                                 int32
Tuberculosis                         int64
CerebrospinalInfection               int64
ViralHepatitis                       int64
AIDS                               float64
OtherInfectAndParasiticDiseases      int64
Influenza                            int64
Pneumonia                            int64
ConfirmedCOVID19                   float64
TotalDeaths_1                        int64
dtype: object
(2880, 12)


In [63]:
print('There are {} gender group in merged data set'.format(len(df_merged.groupby('Gender'))))
print('There are {} age group in merged data set'.format(len(df_merged.groupby('Age'))))

There are 2 gender group in merged data set
There are 20 age group in merged data set


In [64]:
print('{} are given code for age catagory '.format(df_merged.loc[:,'Age'].unique()))
print('{} are given code for gender catagory'.format(df_merged.loc[:,'Gender'].unique()))

[10010 51300 70200 70300 70400 70500 70600 70700 70800 70900 71000 71100
 71200 71300 71400 71500 71600 71700 71800 71900] are given code for age catagory 
[3000 4000] are given code for gender catagory


In [65]:
def change_gender(code, name):
    '''this function is to change value assigned for gender from code to male and female'''
    
    for values in df_merged.iloc[:,0]:
        if values == code:
            df_merged['Gender']=df_merged['Gender'].replace(code,name)
    return df_merged

# calling the function
change_gender(3000, 'male')
change_gender(4000, 'female')

Unnamed: 0,Gender,Age,year,Tuberculosis,CerebrospinalInfection,ViralHepatitis,AIDS,OtherInfectAndParasiticDiseases,Influenza,Pneumonia,ConfirmedCOVID19,TotalDeaths_1
0,male,10010,1950,10,12,0,,89,14,175,,39121
1,male,51300,1950,40,17,0,,153,8,54,,39121
2,male,70200,1950,16,1,0,,38,1,6,,39121
3,male,70300,1950,12,1,0,,5,1,9,,39121
4,male,70400,1950,32,1,0,,7,3,6,,39121
...,...,...,...,...,...,...,...,...,...,...,...,...
2875,female,71500,2021,0,0,2,1.0,113,2,67,734.0,85203
2876,female,71600,2021,1,0,2,0.0,176,0,99,1045.0,85203
2877,female,71700,2021,2,0,3,0.0,257,3,179,1527.0,85203
2878,female,71800,2021,0,0,0,0.0,356,1,299,1938.0,85203


In [66]:
def change_age(code, name):
    '''there were 20 age groups in the this date set .this function regroup 
    the age group to seven based on WHO age grouping. '''

    for values in df_merged.iloc[:,1]:
        if values == code:
            df_merged['Age']=df_merged['Age'].replace(code,name) 
    return df_merged


In [67]:
# calling the function
change_age(10010, 'Infant')
change_age(51300, 'Toddler')
change_age(70200,'Childern')
change_age(70300,'Childern')
change_age(70400, 'Younger adult')
change_age(70500, 'Adult_20_30')
change_age(70600,'Adult_20_30')
change_age(70700,'Adult_30_50')
change_age(70800,'Adult_30_50')
change_age(70900,'Adult_30_50')
change_age(71000,'Adult_30_50')
change_age(71100,'Adult_50_70')
change_age(71200,'Adult_50_70')
change_age(71300,'Adult_50_70')
change_age(71400,'Adult_50_70')
change_age(71500,'Adult_above_70')
change_age(71600,'Adult_above_70')
change_age(71700,'Adult_above_70')
change_age(71800,'Adult_above_70')
change_age(71900,'Adult_above_70')


Unnamed: 0,Gender,Age,year,Tuberculosis,CerebrospinalInfection,ViralHepatitis,AIDS,OtherInfectAndParasiticDiseases,Influenza,Pneumonia,ConfirmedCOVID19,TotalDeaths_1
0,male,Infant,1950,10,12,0,,89,14,175,,39121
1,male,Toddler,1950,40,17,0,,153,8,54,,39121
2,male,Childern,1950,16,1,0,,38,1,6,,39121
3,male,Childern,1950,12,1,0,,5,1,9,,39121
4,male,Younger adult,1950,32,1,0,,7,3,6,,39121
...,...,...,...,...,...,...,...,...,...,...,...,...
2875,female,Adult_above_70,2021,0,0,2,1.0,113,2,67,734.0,85203
2876,female,Adult_above_70,2021,1,0,2,0.0,176,0,99,1045.0,85203
2877,female,Adult_above_70,2021,2,0,3,0.0,257,3,179,1527.0,85203
2878,female,Adult_above_70,2021,0,0,0,0.0,356,1,299,1938.0,85203


## Data Distribution 

##### The time before covid existed, the value was null value. interpolation of this data might give a false value, so I decided to fill it with zero

In [68]:
# filling the null value zeros
df_merged_new =df_merged.fillna(0)
df_merged_new.head(20)

Unnamed: 0,Gender,Age,year,Tuberculosis,CerebrospinalInfection,ViralHepatitis,AIDS,OtherInfectAndParasiticDiseases,Influenza,Pneumonia,ConfirmedCOVID19,TotalDeaths_1
0,male,Infant,1950,10,12,0,0.0,89,14,175,0.0,39121
1,male,Toddler,1950,40,17,0,0.0,153,8,54,0.0,39121
2,male,Childern,1950,16,1,0,0.0,38,1,6,0.0,39121
3,male,Childern,1950,12,1,0,0.0,5,1,9,0.0,39121
4,male,Younger adult,1950,32,1,0,0.0,7,3,6,0.0,39121
5,male,Adult_20_30,1950,39,2,0,0.0,9,3,5,0.0,39121
6,male,Adult_20_30,1950,78,0,0,0.0,11,7,3,0.0,39121
7,male,Adult_30_50,1950,68,0,1,0.0,7,1,3,0.0,39121
8,male,Adult_30_50,1950,67,1,0,0.0,11,3,4,0.0,39121
9,male,Adult_30_50,1950,93,2,0,0.0,15,12,15,0.0,39121


##### Since I am only trying to calculate the total number of deaths from infection, I have not done a statistical test. Even I haven't done a statistical test, I'm just trying to see my data distribution. and my data is not normally distributed.

In [69]:
def histogram(infection, year):
    '''this function will visualize distribution each disease per year'''
    # Setting plot parameters
    df = df_merged_new
    df= df[df['year']== year]
    
#     definig a plot
    p = figure(width=800, height=500, toolbar_location=None,
           title="Number death of infection per year:")
    p.xaxis.axis_label = "Number of Death"
    p.yaxis.axis_label = "count"
    
# seting the bins range
    start = min(df[infection])
    end = max(df[infection])
    
    bins = np.linspace(start,end, 20)
    
#     Plotting histogram
    hist, edges = np.histogram(df[infection], density=False, bins=bins)
    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
    fill_color="navy", fill_alpha = 0.8, line_color="white")
  
    return p
# Plotting interactive histogram
years= list(df_merged_new.year.unique())
infections = list(df_merged_new.columns[3:11])
histogram = pn.interact(histogram, infection = infections, year= years)

# Displaying panel output 
pn.extension()
pn.Row(histogram)

# The bars on zero are no/NaN death records

## Visualization of the Data

In [70]:
def barplot(year):
    '''this function will give the total number of death due infection per year'''
    # changing the dataframe name to new variable
    df = df_merged_new

    # setting parameter for ploting
    df= df[df['year']== year]
    # geting sum of each data frame values corsponding to infection
    deaths =[]
    infection = list(df_merged_new.columns[3:11])
    for x in infection:
        deaths.append(df[x].sum())
    # difining ploting parametres   
    p = figure(title='Graph_1: Total number of death per disease per year', 
    toolbar_location=None, tools="", 
    y_range=FactorRange(factors=infection), width= 600, height= 300)
    p.xaxis.axis_label='Number of death'
    p.hbar(y=infection,  right=deaths, height=0.5, color= 'purple', fill_alpha=0.8,line_color='white')
    return p


In [71]:
# visualizing the bar plot for total number of death per year
years= list(df_merged_new.year.unique())
barplot = pn.interact(barplot, year= years)
pn.Row(barplot)

##### Graph_1: This interactive graph describes the number of deaths per year. Until 2014 pneumonia was the highest fatal infection, in 2014 to 2016 and 2019 OtherInfectAnd Parasitic Diseases was the peak. However, after 2020, covid is the most deadly infection.

In [72]:
def stackedbar(year):
    '''this function will visualize the total number of death per gender per year'''
    df = df_merged_new
    
    # setting parameter for plotting
    df = df[df['year'] == year]
    
    # getting values for gender proportion
    deaths_male = []
    deaths_female = []
    infection = list(df_merged_new.columns[3:11])
    for x in infection:
        male = df[df['Gender'] == 'male']
        female = df[df['Gender'] == 'female']
        deaths_male.append(male[x].sum())
        deaths_female.append(female[x].sum())
        
    data = {'infection': infection, 'male': deaths_male, 'female': deaths_female}
    Gender = ['male', 'female'] 
    
    # setting parameters to plot
    p = figure(y_range=infection, height=400, title='Graph_2: Number of death per disease per year per gender', 
               toolbar_location=None, tools="hover", tooltips="$name: @$name")

    p.hbar_stack(Gender, y='infection', height=0.5, color=['blue', 'pink'], source=data, legend_label=Gender)
    p.xaxis.axis_label = 'Number of death'
    
    p.y_range.range_padding = 0.1
    p.ygrid.grid_line_color = None
    p.axis.minor_tick_line_color = None
    p.outline_line_color = None

    # Position the legend
    p.legend.location = "bottom_right"
    p.legend.orientation = "vertical"
    p.legend.label_text_font_size = "8pt"
    p.legend.spacing = 1
    p.legend.margin = 0
    p.legend.padding = 10
    p.legend.background_fill_alpha = 0.5
    p.legend.click_policy="hide"
    
    return p


In [73]:
# visualizing the hbar plot for total number of death per gender per year
years= list(df_merged_new.year)
stackedbar =pn.interact(stackedbar, year= years)
pn.Row(stackedbar)

##### Graph_2: This interactive graph describes the number of deaths per age per year. Until 2014 pneumonia was the highest fatal infection, in 2014 to 2016 and 2019 OtherInfectAnd Parasitic Diseases was the peak. However, after 2020, covid is the most deadly infection.

In [74]:
def vbarplot(Age):
    '''this function will visualize the total number of death per age group per year'''
    df = df_merged_new
    df= df[df['Age']== Age]
    
    deaths =[]
    infection = list(df_merged_new.columns[3:11])
    for x in infection:
        deaths.append(df[x].sum())
    
    p = figure(title='Graph_3: Number of death per age group per year', 
    toolbar_location=None, tools="hover", tooltips=" @$infection",
    x_range=FactorRange(factors=infection), width= 600, height= 500)
    p.yaxis.axis_label='Number of death'
    p.vbar(x=infection,  top=deaths, width=0.5, color= 'green', fill_alpha=0.8,line_color='white')
    p.xaxis.major_label_orientation = 1
    return p


          

In [75]:
# visualizing the hbar plot fortotal number of death per age per year   
Ages= list(df_merged_new.Age.unique())
# years= list(df_merged_new.year.unique())
vbarplot = pn.interact(vbarplot, Age = Ages)
pn.Row(vbarplot)

##### Graph_3: This graph describes the number of deaths per age group. In infants, the highest mortality was caused by pneumonia followed by other infectious and parasitic diseases. In toddlers and children, other infectious and parasites  diseases are the deadly diseases. In young adults who are between the ages of 15 and 20 tuberculosis as most infection leads to death. In adults aged 30 to 50 tuberculosis, AIDS are the highest. For 50 and above, pneumonia is the most.

In [76]:
def pichart(year):
# '''this function will visualize pie chart for proportion of deases per year'''
    
    df= df_merged_new
    
    df= df[df['year']== year]
    deaths =[]
    
    infection = list(df.columns[3:11])
    for x in infection:
        deaths.append(df[x].sum())
        
    dict ={}
    for key in infection:
        for value in deaths:
            dict[key]=value
            deaths.remove(value)
            break


    data = pd.Series(dict).reset_index(name='value').rename(columns={'index': 'infection'})
    data['angle'] = data['value']/data['value'].sum() * 2*pi
    data['color'] = Dark2[len(dict)]

    p = figure(height=350, title="Graph_4: Pie chart for proportion of deases per year", toolbar_location=None,
               tools='hover',tooltips= '@infection:@value',x_range=(-0.5, 1.0))

    p.wedge(x=0, y=1, radius=0.3,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color='color', legend_field='infection', source=data)

    p.axis.axis_label = None
    p.axis.visible = False
    p.grid.grid_line_color = None

    return p

In [77]:
# Plotting interactive pi chart
years= list(df_merged_new.year.unique())
# infections = list(df_merged_new.columns[3:11])
pichart = pn.interact(pichart,year= years)

# Displaying panel output in notebook
pn.extension()
pn.Row(pichart)

##### Graph_4: This interactive graph shows the percentage of deaths per year

In [78]:
def donought_chart(Age):
    # '''this function will visualize donought chart for proportion of deases per age group'''
    df= df_merged_new
    df= df[df['Age']== Age]
    deaths =[]
    
    infection = list(df_merged_new.columns[3:11])
    for x in infection:
        deaths.append(df[x].sum())
        
    dict ={}
    for key in infection:
        for value in deaths:
            dict[key]=value
            deaths.remove(value)
            break

    data = pd.Series(dict).reset_index(name='value').rename(columns={'index':'infection'})
    data['angle'] = data['value']/data['value'].sum() * 2*pi
    data['color'] = Category10[len(dict)]

    p = figure(plot_height=450, width=900, title="Graph_5: Donought chart for proportion of infections per age group", toolbar_location=None,
               tools='hover',tooltips= '@infection:@value')

    p.annular_wedge(x=0, y=1, inner_radius=0.2, outer_radius=0.4,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color='color', legend='infection', source= data)

    
    return p

In [79]:
# Plotting interactive donat chart for proportion of deaths age group
Ages= list(df_merged_new.Age.unique())
donought_chart = pn.interact(donought_chart, Age= Ages)

# Displaying panel output
pn.extension()
pn.Row(donought_chart)



#### Graph_5: This graph shows the percentage of deaths per age group. Death due to infection varies based on age group.

# Dashboard

In [80]:
# creating panel widgets
year_slider = pn.widgets.IntSlider(name='Year', start=1950,end=2021)

template = pn.template.FastListTemplate(titel='Incidence of death due to infection in the Netherlands.', 
           sidebar=[pn.pane.Markdown('# Death due to infection'), 
           pn.pane.Markdown('Although mortality from infectious diseases has decreased dramatically as the quality of life has improved and the availability of antibiotics and anti-parasitic diseases is more than ever. However, infectious diseases are still life-threatening in some parts of the world. This data analysis tries to answer the research questions.'), 
           pn.pane.PNG('https://www.pngegg.com/en/png-mwkkb', sizing_mode='scale_both')], 
    main= [pn.Row(pn.Column(histogram, pichart, stackedbar, 
            pn.Row(pn.Column(vbarplot,donought_chart))))],
            accent_base_color='#bd9400', 
            header_background='#bd9400',)
template.show() 
                                                     



Launching server at http://localhost:52193


<panel.io.server.Server at 0x22f10910400>