![](CRISP_DM.png)

In [None]:
# required packages
import pandas as pd
import numpy as np
from datetime import datetime
import pandas as pd 
from scipy import optimize
from scipy import integrate
import random
#for plotting
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.graph_objects as go
# for dashboard
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State
# set parameter for plotting 
mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 200)

In [None]:
# local file to create dataframe and set date 
df_analyse=pd.read_csv('../data/processed/COVID_small_flat_table.csv',sep=';')  
df_analyse.sort_values('date',ascending=True).head()

## 80.i Calculation of SIR curves through simulative approach
* Click on the link to check SIR: [SIR Model for Spread of Disease](https://www.maa.org/press/periodicals/loci/joma/the-sir-model-for-spread-of-disease-the-differential-equation-model).

In [None]:
# basic parameters

N0=1000000 # max susceptible population
beta=0.4   # change in infection spread dynamics
gamma=0.1  # recovery rate

# Initial condition condition I0+S0+R0=N0
I0=df_analyse.Germany[35] # if there is zero number of cases then calculation is not possible 
S0=N0-I0
R0=0

In [None]:
# SIR_chauhan function for calculating changes in respective cases
def cal_SIR_chauhan(SIR,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        I: infected people
        R: recovered people
        beta: 
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''  
    S,I,R=SIR
    dS_dt=-beta*S*I/N0          
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return([dS_dt,dI_dt,dR_dt])

In [None]:
# initiate with assumed parameter
SIR=np.array([S0,I0,R0])
propagation_rates=pd.DataFrame(columns={'susceptible':S0,'infected':I0,'recoverd':R0})

for each_t in np.arange(100):
    new_delta_vec=cal_SIR_chauhan(SIR,beta,gamma)
    SIR=SIR+new_delta_vec
    propagation_rates=propagation_rates.append({'susceptible':SIR[0],'infected':SIR[1],
                                                'recovered':SIR[2]}, ignore_index=True)

In [None]:
fig, ax1 = plt.subplots(1, 1)
ax1.plot(propagation_rates.index,propagation_rates.infected,label='infected',color='b')
ax1.plot(propagation_rates.index,propagation_rates.recovered,label='recovered', color ='r')
ax1.plot(propagation_rates.index,propagation_rates.susceptible,label='susceptible', color ='g')

ax1.set_ylim(10, 1000000)
ax1.set_yscale('linear')
ax1.set_title('Scenario SIR simulations (Only for visualization)',size=16)
ax1.set_xlabel('Timelinee in days',size=16)
ax1.set_ylabel('Total infected cases', size=16)
ax1.legend(loc='best',prop={'size': 16});

## 80.i.a. Parameters of SIR model

In [None]:
ydata = np.array(df_analyse.Germany[35:])
t=np.arange(len(ydata))

In [None]:
# re-initialization 
I0=ydata[0]
S0=N0-I0
R0=0

In [None]:
# define SI _chauhan_t, where t (in days).
def cal_SIR_chauhan_t(SIR,t,beta,gamma):
    S,I,R=SIR
    dS_dt=-beta*S*I/N0      
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return dS_dt,dI_dt,dR_dt
#  function for the integration calculation
def fit_odeint_func(x, beta, gamma):
    return integrate.odeint(cal_SIR_chauhan_t, (S0, I0, R0), t, args=(beta, gamma))[:,1]

In [None]:
# curve of differential equation
popt=[0.4,0.1] # intial parameter variable
fit_odeint_func(t, *popt)

In [None]:
# resulting curve has to be fitted free parameters are beta and gamma
popt, pcov = optimize.curve_fit(fit_odeint_func, t, ydata)
perr = np.sqrt(np.diag(pcov))

print('standard deviation errors : ',str(perr), ' start infect:',ydata[0])
print("Optimal parameters: beta =", popt[0], " and gamma = ", popt[1])

In [None]:
# calculation for the final fitted curve
fitted_curve=fit_odeint_func(t, *popt)

In [None]:
# plot the oringal data and fitted data 
plt.semilogy(t, ydata, '*')
plt.semilogy(t, fitted_curve)
plt.title('Fitted SIR model [Only for Germany]')
plt.ylabel('Infected Population in numbers')
plt.xlabel('Timeline in Days')
plt.show()
print('Calculated optimal parameters: beta =', popt[0], ' and gamma = ', popt[1])
print('Basic Reproduction Number R0 = ', popt[0]/popt[1])
print('R0 is a mathematical term that indicates how contagious an infectious disease is.')
print('This ratio is derived as the expected number of new infections')

## 80.ii  SIR with lockdown period through dynamic beta

In [None]:
t_initial=28 
t_intro_measures=14  
t_hold=21  
t_relax=21 

beta_max=0.4 
beta_min=0.11 
gamma=0.1 
pd_beta=np.concatenate((np.array(t_initial*[beta_max]),
                       np.linspace(beta_max,beta_min,t_intro_measures),
                       np.array(t_hold*[beta_min]),
                        np.linspace(beta_min,beta_max,t_relax)))

In [None]:
# parameter for relatable time period
pd_beta

In [None]:
# intialize the sir  
SIR=np.array([S0,I0,R0])
propagation_rates=pd.DataFrame(columns={'susceptible':S0,'infected':I0,'recoverd':R0})

# calculate propagated rates
for each_beta in pd_beta:
    new_delta_vec=cal_SIR_chauhan(SIR,each_beta,gamma)
    SIR=SIR+new_delta_vec
    propagation_rates=propagation_rates.append({'susceptible':SIR[0],'infected':SIR[1],
                                                'recovered':SIR[2]}, ignore_index=True)

In [None]:
# dynamic beta in SIR  
fig, ax1 = plt.subplots(1, 1);

ax1.plot(propagation_rates.index,propagation_rates.infected,label='infected',linewidth=3);

t_phases=np.array([t_initial,t_intro_measures,t_hold,t_relax]).cumsum()
ax1.bar(np.arange(len(ydata)),ydata, width=0.8,label=' current infected Germany',color='w')
ax1.axvspan(0,t_phases[0], facecolor='b', alpha=0.2,label='no measures', color = 'b')
ax1.axvspan(t_phases[0],t_phases[1], facecolor='b', alpha=0.3,label='hard measures introduced', color = 'r')
ax1.axvspan(t_phases[1],t_phases[2], facecolor='b', alpha=0.4,label='hold measures', color = 'y')
ax1.axvspan(t_phases[2],t_phases[3], facecolor='b', alpha=0.5,label='relax measures', color = 'g')
ax1.axvspan(t_phases[3],len(propagation_rates.infected), facecolor='b', alpha=0.6,label='repeat hard measures')

ax1.set_ylim(10, 1.5*max(propagation_rates.infected))
ax1.set_yscale('log')
ax1.set_title('Dynamic beta in SIR simulations[Only for Germany]',size=16)
ax1.set_xlabel('Timeline in Days',size=16)
ax1.legend(loc='best',prop={'size': 16});

## 80.iii  SIR model: Dash App

### 80.iii.a Preparation for Data

In [None]:
# data frame
data_raw = pd.read_csv('../data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
country_list = data_raw['Country/Region'].unique() #making country_list
date = data_raw.columns[4:]
df_chauhan = pd.DataFrame({'Date': date})

# data_raw DataFrame into format for SIR algorithm
for each in country_list:
    df_chauhan[each] = np.array(data_raw[data_raw['Country/Region'] == each].iloc[:,4::].sum(axis=0)).T
df_chauhan.to_csv("../data/raw/COVID-19/csse_covid_19_data/SIR.csv", sep = ';', index=False)

df_analyse=pd.read_csv('../data/raw/COVID-19/csse_covid_19_data/SIR.csv',sep=';')
df_analyse.sort_values('Date',ascending=True).head()

### 80.iii.b. SIR model and fitted curve parameter 

In [None]:
# Intialize parameter
N0 = 1000000
beta = 0.4
gamma = 0.1
I0=df_analyse.Germany[35]
S0=N0-I0
R0=0

df_data = df_analyse[35:] 
t = np.arange(df_data.shape[0])

# optimize parameters for each country
for country in df_data.columns[1:]:
        ydata = np.array(df_data[df_data[country]>0][country]) 
        t = np.arange(len(ydata))
        I0=ydata[0]
        S0=N0-I0
        R0=0
        popt=[0.4,0.1]
        fit_odeint_func(t, *popt)
        popt, pcov = optimize.curve_fit(fit_odeint_func, t, ydata, maxfev=5000)
        perr = np.sqrt(np.diag(pcov))
        fitted=fit_odeint_func(t, *popt)
        fitted_pad = np.concatenate((np.zeros(df_data.shape[0]-len(fitted)) ,fitted))
        df_data[country + '_fitted'] = fitted_pad

df_data = df_data.reset_index(drop=True)
df_data.to_csv('../data/processed/SIR_fitted.csv', sep = ';')

In [None]:
x = df_data[['Germany', 'Germany_fitted']]

In [None]:
#creating plot for germany 
fig = go.Figure()
fig.add_trace(go.Scatter(x = df_data['Date'],y = df_data['Germany_fitted'],name= 'fitted_germany',
                             mode='markers+lines',line_width = 1,marker_size = 3),
             )

fig.add_trace(go.Scatter(x = df_data['Date'],y = df_data['Germany'],name= 'source_germany',
                             mode='markers+lines',line_width = 1,marker_size = 3),
                 )

fig.update_layout(title={'text': 'SIR fitted curve with confirmed cases [Only for Germany]','y':0.9,'x':0.5,'xanchor': 'center','yanchor': 'top'},
                  xaxis_title='Timeline in Days', yaxis_title='Total cases of infected people',width=800, height=600)
fig.update_yaxes(type = 'log')
fig.update_layout(xaxis_rangeslider_visible=True)

In [None]:
color_list = []
for i in range(200):
    var = '#%02x%02x%02x'%(random.randint(0,255),random.randint(0,255),random.randint(0,255))
    color_list.append(var)

In [None]:
# dashboard application of source and fitted SIR curve data for dataset
fig = go.Figure()
app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Data Science @ TU_KL on COVID-19_Task 2
    ## Real and simulated number of infected people

    * The default layout contains the confirmed infected cases in the log-scale format on the Y-axis
    and Timeline in Days on the X-axis.
    ### The dropdown menu enables selection of one or multiple countries for visualization.

    * This dashboard plots two curves for each country:
    
    1. The first curve represents the confirmed infected cases along the timeline.
    2. The second curve represents the simulated infected cases after applying the SIR model along the timeline. 
    
    '''),

    dcc.Markdown('''
    ## Multi-Select Country for visualization
    '''),
    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in df_data.columns[1:200]],
        value=['Germany','Brazil','US'], # which are pre-selected
        multi=True),dcc.Graph(figure=fig, id='main_window_slope')])

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value')])
def update_figure(country_list):
    v = 0
    my_yaxis={'type':"log",'title':'Confirmed infected people (From johns hopkins csse, log-scale)'}
    traces = []
    for each in country_list:
        traces.append(dict(x=df_data['Date'],y=df_data[each],
                                mode='line', line = dict(color = color_list[v]), opacity=1.0,name=each))
        traces.append(dict(x=df_data['Date'],
                                y=df_data[each+'_fitted'],
                                mode='markers+lines',line = dict(color=color_list[v]), opacity=1.0,name=each+'_simulated'))

        v = v+1
    return {
            'data': traces,
            'layout': dict (
                width=1280,height=720,
                xaxis={'title':'Timeline','tickangle':-45,'nticks':20,
                'tickfont':dict(size=14,color="#0c6887"),},yaxis=my_yaxis)}

if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)
