# SIR Dynamic
# Data-Import

In [105]:
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_option('display.max_rows',500)

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import optimize
from scipy import integrate
import plotly

import plotly.express as px
import plotly.graph_objects as go

sns.set(style="darkgrid")

mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)

In [106]:
data_path='../data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
pd_raw=pd.read_csv(data_path)

time_idx=pd_raw.columns[4:]
df_new=pd.DataFrame({'date':time_idx})

country=['Germany','India','US']

for each in country:
    df_new[each]=np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4:].sum(axis=0))

df_new['date']=df_new.date.astype('datetime64[ns]')

df_new.to_csv('../data/processed/small_flat_table.csv',sep=';',index=False)

In [107]:
df_new

Unnamed: 0,date,Germany,India,US
0,2020-01-22,0,0,1
1,2020-01-23,0,0,1
2,2020-01-24,0,0,2
3,2020-01-25,0,0,2
4,2020-01-26,0,0,5
...,...,...,...,...
881,2022-06-21,27454225,43331645,86452232
882,2022-06-22,27573585,43344958,86636306
883,2022-06-23,27681775,43362294,86757621
884,2022-06-24,27771111,43378234,86909716


# General Functions

In [108]:
def SIR_model(SIR,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        I: infected people
        R: recovered people
        beta: 
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''
    
    S,I,R=SIR
    dS_dt=-beta*S*I/N0          #S*I is the 
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return([dS_dt,dI_dt,dR_dt])

In [109]:
def SIR_model_t(SIR,t,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        t: time step, mandatory for integral.odeint
        I: infected people
        R: recovered people
        beta: 
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''
    
    S,I,R=SIR
    dS_dt=-beta*S*I/N0          #S*I is the 
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return dS_dt,dI_dt,dR_dt

In [110]:
def fit_odeint(x, beta, gamma):
    '''
    helper function for the integration
    '''
    return integrate.odeint(SIR_model_t, (S0, I0, R0), t, args=(beta, gamma))[:,1] # we only would like to get dI

# Loop for all country

In [111]:
ydata = np.array(df_new['Germany'][35:100])
t=np.arange(len(ydata))
x=np.linspace(1,len(ydata),len(ydata))
df_plot=pd.DataFrame({'day':np.array(x)})

for each in country: 
    
    if each=='Germany':
        N0=80000000 #max susceptible population
    elif each=='India':
        N0=1380000000
    else:
        N0=329000000
        
    beta=0.4   # infection spread dynamics
    gamma=0.1
    
    I0=df_new[each][35]
    S0=N0-I0
    R0=0
    
    ydata = np.array(df_new[each][35:100])
    t=np.arange(len(ydata))
    
    # example curve of our differential equationa
    popt=[beta,gamma]
    fit_odeint(t, *popt);
    
    popt, pcov = optimize.curve_fit(fit_odeint, t, ydata)
    perr = np.sqrt(np.diag(pcov))
    
    print('standard deviation errors : ',str(perr), ' start infect:',ydata[0])
    print("Optimal parameters: beta =", popt[0], " and gamma = ", popt[1])
    
    # get the final fitted curve
    fitted=fit_odeint(t, *popt)
    len(fitted)
    x=np.linspace(1,len(fitted),len(fitted))
    #df_plot=pd.DataFrame({'day':np.array(x)})
    new={each+'_fit':np.array(fitted),
         each+'_case':np.array(df_new[each][35:100])
        }
    pd_new=pd.DataFrame(new)
    df_plot=pd.concat([df_plot,pd_new],axis=1)
    print(each+":Optimal parameters: beta =", popt[0], " and gamma = ", popt[1])
    print(each+"Basic Reproduction Number R0 " , popt[0]/ popt[1])


Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information.



standard deviation errors :  [0.06447662 0.0641861 ]  start infect: 21
Optimal parameters: beta = 2.895606710294739  and gamma =  2.704066134641813
Germany:Optimal parameters: beta = 2.895606710294739  and gamma =  2.704066134641813
GermanyBasic Reproduction Number R0  1.070834279235666



Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information.



standard deviation errors :  [0.31555625 0.31506699]  start infect: 3
Optimal parameters: beta = 25.242855071571434  and gamma =  25.072977103608693
India:Optimal parameters: beta = 25.242855071571434  and gamma =  25.072977103608693
IndiaBasic Reproduction Number R0  1.0067753409282334



Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information.



standard deviation errors :  [0.05097488 0.05085589]  start infect: 16
Optimal parameters: beta = 2.72039232025338  and gamma =  2.5024951499979142
US:Optimal parameters: beta = 2.72039232025338  and gamma =  2.5024951499979142
USBasic Reproduction Number R0  1.0870719650568144



overflow encountered in double_scalars


overflow encountered in double_scalars


overflow encountered in double_scalars


Illegal input detected (internal error). Run with full_output = 1 to get quantitative information.



In [112]:
df_plot

Unnamed: 0,day,Germany_fit,Germany_case,India_fit,India_case,US_fit,US_case
0,1.0,21.0,21,3.0,3,16.0,16
1,2.0,25.433344,26,3.555478,3,19.89534,17
2,3.0,30.802537,53,4.213801,3,24.73903,17
3,4.0,37.30509,66,4.994007,3,30.76193,25
4,5.0,45.180184,117,5.918659,3,38.25112,32
5,6.0,54.717449,150,7.014493,5,47.56357,55
6,7.0,66.267591,188,8.313192,5,59.14313,74
7,8.0,80.25526,240,9.852302,28,73.54168,107
8,9.0,97.194605,349,11.67631,30,91.44544,184
9,10.0,117.708107,534,13.837931,31,113.7076,237


# Visualization

In [113]:
fig=go.Figure()
for each in country:
    fig.add_trace(go.Scatter(x=df_plot.day,
                             y=df_plot[each+'_case'],
                             mode='markers',
                             marker_size=8,
                             name=each+'_case'
                         ))
for each in country:
    fig.add_trace(go.Scatter(x=df_plot.day,
                             y=df_plot[each+'_fit'],
                             mode='lines',
                             name=each+'_fit'
                         ))

fig.update_layout(
        width=1000,
        height=800,
        xaxis_title="Days",
        yaxis_title="Covid infections",
)
fig.update_yaxes(type="log")
fig.show()

# Dash

In [114]:
import dash
dash.__version__
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State

import os
print(os.getcwd())

fig = go.Figure()

app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Applied Data Science on COVID-19 data

    Goal of the project is to demonstrate the SIR prediction model for three countries and the reproduction rate,
    infection rate and recovery rate for the first few days of the pandemic

    '''),

    dcc.Markdown('''
    ## Multi-Select Country for visualization
    '''),
    
    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in country],
        value=['US', 'Germany','India'], # which are pre-selected
        multi=True
    ),

 

    dcc.Graph(figure=fig, id='main_window_slope')
])



@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value')])


def update_figure(country):


    traces = []
    for each in country:
        traces.append(dict(x=df_plot.day,
                                y=df_plot[each+'_case'],
                                mode='markers',
                                marker_size=8,
                                name=each+'_case'
                       )
                )
    
    for each in country:
        traces.append(dict(x=df_plot.day,
                                y=df_plot[each+'_fit'],
                                mode='lines',
                                name=each+'_fit'
                       )
                )

    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=720,

                xaxis={
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },

                yaxis={'type':"log",
                       'range':'[1.1,5.5]'
                      }
        )
    }

if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)

C:\Users\Mohit\EDS_COVID_22\notebooks
Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
