## Figure 3: Covid-19 Dashboard Prototype

### Importing Libraries

In [1]:
import dash
import numpy as np
import pandas as pd
from dash import dcc
from dash import html
import seaborn as sns
from scipy import signal
import matplotlib as mpl
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn import linear_model
import plotly.graph_objects as go
from dash.dependencies import Input, Output,State

In [2]:
sns.set(style="darkgrid")
mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)

### Importing Dataset from Github

In [3]:
data_of_world = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'


In [4]:
data_of_world = pd.read_csv(data_of_world)
data_of_world.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/19/22,7/20/22,7/21/22,7/22/22,7/23/22,7/24/22,7/25/22,7/26/22,7/27/22,7/28/22
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,183908,184038,184224,184360,184473,184587,184819,185086,185272,185393
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,298578,300058,301394,302767,303925,304890,305123,306789,308050,309278
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,266542,266591,266654,266700,266772,266839,266916,267010,267096,267194
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,45061,45061,45326,45326,45326,45326,45326,45326,45326,45508
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,102209,102209,102209,102301,102301,102301,102301,102301,102301,102301


### Data Pre-processing 

In [5]:
data_of_world.isnull().sum()

Province/State    196
Country/Region      0
Lat                 2
Long                2
1/22/20             0
                 ... 
7/24/22             0
7/25/22             0
7/26/22             0
7/27/22             0
7/28/22             0
Length: 923, dtype: int64

In [6]:
data_of_world =data_of_world.rename(columns={'Country/Region':'Nation','Province/State':'Territory'})

In [7]:
data_of_world['Territory'] = data_of_world['Territory'].fillna('no')
data_of_world = data_of_world.drop(['Lat','Long'],axis=1)
data_of_world.head()

Unnamed: 0,Territory,Nation,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,...,7/19/22,7/20/22,7/21/22,7/22/22,7/23/22,7/24/22,7/25/22,7/26/22,7/27/22,7/28/22
0,no,Afghanistan,0,0,0,0,0,0,0,0,...,183908,184038,184224,184360,184473,184587,184819,185086,185272,185393
1,no,Albania,0,0,0,0,0,0,0,0,...,298578,300058,301394,302767,303925,304890,305123,306789,308050,309278
2,no,Algeria,0,0,0,0,0,0,0,0,...,266542,266591,266654,266700,266772,266839,266916,267010,267096,267194
3,no,Andorra,0,0,0,0,0,0,0,0,...,45061,45061,45326,45326,45326,45326,45326,45326,45326,45508
4,no,Angola,0,0,0,0,0,0,0,0,...,102209,102209,102209,102301,102301,102301,102301,102301,102301,102301


In [8]:
data_of_world = data_of_world.set_index(['Territory','Nation']).T.stack(level=[0,1]).reset_index()             
data_of_world.head()

Unnamed: 0,level_0,Territory,Nation,0
0,1/22/20,Alberta,Canada,0.0
1,1/22/20,Anguilla,United Kingdom,0.0
2,1/22/20,Anhui,China,1.0
3,1/22/20,Aruba,Netherlands,0.0
4,1/22/20,Australian Capital Territory,Australia,0.0


In [9]:
data_of_world.rename(columns={'level_0':'Date',0:'confirmed'},inplace = True)
data_of_world.head()

Unnamed: 0,Date,Territory,Nation,confirmed
0,1/22/20,Alberta,Canada,0.0
1,1/22/20,Anguilla,United Kingdom,0.0
2,1/22/20,Anhui,China,1.0
3,1/22/20,Aruba,Netherlands,0.0
4,1/22/20,Australian Capital Territory,Australia,0.0


In [10]:
data_of_world['Date']=data_of_world.Date.astype('datetime64[ns]')

In [11]:
def get_doubling_time_via_regression(in_array):
    
    y = np.array(in_array)
    X = np.arange(-1,2).reshape(-1, 1)

    assert len(in_array)==3
    reg = linear_model.LinearRegression(fit_intercept=True)
    reg.fit(X,y)
    intercept = reg.intercept_
    slope = reg.coef_
    result = intercept/slope
    
    return result

In [12]:
def savgol_filter(df_input,column='confirmed',window=5):

    degree=1
    df_result = df_input
    filter_in = df_input[column].fillna(0) 
    result = signal.savgol_filter(np.array(filter_in), window, 1)
    df_result[str(column+'_filtered')]=result
    return df_result


In [13]:
def rolling_reg(df_input,col='confirmed'):

    days_back=3
    result=df_input[col].rolling(window=days_back, min_periods=days_back).apply(get_doubling_time_via_regression,raw=False)
    return result

In [14]:
def calc_filtered_data(df_input,filter_on='confirmed'):
 

    must_contain=set(['Territory','Nation',filter_on])
    assert must_contain.issubset(set(df_input.columns))
    df_output=df_input.copy() 
    pd_filtered_result=df_output[['Territory','Nation',filter_on]].groupby(['Territory','Nation']).apply(savgol_filter)
    df_output=pd.merge(df_output,pd_filtered_result[[str(filter_on+'_filtered')]],left_index=True,right_index=True,how='left')
    return df_output.copy()

In [15]:
def calc_doubling_rate(df_input,filter_on='confirmed'):

    must_contain=set(['Territory','Nation',filter_on])
    assert must_contain.issubset(set(df_input.columns))
    pd_DR_result= df_input.groupby(['Territory','Nation']).apply(rolling_reg,filter_on).reset_index()
    pd_DR_result=pd_DR_result.rename(columns={filter_on:filter_on+'_DR','level_2':'index'})
    df_output=pd.merge(df_input,pd_DR_result[['index',str(filter_on+'_DR')]],left_index=True,right_on=['index'],how='left')
    df_output=df_output.drop(columns=['index'])
    return df_output

In [16]:
if __name__ == '__main__':
    test_data_reg=np.array([2,4,6])
    result=get_doubling_time_via_regression(test_data_reg)
    print('the test slope is: '+str(result))

    pd_JH_data=data_of_world.sort_values('Date',ascending=True).copy()

    pd_result_larg=calc_filtered_data(pd_JH_data)
    pd_result_larg=calc_doubling_rate(pd_result_larg)
    pd_result_larg=calc_doubling_rate(pd_result_larg,'confirmed_filtered')


    mask=pd_result_larg['confirmed']>100
    pd_result_larg['confirmed_filtered_DR']=pd_result_larg['confirmed_filtered_DR'].where(mask, other=np.NaN)
    dataset = pd_result_larg.copy()

the test slope is: [2.]


### Creating dynamic Dashboard using Plotly

In [None]:
df_input_large = dataset.copy()

fig = go.Figure()

app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Covid - 19 World Dashboard '''),

    dcc.Markdown('''
    ## Please select any country for Visualization
    '''),


    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in df_input_large['Nation'].unique()],
        value=['Poland', 'France','Singapore'], # which are pre-selected
        multi=True
    ),

    dcc.Markdown('''
        ## Select Timeline of confirmed COVID-19 cases or the approximated doubling time
        '''),


    dcc.Dropdown(
    id='doubling_time',
    options=[
        {'label': 'Timeline Confirmed ', 'value': 'confirmed'},
        {'label': 'Timeline Confirmed Filtered', 'value': 'confirmed_filtered'},
        {'label': 'Timeline Doubling Rate', 'value': 'confirmed_DR'},
        {'label': 'Timeline Doubling Rate Filtered', 'value': 'confirmed_filtered_DR'},
    ],
    value='confirmed',
    multi=False
    ),

    dcc.Graph(figure=fig, id='main_window_slope')
])



@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('doubling_time', 'value')])
def update_figure(country_list,show_doubling):


    if 'doubling_rate' in show_doubling:
        my_yaxis={'type':"log",
               'title':'Approximated doubling rate over 3 days (larger numbers are better #stayathome)'
              }
    else:
        my_yaxis={'type':"log",
                  'title':'Confirmed infected people (source johns hopkins csse, log-scale)'
              }


    traces = []
    for each in country_list:

        df_plot=df_input_large[df_input_large['Nation']==each]

        if show_doubling=='doubling_rate_filtered':
            df_plot=df_plot[['Territory','Nation','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','Date']].groupby(['Nation','Date']).agg(np.mean).reset_index()
        else:
            df_plot=df_plot[['Territory','Nation','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','Date']].groupby(['Nation','Date']).agg(np.sum).reset_index()
       #print(show_doubling)


        traces.append(dict(x=df_plot.Date,
                                y=df_plot[show_doubling],
                                mode='markers+lines',
                                opacity=0.9,
                                name=each
                        )
                )

    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=720,

                xaxis={'title':'Timeline',
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },

                yaxis=my_yaxis
        )
    }

if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
