# COVID-19 DASHBOARD

In [1]:
import dash
from dash import dcc, html
from dash.dependencies import Output, Input
import plotly.express as px
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
import datetime

import plotly.graph_objects as go

## Importing & Processing Data

In [2]:
data_path='C:\\Users\\ASUS\\enterprise_datascience_covid\\data\\raw\\COVID-19\\csse_covid_19_data\\csse_covid_19_time_series\\time_series_covid19_confirmed_global.csv'
pd_raw=pd.read_csv(data_path)

pd_data_base=pd_raw.rename(columns={'Country/Region':'country',
                                  'Province/State':'state'})

pd_data_base['state']=pd_data_base['state'].fillna('no')

pd_data_base=pd_data_base.drop(['Lat','Long'],axis=1)

In [4]:
pd_relational_model=pd_data_base.set_index(['state','country']).T.stack(level=[0,1]).reset_index().rename(columns={'level_0':'date',0:'confirmed'})   
pd_relational_model['date']=pd_relational_model.date.astype('datetime64[ns]')
pd_relational_model.to_csv('C:\\Users\\ASUS\\enterprise_datascience_covid\\data\\processed\\COVID_relational_confirmed.csv',index=False)
pd_relational_model.head()

Unnamed: 0,date,state,country,confirmed
0,2020-01-22,Alberta,Canada,0.0
1,2020-01-22,Anguilla,United Kingdom,0.0
2,2020-01-22,Anhui,China,1.0
3,2020-01-22,Aruba,Netherlands,0.0
4,2020-01-22,Australian Capital Territory,Australia,0.0


In [6]:
pd_JH_data= pd.read_csv('C:\\Users\\ASUS\\enterprise_datascience_covid\\data\\processed\\COVID_relational_confirmed.csv',parse_dates=[0])
pd_JH_data=pd_JH_data.sort_values('date',ascending=True).reset_index(drop=True).copy()
pd_JH_data.head()

Unnamed: 0,date,state,country,confirmed
0,2020-01-22,Alberta,Canada,0.0
1,2020-01-22,no,Kosovo,0.0
2,2020-01-22,no,Kuwait,0.0
3,2020-01-22,no,Kyrgyzstan,0.0
4,2020-01-22,no,Laos,0.0


In [7]:
pd_confirmed=pd_JH_data.groupby(['country']).agg(np.max)

In [8]:
pd_confirmed.reset_index().reset_index()

Unnamed: 0,index,country,date,state,confirmed
0,0,Afghanistan,2022-07-16,no,183445.0
1,1,Albania,2022-07-16,no,293917.0
2,2,Algeria,2022-07-16,no,266424.0
3,3,Andorra,2022-07-16,no,44671.0
4,4,Angola,2022-07-16,no,101901.0
...,...,...,...,...,...
194,194,West Bank and Gaza,2022-07-16,no,662588.0
195,195,Winter Olympics 2022,2022-07-16,no,535.0
196,196,Yemen,2022-07-16,no,11848.0
197,197,Zambia,2022-07-16,no,327994.0


## Filter and Doubling Rate Calculation

In [9]:
from sklearn import linear_model
reg = linear_model.LinearRegression(fit_intercept=True)

def get_doubling_time_via_regression(in_array):
    ''' Use a linear regression to approximate the doubling rate'''

    y = np.array(in_array)
    X = np.arange(-1,2).reshape(-1, 1)

    assert len(in_array)==3
    reg.fit(X,y)
    intercept=reg.intercept_
    slope=reg.coef_

    return intercept/slope

In [10]:
def rolling_reg(df_input,col='confirmed'):
    ''' input has to be a data frame'''
    ''' return is single series (mandatory for group by apply)'''
    days_back=3
    result=df_input[col].rolling(
                window=days_back,
                min_periods=days_back).apply(get_doubling_time_via_regression,raw=False)
    return result

In [11]:
pd_DR_result= pd_JH_data[['state','country','confirmed']].groupby(['state','country']).apply(rolling_reg,'confirmed').reset_index()

In [12]:
pd_DR_result


Unnamed: 0,state,country,level_2,confirmed
0,Alberta,Canada,0,
1,Alberta,Canada,499,
2,Alberta,Canada,762,
3,Alberta,Canada,1048,
4,Alberta,Canada,1354,
...,...,...,...,...
258490,no,Zimbabwe,257161,12188.460317
258491,no,Zimbabwe,257403,18283.690476
258492,no,Zimbabwe,257731,7757.666667
258493,no,Zimbabwe,257973,5020.333333


In [13]:
pd_DR_result=pd_DR_result.rename(columns={'confirmed':'confirmed_DR',
                             'level_2':'index'})
#pd_Doubling_rate=pd_DR_result.sort_values('index_1').replace(np.nan, 0)


In [14]:
pd_Doubling_rate=pd_DR_result.sort_values('index').replace(np.nan, 0).set_index('index')

In [15]:
pd_Doubling_rate

Unnamed: 0_level_0,state,country,confirmed_DR
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Alberta,Canada,0.000000e+00
1,no,Kosovo,0.000000e+00
2,no,Kuwait,0.000000e+00
3,no,Kyrgyzstan,0.000000e+00
4,no,Laos,0.000000e+00
...,...,...,...
258490,no,Andorra,inf
258491,no,Algeria,7.835020e+03
258492,no,Albania,1.977248e+02
258493,no,Argentina,inf


In [16]:
pd_Doubling_rate["confirmed"]=pd_Doubling_rate.index.map(pd_JH_data["confirmed"])

In [17]:
pd_Doubling_rate

Unnamed: 0_level_0,state,country,confirmed_DR,confirmed
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Alberta,Canada,0.000000e+00,0.0
1,no,Kosovo,0.000000e+00,0.0
2,no,Kuwait,0.000000e+00,0.0
3,no,Kyrgyzstan,0.000000e+00,0.0
4,no,Laos,0.000000e+00,0.0
...,...,...,...,...
258490,no,Andorra,inf,44671.0
258491,no,Algeria,7.835020e+03,266424.0
258492,no,Albania,1.977248e+02,293917.0
258493,no,Argentina,inf,9426171.0


In [18]:
pd_Doubling_rate["date"]=pd_Doubling_rate.index.map(pd_JH_data["date"])

In [19]:
pd_Doubling_rate.head(287)

Unnamed: 0_level_0,state,country,confirmed_DR,confirmed,date
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,Alberta,Canada,0.0,0.0,2020-01-22
1,no,Kosovo,0.0,0.0,2020-01-22
2,no,Kuwait,0.0,0.0,2020-01-22
3,no,Kyrgyzstan,0.0,0.0,2020-01-22
4,no,Laos,0.0,0.0,2020-01-22
...,...,...,...,...,...
282,no,Algeria,0.0,0.0,2020-01-22
283,no,Albania,0.0,0.0,2020-01-22
284,no,Argentina,0.0,0.0,2020-01-22
285,no,Lithuania,0.0,0.0,2020-01-23


In [20]:
pd_Doubling_rate.rename(columns={'confirmed_DR': 'Doubling rate'})

Unnamed: 0_level_0,state,country,Doubling rate,confirmed,date
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,Alberta,Canada,0.000000e+00,0.0,2020-01-22
1,no,Kosovo,0.000000e+00,0.0,2020-01-22
2,no,Kuwait,0.000000e+00,0.0,2020-01-22
3,no,Kyrgyzstan,0.000000e+00,0.0,2020-01-22
4,no,Laos,0.000000e+00,0.0,2020-01-22
...,...,...,...,...,...
258490,no,Andorra,inf,44671.0,2022-07-16
258491,no,Algeria,7.835020e+03,266424.0,2022-07-16
258492,no,Albania,1.977248e+02,293917.0,2022-07-16
258493,no,Argentina,inf,9426171.0,2022-07-16


In [21]:
from scipy import signal

def savgol_filter(df_input,column='confirmed',window=5):
    ''' Savgol Filter which can be used in groupby apply function 
        it ensures that the data structure is kept'''
    window=5, 
    degree=1
    df_result=df_input
    
    filter_in=df_input[column].fillna(0) # attention with the neutral element here
    
    result=signal.savgol_filter(np.array(filter_in),
                           5, # window size used for filtering
                           1)
    df_result[column+'_filtered']=result
    return df_result
        

In [22]:
pd_filtered_result= pd_Doubling_rate[['state','country','confirmed']].groupby(['state','country']).apply(savgol_filter).reset_index()


In [23]:
pd_filtered_result

Unnamed: 0,index,state,country,confirmed,confirmed_filtered
0,0,Alberta,Canada,0.0,0.0
1,1,no,Kosovo,0.0,0.0
2,2,no,Kuwait,0.0,0.0
3,3,no,Kyrgyzstan,0.0,0.0
4,4,no,Laos,0.0,0.0
...,...,...,...,...,...
258490,258490,no,Andorra,44671.0,44671.0
258491,258491,no,Algeria,266424.0,266421.8
258492,258492,no,Albania,293917.0,294229.6
258493,258493,no,Argentina,9426171.0,9426171.0


In [24]:
pd_filtered_merged = pd.merge(pd_Doubling_rate,pd_filtered_result[['index','confirmed_filtered']],on=['index'],how='left')
pd_filtered_merged

Unnamed: 0,index,state,country,confirmed_DR,confirmed,date,confirmed_filtered
0,0,Alberta,Canada,0.000000e+00,0.0,2020-01-22,0.0
1,1,no,Kosovo,0.000000e+00,0.0,2020-01-22,0.0
2,2,no,Kuwait,0.000000e+00,0.0,2020-01-22,0.0
3,3,no,Kyrgyzstan,0.000000e+00,0.0,2020-01-22,0.0
4,4,no,Laos,0.000000e+00,0.0,2020-01-22,0.0
...,...,...,...,...,...,...,...
258490,258490,no,Andorra,inf,44671.0,2022-07-16,44671.0
258491,258491,no,Algeria,7.835020e+03,266424.0,2022-07-16,266421.8
258492,258492,no,Albania,1.977248e+02,293917.0,2022-07-16,294229.6
258493,258493,no,Argentina,inf,9426171.0,2022-07-16,9426171.0


In [25]:
pd_filtered_doubling= pd_filtered_merged[['state','country','confirmed_filtered']].groupby(['state','country']).apply(rolling_reg,'confirmed_filtered').reset_index()
pd_filtered_doubling

Unnamed: 0,state,country,level_2,confirmed_filtered
0,Alberta,Canada,0,
1,Alberta,Canada,499,
2,Alberta,Canada,762,
3,Alberta,Canada,1048,
4,Alberta,Canada,1354,
...,...,...,...,...
258490,no,Zimbabwe,257161,12016.798122
258491,no,Zimbabwe,257403,9587.325843
258492,no,Zimbabwe,257731,9343.360097
258493,no,Zimbabwe,257973,9047.227326


In [26]:
pd_filtered_doubling=pd_filtered_doubling.rename(columns={'confirmed_filtered':'confirmed_filtered_DR',
                             'level_2':'index'})

pd_filtered_doubling.tail()

Unnamed: 0,state,country,index,confirmed_filtered_DR
258490,no,Zimbabwe,257161,12016.798122
258491,no,Zimbabwe,257403,9587.325843
258492,no,Zimbabwe,257731,9343.360097
258493,no,Zimbabwe,257973,9047.227326
258494,no,Zimbabwe,258494,8368.156863


In [27]:
pd_result_final = pd.merge(pd_filtered_merged,pd_filtered_doubling[['index','confirmed_filtered_DR']],on=['index'],how='left')
pd_result_final.tail()

Unnamed: 0,index,state,country,confirmed_DR,confirmed,date,confirmed_filtered,confirmed_filtered_DR
258490,258490,no,Andorra,inf,44671.0,2022-07-16,44671.0,-3069768000000000.0
258491,258491,no,Algeria,7835.02,266424.0,2022-07-16,266421.8,8705.595
258492,258492,no,Albania,197.7248,293917.0,2022-07-16,294229.6,243.8852
258493,258493,no,Argentina,inf,9426171.0,2022-07-16,9426171.0,-5060637000000000.0
258494,258494,no,Zimbabwe,14226.17,256083.0,2022-07-16,256096.2,8368.157


In [28]:
mask=pd_result_final['confirmed']>100
pd_result_final['confirmed_filtered_DR']=pd_result_final['confirmed_filtered_DR'].where(mask, other=np.NaN) 

In [29]:
pd_result_final.to_csv('C:\\Users\\ASUS\\enterprise_datascience_covid\\data\\processed\\COVID_df_final.csv',index=False)

## Data Visualization

In [30]:
# %load C:\\Users\\ASUS\\enterprise_datascience_covid\\src\\visualization\\Covid_DB.py
import dash
from dash import dcc, html
from dash.dependencies import Output, Input
import plotly.express as px
import plotly.graph_objects as go
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
import datetime
import os

df_final=pd.read_csv('C:\\Users\\ASUS\\enterprise_datascience_covid\\data\\processed\\COVID_df_final.csv')

fig = go.Figure()

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]
                    #meta_tags=[{'name': 'viewport',
                        #        'content': 'width=device-width, initial-scale=1.0'}]
                )
app.title = 'Covid-19 Dashboard'

colors = {
    'background': '#112D32',
    'bodyColor':'#D1E8E2',
    'text': '#D1E8E2'
}
def get_page_heading_style():
    return {'backgroundColor': colors['background']}


def get_page_heading_title():
    return html.H1(children='COVID-19 Dashboard',
                                        style={
                                        'textAlign': 'center',
                                        'color': colors['text']
                                    })

def get_page_heading_subtitle():
    return html.Div(children= "Goal of this project is to learn Data Science by applying a cross Industry standard process, \
                                it covers the full walkthrough of: automated data gathering, data transformations,\
                                filtering and machine learning to approximating the doubling time, and \
                                (static) deployment of responsive dashboard. ",
                                         style={
                                             'textAlign':'center',
                                             'color':colors['text']
                                         })

def generate_page_header():
    main_header =  dbc.Row(
                            [
                                dbc.Col(get_page_heading_title(),md=12)
                            ],
                            align="center",
                            style=get_page_heading_style()
                        )
    subtitle_header = dbc.Row(
                            [
                                dbc.Col(get_page_heading_subtitle(),md=12)
                            ],
                            align="center",
                            style=get_page_heading_style()
                        )
    header = (main_header,subtitle_header)
    return header


page_header = generate_page_header()
app.layout = dbc.Container([

            page_header[0],
            page_header[1],
            html.Hr(),



            dbc.Row([
                dbc.Col([
                    html.P("Select the Country(s) from the list for Visualization"),
                    dcc.Dropdown(
                        id= 'country_drop_down',
                        options=[ {'label': each, 'value': each} for each in df_final['country'].unique()],
                        value=['India','US','Germany'],
                        multi= True
                    )
                ]),

                dbc.Col([
                    html.P("Select the option"),
                    dcc.Dropdown(
                        id='doubling_time',
                        options=[
                            {'label': 'Timeline Confirmed ', 'value': 'confirmed'},
                            {'label': 'Timeline Confirmed Filtered', 'value': 'confirmed_filtered'},
                            {'label': 'Timeline Doubling Rate', 'value': 'confirmed_DR'},
                            {'label': 'Timeline Doubling Rate Filtered', 'value': 'confirmed_filtered_DR'},
                            ],
                        value='confirmed',
                        multi=False
                    )

                    ])

            ]),

            html.Hr(),

            dbc.Row([
                dbc.Col([

                        dcc.Graph(id='main_window_slope',figure= fig)


                        ], width= {'size':5, 'offset': 1})

            ], align ='start'),

            html.Hr(),




        ], fluid=True,style={'backgroundColor': colors['bodyColor']})

@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('doubling_time', 'value')])


def update_figure(country_list,show_doubling):


    if 'confirmed_DR' in show_doubling:
        my_yaxis={'type':"log",
               'title':'Approximated doubling rate over 3 days '
              }
    else:
        my_yaxis={'type':"log",
                  'title':'Confirmed infected people (source johns hopkins csse, log-scale)'
              }


    traces = []
    for each in country_list:

        df_plot=df_final[df_final['country']==each]

        if show_doubling=='confirmed_filtered_DR':
            df_plot=df_plot[['state','country','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['country','date']).agg(np.mean).reset_index()
        else:
            df_plot=df_plot[['state','country','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['country','date']).agg(np.sum).reset_index()
       #print(show_doubling)


        traces.append(dict(x=df_plot.date,
                                y=df_plot[show_doubling],
                                mode='markers+lines',
                                opacity=0.9,
                                name=each,
                                title_font_family="Times New Roman",
                                title_font_color="black",
                                title_font_size= 24,
                                font_family="Times New Roman",
                                legend_bordercolor = 'black',
                                legend_borderwidth = 2
                        )
                )
    return {
            'data': traces,
            'layout': dict (
                            width=1200,
                            height=700,

                            xaxis={'title':'Date',
                                    'tickangle':-45,
                                    'nticks':20,
                                    'tickfont':dict(size=14,color="#7f7f7f"),
                                    },

                            yaxis=my_yaxis,
                            plot_bgcolor= '#ECECEC',
                            paper_bgcolor= '#ECECEC',
                            title={
                                    'text': "Confirmed Covid Cases & their Doubling Rate",
                                    'y':0.9,
                                    'x':0.5,
                                    'xanchor': 'center',
                                    'yanchor': 'top'}


                            )
        }



if __name__ == '__main__':
    app.run_server(debug= True)


Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


SystemExit: 1


To exit: use 'exit', 'quit', or Ctrl-D.

