In [1]:
# note: all individual plotly plots are not run locally but on the server


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio
pio.renderers.default="browser"
import plotly.graph_objects as go


In [2]:
import dash
from dash.dependencies import Input, Output
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc

In [3]:
df=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")

In [4]:
df.head(2)

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
0,Afghanistan,2021-01-14 11:21:58,33.93911,67.709953,53584.0,2301.0,45105.0,6178.0,137.647787,,,4.294192,4,AFG
1,Albania,2021-01-14 11:21:58,41.1533,20.1683,65334.0,1256.0,38860.0,25218.0,2270.275905,,,1.922429,8,ALB


In [5]:
df_deaths=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
df_deaths.head(2)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/4/21,1/5/21,1/6/21,1/7/21,1/8/21,1/9/21,1/10/21,1/11/21,1/12/21,1/13/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,2237,2244,2244,2253,2257,2264,2277,2288,2301,2301
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,1199,1210,1217,1223,1230,1233,1241,1247,1252,1256


In [6]:
df_deaths.isnull().sum()

Province/State    188
Country/Region      0
Lat                 1
Long                1
1/22/20             0
                 ... 
1/9/21              0
1/10/21             0
1/11/21             0
1/12/21             0
1/13/21             0
Length: 362, dtype: int64

In [7]:
df_confirmed=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
df_confirmed.head(2)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/4/21,1/5/21,1/6/21,1/7/21,1/8/21,1/9/21,1/10/21,1/11/21,1/12/21,1/13/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,53011,53105,53105,53207,53332,53400,53489,53538,53584,53584
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,59623,60283,61008,61705,62378,63033,63595,63971,64627,65334


In [8]:
df_confirmed.isnull().sum()

Province/State    188
Country/Region      0
Lat                 1
Long                1
1/22/20             0
                 ... 
1/9/21              0
1/10/21             0
1/11/21             0
1/12/21             0
1/13/21             0
Length: 362, dtype: int64

In [9]:
df_recovered=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
df_recovered.head(2)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/4/21,1/5/21,1/6/21,1/7/21,1/8/21,1/9/21,1/10/21,1/11/21,1/12/21,1/13/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,42530,42666,42666,43291,43440,43740,43948,44137,44608,44850
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,34996,35551,36102,36535,36971,37327,37648,37981,38421,38860


In [10]:
df_recovered.isnull().sum()

Province/State    189
Country/Region      0
Lat                 0
Long                0
1/22/20             0
                 ... 
1/9/21              0
1/10/21             0
1/11/21             0
1/12/21             0
1/13/21             0
Length: 362, dtype: int64

### Data Preprocessing
few null values are present that need to be dealt with in all three dataframes

In [11]:
# Remove Province/State Column from all 3 datasets as all entries are empty

In [12]:
df_confirmed.drop("Province/State",axis=1,inplace=True)
df_deaths.drop("Province/State",axis=1,inplace=True)
df_recovered.drop("Province/State",axis=1,inplace=True)

In [13]:
# get the row that contains null values  for all 3 dataframes

In [14]:
print(df_deaths[df_deaths["Lat"].isnull()])

   Country/Region  Lat  Long  1/22/20  1/23/20  1/24/20  1/25/20  1/26/20  \
52         Canada  NaN   NaN        0        0        0        0        0   

    1/27/20  1/28/20  ...  1/4/21  1/5/21  1/6/21  1/7/21  1/8/21  1/9/21  \
52        0        0  ...       0       0       0       0       0       0   

    1/10/21  1/11/21  1/12/21  1/13/21  
52        0        0        0        0  

[1 rows x 361 columns]


In [15]:
print(df_confirmed[df_confirmed["Lat"].isnull()])

   Country/Region  Lat  Long  1/22/20  1/23/20  1/24/20  1/25/20  1/26/20  \
52         Canada  NaN   NaN        0        0        0        0        0   

    1/27/20  1/28/20  ...  1/4/21  1/5/21  1/6/21  1/7/21  1/8/21  1/9/21  \
52        0        0  ...      13      13      13      13      13      13   

    1/10/21  1/11/21  1/12/21  1/13/21  
52       13       13       13       13  

[1 rows x 361 columns]


In [16]:
print(df_recovered[df_recovered["Country/Region"]=="Canada"])

   Country/Region      Lat      Long  1/22/20  1/23/20  1/24/20  1/25/20  \
39         Canada  56.1304 -106.3468        0        0        0        0   

    1/26/20  1/27/20  1/28/20  ...  1/4/21  1/5/21  1/6/21  1/7/21  1/8/21  \
39        0        0        0  ...  523448  529580  537024  544047  551983   

    1/9/21  1/10/21  1/11/21  1/12/21  1/13/21  
39  558594   565049   575152   582822   591131  

[1 rows x 361 columns]


In [17]:
# replace null Latitide and longitude for canada with its actual value of longitudes and latitudes present in recovered dataframe

In [18]:
df_deaths.loc[df_deaths["Country/Region"]=="Canada",'Lat']=56.1304
df_deaths.loc[df_deaths["Country/Region"]=="Canada","Long"]=-106.3468

In [19]:
df_confirmed.loc[df_confirmed["Country/Region"]=="Canada",'Lat']=56.1304
df_confirmed.loc[df_confirmed["Country/Region"]=="Canada","Long"]=-106.3468

In [20]:
# values for Lat and Long for canada replaced succesfully

In [21]:
print(df_confirmed[df_confirmed["Lat"].isnull()])

Empty DataFrame
Columns: [Country/Region, Lat, Long, 1/22/20, 1/23/20, 1/24/20, 1/25/20, 1/26/20, 1/27/20, 1/28/20, 1/29/20, 1/30/20, 1/31/20, 2/1/20, 2/2/20, 2/3/20, 2/4/20, 2/5/20, 2/6/20, 2/7/20, 2/8/20, 2/9/20, 2/10/20, 2/11/20, 2/12/20, 2/13/20, 2/14/20, 2/15/20, 2/16/20, 2/17/20, 2/18/20, 2/19/20, 2/20/20, 2/21/20, 2/22/20, 2/23/20, 2/24/20, 2/25/20, 2/26/20, 2/27/20, 2/28/20, 2/29/20, 3/1/20, 3/2/20, 3/3/20, 3/4/20, 3/5/20, 3/6/20, 3/7/20, 3/8/20, 3/9/20, 3/10/20, 3/11/20, 3/12/20, 3/13/20, 3/14/20, 3/15/20, 3/16/20, 3/17/20, 3/18/20, 3/19/20, 3/20/20, 3/21/20, 3/22/20, 3/23/20, 3/24/20, 3/25/20, 3/26/20, 3/27/20, 3/28/20, 3/29/20, 3/30/20, 3/31/20, 4/1/20, 4/2/20, 4/3/20, 4/4/20, 4/5/20, 4/6/20, 4/7/20, 4/8/20, 4/9/20, 4/10/20, 4/11/20, 4/12/20, 4/13/20, 4/14/20, 4/15/20, 4/16/20, 4/17/20, 4/18/20, 4/19/20, 4/20/20, 4/21/20, 4/22/20, 4/23/20, 4/24/20, 4/25/20, 4/26/20, 4/27/20, ...]
Index: []

[0 rows x 361 columns]


In [22]:
df_deaths.isnull().sum()

Country/Region    0
Lat               0
Long              0
1/22/20           0
1/23/20           0
                 ..
1/9/21            0
1/10/21           0
1/11/21           0
1/12/21           0
1/13/21           0
Length: 361, dtype: int64

In [23]:
df["Recovery_Rate"]=round(df["Recovered"]/df["Confirmed"]*100,2)

In [24]:
df.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3,Recovery_Rate
0,Afghanistan,2021-01-14 11:21:58,33.93911,67.709953,53584.0,2301.0,45105.0,6178.0,137.647787,,,4.294192,4,AFG,84.18
1,Albania,2021-01-14 11:21:58,41.1533,20.1683,65334.0,1256.0,38860.0,25218.0,2270.275905,,,1.922429,8,ALB,59.48
2,Algeria,2021-01-14 11:21:58,28.0339,1.6596,102860.0,2819.0,69791.0,30250.0,234.566827,,,2.740618,12,DZA,67.85
3,Andorra,2021-01-14 11:21:58,42.5063,1.5218,8818.0,87.0,8070.0,661.0,11412.670679,,,0.986618,20,AND,91.52
4,Angola,2021-01-14 11:21:58,-11.2027,17.8739,18425.0,424.0,15631.0,2370.0,56.060518,,,2.301221,24,AGO,84.84


In [25]:
df.drop(["People_Tested","People_Hospitalized"],axis=1,inplace=True)

In [26]:
new_df=df.dropna() 

In [27]:
new_df.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,Mortality_Rate,UID,ISO3,Recovery_Rate
0,Afghanistan,2021-01-14 11:21:58,33.93911,67.709953,53584.0,2301.0,45105.0,6178.0,137.647787,4.294192,4,AFG,84.18
1,Albania,2021-01-14 11:21:58,41.1533,20.1683,65334.0,1256.0,38860.0,25218.0,2270.275905,1.922429,8,ALB,59.48
2,Algeria,2021-01-14 11:21:58,28.0339,1.6596,102860.0,2819.0,69791.0,30250.0,234.566827,2.740618,12,DZA,67.85
3,Andorra,2021-01-14 11:21:58,42.5063,1.5218,8818.0,87.0,8070.0,661.0,11412.670679,0.986618,20,AND,91.52
4,Angola,2021-01-14 11:21:58,-11.2027,17.8739,18425.0,424.0,15631.0,2370.0,56.060518,2.301221,24,AGO,84.84


In [28]:
# no missing values are present

### convert data into cleaned time series format for a country because for displaying the graph not all columns are required in the plot and starting values for the time series is 0 which can be truncated.



In [29]:
df_confirmed.T[4:]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,262,263,264,265,266,267,268,269,270,271
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/26/20,0,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,2,0,0,0,0
1/27/20,0,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1/9/21,53400,63033,101913,8586,18156,169,1714409,161794,118,5007,...,3017409,24974,77530,1,116172,1513,146701,2104,26567,20499
1/10/21,53489,63595,102144,8586,18193,176,1722217,162131,118,5018,...,3072349,26186,77572,1,116610,1514,147400,2104,27728,21477
1/11/21,53538,63971,102369,8586,18254,176,1730921,162288,118,5034,...,3118518,26901,77611,1,116983,1515,148171,2105,28596,22297
1/12/21,53584,64627,102641,8682,18343,176,1744704,162643,118,5041,...,3164051,27846,77663,1,117299,1520,148968,2107,29757,23239


### all previous day records are added diff() will give us the delta between 2 days and rolling function will smoothen the graph window is the slider, then mean is calculated for that particular window and then the data is truncated 

In [30]:
def time_data(data,country="US",window=3):
    confirm_ts=data
    confirm_ts_country=confirm_ts[confirm_ts['Country/Region']==country]
    fin=confirm_ts_country.T[4:].sum(axis='columns').diff().rolling(window=window).mean()[40:]
    df=pd.DataFrame(fin,columns=["Total"])
    return df
    

### find Overall total for Confirmed , deaths and Recovered 

In [31]:
def find_tot(df):
    return df.iloc[:,-1].sum()


In [32]:
confirm_total=find_tot(df_confirmed)
deaths_total=find_tot(df_deaths)
recovered_total=find_tot(df_recovered)

In [33]:
print("Total Confirmed cases",confirm_total)
print("Total Deaths",deaths_total)
print("Total Recovered",recovered_total)

Total Confirmed cases 92348199
Total Deaths 1978614
Total Recovered 50994811


### get total confirmed,deaths,recovered  for a country

In [34]:
def country_total(df,country="US"):
    return df[df["Country/Region"]==country].iloc[:,-1].sum()


In [35]:
confirm_country=country_total(df_confirmed,'US')
deaths_country=country_total(df_deaths,"US")
recovered_country=country_total(df_recovered,"US")

In [36]:
print("US confirmed",confirm_country)
print("US deaths",deaths_country)
print("US recovered",recovered_country)


US confirmed 23071895
US deaths 384653
US recovered 0


### Line Graph using Plotly for confirmed cases

In [37]:
# world trend

In [38]:
# Confirmed plot 1

In [39]:
country="US"
df=df_confirmed
df=time_data(df_confirmed,country,3)

In [40]:
#fig=px.line(df,y='Total',x=df.index,title="Daily Confirmed Cases trend for {}".format(country),height=600,color_discrete_sequence=["red"])
#fig.update_layout(title_x=0.5,plot_bgcolor="#aa6ae6",paper_bgcolor='#aa6ae6',xaxis_title="Date",yaxis_title="window")


In [41]:
def figure_trend(country="US",window=3):
    df=time_data(data=df_confirmed,country=country,window=window)
    df.head()
    if window==1:
        yaxis_title="Daily Cases"
    else:
        yaxis_title="Daily Cases ({} Day Moving Average)".format(window)
    fig=px.line(df,y='Total',x=df.index,title="Daily Confirmed Cases trend for {}".format(country),height=600,color_discrete_sequence=["red"])
    fig.update_layout(title_x=0.5,plot_bgcolor="#aa6ae6",paper_bgcolor='#aa6ae6',xaxis_title="Date",yaxis_title=yaxis_title)
    return fig

In [42]:
# Line graph using plotly for deaths

In [43]:
def deaths_trend(country="US",window=3):
    df=time_data(data=df_deaths,country=country,window=window)
    df.head()
    if window==1:
        yaxis_title="Daily Death "
    else:
        yaxis_title="Daily Deaths ({} Day Moving Average)".format(window)
    fig1=px.line(df,y='Total',x=df.index,title="Daily Deaths trend for {}".format(country),height=600,color_discrete_sequence=["red"])
    fig1.update_layout(title_x=0.5,plot_bgcolor="#aa6ae6",paper_bgcolor='#aa6ae6',xaxis_title="Date",yaxis_title=yaxis_title)
    return fig1

In [44]:
def getpie():
    fig = go.Figure(data=[go.Pie(labels=new_df['Country_Region'], values=new_df['Confirmed'], 
           hole=.35,
           textinfo='label+percent'
          )])

    fig.update_layout(title_text="Confirmed Cases Percentage by Countries", annotations=[dict(text='Confirmed<br>Cases', showarrow=False),])
    # Add annotations in the center of the donut pies

    fig.update_traces(textposition='inside')
    fig.update_layout(margin={"r":0,"l":0,"b":0})
    return fig

In [45]:
#getpie()

In [46]:
def getpiedeath():
    fig = go.Figure(data=[go.Pie(labels=new_df['Country_Region'], values=new_df['Deaths'], hole=.35,textinfo='label+percent')])

    fig.update_layout(title_text="Deaths Cases Percentage by Countries",
    # Add annotations in the center of the donut pies.
    annotations=[
        dict(text='Deaths<br>Cases', showarrow=False),])
    fig.update_traces(textposition='inside')
    fig.update_layout(margin={"r":0,"l":0,"b":0})
    return fig

In [47]:
res=new_df.sort_values(by="Confirmed",ascending=[False]).head(10)

In [48]:
def getbar(df):
    fig = px.bar(df, x='Country_Region', y='Confirmed',orientation='v',title="Top 10 confirmed Cases")
    return fig

In [49]:
#getbar(res)

In [50]:
#igure_trend()

In [51]:
external_stylesheets = [dbc.themes.BOOTSTRAP] #import bootsrap wrapper it gives dashboard a good look

In [52]:
app=dash.Dash(__name__,external_stylesheets=external_stylesheets)
app.title="Dashboard"

In [53]:
colors = {
    'background': '#4f0aa8',
    'bodyColor':'#c5e5f0',
    'text': '#0689bd'
}


def get_page_heading_style():
    return {'backgroundColor': colors['background']}


def get_page_heading_title():
    return html.H1(children='COVID-19 Dashboard',
                                        style={
                                        'textAlign': 'center',
                                        'color': colors['text']
                                    })


def generate_page_header():
    main_header =  dbc.Row(
                            [
                                dbc.Col(get_page_heading_title(),md=12)
                            ],
                            align="center",
                            style=get_page_heading_style()
                        )
    
    header = (main_header)
    return header

In [54]:
def get_country_list():
    return df_confirmed['Country/Region'].unique()

def create_dropdown_list(cntry_list):
    dropdown_list = []
    for cntry in sorted(cntry_list):
        tmp_dict = {'label':cntry,'value':cntry}
        dropdown_list.append(tmp_dict)
    return dropdown_list

def get_country_dropdown(id):
    return html.Div([
                        html.Label('Select Country'),
                        dcc.Dropdown(id='my-id'+str(id),
                            options=create_dropdown_list(get_country_list()),
                            value='US'
                        ),
                        html.Div(id='my-div'+str(id))
                    ])

In [55]:
def graph1():
    return dcc.Graph(id='graph1',figure=figure_trend('US'))

In [56]:
def generate_card_content(card_header,card_value,overall_value):
    card_head_style = {'textAlign':'center','fontSize':'150%'}
    card_body_style = {'textAlign':'center','fontSize':'200%'}
    card_header = dbc.CardHeader(card_header,style=card_head_style)
    card_body = dbc.CardBody(
        [
            html.H5(f"{int(card_value):,}", className="card-title",style=card_body_style),
            html.P(
                "Worlwide: {:,}".format(overall_value),
                className="card-text",style={'textAlign':'center'}
            ),
        ]
    )
    card = [card_header,card_body]
    return card

In [57]:
def generate_cards(cntry='US'):
    conf_cntry_total = country_total(df_confirmed,cntry)
    dead_cntry_total = country_total(df_deaths,cntry)
    recv_cntry_total = country_total(df_recovered,cntry)
    cards = html.Div(
        [
            dbc.Row(
                [
                    dbc.Col(dbc.Card(generate_card_content("Recovered",recv_cntry_total,recovered_total), color="success", inverse=True),md=dict(size=2,offset=3)),
                    dbc.Col(dbc.Card(generate_card_content("Confirmed",conf_cntry_total,confirm_total), color="warning", inverse=True),md=dict(size=2)),
                    dbc.Col(dbc.Card(generate_card_content("Dead",dead_cntry_total,deaths_total),color="dark", inverse=True),md=dict(size=2)),
                ],
                className="mb-4",
            ),
        ],id='card1'
    )
    return cards

In [58]:
def get_slider():
    return html.Div([  
                        dcc.Slider(
                            id='my-slider',
                            min=1,
                            max=15,
                            step=None,
                            marks={
                                1: '1',
                                3: '3',
                                5: '5',
                                7: '1-Week',
                                14: 'Fortnight'
                            },
                            value=3,
                        ),
                        html.Div([html.Label('Select Moving Average Window')],id='my-div'+str(id),style={'textAlign':'center'})
                    ])

In [59]:
def graph2():
    return dcc.Graph(id='graph2',figure=deaths_trend('US'))

In [60]:
def graph3():
    return dcc.Graph(id='graph3',figure=getpie())

In [61]:
def graph4():
    return dcc.Graph(id="graph4",figure=getpiedeath())

In [62]:
def graph5():
    return dcc.Graph(id="graph5",figure=getbar(res))

In [63]:
def generate_layout():
    page_header = generate_page_header()
    layout = dbc.Container(
        [
            page_header,
            html.Hr(),
            generate_cards(),
            html.Hr(),
            dbc.Row(
                [
                    dbc.Col(get_country_dropdown(id=1),md=dict(size=4,offset=4))                    
                ]
            
            ),
            dbc.Row(
                [                
                    
                    dbc.Col(graph1(),md=dict(size=6,offset=3))
        
                ],
                align="center",

            ),
            dbc.Row(
                [
                    dbc.Col(get_slider(),md=dict(size=4,offset=4))                    
                ]
             ),
            dbc.Row(
            [
                dbc.Col(graph2(),md=dict(size=6,offset=3))
            ]
            ),
            dbc.Row(
            [
                dbc.Col(graph3(),md=dict(size=7,offset=3))
            ]
            ),
            dbc.Row(
            [
                dbc.Col(graph4(),md=dict(size=7,offset=3))
            ]
            ),
            dbc.Row(
            [
                dbc.Col(graph5(),md=dict(size=7,offset=3))
            ]
            ),
        ],fluid=True,style={'backgroundColor': colors['bodyColor']}
    )
    return layout

In [64]:
app.layout = generate_layout()

In [65]:
@app.callback(
    [Output(component_id='graph1',component_property='figure'), #line chart
    Output(component_id='card1',component_property='children')
    ],#overall card numbers
    
    
    [Input(component_id='my-id1',component_property='value'), #dropdown
     Input(component_id='my-slider',component_property='value')
    ]
    
 
)
def update_output_div(input_value1,input_value2):
    return figure_trend(input_value1,input_value2),generate_cards(input_value1)

def update_output_div2(input_value3):
    return deaths_trend(input_value3)

  

In [66]:
app.run_server(debug=False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [14/Jan/2021 17:24:08] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [14/Jan/2021 17:24:09] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [14/Jan/2021 17:24:09] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [14/Jan/2021 17:24:09] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
