In [1]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import datetime

load the death, confirmed and recovered data to separate dataframes from the url:

https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

In [2]:
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
recovered_df =  pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
death_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')


get the number of records and columns in each dataframes

In [3]:
confirmed_df.shape

(280, 710)

In [4]:
recovered_df.shape

(265, 710)

In [5]:
death_df.shape

(280, 710)

Rename column names in all dataframes

In [6]:
confirmed_df.rename(columns={'Province/State':'state', 'Country/Region':'country'}, inplace=True)
recovered_df.rename(columns={'Province/State':'state', 'Country/Region':'country'}, inplace=True)
death_df.rename(columns={'Province/State':'state', 'Country/Region':'country'}, inplace=True)

Drop unwanted columns

In [7]:
confirmed_df.drop(columns=['state','Lat', 'Long'],inplace=True)
recovered_df.drop(columns=['state','Lat', 'Long'],inplace=True)
death_df.drop(columns=['state','Lat', 'Long'],inplace=True)

getting the date columns 

In [8]:
date_cols = confirmed_df.columns[1:]

transposing the count of cases , which is stored as columns for each date , to rows. This helps in data visulatization

In [9]:
confirmed_df_new = pd.DataFrame()
recovered_df_new = pd.DataFrame()
death_df_new = pd.DataFrame()

temp_df = pd.DataFrame()

for dt in date_cols:
    cols = list(['country', dt])
    temp_df = confirmed_df[cols].copy()
    temp_df.columns = ['country', 'count']
    temp_df['dates'] = datetime.datetime.strptime(dt, "%m/%d/%y").strftime("%Y/%m/%d")
    confirmed_df_new = confirmed_df_new.append(temp_df, ignore_index=False)

for dt in date_cols:
    cols = list(['country', dt])
    temp_df = recovered_df[cols].copy()
    temp_df.columns = ['country', 'count']
    temp_df['dates'] = datetime.datetime.strptime(dt, "%m/%d/%y").strftime("%Y/%m/%d")
    recovered_df_new = recovered_df_new.append(temp_df, ignore_index=False)

for dt in date_cols:
    cols = list(['country', dt])
    temp_df = death_df[cols].copy()
    temp_df.columns = ['country', 'count']
    temp_df['dates'] = datetime.datetime.strptime(dt, "%m/%d/%y").strftime("%Y/%m/%d")
    death_df_new = death_df_new.append(temp_df, ignore_index=False)

confirmed_df_new.reset_index(drop=True, inplace=True)
recovered_df_new.reset_index(drop=True, inplace=True)
death_df_new.reset_index(drop=True, inplace=True)

In [10]:
confirmed_df_new.head()

Unnamed: 0,country,count,dates
0,Afghanistan,0,2020/01/22
1,Albania,0,2020/01/22
2,Algeria,0,2020/01/22
3,Andorra,0,2020/01/22
4,Angola,0,2020/01/22


aggregating the data at country-date level 

In [11]:
confirmed_df_new = confirmed_df_new.groupby(['country','dates']).sum()
recovered_df_new = recovered_df_new.groupby(['country','dates']).sum()
death_df_new = death_df_new.groupby(['country','dates']).sum()

In [12]:
confirmed_df_new.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
country,dates,Unnamed: 2_level_1
Afghanistan,2020/01/22,0
Afghanistan,2020/01/23,0
Afghanistan,2020/01/24,0
Afghanistan,2020/01/25,0
Afghanistan,2020/01/26,0


resetting the index

In [13]:
confirmed_df_new.reset_index(inplace=True)
recovered_df_new.reset_index(inplace=True)
death_df_new.reset_index(inplace=True)

In [14]:
confirmed_df_new.head()

Unnamed: 0,country,dates,count
0,Afghanistan,2020/01/22,0
1,Afghanistan,2020/01/23,0
2,Afghanistan,2020/01/24,0
3,Afghanistan,2020/01/25,0
4,Afghanistan,2020/01/26,0


Find the number of rows in each dataframe

In [15]:
print(confirmed_df_new.shape)
print(recovered_df_new.shape)
print(death_df_new.shape)

(138376, 3)
(138376, 3)
(138376, 3)


Merge the 3 dataframes into a single dataframe

In [16]:
covid_df = pd.merge(pd.merge(confirmed_df_new,recovered_df_new,on=['country','dates']),death_df_new,on=['country','dates'])
covid_df.columns=['country','dates','confirmed','recovered','death']
covid_df.shape

(138376, 5)

Find the active covid cases

In [17]:
covid_df['active'] = covid_df['confirmed'] - covid_df['recovered'] - covid_df['death']

Calculate case-fatality_ratio ( number of deaths per 100 COVID-19 confirmed cases.)

In [18]:
covid_df['case_fatality_ratio'] = round(covid_df['death']/(covid_df['confirmed']/100))

In [19]:
covid_df.head()

Unnamed: 0,country,dates,confirmed,recovered,death,active,case_fatality_ratio
0,Afghanistan,2020/01/22,0,0,0,0,
1,Afghanistan,2020/01/23,0,0,0,0,
2,Afghanistan,2020/01/24,0,0,0,0,
3,Afghanistan,2020/01/25,0,0,0,0,
4,Afghanistan,2020/01/26,0,0,0,0,


In [22]:
import dash
from dash.dependencies import Output, Input
from dash import dcc
from dash import html
import plotly.express as px
import pandas as pd
from jupyter_dash import JupyterDash

app = JupyterDash(__name__)
server = app.server

colors = {
    'background': '#FFFFFF',
    'text': '#7FDBFF'
}


# Defining App Layout

app.layout = html.Div(style={'backgroundColor': colors['background']}, children=[
    html.H1('COVID - 19 ', style={'textAlign':'center'}),
        html.Div([
            html.Div([
                html.Label('Date'),
                dcc.Dropdown(
                    id='dates_dropdown',
                    options=[{'label': i, 'value': i} for i in covid_df.dates.unique()],
                    value=covid_df.dates.max()
                    )
                ]),
        html.Div([
            html.Label('Interest Variable'),
                dcc.Dropdown(
                    id='interest-variable',
                    options=[{'label': 'Total Active', 'value':'active'},
                        {'label': 'Total Deaths', 'value':'death'},
                        {'label': 'Total Recovered', 'value':'recovered'}],
                    value='active'
                )
            ])
    ], style = {'width':'50%','margin':'auto'}),
    html.Div([
        dcc.Graph(
            id='confirmed_Vs_others',
            ),
        html.Div(
            dcc.Graph(
                id='cases_per_country',
            )
            , style={'width': '90%', 'display': 'inline-block'})
    ], style = {'width':'90%','margin':'auto'})
])

@app.callback(Output('confirmed_Vs_others', 'figure'),
            [Input('dates_dropdown', 'value'),
            Input('interest-variable', 'value')])
def update_scatter(selected_pop, interest_var):
    sorted = covid_df[covid_df.dates == selected_pop]
    fig = px.scatter(sorted,
        x=sorted.confirmed,
        y=sorted[interest_var],
        size='confirmed',
        color='country',
        hover_name='country',
        template='plotly_white',
        labels={'y':interest_var,
                'x': 'confirmed'},
        title='confirmed Vs ' + interest_var)
    fig.update_layout(transition_duration=500)
    return fig

@app.callback(Output('cases_per_country', 'figure'),
              [Input('dates_dropdown', 'value'),
               Input('interest-variable', 'value')])
def update_country_bar(selected_pop, interest_var):
    sorted = covid_df[covid_df.dates == selected_pop]
    fig = px.bar(sorted,
        x='country',
        y=interest_var,
        color='case_fatality_ratio',
        template='plotly_white',
        labels={'country':'Country',
            'confirmed':'Total Confirmed',
            'active':'Total Active',
            'deaths':'Total death',
            'recovered':'Total Recovered'},
            title='Total Cases per Country')

    fig.update_layout()
    return fig


if __name__ == '__main__':
    app.run_server(mode='inline')
