In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

In [2]:
confirmed_cases_file_link = "datasets/confirmed.csv"
death_cases_file_link = "datasets/deaths.csv"
recovered_cases_file_link = "datasets/recovered.csv"
country_cases_file_link = "datasets/covid-19-master/data/countries-aggregated.csv"

In [3]:
confirmed_df = pd.read_csv(confirmed_cases_file_link)
print(confirmed_df.shape)
death_df = pd.read_csv(death_cases_file_link)
print(death_df.shape)
recovered_df = pd.read_csv(recovered_cases_file_link)
print(recovered_df.shape)
cases_country_df = pd.read_csv(country_cases_file_link)
print(cases_country_df.shape)

(264, 97)
(264, 97)
(250, 97)
(17205, 5)


In [4]:
confirmed_df.columns

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20', '1/31/20', '2/1/20', '2/2/20', '2/3/20', '2/4/20', '2/5/20',
       '2/6/20', '2/7/20', '2/8/20', '2/9/20', '2/10/20', '2/11/20', '2/12/20',
       '2/13/20', '2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20',
       '2/19/20', '2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20',
       '2/25/20', '2/26/20', '2/27/20', '2/28/20', '2/29/20', '3/1/20',
       '3/2/20', '3/3/20', '3/4/20', '3/5/20', '3/6/20', '3/7/20', '3/8/20',
       '3/9/20', '3/10/20', '3/11/20', '3/12/20', '3/13/20', '3/14/20',
       '3/15/20', '3/16/20', '3/17/20', '3/18/20', '3/19/20', '3/20/20',
       '3/21/20', '3/22/20', '3/23/20', '3/24/20', '3/25/20', '3/26/20',
       '3/27/20', '3/28/20', '3/29/20', '3/30/20', '3/31/20', '4/1/20',
       '4/2/20', '4/3/20', '4/4/20', '4/5/20', '4/6/20', '4/7/20', '4/8/20',
       '4/9/20', '4/10/20'

In [5]:
confirmed_df[confirmed_df["Country/Region"] == "Australia"]

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20
8,Australian Capital Territory,Australia,-35.4735,149.0124,0,0,0,0,0,0,...,103,103,103,103,103,103,103,103,103,104
9,New South Wales,Australia,-33.8688,151.2093,0,0,0,0,3,4,...,2870,2886,2897,2926,2926,2926,2926,2926,2926,2976
10,Northern Territory,Australia,-12.4634,130.8456,0,0,0,0,0,0,...,28,28,28,28,28,28,28,28,28,27
11,Queensland,Australia,-28.0167,153.4,0,0,0,0,0,0,...,998,999,1001,1007,1015,1015,1015,1015,1015,1026
12,South Australia,Australia,-34.9285,138.6007,0,0,0,0,0,0,...,433,433,433,435,435,435,435,435,435,438
13,Tasmania,Australia,-41.4545,145.9707,0,0,0,0,0,0,...,165,165,169,180,180,180,180,180,180,207
14,Victoria,Australia,-37.8136,144.9631,0,0,0,0,1,1,...,1291,1299,1299,1302,1319,1319,1319,1319,1319,1337
15,Western Australia,Australia,-31.9505,115.8605,0,0,0,0,0,0,...,527,527,532,541,541,541,541,541,541,546


In [6]:
confirmed_df["Country/Region"].nunique()

185

###### Inputing data

In [7]:
confirmed_df = confirmed_df.replace(np.nan, '', regex = True)
death_df = death_df.replace(np.nan, '', regex = True)
recovered_df = recovered_df.replace(np.nan, '', regex = True)
cases_country_df = cases_country_df.replace(np.nan, '', regex = True)

###### Viewing the columns in the dataframes - Timeseries

In [8]:
confirmed_df.columns

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20', '1/31/20', '2/1/20', '2/2/20', '2/3/20', '2/4/20', '2/5/20',
       '2/6/20', '2/7/20', '2/8/20', '2/9/20', '2/10/20', '2/11/20', '2/12/20',
       '2/13/20', '2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20',
       '2/19/20', '2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20',
       '2/25/20', '2/26/20', '2/27/20', '2/28/20', '2/29/20', '3/1/20',
       '3/2/20', '3/3/20', '3/4/20', '3/5/20', '3/6/20', '3/7/20', '3/8/20',
       '3/9/20', '3/10/20', '3/11/20', '3/12/20', '3/13/20', '3/14/20',
       '3/15/20', '3/16/20', '3/17/20', '3/18/20', '3/19/20', '3/20/20',
       '3/21/20', '3/22/20', '3/23/20', '3/24/20', '3/25/20', '3/26/20',
       '3/27/20', '3/28/20', '3/29/20', '3/30/20', '3/31/20', '4/1/20',
       '4/2/20', '4/3/20', '4/4/20', '4/5/20', '4/6/20', '4/7/20', '4/8/20',
       '4/9/20', '4/10/20'

In [9]:
cases_country_df.columns

Index(['Date', 'Country', 'Confirmed', 'Recovered', 'Deaths'], dtype='object')

#### Exploratory Analysis

###### For case count - Global Summary

In [17]:
#global_data = cases_country_df.copy().drop(['Lat','Long','Country/Region'], axis = 1)
#global_summary = pd.DataFrame(global_data.sum()).transpose()
#global_summary.style.format("{:,.0f}")

## For Chart 1 : Total COVID_19 cases (Globally)

In [25]:
confirmed_ts = confirmed_df.copy().drop(['Lat','Long','Country/Region','Province/State'],axis =1)
confirmed_ts_summary = confirmed_df.sum()

In [26]:
confirmed_ts_summary

Province/State    Australian Capital TerritoryNew South WalesNor...
Country/Region    AfghanistanAlbaniaAlgeriaAndorraAngolaAntigua ...
Lat                                                         5627.77
Long                                                        5852.44
1/22/20                                                         555
                                        ...                        
4/19/20                                                     2401101
4/20/20                                                     2472258
4/21/20                                                     2549122
4/22/20                                                     2624089
4/23/20                                                     2708884
Length: 97, dtype: object

In [29]:
fig = go.Figure(data=go.Scatter(x=confirmed_ts_summary.index,
                                y=confirmed_ts_summary.values,
                                mode='lines+markers')) # hover text goes here

fig.update_layout(title='Total COVID_19 Confirmed Cases (Globally)',
                  yaxis_title = 'Confirmed Cases',
                  
                  xaxis_tickangle = 300)
fig.show()