# Covid-19 Exploratory Data Analysis

## Covid-19 Dataset Understanding

### Preprocessed Dataset Link: https://github.com/laxmimerit/Covid-19-Preprocessed-Dataset

In [29]:
# We use folium for map Plotting
!pip install folium
# and plotly for charts plotting
!pip install plotly



In [30]:
# import Necessary packages
import  plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import folium

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

import math
import random
from datetime import timedelta

# hide some warnings
import warnings
warnings.filterwarnings('ignore')

# import color palettes
cnf = '#393e46'
dth = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'

## Dataset Preparation 

In [31]:
import plotly as py
py.offline.init_notebook_mode(connected=True)

In [32]:
# importing the pre-cleaned dataset 
df = pd.read_csv('./Covid-19-Preprocessed-Dataset/preprocessed/covid_19_data_cleaned.csv', parse_dates=['Date'])
country_daywise = pd.read_csv('./Covid-19-Preprocessed-Dataset/preprocessed/country_daywise.csv', parse_dates=['Date'])
countrywise = pd.read_csv('./Covid-19-Preprocessed-Dataset/preprocessed/countrywise.csv')
daywise = pd.read_csv('./Covid-19-Preprocessed-Dataset/preprocessed/daywise.csv', parse_dates=['Date'])

In [33]:
# I change nan with empty string
df['Province/State']=df['Province/State'].fillna("")

In [34]:
df.head()

Unnamed: 0,Date,Province/State,Country,Lat,Long,Confirmed,Recovered,Deaths,Active
0,2020-01-22,,Afghanistan,33.0,65.0,0,0,0,0
1,2020-01-23,,Afghanistan,33.0,65.0,0,0,0,0
2,2020-01-24,,Afghanistan,33.0,65.0,0,0,0,0
3,2020-01-25,,Afghanistan,33.0,65.0,0,0,0,0
4,2020-01-26,,Afghanistan,33.0,65.0,0,0,0,0


In [35]:
country_daywise.head()

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered,Active,New Cases,New Deaths,New Recovered
0,2020-01-22,Afghanistan,0,0,0,0,0,0,0
1,2020-01-22,Albania,0,0,0,0,0,0,0
2,2020-01-22,Algeria,0,0,0,0,0,0,0
3,2020-01-22,Andorra,0,0,0,0,0,0,0
4,2020-01-22,Angola,0,0,0,0,0,0,0


In [36]:
countrywise.head()

Unnamed: 0,Country,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Population,Cases / Million People,Confirmed last week,1 week change,1 week % increase
0,Afghanistan,20917,369,2171,18377,575,1.76,10.38,17.0,38928341,537.0,15750,5167,32.81
1,Albania,1263,34,945,284,17,2.69,74.82,3.6,2877800,439.0,1143,120,10.5
2,Algeria,10265,715,6799,2751,111,6.97,66.23,10.52,43851043,234.0,9513,752,7.9
3,Andorra,852,51,751,50,0,5.99,88.15,6.79,77265,11027.0,765,87,11.37
4,Angola,92,4,38,50,1,4.35,41.3,10.53,32866268,3.0,86,6,6.98


In [37]:
daywise.head()

Unnamed: 0,Date,Confirmed,Deaths,Recovered,Active,New Cases,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,No. of Countries
0,2020-01-22,555,17,28,510,0,3.06,5.05,60.71,6
1,2020-01-23,654,18,30,606,99,2.75,4.59,60.0,8
2,2020-01-24,941,26,36,879,287,2.76,3.83,72.22,9
3,2020-01-25,1434,42,39,1353,493,2.93,2.72,107.69,11
4,2020-01-26,2118,56,52,2010,684,2.64,2.46,107.69,13


In [38]:
# Get global confirmed cases
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
# Get recovered cases
recovered = df.groupby('Date').sum()['Recovered'].reset_index()
# Get the death cases
deaths = df.groupby('Date').sum()['Deaths'].reset_index()

In [39]:
confirmed

Unnamed: 0,Date,Confirmed
0,2020-01-22,555
1,2020-01-23,654
2,2020-01-24,941
3,2020-01-25,1434
4,2020-01-26,2118
...,...,...
134,2020-06-04,6632985
135,2020-06-05,6764918
136,2020-06-06,6891213
137,2020-06-07,7010349


In [40]:
recovered

Unnamed: 0,Date,Recovered
0,2020-01-22,28
1,2020-01-23,30
2,2020-01-24,36
3,2020-01-25,39
4,2020-01-26,52
...,...,...
134,2020-06-04,2944289
135,2020-06-05,3013132
136,2020-06-06,3085833
137,2020-06-07,3140920


In [41]:
deaths

Unnamed: 0,Date,Deaths
0,2020-01-22,17
1,2020-01-23,18
2,2020-01-24,26
3,2020-01-25,42
4,2020-01-26,56
...,...,...
134,2020-06-04,391122
135,2020-06-05,395866
136,2020-06-06,399703
137,2020-06-07,402724


In [42]:
# check if there is any Null values in our dataframe
df.isnull().sum()

Date              0
Province/State    0
Country           0
Lat               0
Long              0
Confirmed         0
Recovered         0
Deaths            0
Active            0
dtype: int64

In [43]:
# check the info in our Dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39337 entries, 0 to 39336
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            39337 non-null  datetime64[ns]
 1   Province/State  39337 non-null  object        
 2   Country         39337 non-null  object        
 3   Lat             39337 non-null  float64       
 4   Long            39337 non-null  float64       
 5   Confirmed       39337 non-null  int64         
 6   Recovered       39337 non-null  int64         
 7   Deaths          39337 non-null  int64         
 8   Active          39337 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 2.7+ MB
