## Covid-19 Data Analysis

In [9]:
## Importing important package
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import folium
import math
import random
from datetime import timedelta
import plotly as py
py.offline.init_notebook_mode(connected=True)
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings('ignore')

#Color code
cnf='#0FFFFF'
dth='#FF0000'
rec='#00FF00'
act='#0000FF'

## Data Preparation

In [10]:
df=pd.read_csv("df.csv")

In [12]:
df.shape

(49068, 10)

In [6]:
## Making 	Province/State Nan to empty
df["Province/State"]=df["Province/State"].fillna('')

In [15]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
0,,Afghanistan,33.93911,67.709953,2020-01-22,0,0,0,0,Eastern Mediterranean
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0,Europe
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0,Africa
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0,Europe
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0,Africa


In [7]:
## Getting the total number of confirmed,death,recovered and active csases with date
confirmed=df.groupby(['Date']).sum()["Confirmed"].reset_index()
deaths=df.groupby(["Date"]).sum()["Deaths"].reset_index()
recovered=df.groupby(["Date"]).sum()["Recovered"].reset_index()
active=df.groupby(["Date"]).sum()["Active"].reset_index()

In [8]:
confirmed

Unnamed: 0,Date,Confirmed
0,2020-01-22,555
1,2020-01-23,654
2,2020-01-24,941
3,2020-01-25,1434
4,2020-01-26,2118
...,...,...
183,2020-07-23,15510481
184,2020-07-24,15791645
185,2020-07-25,16047190
186,2020-07-26,16251796


In [9]:
deaths

Unnamed: 0,Date,Deaths
0,2020-01-22,17
1,2020-01-23,18
2,2020-01-24,26
3,2020-01-25,42
4,2020-01-26,56
...,...,...
183,2020-07-23,633506
184,2020-07-24,639650
185,2020-07-25,644517
186,2020-07-26,648621


In [10]:
recovered

Unnamed: 0,Date,Recovered
0,2020-01-22,28
1,2020-01-23,30
2,2020-01-24,36
3,2020-01-25,39
4,2020-01-26,52
...,...,...
183,2020-07-23,8710969
184,2020-07-24,8939705
185,2020-07-25,9158743
186,2020-07-26,9293464


In [11]:
active

Unnamed: 0,Date,Active
0,2020-01-22,510
1,2020-01-23,606
2,2020-01-24,879
3,2020-01-25,1353
4,2020-01-26,2010
...,...,...
183,2020-07-23,6166006
184,2020-07-24,6212290
185,2020-07-25,6243930
186,2020-07-26,6309711


In [12]:
## ploting the scatter plot
fig=go.Figure()
fig.add_trace(go.Scatter(x=confirmed["Date"],y=confirmed["Confirmed"],mode='lines+markers',name="Confirmed",line={'color':'Red','width':5}))
fig.add_trace(go.Scatter(x=recovered["Date"],y=recovered["Recovered"],mode="lines+markers",name="Recovered",line={'color':'aqua','width':5}))
fig.add_trace(go.Scatter(x=deaths["Date"],y=deaths["Deaths"],mode="lines+markers",name="Deaths",line={'color':"yellow",'width':5}))
fig.update_layout(title="Worldwide Covid-19 cases",xaxis_tickfont_size=14,yaxis=dict(title="number of cases"))

In [22]:
## Scatter plot with Death and Dates
fig=go.Figure()
fig.add_trace(go.Scatter(x=deaths["Date"],y=deaths["Deaths"],mode="lines+markers",name="Deaths",line={'color':"yellow",'width':5}))

In [13]:
##Ploting scatter with Date & Active
fig=go.Figure()
fig.add_trace(go.Scatter(x=active["Date"],y=active["Active"],mode="lines+markers",name="Active",line={'color':'green','width':5}))

In [23]:
## Plotting Scatter plot with Date and Recovred
fig=go.Figure()
fig.add_trace(go.Scatter(x=recovered["Date"],y=recovered["Recovered"],mode="lines+markers",name="Recovered",line={'color':'aqua','width':5}))

In [14]:
##Density Animation on Worldmap
df["Date"]=df["Date"].astype(str)
fig=px.density_mapbox(df,lat="Lat",lon="Long",
                     hover_name="Country/Region",hover_data=["Confirmed","Recovered","Deaths"],
                     animation_frame="Date",color_continuous_scale="Inferno",
                     radius=7,zoom=0,height=700)
fig.update_layout(title="Worldwide Covid-19 cases with time lapse")
fig.update_layout(mapbox_style="open-street-map",mapbox_center_lon=180)
fig.show()

In [24]:
df["Date"]=pd.to_datetime(df["Date"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49068 entries, 0 to 49067
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Province/State  49068 non-null  object        
 1   Country/Region  49068 non-null  object        
 2   Lat             49068 non-null  float64       
 3   Long            49068 non-null  float64       
 4   Date            49068 non-null  datetime64[ns]
 5   Confirmed       49068 non-null  int64         
 6   Deaths          49068 non-null  int64         
 7   Recovered       49068 non-null  int64         
 8   Active          49068 non-null  int64         
 9   WHO Region      49068 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(4), object(3)
memory usage: 3.7+ MB


In [15]:
## Tree mapplot
temp=df.groupby(["Date"])["Confirmed","Active","Recovered","Deaths"].sum().reset_index()
temp


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,Date,Confirmed,Active,Recovered,Deaths
0,2020-01-22,555,510,28,17
1,2020-01-23,654,606,30,18
2,2020-01-24,941,879,36,26
3,2020-01-25,1434,1353,39,42
4,2020-01-26,2118,2010,52,56
...,...,...,...,...,...
183,2020-07-23,15510481,6166006,8710969,633506
184,2020-07-24,15791645,6212290,8939705,639650
185,2020-07-25,16047190,6243930,9158743,644517
186,2020-07-26,16251796,6309711,9293464,648621
