In [56]:
import numpy as np 
import pandas as pd 
import plotly as py
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [29]:
df = pd.read_csv('novel-corona-virus-2019-dataset/covid_19_data.csv')

In [30]:
df.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [31]:
df = df.rename(columns={'Country/Region':'Country'})
df = df.rename(columns={'ObservationDate':'Date'})

In [32]:
df = df.groupby(['Country', 'Date']).sum().reset_index().sort_values('Date', ascending=False)
df = df.drop_duplicates(subset = ['Country'])
df = df[df['Confirmed']>0]

In [33]:
df.head()

Unnamed: 0,Country,Date,SNo,Confirmed,Deaths,Recovered
13905,Taiwan,05/24/2020,28679,441.0,7.0,414.0
12727,Slovakia,05/24/2020,28667,1509.0,28.0,1301.0
869,Austria,05/24/2020,28531,16503.0,640.0,15063.0
3954,Dominican Republic,05/24/2020,28568,14801.0,458.0,8133.0
14351,Trinidad and Tobago,05/24/2020,28685,116.0,8.0,108.0


In [35]:
fig = go.Figure(data=go.Choropleth(
            locations = df['Country'],
            locationmode = 'country names',
            z = df['Confirmed'],
            colorscale = 'Reds',
            marker_line_color = 'black',
            marker_line_width = 0.5))

In [43]:
fig.update_layout(
    title_text = 'Confirmed Cases By April 26th, 2020',
    geo = dict(
            showframe = False,
            showcoastlines = False,
            projection_type = 'equirectangular'
    ))

In [44]:
covid_bydate = df[df['Confirmed']>0]
covid_bydate = covid_bydate.groupby(['Date','Country']).sum().reset_index()
covid_bydate.head()

Unnamed: 0,Date,Country,SNo,Confirmed,Deaths,Recovered
0,02/28/2020,Azerbaijan,2664,1.0,0.0,0.0
1,02/28/2020,North Ireland,2685,1.0,0.0,0.0
2,03/08/2020,Republic of Ireland,4067,21.0,0.0,0.0
3,03/09/2020,Palestine,4322,22.0,0.0,0.0
4,03/09/2020,St. Martin,4412,2.0,0.0,0.0


In [45]:
fig = px.choropleth(covid_bydate,
                   locations='Country',
                   locationmode='country names',
                   color = 'Confirmed',
                   hover_name='Country',
                   animation_frame='Date')

In [46]:
fig.update_layout(title_text='Global Spread of COVID-19',
                 geo = dict(
                     showframe=False,
                     showcoastlines=False))

In [47]:
covid_ddf = df[df['Deaths']>=0]
covid_ddf = covid_ddf.groupby(['Date','Country']).sum().reset_index()

In [52]:
covid_ddf['Deaths'].sort_values(ascending=False)

201    97720.0
200    36875.0
108    32785.0
184    28752.0
84     28370.0
        ...   
169        0.0
175        0.0
195        0.0
202        0.0
0          0.0
Name: Deaths, Length: 214, dtype: float64

In [53]:
fig_ddf = px.choropleth(covid_ddf,locations='Country',
                        locationmode='country names',
                        color = 'Deaths',
                        hover_name='Country',
                        animation_frame='Date'
                       )

In [54]:
fig_ddf.update_layout(title_text='Number of Deaths As Of May 26nd',
                     geo=dict(
                     showframe=False,
                     showcoastlines=False))

In [55]:
df.corr()

Unnamed: 0,SNo,Confirmed,Deaths,Recovered
SNo,1.0,0.79259,0.817102,0.819312
Confirmed,0.79259,1.0,0.934011,0.890918
Deaths,0.817102,0.934011,1.0,0.837136
Recovered,0.819312,0.890918,0.837136,1.0
