In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import plotly.express as px
import seaborn as sns
from datetime import date
import plotly.graph_objects as go
import datetime as dt

  import pandas.util.testing as tm


In [2]:
confirmed   = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

In [3]:
confirmed.head();

In [4]:
deaths.head();

In [5]:
recovered.head();

In [6]:
confirmed.shape;

In [7]:
deaths.shape;

In [8]:
recovered.shape;

In [9]:
#Rename Col names
confirmed= confirmed.rename(columns={'Country/Region': 'Country', 'Province/State': 'State'})
deaths= deaths.rename(columns={'Country/Region': 'Country', 'Province/State': 'State'})
recovered= recovered.rename(columns={'Country/Region': 'Country', 'Province/State': 'State'})

In [10]:
#Rename to consistent values
confirmed['Country'].replace('Mainland China', 'China', inplace=True)
deaths['Country'].replace('Mainland China', 'China', inplace=True)
recovered['Country'].replace('Mainland China', 'China', inplace=True)

In [11]:
#Handle empty data
confirmed[['State']] = confirmed[['State']].fillna('')
confirmed.fillna(0, inplace=True)
deaths[['State']] = deaths[['State']].fillna('')
deaths.fillna(0, inplace=True)
recovered[['State']] = recovered[['State']].fillna('')
recovered.fillna(0, inplace=True)


In [12]:
deaths.head();

In [13]:
recovered.head();

In [14]:
#Start by aggregating all the cases to see a snapshot for the world.
confirmed_count = confirmed.iloc[:, 4:].sum().max()
deaths_count = deaths.iloc[:, 4:].sum().max()
recovered_count = recovered.iloc[:, 4:].sum().max()


In [15]:
print('Total confirmed, dead, and recovered numbers in the world, respectively: ', 
     confirmed_count, deaths_count, recovered_count)

Total confirmed, dead, and recovered numbers in the world, respectively:  11449707 534267 6179006


In [16]:
#For easy plotting let us store all these numbers in a dataframe. 
#Let us also calculate active cases.
#Active=Confirmed−Deaths−Recovered

world_count = pd.DataFrame({
    'confirmed': [confirmed_count],
    'deaths': [deaths_count],
    'recovered': [recovered_count],
    'active': [confirmed_count - deaths_count - recovered_count]
})

In [17]:
print(world_count)

   confirmed  deaths  recovered   active
0   11449707  534267    6179006  4736434


In [18]:
#Evolution of virus cases over time

#Remove first 4 columns about state, country, etc. and sum numbers for all dates
world_confirmed = confirmed.iloc[:, 4:].sum(axis=0)
world_deaths = deaths.iloc[:, 4:].sum(axis=0)
world_recovered = recovered.iloc[:, 4:].sum(axis=0)
world_active = world_confirmed - world_deaths - world_recovered

In [19]:
us_confirmed = confirmed[confirmed.Country=='US'].iloc[:,4:].sum(axis=0)
us_deaths = deaths[deaths.Country=='US'].iloc[:,4:].sum(axis=0)
us_recovered = recovered[recovered.Country=='US'].iloc[:,4:].sum(axis=0)
us_active = us_confirmed - us_deaths-us_recovered

In [20]:
world_confirmed.tail();

In [21]:
us_confirmed.tail();

In [22]:
us_confirmed_count = us_confirmed.max()
us_deaths_count = us_deaths.max()
us_recovered_count = us_recovered.max()
us_active_count = us_active.max()
us_active_count;

In [23]:
print('Total confirmed, dead, and recovered numbers in the USA, respectively: ', 
     us_confirmed_count, us_deaths_count, us_recovered_count)

Total confirmed, dead, and recovered numbers in the USA, respectively:  2888635 129947 906763


In [24]:
us_count = pd.DataFrame({
    'confirmed': [us_confirmed_count],
    'deaths': [us_deaths_count],
    'recovered': [us_recovered_count],
    'active': [us_active_count]
})


In [25]:
print(us_count)

   confirmed  deaths  recovered   active
0    2888635  129947     906763  1874315


In [41]:
#Evolution of Mortality rate 
#Create a new data frame to capture all the numbers for confirmed, recovered, active, death.
world_df = pd.DataFrame({
#    'date': us_confirmed.index,
    'confirmed': world_confirmed,
    'deaths': world_deaths,
    'recovered': world_recovered,
    'active': world_active
})
world_df['Date'] = world_df.index
world_df['Date'] = pd.to_datetime(world_df['Date'])
#us_df['Date'] = [dt.datetime.strptime(x,'%m/%d/%Y') for x in us_df['Date']] 
world_df['MAvgConf3'] = world_df.confirmed.rolling(3).mean()
world_df['MAvgConf7'] = world_df.confirmed.rolling(7).mean()
world_df['recoveryrate'] = world_df['recovered'] / world_df['confirmed'] * 100
world_df['mortalityrate'] = world_df['deaths'] / world_df['confirmed'] * 100
world_df.tail()

Unnamed: 0,confirmed,deaths,recovered,active,Date,MAvgConf3,MAvgConf7,recoveryrate,mortalityrate
7/1/20,10664168,516253,5469134,4678781,2020-07-01,10461850.0,10116260.0,51.285145,4.841006
7/2/20,10871926,521361,5754006,4596559,2020-07-02,10661320.0,10300310.0,52.92536,4.79548
7/3/20,11074878,526411,5863847,4684620,2020-07-03,10870320.0,10486060.0,52.947283,4.753199
7/4/20,11267309,530754,6059565,4676990,2020-07-04,11071370.0,10673900.0,53.780055,4.710566
7/5/20,11449707,534267,6179006,4736434,2020-07-05,11263960.0,10864200.0,53.966499,4.666207


In [36]:
#Evolution of Mortality rate 
#Create a new data frame to capture all the numbers for confirmed, recovered, active, death.
us_df = pd.DataFrame({
#    'date': us_confirmed.index,
    'confirmed': us_confirmed,
    'deaths': us_deaths,
    'recovered': us_recovered,
    'active': us_active
})
us_df['Date'] = us_df.index
us_df['Date'] = pd.to_datetime(us_df['Date'])
#us_df['Date'] = [dt.datetime.strptime(x,'%m/%d/%Y') for x in us_df['Date']] 
us_df['MAvgConf3'] = us_df.confirmed.rolling(3).mean()
us_df['MAvgConf7'] = us_df.confirmed.rolling(7).mean()
us_df['recoveryrate'] = us_df['recovered'] / us_df['confirmed'] * 100
us_df['mortalityrate'] = us_df['deaths'] / us_df['confirmed'] * 100
us_df.tail()

Unnamed: 0,confirmed,deaths,recovered,active,Date,MAvgConf3,MAvgConf7,recoveryrate,mortalityrate
7/1/20,2687588,128105,729994,1829489,2020-07-01,2638223.0,2552011.0,27.161678,4.766542
7/2/20,2742049,128803,781970,1831276,2020-07-02,2688684.0,2597689.0,28.517725,4.697327
7/3/20,2794153,129434,790404,1874315,2020-07-03,2741263.0,2644346.0,28.287785,4.632316
7/4/20,2839436,129676,894325,1815435,2020-07-04,2791879.0,2691372.0,31.496572,4.566963
7/5/20,2888635,129947,906763,1851925,2020-07-05,2840741.0,2739849.0,31.390709,4.498561


In [37]:
today = date.today()
today

datetime.date(2020, 7, 6)

In [38]:
world = go.Figure(data=[
  
    go.Bar(name='Confirmed Cases', 
           x=world_df['Date'],
           y=world_df['confirmed'],
           marker_color='lightblue')


])

world.add_trace(go.Scatter(x=world_df.Date, y=world_df.MAvgConf3, 
                          mode='lines',
                          name='3 days moving average',
           line=dict(color='lime', width=3)))

world.add_trace(go.Scatter(x=world_df.Date, y=world_df.MAvgConf7, 
                          mode='lines',
                          name='7 days moving average',
           line=dict(color='gold', width=3)))

world.update_xaxes(rangeslider_visible=True)

world.update_layout(title='COVID-19 Confirmed Cases in the World',
                   xaxis_title='Date',
                 yaxis_title='Number')

# Change the bar mode
##fig5.update_layout(barmode='group')
world.show()


In [39]:
us = go.Figure(data=[
  
    go.Bar(name='Confirmed Cases',
           x=us_df['Date'], 
           y=us_df['confirmed'],
           marker_color = 'green')


])

us.add_trace(go.Scatter(x=us_df.Date, y=us_df.MAvgConf3, 
                          mode='lines',
                          name='3 days moving average',
           line=dict(color='turquoise', width=3)))

us.add_trace(go.Scatter(x=us_df.Date, y=us_df.MAvgConf7, 
                          mode='lines',
                          name='7 days moving average',
           line=dict(color='orchid', width=3)))

us.update_xaxes(rangeslider_visible=True)

us.update_layout(title='COVID-19 Confirmed Cases in the US',
                   xaxis_title='Date',
                 yaxis_title='Number')



# Change the bar mode
##fig5.update_layout(barmode='group')
us.show()



In [31]:
# plotly 
#us1 = px.line(us_df,x=us_df['Date'], y=us_df['confirmed'],
          #   title='COVID-19 United States Confirmed Cases')
#us1.update_xaxes(rangeslider_visible=True)
# Show plot 
#us1.show()

In [40]:
usa=go.Figure()
usa.add_trace(go.Scatter(x=us_df.Date, y=us_df.confirmed, 
                          mode='lines',
                          name='Confirmed Cases',
           line=dict(color='slateblue', width=2)))
usa.add_trace(go.Scatter(x=us_df.Date, y=us_df.recovered, 
                          mode='lines',
                          name='Recovered Cases',
           line=dict(color='lawngreen', width=2)))

usa.add_trace(go.Scatter(x=us_df.Date, y=us_df.active, 
                          mode='lines',
                          name='Active Cases',
           line=dict(color='royalblue', width=2)))

usa.add_trace(go.Scatter(x=us_df.Date, y=us_df.deaths, 
                          mode='lines',
                          name='Deaths',
           line=dict(color='firebrick', width=2)))

usa.update_xaxes(rangeslider_visible=True)
usa.update_layout(title='COVID-19 Cases in the US',
                   xaxis_title='Date',
                 yaxis_title='Number')
usa.show()

In [42]:
world1=go.Figure()
world1.add_trace(go.Scatter(x=world_df.Date, y=world_df.confirmed, 
                          mode='lines',
                          name='Confirmed Cases',
           line=dict(color='slateblue', width=2)))
world1.add_trace(go.Scatter(x=world_df.Date, y=world_df.recovered, 
                          mode='lines',
                          name='Recovered Cases',
           line=dict(color='lawngreen', width=2)))

world1.add_trace(go.Scatter(x=world_df.Date, y=world_df.active, 
                          mode='lines',
                          name='Active Cases',
           line=dict(color='royalblue', width=2)))

world1.add_trace(go.Scatter(x=world_df.Date, y=world_df.deaths, 
                          mode='lines',
                          name='Deaths',
           line=dict(color='firebrick', width=2)))

world1.update_xaxes(rangeslider_visible=True)
world1.update_layout(title='COVID-19 Cases in the World',
                   xaxis_title='Date',
                 yaxis_title='Number')
world1.show()

In [44]:
usworld=go.Figure()
usworld.add_trace(go.Scatter(x=us_df.Date, y=us_df.recoveryrate, 
                          mode='lines',
                          name='US Recovery Rate',
                          line=dict(color='cyan', width=3)))
usworld.add_trace(go.Scatter(x=us_df['Date'], y=us_df['mortalityrate'], 
                          mode='lines',
                          name='US Mortality Rate',
                          line=dict(color='firebrick', width=3)))

usworld.add_trace(go.Scatter(x=world_df.Date, y=world_df.recoveryrate, 
                          mode='lines',
                          name='World Recovery Rate',
                          line=dict(color='darkcyan', width=3)))
usworld.add_trace(go.Scatter(x=world_df['Date'], y=world_df['mortalityrate'], 
                          mode='lines',
                          name='World Mortality Rate',
                          line=dict(color='tomato', width=3)))
#usworld.update_xaxes(rangeslider_visible=True)
usworld.update_layout(yaxis_type="log")
usworld.update_layout(title='Recovery and Mortality rate comparision in the US and the World',
                   xaxis_title='Date',
                 yaxis_title='Percentage',
                     template='plotly_white')
usworld.show()