# Libraries

In [196]:
# Import
# ======
from IPython.core.display import HTML

# essential libraries
import math
import random
from datetime import date, datetime, timedelta

# storing and anaysis
import numpy as np
from numpy import inf
import pandas as pd
# import geopandas
import geopy

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import calmap
import folium

# converter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()   

# hide warnings
import warnings
warnings.filterwarnings('ignore')

# for offline ploting
# ===================
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

# color pallette
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801' 

# Dataset

In [199]:
# convert the table to appropriate format
def convert_df(df):
#     df = df.drop(['Long', 'Lat'], axis=1)
    df.Lat = df.Lat.astype(str)
    df.Long = df.Long.astype(str)
    df = df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], var_name='date', value_name='total')
    df.date = pd.to_datetime(df.date)
    df['Province/State'].fillna('all', inplace=True)
    df.total.fillna(0, inplace=True)
    df.columns = ['prov_state','country','lat','long','date','total']
    
    dates = df['date'].dt.floor('D')
    dates = df['date'].dt.date

#     df = df.drop(['prov_state'], axis=1)
    df = df.groupby(['prov_state','country','lat','long',dates]).sum().reset_index()
    
    return df

# download a timeseries of daily deaths per country
us_case = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
us_death = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"

glob_cases = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
glob_death = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
glob_recov = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"

cases = pd.read_csv(glob_cases)
deaths = pd.read_csv(glob_death)
recov = pd.read_csv(glob_recov)

# convert the table so that each country and each day is a separate row
cases = convert_df(cases)
deaths = convert_df(deaths)
recov = convert_df(recov)

cases.columns = ['prov_state','country_region','lat','long','date','cases']
deaths.columns = ['prov_state','country_region','lat','long','date','deaths']
recov.columns = ['prov_state','country_region','lat','long','date','recovery']

In [200]:
# merge all dataframes
df = pd.merge(left=cases, right=deaths, how='left', left_on=['prov_state','country_region','date','lat','long'], right_on=['prov_state','country_region','date','lat','long'])
df = pd.merge(left=df, right=recov, how='left', left_on=['prov_state','country_region','date','lat','long'], right_on=['prov_state','country_region','date','lat','long'])

# reformat columns
df['date'] = df['date'].apply(lambda x: datetime.strftime(x, '%Y-%m-%d'))
df['recovery'] = df['recovery'].fillna(0)
df['recovery'] = df['recovery'].astype(int)

# Active Case = confirmed - deaths - recovered
df['active'] = df['cases'] - df['deaths'] - df['recovery']
# fixing datatypes
df['active'] = df['active'].astype(int)
df['active'] = df['active'].apply(lambda x: 0 if x<0 else x)

In [210]:
# print(cases.shape, deaths.shape, recov.shape)
# deaths[deaths.country_region == "US"]
# df[df.iloc[:,1] == "US"]
# df.shape
# df.info()
# df[df.recovery == inf]
# df[df.country_region == "Taiwan"]

# Preprocessing

In [None]:
# # add daily change in deaths and cases
# death_change = []
# case_change = []
# # newDeath = df[['country', 'case_total', 'death_total']]

# countries = df.country_region.unique()

# # calcualte death change for each country
# for country in countries:
#     d_change = list(df[df['country_region'] == country].deaths.diff(periods=1))
#     c_change = list(df[df['country_region'] == country].cases.diff(periods=1))
    
#     death_change += d_change
#     case_change += c_change
    
# df['death_change'] = death_change
# df['case_change'] = case_change
# df['death_change'].fillna(0, inplace=True)
# df['death_change'] = df['death_change'].astype(int)
# df['case_change'].fillna(0, inplace=True)
# df['case_change'] = df['case_change'].astype(int)

# df['case_change'] = df['case_change'].apply(lambda x: 0 if x<0 else x)

# df.sample(6)

In [201]:
# Statistics by date
# ==================

day_wise = df.groupby('date').sum().reset_index()

# add columns for death rate, recovery rate and death/recovery
day_wise['death_rate'] = round((day_wise['deaths']/day_wise['cases'])*100, 2)
day_wise['recovery_rate'] = round((day_wise['recovery']/day_wise['cases'])*100, 2)
# day_wise['death_recovery_rate'] = round((day_wise['death_total']/day_wise['recov_total'])*100, 2)

# add number of countries
day_wise['num_countries'] = df[df['cases']!=0].groupby('date')['country_region'].unique().apply(len).values

# fillna by 0
cols = ['death_rate', 'recovery_rate']
day_wise[cols] = day_wise[cols].fillna(0)

day_wise.tail()

Unnamed: 0,date,cases,deaths,recovery,active,death_rate,recovery_rate,num_countries
75,2020-04-06,1345101,74565,273256,997281,5.54,20.31,184
76,2020-04-07,1426096,81865,296259,1047973,5.74,20.77,184
77,2020-04-08,1511104,88338,324502,1098265,5.85,21.47,184
78,2020-04-09,1595350,95455,348808,1151088,5.98,21.86,184
79,2020-04-10,1691719,102525,370234,1218961,6.06,21.89,185


In [202]:
# Statistics by country
# ============

# getting latest values
country_wise = df[df['date']==max(df['date'])].reset_index(drop=True).drop('date', axis=1)

# group by country
country_wise = country_wise.groupby('country_region').sum().reset_index()

# per 100 cases
country_wise['death_rate'] = round((country_wise['deaths']/country_wise['cases'])*100, 2)
country_wise['recovery_rate'] = round((country_wise['recovery']/country_wise['cases'])*100, 2)
# country_wise['death_recovery_rate'] = round((country_wise['death_total']/country_wise['recov_total'])*100, 2)

cols = ['death_rate', 'recovery_rate']
country_wise[cols] = country_wise[cols].fillna(0)

country_wise.head()

Unnamed: 0,country_region,cases,deaths,recovery,active,death_rate,recovery_rate
0,Afghanistan,521,15,32,474,2.88,6.14
1,Albania,416,23,182,211,5.53,43.75
2,Algeria,1761,256,405,1100,14.54,23.0
3,Andorra,601,26,71,504,4.33,11.81
4,Angola,19,2,2,15,10.53,10.53


In [203]:
temp = df.groupby('date')['recovery', 'deaths', 'active'].sum().reset_index()

temp = temp.melt(id_vars="date", value_vars=['recovery', 'deaths', 'active'], var_name='Case', value_name='Count')

# temp.head()

fig = px.area(temp, x="date", y="Count", color='Case', height=600, title='Cases over time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

# Maps

### Across the world

In [213]:
# World wide

temp = df[df['date'] == max(df['date'])]

m = folium.Map(location=[0, 0], tiles='cartodbpositron', min_zoom=1, max_zoom=4, zoom_start=1)

for i in range(0, len(temp)):
    folium.Circle(
        location= [temp.iloc[i]['lat'], temp.iloc[i]['long']],
        color= 'Red', fill='Red',
        tooltip =   '<li><bold>Country : ' + str(temp.iloc[i]['country_region'])+
                    '<li><bold>Province : ' + str(temp.iloc[i]['prov_state'])+
                    '<li><bold>Confirmed : ' + str(temp.iloc[i]['cases'])+
                    '<li><bold>Deaths : ' + str(temp.iloc[i]['deaths']),
        radius= int(temp.iloc[i]['cases'])**1.1).add_to(m)
    
m

In [220]:
# Over the time
# https://plotly.com/python/builtin-colorscales/
fig = px.choropleth(df, locations="country_region", locationmode="country names", color="cases",
                    hover_name="country_region", animation_frame="date", title='Cases over time', 
                    color_continuous_scale=px.colors.sequential.Oryel)

fig.update(layout_coloraxis_showscale=False)
fig.show()

In [226]:
np.log(temp["deaths"])

0      2.708050
1      3.135494
2      5.545177
3      3.258097
4      0.693147
         ...   
177    1.098612
178    2.197225
180    0.693147
183    0.693147
184    1.098612
Name: deaths, Length: 150, dtype: float64

In [228]:
# Confirmed cases
fig_c = px.choropleth(country_wise, locations="country_region", locationmode='country names', 
                      color="cases", hover_name="country_region", hover_data=['cases'])

# Deaths
temp = country_wise[country_wise['deaths']>0]
fig_d = px.choropleth(temp, locations="country_region", locationmode='country names',
                      color="deaths", hover_name="country_region", hover_data=['deaths'])

# Plot
fig = make_subplots(rows=1, cols=2, subplot_titles = ['cases', 'deaths'],
                    specs=[[{"type": "choropleth"}, {"type": "choropleth"}]])

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.update(layout_coloraxis_showscale=True)

fig.show()

# Cases over the time

In [None]:
fig_c = px.bar(day_wise, x="Date", y="Confirmed", color_discrete_sequence = [act])
fig_d = px.bar(day_wise, x="Date", y="Deaths", color_discrete_sequence = [dth])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1,
                    subplot_titles=('Confirmed cases', 'Deaths reported'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.update_layout(height=480)
fig.show()

# ===============================

fig_1 = px.line(day_wise, x="Date", y="Deaths / 100 Cases", color_discrete_sequence = [dth])
fig_2 = px.line(day_wise, x="Date", y="Recovered / 100 Cases", color_discrete_sequence = [rec])
fig_3 = px.line(day_wise, x="Date", y="Deaths / 100 Recovered", color_discrete_sequence = ['#333333'])

fig = make_subplots(rows=1, cols=3, shared_xaxes=False, 
                    subplot_titles=('Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered'))

fig.add_trace(fig_1['data'][0], row=1, col=1)
fig.add_trace(fig_2['data'][0], row=1, col=2)
fig.add_trace(fig_3['data'][0], row=1, col=3)

fig.update_layout(height=480)
fig.show()

# ===================================

fig_c = px.bar(day_wise, x="Date", y="New cases", color_discrete_sequence = [act])
fig_d = px.bar(day_wise, x="Date", y="No. of countries", color_discrete_sequence = [dth])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1,
                    subplot_titles=('No. of new cases everyday', 'No. of countries'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.update_layout(height=480)
fig.show()

# Top 20 Countries

In [None]:
# confirmed - deaths
fig_c = px.bar(country_wise.sort_values('Confirmed').tail(15), x="Confirmed", y="Country/Region", 
               text='Confirmed', orientation='h', color_discrete_sequence = [act])
fig_d = px.bar(country_wise.sort_values('Deaths').tail(15), x="Deaths", y="Country/Region", 
               text='Deaths', orientation='h', color_discrete_sequence = [dth])

# recovered - active
fig_r = px.bar(country_wise.sort_values('Recovered').tail(15), x="Recovered", y="Country/Region", 
               text='Recovered', orientation='h', color_discrete_sequence = [rec])
fig_a = px.bar(country_wise.sort_values('Active').tail(15), x="Active", y="Country/Region", 
               text='Active', orientation='h', color_discrete_sequence = ['#333333'])

# death - recoverd / 100 cases
fig_dc = px.bar(country_wise.sort_values('Deaths / 100 Cases').tail(15), x="Deaths / 100 Cases", y="Country/Region", 
               text='Deaths / 100 Cases', orientation='h', color_discrete_sequence = ['#f38181'])
fig_rc = px.bar(country_wise.sort_values('Recovered / 100 Cases').tail(15), x="Recovered / 100 Cases", y="Country/Region", 
               text='Recovered / 100 Cases', orientation='h', color_discrete_sequence = ['#a3de83'])

# new cases - cases per million people
fig_nc = px.bar(country_wise.sort_values('New cases').tail(15), x="New cases", y="Country/Region", 
               text='New cases', orientation='h', color_discrete_sequence = ['#c61951'])
temp = country_wise[country_wise['Population']>1000000]
fig_p = px.bar(temp.sort_values('Cases / Million People').tail(15), x="Cases / Million People", y="Country/Region", 
               text='Cases / Million People', orientation='h', color_discrete_sequence = ['#741938'])

# week change, percent increase
fig_wc = px.bar(country_wise.sort_values('1 week change').tail(15), x="1 week change", y="Country/Region", 
               text='1 week change', orientation='h', color_discrete_sequence = ['#004a7c'])
temp = country_wise[country_wise['Confirmed']>100]
fig_pi = px.bar(temp.sort_values('1 week % increase').tail(15), x="1 week % increase", y="Country/Region", 
               text='1 week % increase', orientation='h', color_discrete_sequence = ['#005691'], 
                hover_data=['Confirmed last week', 'Confirmed'])


# plot
fig = make_subplots(rows=5, cols=2, shared_xaxes=False, horizontal_spacing=0.14, vertical_spacing=0.08,
                    subplot_titles=('Confirmed cases', 'Deaths reported', 'Recovered', 'Active cases', 
                                    'Deaths / 100 cases', 'Recovered / 100 cases', 'New cases', 
                                    'Cases / Million People', '1 week increase', '1 week % increase'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)
fig.add_trace(fig_r['data'][0], row=2, col=1)
fig.add_trace(fig_a['data'][0], row=2, col=2)

fig.add_trace(fig_dc['data'][0], row=3, col=1)
fig.add_trace(fig_rc['data'][0], row=3, col=2)
fig.add_trace(fig_nc['data'][0], row=4, col=1)
fig.add_trace(fig_p['data'][0], row=4, col=2)

fig.add_trace(fig_wc['data'][0], row=5, col=1)
fig.add_trace(fig_pi['data'][0], row=5, col=2)


fig.update_layout(height=3000)

In [None]:
fig = px.scatter(country_wise.sort_values('Deaths', ascending=False).iloc[:15, :], 
                 x='Confirmed', y='Deaths', color='Country/Region', size='Confirmed', height=700,
                 text='Country/Region', log_x=True, log_y=True, title='Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

# Date vs

In [None]:
fig = px.bar(full_grouped, x="Date", y="Confirmed", color='Country/Region', height=600,
             title='Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

# =========================================

fig = px.bar(full_grouped, x="Date", y="Deaths", color='Country/Region', height=600,
             title='Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

# =========================================

fig = px.bar(full_grouped, x="Date", y="New cases", color='Country/Region', height=600,
             title='New cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.line(full_grouped, x="Date", y="Confirmed", color='Country/Region', height=600,
             title='Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

# =========================================

fig = px.line(full_grouped, x="Date", y="Deaths", color='Country/Region', height=600,
             title='Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

# =========================================

fig = px.line(full_grouped, x="Date", y="New cases", color='Country/Region', height=600,
             title='New cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

In [None]:
gt_100 = full_grouped[full_grouped['Confirmed']>100]['Country/Region'].unique()
temp = full_table[full_table['Country/Region'].isin(gt_100)]
temp = temp.groupby(['Country/Region', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>100]
# print(temp.head())

min_date = temp.groupby('Country/Region')['Date'].min().reset_index()
min_date.columns = ['Country/Region', 'Min Date']
# print(min_date.head())

from_100th_case = pd.merge(temp, min_date, on='Country/Region')
from_100th_case['N days'] = (from_100th_case['Date'] - from_100th_case['Min Date']).dt.days
# print(from_100th_case.head())

fig = px.line(from_100th_case, x='N days', y='Confirmed', color='Country/Region', title='N days from 100 case', height=600)
fig.show()

# ===========================================================================

gt_1000 = full_grouped[full_grouped['Confirmed']>1000]['Country/Region'].unique()
temp = full_table[full_table['Country/Region'].isin(gt_1000)]
temp = temp.groupby(['Country/Region', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>1000]
# print(temp.head())

min_date = temp.groupby('Country/Region')['Date'].min().reset_index()
min_date.columns = ['Country/Region', 'Min Date']
# print(min_date.head())

from_1000th_case = pd.merge(temp, min_date, on='Country/Region')
from_1000th_case['N days'] = (from_1000th_case['Date'] - from_1000th_case['Min Date']).dt.days
# print(from_1000th_case.head())

fig = px.line(from_1000th_case, x='N days', y='Confirmed', color='Country/Region', title='N days from 1000 case', height=600)
fig.show()

# ===========================================================================

gt_10000 = full_grouped[full_grouped['Confirmed']>10000]['Country/Region'].unique()
temp = full_table[full_table['Country/Region'].isin(gt_10000)]
temp = temp.groupby(['Country/Region', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>10000]
# print(temp.head())

min_date = temp.groupby('Country/Region')['Date'].min().reset_index()
min_date.columns = ['Country/Region', 'Min Date']
# print(min_date.head())

from_10000th_case = pd.merge(temp, min_date, on='Country/Region')
from_10000th_case['N days'] = (from_10000th_case['Date'] - from_10000th_case['Min Date']).dt.days
# print(from_10000th_case.head())full_grouped

fig = px.line(from_10000th_case, x='N days', y='Confirmed', color='Country/Region', title='N days from 10000 case', height=600)
fig.show()

# Composition of Cases

In [None]:
full_latest = full_table[full_table['Date'] == max(full_table['Date'])]
                         
fig = px.treemap(full_latest.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country/Region", "Province/State"], values="Confirmed", height=700,
                 title='Number of Confirmed Cases',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()

fig = px.treemap(full_latest.sort_values(by='Deaths', ascending=False).reset_index(drop=True), 
                 path=["Country/Region", "Province/State"], values="Deaths", height=700,
                 title='Number of Deaths reported',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()

# New cases

In [None]:
temp = full_grouped[full_grouped['New cases']>0].sort_values('Country/Region', ascending=False)
fig = px.scatter(temp, x='Date', y='Country/Region', size='New cases', color='New cases', height=3000, 
           color_continuous_scale=px.colors.sequential.Viridis)
fig.update_layout(yaxis = dict(dtick = 1))
fig.update(layout_coloraxis_showscale=False)
fig.show()

# Active cases

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=full_grouped['Active'],
        x=full_grouped['Date'],
        y=full_grouped['Country/Region'],
        colorscale='Reds',
        showlegend=False,
        text=full_grouped['Active']))

fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height=3000)
fig.show()

# Epidemic Span

Note : In the graph, last day is shown as one day after the last time a new confirmed cases reported in the Country / Region

In [None]:
# first date
# ==========
first_date = full_table[full_table['Confirmed']>0]
first_date = first_date.groupby('Country/Region')['Date'].agg(['min']).reset_index()
# first_date.head()

# last date
# =========
last_date = full_table.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()

mask = last_date['Country/Region'] != last_date['Country/Region'].shift(1)
last_date.loc[mask, 'Confirmed'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan

last_date = last_date[last_date['Confirmed']>0]
last_date = last_date.groupby('Country/Region')['Date'].agg(['max']).reset_index()
# last_date.head()

# first_last
# ==========
first_last = pd.concat([first_date, last_date[['max']]], axis=1)

# added 1 more day, which will show the next day as the day on which last case appeared
first_last['max'] = first_last['max'] + timedelta(days=1)

# no. of days
first_last['Days'] = first_last['max'] - first_last['min']

# task column as country
first_last['Task'] = first_last['Country/Region']

# rename columns
first_last.columns = ['Country/Region', 'Start', 'Finish', 'Days', 'Task']

# sort by no. of days
first_last = first_last.sort_values('Days')
# first_last.head()

# visualization
# =============

# produce random colors
clr = ["#"+''.join([random.choice('0123456789ABC') for j in range(6)]) for i in range(len(first_last))]

# plot
fig = ff.create_gantt(first_last, index_col='Country/Region', colors=clr, show_colorbar=False, 
                      bar_width=0.2, showgrid_x=True, showgrid_y=True, height=2500)
fig.show()

https://app.flourish.studio/visualisation/1571387/edit



In [None]:
HTML('''<div class="flourish-embed flourish-bar-chart-race" data-src="visualisation/1571387"><script src="https://public.flourish.studio/resources/embed.js"></script></div>''')

# Country Wise

In [None]:
temp = full_table.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths']
temp = temp.sum().diff().reset_index()

mask = temp['Country/Region'] != temp['Country/Region'].shift(1)

temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan

temp = temp[temp['Country/Region'].isin(gt_10000)]

# countries = ['China', 'Iran', 'South Korea', 'Italy', 'France', 'Germany', 'Italy', 'Spain', 'US']
countries = temp['Country/Region'].unique()

n_cols = 4
n_rows = math.ceil(len(countries)/n_cols)

fig = make_subplots(rows=n_rows, cols=n_cols, shared_xaxes=False, subplot_titles=countries)

for ind, country in enumerate(countries):
    row = int((ind/n_cols)+1)
    col = int((ind%n_cols)+1)
    fig.add_trace(go.Bar(x=temp['Date'], y=temp.loc[temp['Country/Region']==country, 'Confirmed'], name=country), row=row, col=col)
    
fig.update_layout(height=2000, title_text="No. of new cases in each Country")    
fig.show()

# Calander map

### Number of new cases every day

In [None]:
temp = full_table.groupby('Date')['Confirmed'].sum()
temp = temp.diff()

plt.figure(figsize=(20, 5))
ax = calmap.yearplot(temp, fillcolor='white', cmap='Reds', linewidth=0.5)

### Number of new countries every day

In [None]:
spread = full_table[full_table['Confirmed']!=0].groupby('Date')
spread = spread['Country/Region'].unique().apply(len).diff()

plt.figure(figsize=(20, 5))
ax = calmap.yearplot(spread, fillcolor='white', cmap='Greens', linewidth=0.5)

# Comparison with similar epidemics

https://www.kaggle.com/imdevskp/covid19-vs-sars-vs-mers-vs-ebola-vs-h1n1



In [None]:
epidemics = pd.DataFrame({
    'epidemic' : ['COVID-19', 'SARS', 'EBOLA', 'MERS', 'H1N1'],
    'start_year' : [2019, 2003, 2014, 2012, 2009],
    'end_year' : [2020, 2004, 2016, 2017, 2010],
    'confirmed' : [full_latest['Confirmed'].sum(), 8096, 28646, 2494, 6724149],
    'deaths' : [full_latest['Deaths'].sum(), 774, 11323, 858, 19654]
})

epidemics['mortality'] = round((epidemics['deaths']/epidemics['confirmed'])*100, 2)

epidemics.head()

In [None]:
temp = epidemics.melt(id_vars='epidemic', value_vars=['confirmed', 'deaths', 'mortality'],
                      var_name='Case', value_name='Value')

fig = px.bar(temp, x="epidemic", y="Value", color='epidemic', text='Value', facet_col="Case",
             color_discrete_sequence = px.colors.qualitative.Bold)
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_yaxes(showticklabels=False)
fig.layout.yaxis2.update(matches=None)
fig.layout.yaxis3.update(matches=None)
fig.show()

# Analysis on similar epidemics

https://www.kaggle.com/imdevskp/mers-outbreak-analysis  
https://www.kaggle.com/imdevskp/sars-2003-outbreak-analysis  
https://www.kaggle.com/imdevskp/western-africa-ebola-outbreak-analysis
