# Import all required Modules

In [0]:
import pandas as pd
import numpy as np
import seaborn as sn
from matplotlib import pyplot as plt
import plotly.graph_objects as go
from fbprophet import Prophet
import plotly.express as px

# Data Imports and Renaming Columns

In [0]:
df = pd.read_csv('covid_19_data.csv', parse_dates = ['Last Update'])
df.rename(columns = {'ObservationDate':'Date', 'Country/Region':'Country'}, inplace = True)

df_confirmed = pd.read_csv('time_series_covid_19_confirmed.csv')
df_confirmed.rename(columns = {'Country/Region':'Country'}, inplace = True)

df_deaths = pd.read_csv('time_series_covid_19_deaths.csv')
df_deaths.rename(columns = {'Country/Region':'Country'}, inplace = True)

df_recovered = pd.read_csv('time_series_covid_19_recovered.csv')
df_recovered.rename(columns = {'Country/Region':'Country'}, inplace = True)

### Exploring Data

In [18]:
print("COVID-19 DATA\n")
df.head()


COVID-19 DATA



Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,2020-01-22 17:00:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,2020-01-22 17:00:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,2020-01-22 17:00:00,0.0,0.0,0.0


In [19]:
print("COVID-19 Confirmed DATA\n")
df_confirmed.head()



COVID-19 Confirmed DATA



Unnamed: 0,Province/State,Country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0,101.0,2,3,5,7,8,8,14,14,14,19,19,19,19,25,25,25,25,32,32,32,33,33,33,33,33,34,35,35,35,35,35,35,35,35,37,40,40,41,42,42,43,43,43,47,48,50,50,50,53,59,70,75,82
1,,Japan,36.0,138.0,2,1,2,2,4,4,7,7,11,15,20,20,20,22,22,45,25,25,26,26,26,28,28,29,43,59,66,74,84,94,105,122,147,159,170,189,214,228,241,256,274,293,331,360,420,461,502,511,581,639,639,701,773
2,,Singapore,1.2833,103.8333,0,1,3,3,4,5,7,7,10,13,16,18,18,24,28,28,30,33,40,45,47,50,58,67,72,75,77,81,84,84,85,85,89,89,91,93,93,93,102,106,108,110,110,117,130,138,150,150,160,178,178,200,212
3,,Nepal,28.1667,84.25,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4,,Malaysia,2.5,112.5,0,0,0,3,4,4,4,7,8,8,8,8,8,10,12,12,12,16,16,18,18,18,19,19,22,22,22,22,22,22,22,22,22,22,22,22,23,23,25,29,29,36,50,50,83,93,99,117,129,149,149,197,238


In [20]:
print("COVID-19 Deaths DATA\n")
df_deaths.head()



COVID-19 Deaths DATA



Unnamed: 0,Province/State,Country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0,101.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1,,Japan,36.0,138.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,2,4,4,5,6,6,6,6,6,6,6,6,10,10,15,16,19,22
2,,Singapore,1.2833,103.8333,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,,Nepal,28.1667,84.25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,,Malaysia,2.5,112.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
print("COVID-19 Recovered DATA\n")
df_recovered.head()

COVID-19 Recovered DATA



Unnamed: 0,Province/State,Country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0,101.0,0,0,0,0,2,2,5,5,5,5,5,5,5,5,5,5,5,10,10,10,10,10,12,12,12,14,15,15,15,15,17,17,21,21,22,22,22,28,28,28,31,31,31,31,31,31,31,31,33,34,34,35,35
1,,Japan,36.0,138.0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,4,9,9,9,9,12,12,12,13,18,18,22,22,22,22,22,22,22,22,32,32,32,43,43,43,46,76,76,76,101,118,118,118,118
2,,Singapore,1.2833,103.8333,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,9,15,15,17,18,18,24,29,34,34,37,37,51,51,53,62,62,62,72,72,78,78,78,78,78,78,78,78,78,96,96,97,105
3,,Nepal,28.1667,84.25,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4,,Malaysia,2.5,112.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,3,3,3,3,7,7,7,13,15,15,15,15,15,18,18,18,18,18,18,18,18,22,22,22,22,23,24,24,24,26,26,26,35


In [22]:
df.describe()


Unnamed: 0,SNo,Confirmed,Deaths,Recovered
count,5632.0,5632.0,5632.0,5632.0
mean,2816.5,582.415305,18.33576,213.634766
std,1625.962689,4935.578984,195.698066,2363.112194
min,1.0,0.0,0.0,0.0
25%,1408.75,2.0,0.0,0.0
50%,2816.5,10.0,0.0,0.0
75%,4224.25,94.0,1.0,12.0
max,5632.0,67790.0,3075.0,52960.0


In [23]:
df_confirmed.describe()

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
count,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0
mean,32.472246,-38.240308,1.255656,1.477376,2.128959,3.244344,4.791855,6.622172,12.61991,13.950226,18.628959,22.459276,27.235294,37.979638,44.979638,54.054299,62.522624,69.721719,77.807692,83.9819,90.837104,96.746606,101.361991,102.309955,136.579186,151.323529,156.176471,161.140271,165.742081,169.99095,171.128959,172.391403,173.807692,177.780543,178.653846,180.0181,181.929864,184.151584,187.226244,190.316742,194.595023,199.929864,204.312217,210.045249,215.20362,221.452489,230.280543,239.414027,248.404977,256.925339,268.307692,284.762443,290.368778,328.49095,353.171946
std,18.24316,80.488626,21.171051,21.257,26.477289,36.865436,51.533686,69.443788,170.220759,171.323903,236.064017,279.893986,344.404641,535.022527,646.729836,797.005818,939.162823,1055.717087,1190.911934,1293.134598,1413.472284,1513.195519,1591.119424,1591.422643,2295.713341,2590.268823,2677.915922,2769.81663,2855.715209,2936.19311,2952.814379,2972.348328,2982.894601,3050.475448,3050.538457,3060.269613,3084.049895,3103.305431,3123.267274,3139.242316,3160.977087,3190.002944,3201.691249,3211.5035,3220.566252,3231.736307,3245.454795,3259.270219,3272.37563,3287.815469,3300.66287,3327.040627,3334.956867,3408.758787,3472.648781
min,-41.4545,-157.8584,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,29.517325,-94.614125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,38.1949,-75.2479,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
75%,42.3289,18.981425,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,2.0,3.0,4.0,6.0,7.0,8.75,11.75,17.0,25.75,30.75
max,64.9631,174.886,444.0,444.0,549.0,761.0,1058.0,1423.0,3554.0,3554.0,4903.0,5806.0,7153.0,11177.0,13522.0,16678.0,19665.0,22112.0,24953.0,27100.0,29631.0,31728.0,33366.0,33366.0,48206.0,54406.0,56249.0,58182.0,59989.0,61682.0,62031.0,62442.0,62662.0,64084.0,64084.0,64287.0,64786.0,65187.0,65596.0,65914.0,66337.0,66907.0,67103.0,67217.0,67332.0,67466.0,67592.0,67666.0,67707.0,67743.0,67760.0,67773.0,67781.0,67786.0,67790.0


In [25]:
df_deaths.describe()

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
count,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0
mean,32.472246,-38.240308,0.038462,0.040724,0.058824,0.095023,0.126697,0.18552,0.29638,0.300905,0.386878,0.4819,0.585973,0.819005,0.963801,1.113122,1.276018,1.434389,1.626697,1.823529,2.049774,2.291855,2.5181,2.529412,3.10181,3.445701,3.769231,4.004525,4.226244,4.540724,4.800905,5.08371,5.09276,5.561086,5.585973,5.947964,6.126697,6.266968,6.366516,6.497738,6.653846,6.778281,6.979638,7.149321,7.361991,7.574661,7.828054,8.049774,8.60181,9.022624,9.642534,10.441176,10.678733,12.226244,13.165158
std,18.24316,80.488626,0.808608,0.809898,1.143324,1.903576,2.474773,3.61616,5.946125,5.94666,7.706469,9.703817,11.843962,16.647712,19.691695,22.783231,26.112539,29.394517,33.247079,37.099921,41.428377,46.327371,50.798619,50.798703,62.308981,69.301294,75.912715,80.668606,85.092605,91.371507,96.507827,101.976676,101.976401,111.5836,111.583381,118.670315,121.90531,124.379207,125.618855,127.572846,129.720999,131.348045,133.369424,134.928655,136.700859,138.276384,139.816814,141.307973,143.330153,145.125872,147.491508,150.963459,151.851434,159.423717,164.161312
min,-41.4545,-157.8584,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,29.517325,-94.614125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,38.1949,-75.2479,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,42.3289,18.981425,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,64.9631,174.886,17.0,17.0,24.0,40.0,52.0,76.0,125.0,125.0,162.0,204.0,249.0,350.0,414.0,479.0,549.0,618.0,699.0,780.0,871.0,974.0,1068.0,1068.0,1310.0,1457.0,1596.0,1696.0,1789.0,1921.0,2029.0,2144.0,2144.0,2346.0,2346.0,2495.0,2563.0,2615.0,2641.0,2682.0,2727.0,2761.0,2803.0,2835.0,2871.0,2902.0,2931.0,2959.0,2986.0,3008.0,3024.0,3046.0,3056.0,3062.0,3075.0


In [26]:
df.recovered.describe()

AttributeError: ignored

### Earliest and Latest Cases

In [27]:
df.head()

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,2020-01-22 17:00:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,2020-01-22 17:00:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,2020-01-22 17:00:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,2020-01-22 17:00:00,0.0,0.0,0.0


In [28]:
df.tail()

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
5627,5628,03/14/2020,"Virgin Islands, U.S.",US,2020-03-14 16:15:18,1.0,0.0,0.0
5628,5629,03/14/2020,Gibraltar,UK,2020-03-14 16:33:03,1.0,0.0,1.0
5629,5630,03/14/2020,Diamond Princess cruise ship,Australia,2020-03-14 02:33:04,0.0,0.0,0.0
5630,5631,03/14/2020,West Virginia,US,2020-03-10 02:33:04,0.0,0.0,0.0
5631,5632,03/14/2020,,occupied Palestinian territory,2020-03-11 20:53:02,0.0,0.0,0.0


## By country View

In [0]:
df2 = df.groupby(['Date','Country'])[['Date', 'Country', 'Confirmed', 'Deaths', 'Recovered']].sum().reset_index()

In [31]:
df2.tail()

Unnamed: 0,Date,Country,Confirmed,Deaths,Recovered
2582,03/14/2020,United Arab Emirates,85.0,0.0,17.0
2583,03/14/2020,Uruguay,4.0,0.0,0.0
2584,03/14/2020,Venezuela,2.0,0.0,0.0
2585,03/14/2020,Vietnam,53.0,0.0,16.0
2586,03/14/2020,occupied Palestinian territory,0.0,0.0,0.0


## By country View(Inida and China)

In [32]:
df.query('Country == "India"').groupby('Date')[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,01/30/2020,1.0,0.0,0.0
1,01/31/2020,1.0,0.0,0.0
2,02/01/2020,1.0,0.0,0.0
3,02/02/2020,2.0,0.0,0.0
4,02/03/2020,3.0,0.0,0.0
5,02/04/2020,3.0,0.0,0.0
6,02/05/2020,3.0,0.0,0.0
7,02/06/2020,3.0,0.0,0.0
8,02/07/2020,3.0,0.0,0.0
9,02/08/2020,3.0,0.0,0.0


In [33]:
df.query('Country == "Mainland China"').groupby('Date')[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()


Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,01/22/2020,547.0,17.0,28.0
1,01/23/2020,639.0,18.0,30.0
2,01/24/2020,916.0,26.0,36.0
3,01/25/2020,1399.0,42.0,39.0
4,01/26/2020,2062.0,56.0,49.0
5,01/27/2020,2863.0,82.0,58.0
6,01/28/2020,5494.0,131.0,101.0
7,01/29/2020,6070.0,133.0,120.0
8,01/30/2020,8124.0,171.0,135.0
9,01/31/2020,9783.0,213.0,214.0


In [41]:
df.groupby('Country')[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()


Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,Azerbaijan,1.0,0.0,0.0
1,"('St. Martin',)",2.0,0.0,0.0
2,Afghanistan,58.0,0.0,0.0
3,Albania,118.0,4.0,0.0
4,Algeria,238.0,6.0,28.0
...,...,...,...,...
151,Uruguay,4.0,0.0,0.0
152,Vatican City,4.0,0.0,0.0
153,Venezuela,2.0,0.0,0.0
154,Vietnam,798.0,0.0,438.0


# Visualizations

In [42]:
df.groupby('Date').sum()

Unnamed: 0_level_0,SNo,Confirmed,Deaths,Recovered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01/22/2020,741,555.0,17.0,28.0
01/23/2020,2829,653.0,18.0,30.0
01/24/2020,4305,941.0,26.0,36.0
01/25/2020,6490,1438.0,42.0,39.0
01/26/2020,9071,2118.0,56.0,52.0
01/27/2020,12342,2927.0,82.0,61.0
01/28/2020,15262,5578.0,131.0,107.0
01/29/2020,18711,6165.0,133.0,126.0
01/30/2020,23345,8235.0,171.0,143.0
01/31/2020,28675,9925.0,213.0,222.0


### Summary Plot of Worldwide Cases - Confirmed, Deaths & Recovered

In [0]:
confirmed = df.groupby("Date").sum()['Confirmed'].reset_index()
deaths = df.groupby("Date").sum()['Deaths'].reset_index()
recovered = df.groupby("Date").sum()['Recovered'].reset_index()


In [45]:
fig = go.Figure()

fig.add_trace(go.Bar(x = confirmed['Date'], y = confirmed['Confirmed'], name = 'Confirmed', marker_color = 'blue'))

fig.add_trace(go.Bar(x = deaths['Date'], y = deaths['Deaths'], name = 'Deaths', marker_color = 'Red'))

fig.add_trace(go.Bar(x = recovered['Date'], y = recovered['Recovered'], name = 'Recovered', marker_color = 'Green'))

fig.update_layout(
    title='Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Bar Chart)',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of Cases',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [46]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=confirmed['Date'], 
                         y=confirmed['Confirmed'],
                         mode='lines+markers',
                         name='Confirmed',
                         line=dict(color='blue', width=2)
                        ))
fig.add_trace(go.Scatter(x=deaths['Date'], 
                         y=deaths['Deaths'],
                         mode='lines+markers',
                         name='Deaths',
                         line=dict(color='Red', width=2)
                        ))
fig.add_trace(go.Scatter(x=recovered['Date'], 
                         y=recovered['Recovered'],
                         mode='lines+markers',
                         name='Recovered',
                         line=dict(color='Green', width=2)
                        ))
fig.update_layout(
    title='Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Line Chart)',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of Cases',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    )
)
fig.show()

In [47]:
df_confirmed = df_confirmed[['Province/State', 'Lat', 'Long', 'Country']]
df_latlong = pd.merge(df, df_confirmed, on = ['Province/State', 'Country'])

fig = px.density_mapbox(df_latlong, lat='Lat', lon='Long', hover_name='Province/State', hover_data = ['Confirmed', 'Deaths', 'Recovered'], animation_frame="Date", color_continuous_scale='Rainbow', radius=7, zoom=0, height=700)

fig.update_layout(title='Worldwide Corona Virus Cases Time Lapse - Confirmed, Deaths, Recovered',
                  font=dict(family="Courier New, monospace",
                            size=18,
                            color="#7f7f7f")
                 )
fig.update_layout(mapbox_style="open-street-map", mapbox_center_lon=0)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})


fig.show()

# Analysis By country

### Latest Date in Data

In [0]:
confirmed = df2.groupby(['Date', 'Country']).sum()[['Confirmed']].reset_index()
deaths = df2.groupby(['Date', 'Country']).sum()[['Deaths']].reset_index()
recovered = df2.groupby(['Date', 'Country']).sum()[['Recovered']].reset_index()

In [50]:
latest_date = confirmed['Date'].max()
latest_date

'03/14/2020'

In [0]:
confirmed = confirmed[(confirmed["Date"]==latest_date)][['Country', 'Confirmed']]
deaths = deaths[(deaths["Date"]==latest_date)][['Country', 'Deaths']]
recovered = recovered[(recovered["Date"]==latest_date)][['Country', 'Recovered']]


In [54]:
fig = go.Figure()

fig.add_trace(go.Bar(x = confirmed['Country'], y = confirmed['Confirmed'], name = 'Confirmed', marker_color = 'blue'))

fig.add_trace(go.Bar(x = deaths['Country'], y = deaths['Deaths'], name = 'Deaths', marker_color = 'Red'))

fig.add_trace(go.Bar(x = recovered['Country'], y = recovered['Recovered'], name = 'Recovered', marker_color = 'Green'))

fig.update_layout(
    title='Worldwide Corona Virus Cases - Confirmed, Deaths, Recovered (Bar Chart)',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of Cases',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

## Countries Effected

In [57]:
all_country = confirmed['Country'].unique()

print("Number of countries with cases: ", str(len(all_country)))

print("Countries with cases: ")
for i in range(len(all_country)):
  print(str(i) + ' - - ' + all_country[i])

Number of countries with cases:  143
Countries with cases: 
0 - - Afghanistan
1 - - Albania
2 - - Algeria
3 - - Andorra
4 - - Antigua and Barbuda
5 - - Argentina
6 - - Armenia
7 - - Aruba
8 - - Australia
9 - - Austria
10 - - Azerbaijan
11 - - Bahrain
12 - - Bangladesh
13 - - Belarus
14 - - Belgium
15 - - Bhutan
16 - - Bolivia
17 - - Bosnia and Herzegovina
18 - - Brazil
19 - - Brunei
20 - - Bulgaria
21 - - Burkina Faso
22 - - Cambodia
23 - - Cameroon
24 - - Canada
25 - - Cayman Islands
26 - - Chile
27 - - Colombia
28 - - Congo (Kinshasa)
29 - - Costa Rica
30 - - Croatia
31 - - Cuba
32 - - Curacao
33 - - Cyprus
34 - - Czech Republic
35 - - Denmark
36 - - Dominican Republic
37 - - Ecuador
38 - - Egypt
39 - - Estonia
40 - - Eswatini
41 - - Ethiopia
42 - - Finland
43 - - France
44 - - French Guiana
45 - - Gabon
46 - - Georgia
47 - - Germany
48 - - Ghana
49 - - Greece
50 - - Guadeloupe
51 - - Guatemala
52 - - Guernsey
53 - - Guinea
54 - - Guyana
55 - - Holy See
56 - - Honduras
57 - - Hungary

In [60]:
!pip install pycountry

Collecting pycountry
[?25l  Downloading https://files.pythonhosted.org/packages/16/b6/154fe93072051d8ce7bf197690957b6d0ac9a21d51c9a1d05bd7c6fdb16f/pycountry-19.8.18.tar.gz (10.0MB)
[K     |████████████████████████████████| 10.0MB 1.4MB/s 
[?25hBuilding wheels for collected packages: pycountry
  Building wheel for pycountry (setup.py) ... [?25l[?25hdone
  Created wheel for pycountry: filename=pycountry-19.8.18-py2.py3-none-any.whl size=10627361 sha256=1b1ce00464ab294513c9e26873d62d921500054a174fec5da4eb0144062c932b
  Stored in directory: /root/.cache/pip/wheels/a2/98/bf/f0fa1c6bf8cf2cbdb750d583f84be51c2cd8272460b8b36bd3
Successfully built pycountry
Installing collected packages: pycountry
Successfully installed pycountry-19.8.18


In [61]:
import pycountry

countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3
    
confirmed["iso_alpha"] = confirmed["Country"].map(countries.get)
deaths["iso_alpha"] = deaths["Country"].map(countries.get)
recovered["iso_alpha"] = recovered["Country"].map(countries.get)

plot_data_confirmed = confirmed[["iso_alpha","Confirmed", "Country"]]
plot_data_deaths = deaths[["iso_alpha","Deaths"]]
plot_data_recovered = recovered[["iso_alpha","Recovered"]]

fig = px.scatter_geo(plot_data_confirmed, locations="iso_alpha", color="Country",
                     hover_name="iso_alpha", size="Confirmed",
                     projection="natural earth", title = 'Worldwide Confirmed Cases')
fig.show()

In [62]:
fig = px.scatter_geo(plot_data_deaths, locations="iso_alpha", color="Deaths",
                     hover_name="iso_alpha", size="Deaths",
                     projection="natural earth", title="Worldwide Death Cases")
fig.show()

In [63]:
fig = px.scatter_geo(plot_data_recovered, locations="iso_alpha", color="Recovered",
                     hover_name="iso_alpha", size="Recovered",
                     projection="natural earth", title="Worldwide Recovered Cases")
fig.show()

# Transforming Data for Forecasting

In [66]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()

confirmed.columns = ['ds','y']
confirmed['ds'] = pd.to_datetime(confirmed['ds'])
confirmed.head()

Unnamed: 0,ds,y
0,2020-01-22,555.0
1,2020-01-23,653.0
2,2020-01-24,941.0
3,2020-01-25,1438.0
4,2020-01-26,2118.0


# Forcasting Total Number of Cases Worldwide

## Forecasting Confirmed Cases Worldwide with Prophet (Baseline)
We perform a week's ahead forecast with Prophet, with 95% prediction intervals. Here, no tweaking of seasonality-related parameters and additional regressors are performed.

In [67]:
m = Prophet(interval_width=0.95)
m.fit(confirmed)
future = m.make_future_dataframe(periods=7)
future.tail()

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


Unnamed: 0,ds
55,2020-03-17
56,2020-03-18
57,2020-03-19
58,2020-03-20
59,2020-03-21
