# CoronaVirus Analysis and Forecasting using Data Science

Data Source: Johns Hopkins CSSE Data Repository - aggregated from multiple sources.  
https://github.com/CSSEGISandData/COVID-19

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

## Get the Datasets

In [2]:
# Live data is stored in files named MM-DD-YYYY.csv, so get the current date for live data updates

year = str(pd.datetime.now().year)
month = str(pd.datetime.now().month) if pd.datetime.now().month >= 10 else "0" + str(pd.datetime.now().month)
day = str(pd.datetime.now().day) if pd.datetime.now().day >= 10 else "0" + str(pd.datetime.now().day)
filename = month + "-" + day + "-" + year + ".csv"
filename

'03-14-2020.csv'

In [3]:
fileURL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/" + filename
livedata = pd.read_csv(fileURL)
livedata.head()

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
0,Hubei,China,2020-03-14T10:13:09,67790,3075,52960,30.9756,112.2707
1,,Italy,2020-03-14T20:13:16,21157,1441,1966,41.8719,12.5674
2,,Iran,2020-03-14T11:33:06,12729,611,2959,32.4279,53.688
3,,"Korea, South",2020-03-14T01:33:02,8086,72,510,35.9078,127.7669
4,,Spain,2020-03-14T22:13:11,6391,195,517,40.4637,-3.7492


**Also pull the historic time series data.  Note that it won't have live data from today.**

In [4]:
confirmed = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0,101.0,2,3,5,7,8,8,...,47,48,50,50,50,53,59,70,75,
1,,Japan,36.0,138.0,2,1,2,2,4,4,...,360,420,461,502,511,581,639,639,701,
2,,Singapore,1.2833,103.8333,0,1,3,3,4,5,...,117,130,138,150,150,160,178,178,200,
3,,Nepal,28.1667,84.25,0,0,0,1,1,1,...,1,1,1,1,1,1,1,1,1,
4,,Malaysia,2.5,112.5,0,0,0,3,4,4,...,50,83,93,99,117,129,149,149,197,


In [5]:
deaths = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
deaths.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0,101.0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,
1,,Japan,36.0,138.0,0,0,0,0,0,0,...,6,6,6,6,10,10,15,16,19,
2,,Singapore,1.2833,103.8333,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
3,,Nepal,28.1667,84.25,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
4,,Malaysia,2.5,112.5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,


In [6]:
recovered = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")
recovered.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0,101.0,0,0,0,0,2,2,...,31,31,31,31,31,33,34,34,35,
1,,Japan,36.0,138.0,0,0,0,0,1,1,...,43,46,76,76,76,101,118,118,118,
2,,Singapore,1.2833,103.8333,0,0,0,0,0,0,...,78,78,78,78,78,78,96,96,97,
3,,Nepal,28.1667,84.25,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,
4,,Malaysia,2.5,112.5,0,0,0,0,0,0,...,22,22,23,24,24,24,26,26,26,


## Start examining the live data

In [7]:
livedata[livedata["Country/Region"]=='US'].sort_values('Confirmed', ascending=False)

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
26,Washington,US,2020-03-14T22:13:19,572,37,1,47.4009,-121.4905
28,New York,US,2020-03-14T22:13:32,525,2,0,42.1657,-74.9481
32,California,US,2020-03-14T22:13:32,340,5,6,36.1162,-119.6816
53,Massachusetts,US,2020-03-14T22:13:19,138,0,1,42.2302,-71.5301
69,Colorado,US,2020-03-14T22:13:32,101,1,0,39.0598,-105.3111
77,Louisiana,US,2020-03-14T22:13:32,77,1,0,31.1695,-91.8678
79,Florida,US,2020-03-14T18:53:03,76,3,0,27.7663,-81.6868
82,New Jersey,US,2020-03-14T22:13:32,69,1,0,40.2989,-74.521
83,Georgia,US,2020-03-14T17:53:03,66,1,0,33.0406,-83.6431
85,Illinois,US,2020-03-14T22:33:04,64,0,2,40.3495,-88.9861


In [8]:
livedata[livedata["Country/Region"]=='US']['Confirmed'].sum()

2726

In [9]:
livedata[livedata["Province/State"]=='California']

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
32,California,US,2020-03-14T22:13:32,340,5,6,36.1162,-119.6816


In [10]:
livedata[livedata["Province/State"]=='Massachusetts']

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
53,Massachusetts,US,2020-03-14T22:13:19,138,0,1,42.2302,-71.5301


In [11]:
confirmed[confirmed["Province/State"]=='California']

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
102,California,US,36.1162,-119.6816,0,0,0,0,0,0,...,0,0,0,0,0,144,177,221,282,
