In [84]:
import pandas as pd

In [85]:
# Get the data

def getData():

	import requests

	files = [
	"time_series_covid19_confirmed_global.csv",
	"time_series_19-covid-Deaths.csv",
	"time_series_19-covid-Recovered.csv"
	]

	headers = {'Accept': 'application/vnd.github.v3.raw'}

	for path in files:
		url = "https://api.github.com/repos/CSSEGISandData/COVID-19/contents/csse_covid_19_data/csse_covid_19_time_series/{path}".format(path=path)
		print("Getting", path)
		r = requests.get(url, headers=headers)
		with open(path, 'w') as f:
			f.write(r.text)
			
	print("Files saved")

getData()

Getting time_series_covid19_confirmed_global.csv
Getting time_series_19-covid-Deaths.csv
Getting time_series_19-covid-Recovered.csv
Files saved


In [86]:
# Read in the data and have a look
confirmed = pd.read_csv("time_series_covid19_confirmed_global.csv")
colnames = list(confirmed.columns.values)
dates = colnames[4:]
#dates
confirmed[confirmed['Country/Region'] == 'Singapore']

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20
196,,Singapore,1.2833,103.8333,0,1,3,3,4,5,...,243,266,313,345,385,432,455,509,558,631


In [87]:
deaths = pd.read_csv("time_series_19-covid-Deaths.csv")
deaths.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20
0,,Thailand,15.0,101.0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1.0
1,,Japan,36.0,138.0,0,0,0,0,0,0,...,22,22,27,29,29,29,33,35,40,40.0
2,,Singapore,1.2833,103.8333,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2,2,2.0
3,,Nepal,28.1667,84.25,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0
4,,Malaysia,2.5,112.5,0,0,0,0,0,0,...,0,0,0,2,2,2,3,4,10,10.0


In [88]:
# Just get Australia
australia = confirmed[confirmed['Country/Region'] == 'Australia']
# Clear out regions and lat/lon / general data cleanup
australia_confirmed = australia.melt(id_vars='Province/State', var_name='Date', value_name='ConfirmedCases')
australia_confirmed = australia_confirmed.groupby(['Date','Province/State'])['ConfirmedCases'].sum().reset_index()
australia_confirmed = australia_confirmed = australia_confirmed[australia_confirmed['Date'].isin(dates)]
australia_confirmed = australia_confirmed.sort_values(by=['ConfirmedCases'])
australia_confirmed['Date'] = pd.to_datetime(australia_confirmed['Date'], infer_datetime_format=True)  
australia_confirmed['ConfirmedCases'] = australia_confirmed['ConfirmedCases'].astype(int)
australia_confirmed = australia_confirmed[australia_confirmed['Date'] > '2020-03-10']
australia_confirmed.head()

Unnamed: 0,Date,Province/State,ConfirmedCases
336,2020-03-12,Australian Capital Territory,0
328,2020-03-11,Australian Capital Territory,0
338,2020-03-12,Northern Territory,1
344,2020-03-13,Australian Capital Territory,1
346,2020-03-13,Northern Territory,1


In [89]:
# Read in population data (Australia only), and only include countries with >100 cases
australia_confirmed.rename(columns = {'Country/Region':'Country'}, inplace = True) 
australia_confirmed['day_since_100'] = australia_confirmed.groupby(['Province/State']).cumcount() + 1
australia_confirmed['dod_growth'] = australia_confirmed['ConfirmedCases'].pct_change()
australia_confirmed = australia_confirmed.dropna(axis="rows")
australia_confirmed.to_csv('australia.csv')

In [90]:
# Clear out regions and lat/lon / general data cleanup
australia_melted = australia_confirmed.melt(id_vars='Province/State', var_name='Date', value_name='ConfirmedCases')
#australia_melted = australia_melted.groupby(['Date','Province/State'])['ConfirmedCases'].sum().reset_index()
#australia_melted = australia_melted = australia_melted[australia_melted['Date'].isin(dates)]
#australia_melted = australia_melted.sort_values(by=['ConfirmedCases'])
#australia_melted['Date'] = pd.to_datetime(australia_melted['Date'], infer_datetime_format=True)  
#australia_melted['ConfirmedCases'] = australia_melted['ConfirmedCases'].astype(int)
australia_melted.head()

Unnamed: 0,Province/State,Date,ConfirmedCases
0,Northern Territory,Date,2020-03-12 00:00:00
1,Australian Capital Territory,Date,2020-03-13 00:00:00
2,Northern Territory,Date,2020-03-13 00:00:00
3,Australian Capital Territory,Date,2020-03-14 00:00:00
4,Northern Territory,Date,2020-03-14 00:00:00


In [91]:
# Clear out regions and lat/lon / general data cleanup
melted_confirmed = confirmed.melt(id_vars='Country/Region', var_name='Date', value_name='ConfirmedCases')
melted_confirmed = melted_confirmed.groupby(['Date','Country/Region'])['ConfirmedCases'].sum().reset_index()
melted_confirmed = melted_confirmed = melted_confirmed[melted_confirmed['Date'].isin(dates)]
melted_confirmed = melted_confirmed.sort_values(by=['ConfirmedCases'])
melted_confirmed['Date'] = pd.to_datetime(melted_confirmed['Date'], infer_datetime_format=True)  
melted_confirmed['ConfirmedCases'] = melted_confirmed['ConfirmedCases'].astype(int)
melted_confirmed.head()

TypeError: can only concatenate str (not "int") to str

In [None]:
# Read in population data, and only include countries with >100 cases
country_pop = pd.read_csv('C:/Users/Keegan/Dev/country_population.csv')
melted_confirmed_wpop = pd.merge(melted_confirmed, country_pop, on = 'Country/Region', how = 'left').fillna(0)
melted_confirmed_wpop.rename(columns = {'Country/Region':'Country'}, inplace = True) 
melted_confirmed_wpop['Population'] = melted_confirmed_wpop['Population'].astype(int)
melted_confirmed_wpop['population_proportion'] = (melted_confirmed_wpop['ConfirmedCases'] / melted_confirmed_wpop['Population'])*100
melted_confirmed_wpop = melted_confirmed_wpop[melted_confirmed_wpop['ConfirmedCases'] > 100]
melted_confirmed_wpop['day_since_100'] = melted_confirmed_wpop.groupby('Country').cumcount() + 1
melted_confirmed_wpop['dod_growth'] = melted_confirmed_wpop['population_proportion'].pct_change()
melted_confirmed_wpop = melted_confirmed_wpop[melted_confirmed_wpop['Country'] != 'Cruise Ship']

In [None]:
# Look at the first week
first_week = melted_confirmed_wpop[melted_confirmed_wpop['day_since_100'] < 30] #change this to adjust timeframe
first_week = pd.DataFrame(first_week.pivot(index='Country', columns='day_since_100', values=['ConfirmedCases']).fillna("").reset_index())
first_week

In [None]:
#Look at a few countries
melted_confirmed_wpop = melted_confirmed_wpop[(melted_confirmed_wpop['Country'] == 'Italy') | (melted_confirmed_wpop['Country'] == 'Australia') | (melted_confirmed_wpop['Country'] == 'New Zealand') | (melted_confirmed_wpop['Country'] == 'Japan')]
melted_confirmed_wpop.groupby(['Continent','Country']).count() 

In [None]:
#Plot it

import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML
fig = px.line(australia_confirmed, x="Date", y="dod_growth", color='Province/State', line_group="Province/State")
fig.show() 

In [None]:
import seaborn as sns
%matplotlib inline

heatmap1_data = days_since_dataframe #[days_since_dataframe['Country'] != 'China']
heatmap1_data = pd.pivot_table(heatmap1_data, values='ConfirmedCases', 
                     index=['Country'], 
                     columns='day_since_100')

plt.subplots(figsize=(20,15))
sns.heatmap(heatmap1_data, cmap="BrBG")