In [None]:
import pandas as pd

In [529]:
# Get the data

def getData():

	import requests

	files = [
	"time_series_19-covid-Confirmed.csv",
	"time_series_19-covid-Deaths.csv",
	"time_series_19-covid-Recovered.csv"
	]

	headers = {'Accept': 'application/vnd.github.v3.raw'}

	for path in files:
		url = "https://api.github.com/repos/CSSEGISandData/COVID-19/contents/csse_covid_19_data/csse_covid_19_time_series/{path}".format(path=path)
		print("Getting", path)
		r = requests.get(url, headers=headers)
		with open(path, 'w') as f:
			f.write(r.text)
			
	print("Files saved")

getData()

In [530]:
# Read in the data and have a look
confirmed = pd.read_csv("time_series_19-covid-Confirmed.csv")
confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
157,,Iran,32.0,53.0,0,0,0,0,0,0,...,3513,4747,5823,6566,7161,8042,9000,10075,11364,12729


In [531]:
# Clear out regions and lat/lon / general data cleanup
colnames = list(confirmed.columns.values)
dates = colnames[4:]
melted_confirmed = confirmed.melt(id_vars='Country/Region', var_name='Date', value_name='ConfirmedCases')
melted_confirmed = melted_confirmed.groupby(['Date','Country/Region'])['ConfirmedCases'].sum().reset_index()
melted_confirmed = melted_confirmed = melted_confirmed[melted_confirmed['Date'].isin(dates)]
melted_confirmed = melted_confirmed.sort_values(by=['ConfirmedCases'])
melted_confirmed['Date'] = pd.to_datetime(melted_confirmed['Date'], infer_datetime_format=True)  
melted_confirmed['ConfirmedCases'] = melted_confirmed['ConfirmedCases'].astype(int)
melted_confirmed.info()

In [598]:
# Read in population data, and only include countries with >100 cases
country_pop = pd.read_csv('Dev/country_population.csv')
melted_confirmed_wpop = pd.merge(melted_confirmed, country_pop, on = 'Country/Region', how = 'left').fillna(0)
melted_confirmed_wpop.rename(columns = {'Country/Region':'Country'}, inplace = True) 
melted_confirmed_wpop['Population'] = melted_confirmed_wpop['Population'].astype(int)
melted_confirmed_wpop['population_proportion'] = (melted_confirmed_wpop['ConfirmedCases'] / melted_confirmed_wpop['Population'])*100
melted_confirmed_wpop = melted_confirmed_wpop[melted_confirmed_wpop['ConfirmedCases'] > 100]
melted_confirmed_wpop['day_since_100'] = melted_confirmed_wpop.groupby('Country').cumcount() + 1
melted_confirmed_wpop['dod_growth'] = melted_confirmed_wpop['population_proportion'].pct_change()
melted_confirmed_wpop = melted_confirmed_wpop[melted_confirmed_wpop['Country'] != 'Cruise Ship']

In [603]:
# Look at the first week
first_week = days_since_dataframe[days_since_dataframe['day_since_100'] < 14] #change this to adjust timeframe
first_week = pd.DataFrame(first_week.pivot(index='Country', columns='day_since_100', values=['ConfirmedCases']).fillna(0).reset_index())
first_week

Unnamed: 0_level_0,Country,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases,ConfirmedCases
day_since_100,Unnamed: 1_level_1,1,2,3,4,5,6,7,8,9,10,11,12,13
0,Australia,107.0,128.0,128.0,200.0,250.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Austria,104.0,131.0,182.0,246.0,302.0,504.0,655.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bahrain,110.0,189.0,195.0,195.0,210.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Belgium,109.0,169.0,200.0,239.0,267.0,314.0,314.0,559.0,689.0,0.0,0.0,0.0,0.0
4,Brazil,151.0,151.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Canada,108.0,117.0,193.0,198.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,China,548.0,643.0,920.0,1406.0,2075.0,2877.0,5509.0,6087.0,8141.0,9802.0,11891.0,16630.0,19716.0
7,Czechia,141.0,189.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Denmark,264.0,444.0,617.0,804.0,836.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Egypt,109.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [599]:
#Look at a few countries
melted_confirmed_wpop = melted_confirmed_wpop[(melted_confirmed_wpop['Country'] == 'Austria') | (melted_confirmed_wpop['Country'] == 'Australia') | (melted_confirmed_wpop['Country'] == 'Belgium') | (melted_confirmed_wpop['Country'] == 'Japan')]
melted_confirmed_wpop.groupby(['Continent','Country']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,ConfirmedCases,Population,population_proportion,day_since_100,dod_growth
Continent,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Asia,Japan,23,23,23,23,23,23
Europe,Austria,7,7,7,7,7,7
Europe,Belgium,9,9,9,9,9,9
Oceania,Australia,5,5,5,5,5,5


In [600]:
#Plot it

import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.animation as animation
from IPython.display import HTML
fig = px.line(melted_confirmed_wpop[melted_confirmed_wpop['day_since_100'] > 1], x="day_since_100", y="ConfirmedCases", color='Country', line_group="Country")
fig.show() 