In [1]:
# Dependencies (storing and analysis)
import numpy as np
import pandas as pd

# Dependencies (visualization)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

In [3]:
# COVID-19 Data

# covid_19 dataset
covid = pd.read_csv('Resources/covid_2020.csv', parse_dates=['Date'])
#print(covid) 

# selecting only the columns we need
covid = covid[['Date', 'Country/Region', 'Confirmed', 'Deaths']]
#print(covid)

# renaming columns with slash
covid.columns = ['Date_covid', 'Country', 'Cases_covid', 'Deaths_covid']
#print(covid)

# Formatting date
covid['Date_covid'] = pd.to_datetime(covid['Date_covid'])
#covid.head()

# group by date and country
covid = covid.groupby(['Date_covid', 'Country'])['Cases_covid', 'Deaths_covid']
covid = covid.sum().reset_index()
#print(covid)

# only countries with cases
covid_cases = covid[covid['Cases_covid']>0]
#print(covid_cases)

covid_cases.head()

Unnamed: 0,Date_covid,Country,Cases_covid,Deaths_covid
44,2020-01-22,Japan,2.0,0.0
52,2020-01-22,Macau,1.0,0.0
53,2020-01-22,Mainland China,547.0,17.0
84,2020-01-22,South Korea,1.0,0.0
89,2020-01-22,Taiwan,1.0,0.0


In [4]:
# h1n1 Data

# h1n1 dataset
h1n1 = pd.read_csv('Resources/h1n1_2009.csv',  encoding = "ISO-8859-1", parse_dates=['Date'])
#print(h1n1) 

# selecting only the columns we need
h1n1 = h1n1[['Date', 'Country', 'Cases', 'Deaths']]
#print(h1n1)

# renaming columns 
h1n1.columns = ['Date_hn', 'Country', 'Cases_hn', 'Deaths_hn']
#print(h1n1)

# Formatting date
h1n1['Date_hn']=pd.to_datetime(h1n1['Date_hn'].astype(str), format='%Y/%m/%d').dt.date
#h1n1.head()

# group by date and country
h1n1 = h1n1.groupby(['Date_hn', 'Country'])['Cases_hn', 'Deaths_hn']
h1n1 = h1n1.sum().reset_index()
#print(h1n1)

# only countries with cases
h1n1_cases = h1n1[h1n1['Cases_hn']>0]
#print(h1n1_cases)
h1n1_cases.head()

Unnamed: 0,Date_hn,Country,Cases_hn,Deaths_hn
0,2009-05-23,Argentina,1,0.0
1,2009-05-23,Australia,12,0.0
2,2009-05-23,Austria,1,0.0
3,2009-05-23,Belgium,7,0.0
4,2009-05-23,Brazil,8,0.0


In [5]:
# sars Data

# sars dataset
sars = pd.read_csv('Resources/sars_2003.csv', parse_dates=['Date'])
#print(sars) 

# selecting only the columns we need
sars = sars[['Date', 'Country', 'Cumulative number of case(s)', 'Number of deaths']]
#print(sars)

# renaming columns 
sars.columns = ['Date_sars', 'Country', 'Cases_sars', 'Deaths_sars']
#print(sars)

# group by date and country
sars = sars.groupby(['Date_sars', 'Country'])['Cases_sars', 'Deaths_sars']
sars = sars.sum().reset_index()
#print(sars)

# only countries with cases
sars_cases = sars[sars['Cases_sars']>0]
#print(sars_cases)
sars_cases.head()

Unnamed: 0,Date_sars,Country,Cases_sars,Deaths_sars
0,2003-03-17,Canada,8,2
1,2003-03-17,Germany,1,0
2,2003-03-17,"Hong Kong SAR, China",95,1
3,2003-03-17,Singapore,20,0
4,2003-03-17,Switzerland,2,0


In [6]:
# Merging dataframes
covid_h1n1_df = covid_cases.merge(h1n1_cases, on='Country', how='outer') 
covid_h1n1_df.head()

Unnamed: 0,Date_covid,Country,Cases_covid,Deaths_covid,Date_hn,Cases_hn,Deaths_hn
0,2020-01-22,Japan,2.0,0.0,2009-05-23,321.0,0.0
1,2020-01-22,Japan,2.0,0.0,2009-05-25,345.0,0.0
2,2020-01-22,Japan,2.0,0.0,2009-05-26,350.0,0.0
3,2020-01-22,Japan,2.0,0.0,2009-05-27,360.0,0.0
4,2020-01-22,Japan,2.0,0.0,2009-05-29,364.0,0.0


In [7]:
# Merging dataframes
all_df = covid_h1n1_df.merge(sars_cases, on='Country', how='outer') 
all_df.head()

Unnamed: 0,Date_covid,Country,Cases_covid,Deaths_covid,Date_hn,Cases_hn,Deaths_hn,Date_sars,Cases_sars,Deaths_sars
0,2020-01-22,Japan,2.0,0.0,2009-05-23,321.0,0.0,2003-04-11,4.0,0.0
1,2020-01-22,Japan,2.0,0.0,2009-05-23,321.0,0.0,2003-04-12,4.0,0.0
2,2020-01-22,Japan,2.0,0.0,2009-05-23,321.0,0.0,2003-04-15,1.0,0.0
3,2020-01-22,Japan,2.0,0.0,2009-05-23,321.0,0.0,2003-04-16,1.0,0.0
4,2020-01-22,Japan,2.0,0.0,2009-05-23,321.0,0.0,2003-04-17,2.0,0.0
