In [1]:
import bokeh
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import matplotlib.gridspec as gridspec
from datetime import datetime
import seaborn as sns
from bokeh.plotting import figure, show, output_notebook, output_file, save
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.layouts import gridplot,layout
from bokeh.models import FactorRange
import pandas as pd 
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral6
from bokeh.models import Range1d

output_notebook()

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_rows = None
pd.options.display.max_columns = None

In [2]:
import bokeh
bokeh.__version__

'2.0.1'

In [3]:
description = pd.read_csv('Covid19Canada-master/codebook.csv')

n_cases = pd.read_csv('Covid19Canada-master/cases.csv')
n_death = pd.read_csv('Covid19Canada-master/mortality.csv')

n_recovered = pd.read_csv('Covid19Canada-master/recovered_cumulative.csv')
n_testing = pd.read_csv('Covid19Canada-master/testing_cumulative.csv')

In [4]:
display(description)

Unnamed: 0,Variable,Description,Label
0,case_id,National Case ID Number,
1,provincial_case_id,Provincial Case ID Number,
2,age,"Age, if specific age not given then range prov...",
3,sex,Sex,
4,health_region,"Health region, if reported",
5,province,Province,
6,country,Country,
7,date_report,Reported date (i.e. public announcement date) ...,
8,report_week,Week of Report (Sundays are 1st day of week),
9,travel_yn,Travel history (yes/no),"0=no, 1=yes, Not Reported"


In [5]:
display(n_cases.head(1))
print(n_cases.shape)
display(n_death.head(1))
print(n_death.shape)
display(n_recovered.head(1))
print(n_recovered.shape)
display(n_testing.head(1))
print(n_testing.shape)

Unnamed: 0,case_id,provincial_case_id,age,sex,health_region,province,country,date_report,report_week,travel_yn,travel_history_country,locally_acquired,case_source,additional_info,additional_source
0,1,1,50-59,Male,Toronto,Ontario,Canada,25-01-2020,19-01-2020,1,China,,(1) https://news.ontario.ca/mohltc/en/2020/01/...,,


(6320, 15)


Unnamed: 0,death_id,province_death_id,case_id,age,sex,health_region,province,country,date_death_report,death_source,additional_info,additional_source
0,1,1,60.0,80-89,Male,Vancouver Coastal,BC,Canada,08-03-2020,https://news.gov.bc.ca/releases/2020HLTH0068-0...,Lynn Valley Resident,


(66, 12)


Unnamed: 0,date_recovered,province,cumulative_recovered
0,29-03-2020,Alberta,73.0


(611, 3)


Unnamed: 0,date_testing,province,cumulative_testing
0,29-03-2020,Alberta,44999


(195, 3)


# Number of Cases in Canada

In [6]:
display(n_cases.head(1))

Unnamed: 0,case_id,provincial_case_id,age,sex,health_region,province,country,date_report,report_week,travel_yn,travel_history_country,locally_acquired,case_source,additional_info,additional_source
0,1,1,50-59,Male,Toronto,Ontario,Canada,25-01-2020,19-01-2020,1,China,,(1) https://news.ontario.ca/mohltc/en/2020/01/...,,


In [7]:
n_cases = n_cases.iloc[:,:11]
n_cases['date_report'] = pd.to_datetime(n_cases['date_report'], dayfirst = True)
n_cases['report_week'] = pd.to_datetime(n_cases['report_week'], dayfirst = True)
n_cases.head(1)

Unnamed: 0,case_id,provincial_case_id,age,sex,health_region,province,country,date_report,report_week,travel_yn,travel_history_country
0,1,1,50-59,Male,Toronto,Ontario,Canada,2020-01-25,2020-01-19,1,China


In [8]:
n_cases['people'] = 1

In [9]:
all_gender = n_cases.groupby(['sex'])['people'].sum().sort_values(ascending = False).reset_index();
all_gender_factors = pd.unique(all_gender['sex'])
all_gender_data = ColumnDataSource(all_gender)

all_age = n_cases.groupby(['age'])['people'].sum().sort_values(ascending = False).reset_index();
all_age_factors = pd.unique(all_age['age'])
all_age_data = ColumnDataSource(all_age)

sex_case = n_cases.loc[n_cases['sex'] != 'Not Reported']
sex_case = sex_case.groupby(['sex'])['people'].sum().sort_values(ascending = False).reset_index();
sex_case_factors = pd.unique(sex_case['sex'])
sex_case_data = ColumnDataSource(sex_case)

age_case = n_cases.loc[n_cases['age'] != 'Not Reported']
age_case = age_case.groupby(['age'])['people'].sum().sort_values(ascending = False).reset_index();
age_case_factors = pd.unique(age_case['age'])
age_case_data = ColumnDataSource(age_case)

sex_age = n_cases.loc[(n_cases['age'] != 'Not Reported') & (n_cases['sex'] != 'Not Reported')]
sex_age = sex_age.groupby(['age', 'sex'])['people'].sum().reset_index()
sex_age['label'] = tuple(zip(sex_age['age'], sex_age['sex']))
sex_age_data = ColumnDataSource(sex_age)

In [10]:
plot1 = figure(plot_width = 495, plot_height = 300, x_range = FactorRange(factors = all_gender_factors))
plot2 = figure(plot_width = 495, plot_height = 300, x_range = FactorRange(factors = all_age_factors))
plot3 = figure(plot_width = 495, plot_height = 300, x_range = FactorRange(factors = sex_case_factors))
plot4 = figure(plot_width = 495, plot_height = 300, x_range = FactorRange(factors = age_case_factors))
plot5 = figure(plot_width = 990, plot_height = 300, x_range = FactorRange(factors = sex_age['label']))


plot1.vbar(x = 'sex' , top = 'people' , width = 0.8, source = all_gender_data)
plot2.vbar(x = 'age' , top = 'people' , width = 0.8, source = all_age_data)
plot3.vbar(x = 'sex' , top = 'people' , width = 0.8, source = sex_case_data)
plot4.vbar(x = 'age' , top = 'people' , width = 0.8, source = age_case_data)
plot5.vbar(x = 'label' , top = 'people' , width = 0.8, source = sex_age_data,\
          fill_color=factor_cmap('label', factors= sex_age['sex'] , start=1, end=2, palette = Spectral6))

plot1.add_tools(HoverTool(tooltips = [('Sales', '@people')]))
plot2.add_tools(HoverTool(tooltips = [('Sales', '@people')]))
plot3.add_tools(HoverTool(tooltips = [('Sales', '@people')]))
plot4.add_tools(HoverTool(tooltips = [('Sales', '@people')]))
plot5.add_tools(HoverTool(tooltips = [('Sales', '@people')]))

plot2.xaxis.major_label_orientation = 1
plot4.xaxis.major_label_orientation = 1
plot5.xaxis.major_label_orientation = 1

grid = layout([[plot1,plot2],[plot3, plot4],[plot5]]) 


show(grid)

In [11]:
data_report = pd.DataFrame(n_cases['date_report'].value_counts()).reset_index().sort_values(by = 'index')
data_report_data = ColumnDataSource(data_report)
report_week = pd.DataFrame(n_cases['report_week'].value_counts()).reset_index().sort_values(by = 'index')
report_week_data = ColumnDataSource(report_week)

In [12]:
data_report

Unnamed: 0,index,date_report
32,2020-01-25,1
39,2020-01-27,1
37,2020-01-28,1
34,2020-01-31,1
33,2020-02-04,1
28,2020-02-06,2
40,2020-02-14,1
35,2020-02-20,1
41,2020-02-23,1
38,2020-02-24,1


In [13]:
plot1 = figure(plot_width = 900, plot_height = 300,x_axis_type="datetime")
plot2 = figure(plot_width = 900, plot_height = 300,x_axis_type="datetime")
plot3 = figure(plot_width = 900, plot_height = 300,x_axis_type="datetime")
plot1.line(x = 'index' , y = 'date_report' , width = 0.8, source = data_report_data);
plot2.line(x = 'index' , y = 'report_week' , width = 0.8, source = report_week_data);
line1 = plot3.line(x = 'index' , y = 'date_report' , width = 0.8, source = data_report_data);
line2 = plot3.line(x = 'index' , y = 'report_week' , width = 0.8, source = report_week_data, color= 'orange');

plot1.add_tools(HoverTool(tooltips = [('Sales', '@date_report')]))
plot2.add_tools(HoverTool(tooltips = [('Sales', '@report_week')]))
plot3.add_tools(HoverTool(renderers=[line1], tooltips = [('Sales', '@date_report')]))
plot3.add_tools(HoverTool(renderers=[line2], tooltips = [('Sales', '@report_week')]))

grid = layout([[plot1],[plot2],[plot3]]) 

show(grid)

In [14]:
province = (pd.DataFrame(n_cases['province'].value_counts()).reset_index()
                 .sort_values(by = 'province',ascending = False))
province_factors = pd.unique(province['index'])
province_data = ColumnDataSource(province)

health_region = (pd.DataFrame(n_cases['health_region'].value_counts()).reset_index()
                 .sort_values(by = 'health_region',ascending = False))
health_region_factors = pd.unique(health_region['index'])
health_region_data = ColumnDataSource(health_region)

In [15]:
plot1 = figure(plot_width = 900, plot_height = 300,x_range = FactorRange(factors = province_factors))
plot2 = figure(plot_width = 990, plot_height = 300,x_range = FactorRange(factors = health_region_factors))




plot1.vbar(x = 'index' , top = 'province' , width = 0.8, source = province_data);
plot2.vbar(x = 'index' , top = 'health_region' , width = 0.8, source = health_region_data);
plot1.add_tools(HoverTool(tooltips = [('Sales', '@province')]))
plot2.add_tools(HoverTool(tooltips = [('Sales', '@health_region')]))






plot1.xaxis.major_label_orientation = 1
plot2.xaxis.major_label_orientation = 1
grid = layout([[plot1],[plot2]]) 

show(grid)

In [16]:
n_travel = n_cases.loc[n_cases['travel_yn'] != 'Not Reported']
n_travel = (pd.DataFrame(n_travel['travel_yn'].value_counts()).reset_index()
                 .sort_values(by = 'travel_yn',ascending = False))
n_travel_factors = pd.unique(n_travel['index'])
n_travel_data = ColumnDataSource(n_travel)

travel_history_country = (pd.DataFrame(n_cases['travel_history_country'].value_counts()).reset_index()
                 .sort_values(by = 'travel_history_country',ascending = False))
travel_history_country_factors = pd.unique(travel_history_country['index'])
travel_history_country_data = ColumnDataSource(travel_history_country)

In [17]:
plot1 = figure(plot_width = 900, plot_height = 300,x_range = FactorRange(factors = n_travel_factors))
plot2 = figure(plot_width = 950, plot_height = 300,x_range = FactorRange(factors = travel_history_country_factors))


plot1.vbar(x = 'index' , top = 'travel_yn' , width = 0.8, source = n_travel_data);
plot2.vbar(x = 'index' , top = 'travel_history_country' , width = 0.8, source = travel_history_country_data);
plot1.add_tools(HoverTool(tooltips = [('Sales', '@travel_yn')]))
plot2.add_tools(HoverTool(tooltips = [('Sales', '@travel_history_country')]))


plot2.xaxis.major_label_orientation = 1
grid = layout([[plot1],[plot2]]) 

show(grid)

# Number of Deaths in Canada

In [18]:
display(n_death.head(1))

Unnamed: 0,death_id,province_death_id,case_id,age,sex,health_region,province,country,date_death_report,death_source,additional_info,additional_source
0,1,1,60.0,80-89,Male,Vancouver Coastal,BC,Canada,08-03-2020,https://news.gov.bc.ca/releases/2020HLTH0068-0...,Lynn Valley Resident,


In [19]:
n_death

Unnamed: 0,death_id,province_death_id,case_id,age,sex,health_region,province,country,date_death_report,death_source,additional_info,additional_source
0,1,1,60.0,80-89,Male,Vancouver Coastal,BC,Canada,08-03-2020,https://news.gov.bc.ca/releases/2020HLTH0068-0...,Lynn Valley Resident,
1,2,1,477.0,70-79,Male,Simcoe Muskoka,Ontario,Canada,11-03-2020,https://www.nationalobserver.com/2020/03/17/ne...,Was being treated at Royal Victoria Regional H...,
2,3,2,,Not Reported,Not Reported,Vancouver Coastal,BC,Canada,16-03-2020,https://news.gov.bc.ca/releases/2020HLTH0086-0...,Lynn Valley Resident,
3,4,3,,Not Reported,Not Reported,Vancouver Coastal,BC,Canada,16-03-2020,https://news.gov.bc.ca/releases/2020HLTH0086-0...,Lynn Valley Resident,
4,5,4,,Not Reported,Not Reported,Vancouver Coastal,BC,Canada,16-03-2020,https://news.gov.bc.ca/releases/2020HLTH0086-0...,Lynn Valley Resident,
5,6,5,,Not Reported,Not Reported,Vancouver Coastal,BC,Canada,17-03-2020,https://vancouverisland.ctvnews.ca/b-c-declare...,Lynn Valley Resident,
6,7,6,,Not Reported,Not Reported,Vancouver Coastal,BC,Canada,17-03-2020,https://vancouverisland.ctvnews.ca/b-c-declare...,Lynn Valley Resident,
7,8,7,,80-89,Male,Fraser,BC,Canada,17-03-2020,https://vancouverisland.ctvnews.ca/b-c-declare...,The other death is a man in his 80s in the Fra...,
8,9,1,,80-89,Female,Lanaudière,Quebec,Canada,18-03-2020,https://montreal.ctvnews.ca/covid-19-quebec-ha...,Lived in senior's residence,https://globalnews.ca/news/6705211/granddaught...
9,10,2,806.0,50-59,Male,Halton,Ontario,Canada,19-03-2020,https://globalnews.ca/news/6701911/coronavirus...,,


# Number of Testing Case

In [20]:
n_testing['cumulative_testing'] = n_testing['cumulative_testing'].str.rstrip('*')
n_testing['cumulative_testing'] = n_testing['cumulative_testing'].fillna(0)

In [21]:
n_testing['date_testing'] = pd.to_datetime(n_testing['date_testing'], dayfirst = True)

In [22]:
n_testing = n_testing.sort_values(by = ['province','date_testing']);
n_testing['province'].unique()

array(['Alberta', 'BC', 'Manitoba', 'NL', 'NWT', 'New Brunswick',
       'Nova Scotia', 'Nunavut', 'Ontario', 'PEI', 'Quebec',
       'Saskatchewan', 'Yukon'], dtype=object)

In [23]:
test = {}
test_data = {}
for name in n_testing['province'].unique():
    test[name] = pd.DataFrame(n_testing.loc[n_testing['province'] == name])
    test_data[name] = ColumnDataSource(test[name])

In [24]:
plot3 = figure(plot_width = 900, plot_height = 900,x_axis_type="datetime",y_range=(0, 50000))

line1 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Alberta']);
line2 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['BC'], color= 'red');
line3 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Manitoba'], color= 'brown');
line4 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['NL'], color= 'green');
line5 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['NWT'], color= 'indigo');
line6 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['New Brunswick'], color= 'violet');
line7 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Nova Scotia'], color= 'orange');
line8 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Nunavut'], color= 'orange');
line9 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Ontario'], color= 'orange');
line10 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['PEI'], color= 'orange');
line11 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Quebec'], color= 'orange');
line12 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Saskatchewan'], color= 'orange');
line13 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Nova Scotia'], color= 'orange');
line14 = plot3.line(x = 'date_testing' , y = 'cumulative_testing' , width = 2, source = test_data['Yukon'], color= 'orange');

line = [line1,line2,line3,line4,line5,line6,line7,line8,line9,line10,line11,line12,line13,line14]

for i in line:
    plot3.add_tools(HoverTool(renderers=[i], tooltips = [('Sales', '@cumulative_testing')]))


#plot3.x_range = Range1d(min(n_testing['date_testing']),max(n_testing['date_testing']))

grid = layout([[plot3]]) 

show(grid)

# Number of Recoverd Case

In [27]:
display(n_recovered.head(1))

Unnamed: 0,date_recovered,province,cumulative_recovered
0,29-03-2020,Alberta,73.0


In [33]:
n_recovered['cumulative_recovered'] = n_recovered['cumulative_recovered'].fillna(0)
n_recovered['date_recovered'] = pd.to_datetime(n_recovered['date_recovered'], dayfirst = True)

In [34]:
n_recovered = n_recovered.sort_values(by = ['province','date_recovered']);
n_recovered['province'].unique()

array(['Alberta', 'BC', 'Manitoba', 'NL', 'NWT', 'New Brunswick',
       'Nova Scotia', 'Nunavut', 'Ontario', 'PEI', 'Quebec',
       'Saskatchewan', 'Yukon'], dtype=object)

In [42]:
recovered = {}
recovered_data = {}
for name in n_recovered['province'].unique():
    recovered[name] = pd.DataFrame(n_recovered.loc[n_recovered['province'] == name])
    recovered_data[name] = ColumnDataSource(recovered[name])

In [51]:
plot3 = figure(plot_width = 900, plot_height = 200,x_axis_type="datetime",y_range=(1, 400))

line1 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Alberta']);
line2 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['BC'], color= 'red');
line3 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Manitoba'], color= 'brown');
line4 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['NL'], color= 'green');
line5 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['NWT'], color= 'indigo');
line6 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['New Brunswick'], color= 'violet');
line7 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Nova Scotia'], color= 'orange');
line8 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Nunavut'], color= 'orange');
line9 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Ontario'], color= 'orange');
line10 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['PEI'], color= 'orange');
line11 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Quebec'], color= 'orange');
line12 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Saskatchewan'], color= 'orange');
line13 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Nova Scotia'], color= 'orange');
line14 = plot3.line(x = 'date_recovered' , y = 'cumulative_recovered' , width = 2, source = recovered_data['Yukon'], color= 'orange');

line = [line1,line2,line3,line4,line5,line6,line7,line8,line9,line10,line11,line12,line13,line14]

for i in line:
    plot3.add_tools(HoverTool(renderers=[i], tooltips = [('Sales', '@cumulative_recovered')]))


#plot3.x_range = Range1d(min(n_testing['date_testing']),max(n_testing['date_testing']))

grid = layout([[plot3]]) 

show(grid)