In [13]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

data_ = pd.read_csv('./data/Canada COVID Data/Compiled_COVID-19_Case_Details__Canada_.csv')

data_['date_reported'] = data_['date_reported'].apply(lambda x: x.split()[0]).astype(np.datetime64)

# drop rows relating to Repatriated people
data_ = data_.drop(data_[data_['province_abbr']=='RC'].index)

# drop the unnecessary columns
del data_['ObjectId']
del data_['row_id']
del data_['hr_uid']
del data_['province']
del data_['longitude']
del data_['latitude']
del data_['health_region']

In [14]:
data_ = data_[data_['province_abbr']=='BC']
del data_['province_abbr']

# BC doesn't report this information
del data_['exposure']
del data_['case_status']

# remove rows lacking information
data_ = data_[data_['gender'] != 'Not Reported']
data_ = data_[data_['age_group'] != 'Not Reported']

data = data_.copy()

data['n'] = 1
data = data.groupby(['date_reported', 'age_group', 'gender']).count().reset_index()

# convert age strings to numerical categories
conv = {'<20':1,'20-29':2,'30-39':3,'40-49':4,'50-59':5,'60-69':6,'70-79':7,'80+':8}
data['grouping'] = data['age_group'].map(conv)

In [15]:
data_m = data[data.gender=='Male']
data_f = data[data.gender=='Female']

fig = go.Figure()
fig.add_trace(
	go.Scatter(
		x=data_m['date_reported'], y=data_m['n'], mode='markers', name='Male', marker_size=data_m['grouping']))
fig.add_trace(
	go.Scatter(
		x=data_f['date_reported'], y=data_f['n'], mode='markers', name='Female', marker_size=data_f['grouping']))
fig.update_layout(title='Daily COVID-19 cases in BC', yaxis_title='No. of people', xaxis_title='Time')
fig.show()

In [25]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2, subplot_titles=('Male', 'Female'))

fig.add_trace(
	go.Scatter(
		x=data_m['date_reported'], y=data_m['n'], mode='markers', name='Male', marker_size=data_m['grouping']), row=1, col=1)
fig.add_trace(
	go.Scatter(
		x=data_f['date_reported'], y=data_f['n'], mode='markers', name='Female', marker_size=data_f['grouping']), row=1, col=2)
fig.update_layout(title='Daily COVID-19 cases in BC', yaxis_title='No. of people', showlegend=False)
fig.update_xaxes(title='Time')
fig.update_yaxes(range=[-10, max(data['n']*1.1)])
fig.show()
fig.write_image('./img/Pyplot_BC_covid_daily_cases_age_gender_split.png', width=875, height=435, scale=3)

In [5]:
del data_['gender']

data = data_.copy()

data['n'] = 1
data = data.groupby(['date_reported', 'age_group']).count().reset_index()

conv = {'<20':1,'20-29':2,'30-39':3,'40-49':4,'50-59':5,'60-69':6,'70-79':7,'80+':8}
data['grouping'] = data['age_group'].map(conv)

In [6]:
fig = go.Figure()
fig.add_trace(
	go.Scatter(
		x=data['date_reported'], y=data['n'], mode='markers', marker_size=data['grouping']*1.5))
fig.update_layout(title='Daily COVID-19 cases in BC', yaxis_title='No. of people', xaxis_title='Time')
fig.show()

fig.write_image('./img/Pyplot_BC_covid_daily_cases_age_size.png', width=875, height=435, scale=3)

In [7]:
fig = go.Figure()

for age in sorted(data['grouping'].unique()):
	dat = data[data['grouping'] == age]
	grp = [k for k,v in conv.items() if v == age][0]
	fig.add_trace(
		go.Scatter(
			x=dat['date_reported'], y=dat['n'], mode='markers', marker=dict(size=3), name=grp)
		)

fig.update_layout(title='Daily COVID-19 cases in BC', yaxis_title='No. of people', xaxis_title='Time', legend= {'title_text':'Age','itemsizing': 'constant'})
fig.show()

fig.write_image('./img/Pyplot_BC_covid_daily_cases_age_colour.png', width=875, height=435, scale=3)

In [8]:
data = data_.copy()

conv = {'<20':'<20','20-29':'20-39','30-39':'20-39','40-49':'40-59','50-59':'40-59','60-69':'60+','70-79':'60+','80+':'60+'}
data = data.replace(conv)

data['n'] = 1
data = data.groupby(['date_reported', 'age_group']).count().reset_index()

grp_names = {1:'<20', 2:'20-39', 3:'40-59', 4:'60+'}
conv = {'<20':1,'20-39':2,'40-59':3,'60+':4}
data['grouping'] = data['age_group'].map(conv)

colour = {1:'#000000', 2:'#EF553B', 3:'#AB63FA', 4:'#109618'}
data['colour'] = data['grouping'].map(colour)

In [11]:
fig = go.Figure()
for age in sorted(data['grouping'].unique()):
	dat = data[data['grouping'] == age]
	fig.add_trace(
		go.Scatter(
			x=dat['date_reported'], y=dat['n'], mode='markers', marker=dict(size=3, color=dat['colour']), name=grp_names[age])
		)

fig.add_annotation(x=np.datetime64('2020-03-30'), y=50, text='First wave', showarrow=True, arrowhead=1)
fig.add_annotation(x=np.datetime64('2020-07-06'), y=20, text='Start of second wave', showarrow=True, arrowhead=1)  
fig.add_annotation(x=np.datetime64('2020-10-12'), y=100, text='Thanksgiving', ax=-40)
fig.add_annotation(x=np.datetime64('2020-10-31'), y=200, text='Halloween', ax=-40)    
fig.add_annotation(x=np.datetime64('2020-11-07'), y=300, text='Lockdown initiated', ax=-40)  
fig.add_annotation(x=np.datetime64('2020-11-24'), y=420, text='Fines for not wearing masks', ax=-40)    

lines = [{'x': [np.datetime64('2020-10-12'), np.datetime64('2020-10-12')],
        'y': [0,100],
        'mode': 'lines',
        'line': {'color': 'black', 'width': 1},
        'showlegend': False,
        'xaxis': 'x',
        'yaxis': 'y'},
        {'x': [np.datetime64('2020-10-31'), np.datetime64('2020-10-31')],
        'y': [0,200],
        'mode': 'lines',
        'line': {'color': 'black', 'width': 1},
        'showlegend': False,
        'xaxis': 'x',
        'yaxis': 'y'},
         {'x': [np.datetime64('2020-11-07'), np.datetime64('2020-11-07')],
        'y': [0,300],
        'mode': 'lines',
        'line': {'color': 'black', 'width': 1},
        'showlegend': False,
        'xaxis': 'x',
        'yaxis': 'y'}, 
        {'x': [np.datetime64('2020-11-24'), np.datetime64('2020-11-24')],
        'y': [0,420],
        'mode': 'lines',
        'line': {'color': 'black', 'width': 1},
        'showlegend': False,
        'xaxis': 'x',
        'yaxis': 'y'}
        ]

for line in lines:
    fig.add_trace(line)

fig.update_layout(title='Daily COVID-19 cases in BC', yaxis_title='No. of people', xaxis_title='Time', legend= {'title_text':'Age','itemsizing': 'constant'})
fig.show()

fig.write_image('./img/Pyplot_BC_covid_daily_cases_age_colour_grouped_annotated.png', width=875, height=435, scale=3)

In [10]:
fig = make_subplots(rows=2, cols=2, subplot_titles=('Children (<20)', 'Millennials (20-39)', 'Gen X (40-59)', 'Boomers (60+)'))

row, col = 1, 1
for age in sorted(data['grouping'].unique()):
    dat = data[data['grouping'] == age]
    fig.add_trace(
        go.Scatter(
            x=dat['date_reported'], y=dat['n'], mode='markers', marker=dict(size=2, color=dat['colour']), name=grp_names[age]),
        row=row, col=col
        )
    col += 1
    if col>2:
        col = 1
        row += 1
fig.update_layout(title='Daily COVID-19 cases in BC',showlegend=False)
fig.update_xaxes(nticks=6, range=[np.datetime64('2020-01-15'),np.datetime64('2020-12-15')])
fig.update_xaxes(title='Time', row=2)
fig.update_yaxes(title='No. of people', col=1)
fig.update_yaxes(range=[-10,max(data['n'])*1.1])
fig.show()
fig.write_image('./img/Pyplot_BC_covid_daily_cases_age_colour_split.png', width=875, height=435, scale=3)

In [11]:
import gif

fig = go.Figure()

traces = []

for age in sorted(data['grouping'].unique()):
    dat = data[data['grouping'] == age]
    traces.append(go.Scatter(
            x=dat['date_reported'], y=dat['n'], mode='markers', marker=dict(size=3, color=dat['colour']), name=grp_names[age])
        )

@gif.frame
def plot(trace, grp_name):
	fig = go.Figure()
	fig.add_trace(trace)
	fig.update_layout(title='Daily COVID-19 cases in BC',
				   xaxis_title='Time'
				   )
	fig.update_xaxes(title='Time', nticks=6, range=[np.datetime64('2020-01-15'),np.datetime64('2020-12-15')])
	fig.update_yaxes(title='No. of people', range=[-10,max(max(data['n']),max(data['n']))*1.1])
	fig.add_annotation(text=f'{grp_name}',
                       xref="paper", yref="paper",
                       x=0.5, y=1.1, showarrow=False)
	fig.update_layout(width=875, height=435)

	return fig

f = []
grps = ['Children (<20)', 'Millennials (20-39)', 'Gen X (40-59)', 'Boomers (60+)']
for trace, grp in zip(traces, grps):
	f.append(plot(trace, grp))

gif.save(f, './img/Pyplot_BC_covid_daily_cases_age_colour_split.gif', duration=1500)

In [12]:
dat = data.groupby('date_reported').sum()['n']
data['n_tot'] = 1
data['n_corrected'] = 1
split = {1:20.45, 2:25.77, 3:28.54, 4:25.24}
for i in range(len(data)):
    data.loc[i,'n_tot'] = dat[data.iloc[i]['date_reported']]
    data.loc[i,'n_corrected'] = split[data.iloc[i]['grouping']]/100 * data.iloc[i]['n_tot']

In [13]:
fig = go.Figure()
for age in sorted(data['grouping'].unique()):
	dat = data[data['grouping'] == age]
	fig.add_trace(
		go.Scatter(
			x=dat['date_reported'], y=dat['n_corrected'], mode='markers', marker=dict(size=3, color=dat['colour']), name=grp_names[age])
		)
fig.update_layout(title='Expected COVID-19 cases in BC (age indiscriminate)', yaxis_title='No. of people', xaxis_title='Time', legend= {'title_text':'Age','itemsizing': 'constant'})
fig.show()
fig.write_image('./img/Pyplot_BC_covid_daily_cases_age_colour_corrected.png', width=875, height=435, scale=3)

In [10]:
fig = px.scatter(data, x='date_reported', y='n', color='age_group', title='Daily COVID-19 cases in BC',
                category_orders={'age_group': ['<20','20-39','40-59','60+']},
                labels={'age_group': 'Age', 'n': 'No. of people', 'date_reported': 'Time'},
                color_discrete_map={'<20':'#000000', '20-39':'#EF553B', '40-59':'#AB63FA', '60+':'#109618'})
fig.update_traces(marker={'size':3})
fig.update_layout(legend={'itemsizing': 'constant'})

# fig.add_annotation(x=np.datetime64('2020-03-30'), y=50, text='First wave', showarrow=True, arrowhead=1)
# fig.add_annotation(x=np.datetime64('2020-07-06'), y=20, text='Start of second wave', showarrow=True, arrowhead=1)  
# fig.add_annotation(x=np.datetime64('2020-10-12'), y=100, text='Thanksgiving', ax=-40)
# fig.add_annotation(x=np.datetime64('2020-10-31'), y=200, text='Halloween', ax=-40)    
# fig.add_annotation(x=np.datetime64('2020-11-07'), y=300, text='Lockdown initiated', ax=-40)  
# fig.add_annotation(x=np.datetime64('2020-11-24'), y=420, text='Fines for not wearing masks', ax=-40)    

# lines = [{'x': [np.datetime64('2020-10-12'), np.datetime64('2020-10-12')],
#         'y': [0,100],
#         'mode': 'lines',
#         'line': {'color': 'black', 'width': 1},
#         'showlegend': False,
#         'xaxis': 'x',
#         'yaxis': 'y'},
#         {'x': [np.datetime64('2020-10-31'), np.datetime64('2020-10-31')],
#         'y': [0,200],
#         'mode': 'lines',
#         'line': {'color': 'black', 'width': 1},
#         'showlegend': False,
#         'xaxis': 'x',
#         'yaxis': 'y'},
#          {'x': [np.datetime64('2020-11-07'), np.datetime64('2020-11-07')],
#         'y': [0,300],
#         'mode': 'lines',
#         'line': {'color': 'black', 'width': 1},
#         'showlegend': False,
#         'xaxis': 'x',
#         'yaxis': 'y'}, 
#         {'x': [np.datetime64('2020-11-24'), np.datetime64('2020-11-24')],
#         'y': [0,420],
#         'mode': 'lines',
#         'line': {'color': 'black', 'width': 1},
#         'showlegend': False,
#         'xaxis': 'x',
#         'yaxis': 'y'}
#         ]

# for line in lines:
#     fig.add_trace(line)

fig.show()

fig.write_image('./img/Pyplot_BC_covid_daily_cases_age_colour_grouped.png', width=875, height=435, scale=3)

In [15]:
import gif

fig = go.Figure()

traces = []

for age in sorted(data['grouping'].unique()):
    dat = data[data['grouping'] == age]
    traces.append(go.Scatter(
            x=dat['date_reported'], y=dat['n'], mode='markers', marker=dict(size=3, color=dat['colour']), name='Actual')
        )

traces_ = []

for age in sorted(data['grouping'].unique()):
    dat = data[data['grouping'] == age]
    traces_.append(go.Scatter(
            x=dat['date_reported'], y=dat['n_corrected'], mode='markers', opacity=0.2, marker=dict(size=3, color='black'), name='Expected')
        )    
# print(data)

@gif.frame
def plot(trace, trace_, grp_name):
	fig = go.Figure()
	fig.add_trace(trace)
	fig.add_trace(trace_)
	fig.update_layout(title='Daily COVID-19 cases in BC',
				   xaxis_title='Time',
				   legend={'itemsizing': 'constant'}
				   )
	fig.update_xaxes(title='Time', nticks=6, range=[np.datetime64('2020-01-15'),np.datetime64('2020-12-15')])
	fig.update_yaxes(title='No. of people', range=[-10,max(max(data['n_corrected']),max(data['n']))*1.1])
	fig.add_annotation(text=f'{grp_name}',
                       xref="paper", yref="paper",
                       x=0.5, y=1.1, showarrow=False)
	fig.update_layout(width=875, height=435)
    
	return fig

f = []
grps = ['Children (<20)', 'Millennials (20-39)', 'Gen X (40-59)', 'Boomers (60+)']
for trace, trace_, grp in zip(traces, traces_, grps):
	f.append(plot(trace, trace_, grp))

gif.save(f, './img/Pyplot_BC_covid_daily_cases_age_colour_split_diff.gif', duration=1500)

In [70]:
data__ = data

Unnamed: 0,date_reported,age_group,n,grouping,colour,n_tot,n_corrected
0,2020-01-26,40-59,1,3,#AB63FA,1,0.2854
1,2020-02-02,40-59,1,3,#AB63FA,1,0.2854
2,2020-02-05,20-39,2,2,#EF553B,2,0.5154
3,2020-02-11,20-39,1,2,#EF553B,1,0.2577
4,2020-02-20,20-39,1,2,#EF553B,1,0.2577
...,...,...,...,...,...,...,...
1014,2020-12-03,60+,121,4,#109618,650,164.0600
1015,2020-12-03,<20,82,1,#000000,650,132.9250
1016,2020-12-04,20-39,3,2,#EF553B,10,2.5770
1017,2020-12-04,60+,5,4,#109618,10,2.5240


In [86]:
dat = pd.DataFrame(columns=['date_reported', 'age_group', 'n'])
l, m = [], []
k = ['<20','20-39','40-59','60+']
for i in data.date_reported.unique():
    for j in k:
        l.append(i)
        m.append(j)
dat['date_reported'] = l
dat['age_group'] = m

for i in range(len(dat)):
    d = data.loc[data['date_reported']==dat.loc[i,'date_reported']]
    dat.loc[i,'n'] = int(d.loc[d['age_group']==dat.loc[i,'age_group']]['n'].sum())
dat

Unnamed: 0,date_reported,age_group,n
0,2020-01-26,<20,0
1,2020-01-26,20-39,0
2,2020-01-26,40-59,1
3,2020-01-26,60+,0
4,2020-02-02,<20,0
...,...,...,...
1135,2020-12-03,60+,121
1136,2020-12-04,<20,2
1137,2020-12-04,20-39,3
1138,2020-12-04,40-59,0


In [85]:
data=data__
data

Unnamed: 0,date_reported,age_group,n,grouping,colour,n_tot,n_corrected
0,2020-01-26,40-59,1,3,#AB63FA,1,0.2854
1,2020-02-02,40-59,1,3,#AB63FA,1,0.2854
2,2020-02-05,20-39,2,2,#EF553B,2,0.5154
3,2020-02-11,20-39,1,2,#EF553B,1,0.2577
4,2020-02-20,20-39,1,2,#EF553B,1,0.2577
...,...,...,...,...,...,...,...
1014,2020-12-03,60+,121,4,#109618,650,164.0600
1015,2020-12-03,<20,82,1,#000000,650,132.9250
1016,2020-12-04,20-39,3,2,#EF553B,10,2.5770
1017,2020-12-04,60+,5,4,#109618,10,2.5240


In [87]:
data = dat

conv = {'<20':1,'20-39':2,'40-59':3,'60+':4}
data['grouping'] = data['age_group'].map(conv)

dat = data[['date_reported', 'n']].groupby('date_reported').sum()['n']
data['n_tot'] = 1
data['n_corrected'] = 1
split = {1:20.45, 2:25.77, 3:28.54, 4:25.24}
for i in range(len(data)):
    data.loc[i,'n_tot'] = dat[data.iloc[i]['date_reported']]
    data.loc[i,'n_corrected'] = split[data.iloc[i]['grouping']]/100 * data.iloc[i]['n_tot']
    
colour = {1:'#000000', 2:'#EF553B', 3:'#AB63FA', 4:'#109618'}
data['colour'] = data['grouping'].map(colour)
data

Unnamed: 0,date_reported,age_group,n,grouping,n_tot,n_corrected,colour
0,2020-01-26,<20,0,1,1,0.2045,#000000
1,2020-01-26,20-39,0,2,1,0.2577,#EF553B
2,2020-01-26,40-59,1,3,1,0.2854,#AB63FA
3,2020-01-26,60+,0,4,1,0.2524,#109618
4,2020-02-02,<20,0,1,1,0.2045,#000000
...,...,...,...,...,...,...,...
1135,2020-12-03,60+,121,4,650,164.0600,#109618
1136,2020-12-04,<20,2,1,10,2.0450,#000000
1137,2020-12-04,20-39,3,2,10,2.5770,#EF553B
1138,2020-12-04,40-59,0,3,10,2.8540,#AB63FA


In [88]:
import gif

fig = go.Figure()

traces_ = []

for age in sorted(data['grouping'].unique()):
    dat = data[data['grouping'] == age]
    traces_.append(go.Scatter(
            x=dat['date_reported'], y=dat['n_corrected'], mode='markers', opacity=0.2, marker=dict(size=3, color='black'), name='Expected')
        )   

data = data__
traces = []

for age in sorted(data['grouping'].unique()):
    dat = data[data['grouping'] == age]
    traces.append(go.Scatter(
            x=dat['date_reported'], y=dat['n'], mode='markers', marker=dict(size=3, color=dat['colour']), name='Actual')
        )
 
# print(data)

@gif.frame
def plot(trace, trace_, grp_name):
	fig = go.Figure()
	fig.add_trace(trace)
	fig.add_trace(trace_)
	fig.update_layout(title='Daily COVID-19 cases in BC',
				   xaxis_title='Time',
				   legend={'itemsizing': 'constant'}
				   )
	fig.update_xaxes(title='Time', nticks=6, range=[np.datetime64('2020-01-15'),np.datetime64('2020-12-15')])
	fig.update_yaxes(title='No. of people', range=[-10,max(max(data['n_corrected']),max(data['n']))*1.1])
	fig.add_annotation(text=f'{grp_name}',
                       xref="paper", yref="paper",
                       x=0.5, y=1.1, showarrow=False)
	fig.update_layout(width=875, height=435)
    
	return fig

f = []
grps = ['Children (<20)', 'Millennials (20-39)', 'Gen X (40-59)', 'Boomers (60+)']
for trace, trace_, grp in zip(traces, traces_, grps):
	f.append(plot(trace, trace_, grp))

gif.save(f, './img/Pyplot_BC_covid_daily_cases_age_colour_split_diff.gif', duration=1500)

In [80]:
data.tail(10)

Unnamed: 0,date_reported,age_group,n,grouping,n_tot,n_corrected
1130,2020-12-02,40-59,169,3,698,199.2092
1131,2020-12-02,60+,150,4,698,176.1752
1132,2020-12-03,<20,82,1,650,132.925
1133,2020-12-03,20-39,234,2,650,167.505
1134,2020-12-03,40-59,213,3,650,185.51
1135,2020-12-03,60+,121,4,650,164.06
1136,2020-12-04,<20,2,1,10,2.045
1137,2020-12-04,20-39,3,2,10,2.577
1138,2020-12-04,40-59,0,3,10,2.854
1139,2020-12-04,60+,5,4,10,2.524
