In [1]:
import panel as pn
pn.extension('plotly')
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
import hvplot.pandas
from hvplot import hvPlot
import holoviews as hv
import matplotlib.pyplot as plt
hv.extension('bokeh')
import os
import numpy as np
import datetime as dt
from pathlib import Path
from dotenv import load_dotenv
from panel.interact import interact, fixed
from panel import widgets
from sklearn import preprocessing
from trend_search import google_lookup

KeyboardInterrupt: 

In [None]:
load_dotenv()
map_box_api = os.getenv("mapbox")
px.set_mapbox_access_token(map_box_api)

In [None]:
business_activity_path = Path("business_out.csv")
labour_activity_path = Path("labour_out.csv")
df_business = pd.read_csv(business_activity_path, infer_datetime_format= True, parse_dates = True)
df_business.drop("Business dynamics measure", inplace = True, axis = 1)
df_labour = pd.read_csv(labour_activity_path, infer_datetime_format= True, parse_dates = True)
df_labour.drop("month_year.1", inplace = True, axis = 1)
df_business['month_year'] = pd.to_datetime(df_business['month_year'])
df_business = df_business.set_index("month_year")
df_business_pivot = df_business.pivot_table(index= ["month_year"], columns = ["GEO","Industry"])
scaler = preprocessing.MinMaxScaler()
normalized_business_df = pd.DataFrame(scaler.fit_transform(df_business_pivot.values), columns=df_business_pivot.columns, index=df_business_pivot.index)
melted_business = normalized_business_df.stack(["GEO","Industry"])
df_labour['month_year'] = pd.to_datetime(df_labour['month_year'])
df_labour = df_labour.set_index("month_year")
df_labour_pivot = df_labour.pivot_table(index= ["month_year"], columns = ["GEO","Industry"])
scaler2 = preprocessing.MinMaxScaler()
normalized_labour_df = pd.DataFrame(scaler2.fit_transform(df_labour_pivot.values), columns=df_labour_pivot.columns, index=df_labour_pivot.index)
melted_labour = normalized_labour_df.stack(["GEO","Industry"])
industry_list = set(df_business["Industry"])
industry_list2 = set(df_labour["Industry"])
industry_list = list(industry_list.intersection(industry_list2))
business_for_charting = melted_business[melted_business.index.get_level_values("Industry").isin(industry_list)]
labour_for_charting = melted_labour[melted_labour.index.get_level_values("Industry").isin(industry_list)]
business_chart = business_for_charting.hvplot.line(x="month_year", y = "VALUE", ylabel="Percentage", rot = 90, groupby=["Industry","GEO"], label = "Businesses")
labour_chart =labour_for_charting.hvplot.line(x="month_year", y = "VALUE", ylabel="Percentage", rot = 90, groupby=["Industry","GEO"], label = "Labour")

In [None]:
business_all = melted_business.hvplot.line(x="month_year", y = "VALUE",xlabel = "month_year", ylabel="VALUE", rot = 90, height = 550, width = 1000, groupby=["Industry","GEO"],subplots=True).overlay("Industry").opts(legend_position='right')

In [None]:
term1 = "Restaurant"
new1 = google_lookup(term1)
term2 = "Tourism"
new2 = google_lookup(term2)
google_searches = new1.hvplot.line(title = "Google searches for " + term1, y="Ontario") + new2.hvplot.line(title = "Google searches for " + term2, y="Ontario")

In [None]:
combined_business_labour = (business_chart * labour_chart).opts(legend_position='right')

In [None]:
#Showing the cleaned data for the labour market
file_path = Path("Resources/final_data.csv")
labour_df = pd.read_csv(file_path, index_col='REF_DATE')
labour_df.drop(columns=['DGUID', 'Labour force characteristics', 'Sex', 'UOM', 'UOM_ID', 'SCALAR_FACTOR', 'SCALAR_ID', 'VECTOR', 'COORDINATE', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS'], axis=1, inplace=True)
labour_df
labour_df.index = pd.to_datetime(labour_df.index, format = "%b-%y")

In [None]:
#Labour Market
#Comparing the Hvplots for three years
labour_2019_plot = labour_df.loc["2019"].hvplot.line(xlabel='REF_DATE', ylabel='VALUE', groupby=['North American Industry Classification System (NAICS)','GEO', 'Age group'], rot=90)
labour_2020_plot = labour_df.loc["2020"].hvplot.line(xlabel='REF_DATE', ylabel='VALUE', groupby=['North American Industry Classification System (NAICS)','GEO', 'Age group'], rot=90)
labour_2021_plot = labour_df.loc["2021"].hvplot.line(xlabel='REF_DATE', ylabel='VALUE', groupby=['North American Industry Classification System (NAICS)','GEO', 'Age group'], rot=90)

In [None]:
#Final Comparison for all the Data from 2019-2021 using Hvplot
combined_labour_data = (labour_2019_plot + labour_2020_plot + labour_2021_plot).opts(shared_axes = False)

In [None]:
# import panel as pn
# pn.extension('plotly')
# import pandas as pd
# import plotly.express as px
# import hvplot.pandas
# import matplotlib.pyplot as plt
# import os
# from pathlib import Path
# from dotenv import load_dotenv
# #load_dotenv()
# #map_box_api = os.getenv("mapbox")
# from hvplot import hvPlot
# import holoviews as hv
# hv.extension('bokeh')

# Read the 2020 & 2021 community mobility data & hospitalisation data for the 5 provinces into a Pandas DataFrame
file_path_2020 = Path("2020.csv")
file_path_2021 = Path("2021.csv")
file_path_3 = Path("COVID-19_by_Province.csv")
Mobility_2020_data = pd.read_csv(file_path_2020)
Mobility_2021_data = pd.read_csv(file_path_2021)
Hospitalisation_data = pd.read_csv(file_path_3)
Mobility_data_actual = pd.concat([Mobility_2020_data,Mobility_2021_data])

# Drop columns and save the output in a csv which will be the final database to be used for analysis
Mobility_data_edit_1 = Mobility_data_actual.drop(columns = ['country_region_code','country_region','sub_region_2','metro_area','iso_3166_2_code','census_fips_code','place_id'])
Mobility_data_edit_2 = Mobility_data_edit_1.dropna()
Mobility_data_edit_3 = Mobility_data_edit_2.set_index('date')
Mobility_data_edit_4 = Mobility_data_edit_2.rename(columns = {'sub_region_1':'Province', 
                                                              'retail_and_recreation_percent_change_from_baseline':'Retail & Recreation',
                                                             'grocery_and_pharmacy_percent_change_from_baseline':'Grocery & Pharmacy',
                                                              'parks_percent_change_from_baseline':'Parks',
                                                              'transit_stations_percent_change_from_baseline':'Transit Stations',
                                                              'workplaces_percent_change_from_baseline':'Workplaces',
                                                              'residential_percent_change_from_baseline':'Residential'
                                                             })
Mobility_data_edit_5 = Mobility_data_edit_4.groupby(["date","Province"]).mean()                         
Mobility_data_edit_5.to_csv("Mobility_Data_Canada_2020_21.csv")
Mobility_data_edit_6 = Mobility_data_edit_5.reset_index()

Hospitalisation_data_1 = Hospitalisation_data.rename(columns = {'GEO':'Province'})
Hospitalisation_data_2 = Hospitalisation_data_1.set_index('Date')


#Combine datapoints of hospitalisation and mobility data
Mob_hosp_data = pd.merge(Mobility_data_edit_6, Hospitalisation_data_2,how = 'left', 
                         left_on=['date','Province'],
                         right_on=['Date','Province']
                        )
Mob_hosp_data.dropna()
Mob_hosp_data['date']=pd.to_datetime(Mob_hosp_data['date'])
Mob_hosp_data = Mob_hosp_data.set_index('date')

# Visualisation of data province-wise for all the places visited


a = Mob_hosp_data.hvplot.line(x="date",y="Retail & Recreation",groupby="Province",rot = 90).opts(shared_axes = False)

b = Mob_hosp_data.hvplot.line(x="date",y="Grocery & Pharmacy",groupby="Province",rot = 90).opts(shared_axes = False)

c = Mob_hosp_data.hvplot.line(x="date",y="Parks",groupby="Province",rot = 90).opts(shared_axes = False)

d = Mob_hosp_data.hvplot.line(x="date",y="Transit Stations",groupby="Province",rot = 90).opts(shared_axes = False)

e = Mob_hosp_data.hvplot.line(x="date",y="Workplaces",groupby="Province",rot = 90).opts(shared_axes = False)

f = Mob_hosp_data.hvplot.line(x="date",y="Residential",groupby="Province",rot = 90).opts(shared_axes = False)

hospital_column = pn.Column(a,b,c,d,e,f)

#Hospitalisation data grouped across provinces and mean considered for plotting
Mob_hosp_data_Canada = Mob_hosp_data.groupby("date").mean()
Mob_hosp_data_Canada_1 = Mob_hosp_data_Canada.dropna()

#Hospitalisation data based on mobility to places mentioned
plot = hvPlot(Mob_hosp_data_Canada_1)
mobility_plot = plot(y=['Retail & Recreation', 
        'Grocery & Pharmacy', 
        'Parks', 
        'Transit Stations', 
        'Workplaces', 
        'Residential',
       'Hospitalizations'], width = 900, legend = 'top' ).opts(shared_axes = False)

In [None]:
#Assign paths
canada_hospitalizations_path = Path('Resources/COVID-19_Hospitalizations/Canadian_COVID-19_Hospitalizations_combined.csv')
canada_cpi_path = Path('Resources/Consumer_Price_Index/CPI_Cleaned.csv')
canada_coordinates_path = Path('Resources/Canada_coordinates.csv')

#Read CSV files
canada_hospitalizations_df = pd.read_csv(canada_hospitalizations_path, index_col='Date', infer_datetime_format=True, parse_dates=True)
canada_cpi_df = pd.read_csv(canada_cpi_path, index_col='Date', infer_datetime_format=True, parse_dates=True)
canada_coordinates_df = pd.read_csv(canada_coordinates_path)

#Define function for Canada CPI
def canada_cpi_plot():
    canada_cpi_plot = canada_cpi_df.hvplot(
    x='Date', 
    y='VALUE', 
    groupby=['GEO', 'Products and product groups'],
    title = 'Canada CPI 2019-2021'
    )
    return canada_cpi_plot

#Define function for Canada CPI percent change
def canada_cpi_pct_plot():
    canada_cpi_pct_change = canada_cpi_df['VALUE'].pct_change()
    canada_cpi_pct_change = canada_cpi_df.assign(Percent_Change= canada_cpi_pct_change)
    canada_cpi_pct_plot = canada_cpi_pct_change.hvplot(
    x='Date', 
    y='Percent_Change', 
    groupby=['GEO', 'Products and product groups'],
    title = 'Canada CPI Rate of Change 2019-2021'
    )
    return canada_cpi_pct_plot

#Define function for overlaid hvplot of Canada hospitalizations
def canada_hospitalizations_plot():
    canada_hospitalizations_df.set_index('GEO')
    canada_hospitalizations_plot = canada_hospitalizations_df.hvplot(
        x='Date', 
        groupby=['GEO'],
        title = 'Total Canada COVID-19 Hospitalizations and Per Selected Provinces',
        width=2000,
        height=500
        ).overlay('GEO')
    return canada_hospitalizations_plot

def mapbox_canada_summary(canada_coordinates_df):
    #Summary statistics for Canada and each province
    canada_summary = canada_hospitalizations_df.loc[canada_hospitalizations_df['GEO'] == 'Canada'].describe()
    alberta_summary = canada_hospitalizations_df.loc[canada_hospitalizations_df['GEO'] == 'Alberta'].describe()
    ontario_summary = canada_hospitalizations_df.loc[canada_hospitalizations_df['GEO'] == 'Ontario'].describe()
    saskatchewan_summary = canada_hospitalizations_df.loc[canada_hospitalizations_df['GEO'] == 'Saskatchewan'].describe()
    manitoba_summary = canada_hospitalizations_df.loc[canada_hospitalizations_df['GEO'] == 'Manitoba'].describe()
    quebec_summary = canada_hospitalizations_df.loc[canada_hospitalizations_df['GEO'] == 'Quebec'].describe()
    #Reassign column name to respective region
    canada_summary = canada_summary.rename(columns = {'Hospitalizations':'Canada'})
    alberta_summary = alberta_summary.rename(columns = {'Hospitalizations':'Alberta'})
    ontario_summary = ontario_summary.rename(columns = {'Hospitalizations':'Ontario'})
    saskatchewan_summary = saskatchewan_summary.rename(columns = {'Hospitalizations':'Saskatchewan'})
    manitoba_summary = manitoba_summary.rename(columns = {'Hospitalizations':'Manitoba'})
    quebec_summary = quebec_summary.rename(columns = {'Hospitalizations':'Quebec'})
    #Concatatenate Summary dataframes
    canada_summary_combined = pd.concat([canada_summary, alberta_summary, ontario_summary, manitoba_summary, saskatchewan_summary, quebec_summary], axis=1, join='inner')
    #Transpose combined dataframe for transpose
    canada_summary_combined = canada_summary_combined.transpose()
    #Assign index name to GEO
    canada_summary_combined.index.names = ['GEO']
    #Set index to GEO
    canada_coordinates_df = canada_coordinates_df.set_index('GEO')
    #Concatenate combined summary dataframe with Coordinates dataframe
    canada_summary_combined_coordinates = pd.concat([canada_summary_combined, canada_coordinates_df], axis=1, join='inner')
    #Reset index for concatenated summary coordinates dataframe
    canada_summary_combined_coordinates.reset_index(inplace=True)
    #Plot summary mapbox plot
    mapbox_canada_summary = px.scatter_mapbox(
        canada_summary_combined_coordinates, 
        lat='lat', 
        lon='lon', 
        color='GEO', 
        size='max', 
        title='Canada & Select Provinces COVID-19 Hospitalizations Summary Statistics',
        hover_data=['GEO', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'],
        zoom=4,
        height=700,
        width=2000,
        size_max=100
        )
    return mapbox_canada_summary

#Define function for mapbox visualiation of hospitalizations
def mapbox_hospitalizations():
    #Reset index for merge
    canada_hospitalizations_df.reset_index(inplace=True)
    #Merge Canada hospitalizations datadrame with Canada coordinates dataframe
    canada_hospitalizations_coordinates = canada_hospitalizations_df.merge(canada_coordinates_df, how='outer', left_on = 'GEO', right_on = 'GEO')
    #Cast Date column to string for mapbox animation plot
    canada_hospitalizations_coordinates['Date'] = canada_hospitalizations_coordinates['Date'].apply(str)
    #Reset index for mapbox animation plot
    canada_hospitalizations_df.reset_index(inplace=True)
    mapbox_hospitalizations = px.scatter_mapbox(
        canada_hospitalizations_coordinates, 
        lat='lat', 
        lon='lon', 
        color='Hospitalizations', 
        size='Hospitalizations', 
        title='Canada and Select Provinces COVID-19 Hospitalizations - March 9th, 2020 - January 5, 2022',
        color_continuous_scale=px.colors.sequential.Jet,
        hover_data=['GEO'],
        zoom=4,
        height=800,
        width=2000,
        size_max=100,
        animation_frame='Date',
        animation_group = 'GEO'
        )
    return mapbox_hospitalizations

In [None]:
mapbox_canada_summary(canada_coordinates_df)

In [None]:
business_column = pn.Column(business_all, google_searches,combined_business_labour)
hospitalizations_column = pn.Column(pn.Row(canada_hospitalizations_plot()),pn.Row(mapbox_canada_summary(canada_coordinates_df)),pn.Row(mapbox_hospitalizations()))
cpi_column = pn.Column(canada_cpi_plot(),canada_cpi_pct_plot())

dashboard = pn.Tabs(
    ("Business", business_column), ("Labour", combined_labour_data), ("Hospitalization", hospitalizations_column),("Mobility", pn.Column(mobility_plot, hospital_column)), ("CPI", cpi_column))  
dashboard.servable()