In [2]:
library(tidyverse)


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.0     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [66]:
# tidying the populations dataset
populations <- read.csv("../data/population_per_province_data.csv")
head(populations)

# turn the columns into rows
pops_pivoted <- populations |> pivot_longer(!Geography, names_to = "period", values_to = "count")
head(pops_pivoted)

# split the 'period' column into quarter and year
pops_split <- pops_pivoted %>% separate_wider_delim(period, ".", names = c("quarter", "year"))
head(pops_split)

# # only keep rows where quarter = Q4, because this is this population at the end of each year
pops_sum <- pops_split |>
            filter(quarter == "Q4")|>
            select(-quarter)
pops_sum
write.csv(pops_sum, "../data/tidied_population_estimates.csv",row.names = FALSE)

Unnamed: 0_level_0,Geography,Q1.2012,Q2.2012,Q3.2012,Q4.2012,Q1.2013,Q2.2013,Q3.2013,Q4.2013,Q1.2014,⋯,Q3.2019,Q4.2019,Q1.2020,Q2.2020,Q3.2020,Q4.2020,Q1.2021,Q2.2021,Q3.2021,Q4.2021
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,Canada,34516032,34592192,34713395,34834973,34881794,34956567,35080992,35209597,35247023,⋯,37618495,37828162,37928208,38006941,38028638,38027406,38058291,38140918,38239864,38451454
2,Newfoundland and Labrador,526115,525550,526235,527065,527129,527177,526960,527948,528065,⋯,527643,528442,528231,527733,526884,526046,525895,526195,527056,528335
3,Prince Edward Island,144208,144365,144415,144396,144041,143807,143942,143943,143828,⋯,155792,157025,157494,158401,159193,159179,159240,160334,162133,163775
4,Nova Scotia,943907,943317,943163,942829,942045,940647,939808,939442,939166,⋯,975799,982592,984130,986204,989168,989154,990025,993946,999908,1006805
5,New Brunswick,757039,757024,758121,758408,758026,757854,758261,758379,758713,⋯,777387,780907,781054,782512,783432,783814,784950,787002,790802,795755
6,Quebec,8028516,8038332,8059752,8076838,8081006,8089004,8108825,8122462,8123138,⋯,8483186,8521542,8537376,8550900,8551095,8551865,8550561,8556015,8572020,8603553


Geography,period,count
<chr>,<chr>,<int>
Canada,Q1.2012,34516032
Canada,Q2.2012,34592192
Canada,Q3.2012,34713395
Canada,Q4.2012,34834973
Canada,Q1.2013,34881794
Canada,Q2.2013,34956567


Geography,quarter,year,count
<chr>,<chr>,<chr>,<int>
Canada,Q1,2012,34516032
Canada,Q2,2012,34592192
Canada,Q3,2012,34713395
Canada,Q4,2012,34834973
Canada,Q1,2013,34881794
Canada,Q2,2013,34956567


Geography,year,count
<chr>,<chr>,<int>
Canada,2012,34834973
Canada,2013,35209597
Canada,2014,35555305
Canada,2015,35823591
Canada,2016,36257421
Canada,2017,36722075
Canada,2018,37259485
Canada,2019,37828162
Canada,2020,38027406
Canada,2021,38451454


In [67]:
# tidying the employment dataset
employment <- read.csv("../data/manufacturing_employment_data.csv")
# head(employment)

# # renaming columns 
old_names <- c("naics_industry", "Province", "X2012", "X2013", "X2014", 
               "X2015", "X2016", "X2017", "X2018", "X2019", "X2020", "X2021")
new_names <- c("naics_industry", "province", "2012", "2013", "2014", 
               "2015", "2016", "2017", "2018", "2019", "2020", "2021")
colnames(employment)[colnames(employment) %in% old_names] <- new_names
# head(employment)

emp_pivoted <- employment |> pivot_longer(!naics_industry:province, names_to = "year", values_to = "count")
# head(emp_pivoted)

write.csv(emp_pivoted, "../data/tidied_employment_estimates.csv",row.names = FALSE)

naics_industry,province,year,count
<chr>,<chr>,<chr>,<dbl>
Manufacturing,Canada,2012,1776.6
Manufacturing,Canada,2013,1732.6
Manufacturing,Canada,2014,1721.7
Manufacturing,Canada,2015,1734.4
Manufacturing,Canada,2016,1729.8
Manufacturing,Canada,2017,1767.5


In [77]:
# tidying the employment dataset
emissions <- read.csv("../data/emissions_data_per_province.csv")
# head(emissions)

old_names <- c("Geography", "Sector", "X2012", "X2013", "X2014", 
               "X2015", "X2016", "X2017", "X2018", "X2019", "X2020", "X2021")
new_names <- c("province", "ioic_industry", "2012", "2013", "2014", 
               "2015", "2016", "2017", "2018", "2019", "2020", "2021")
colnames(emissions)[colnames(emissions) %in% old_names] <- new_names
# head(emissions)

emissions_pivoted <- emissions |> 
                     pivot_longer(!province:ioic_industry, names_to = "year", values_to = "count")
# head(emissions_pivoted)

write.csv(emissions_pivoted, "../data/tidied_emissions_estimates.csv",row.names = FALSE)

Unnamed: 0_level_0,province,ioic_industry,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
1,Canada,"Total, industries and households",765177,774546,775519,780254,753719,766876,780372,775612,693849,708880
2,Canada,"Total, industries",636925,639396,642314,647912,622641,632545,642939,639121,579689,594661
3,Canada,Animal food manufacturing [BS31110],254,272,457,321,341,286,384,433,434,421
4,Canada,Sugar and confectionery product manufacturing [BS31130],352,361,427,466,397,607,370,440,412,484
5,Canada,Fruit and vegetable preserving and specialty food manufacturing [BS31140],805,625,485,526,457,595,484,472,502,493
6,Canada,Dairy product manufacturing [BS31150],451,540,343,385,383,392,434,510,509,524


province,ioic_industry,year,count
<chr>,<chr>,<chr>,<int>
Canada,"Total, industries and households",2012,765177
Canada,"Total, industries and households",2013,774546
Canada,"Total, industries and households",2014,775519
Canada,"Total, industries and households",2015,780254
Canada,"Total, industries and households",2016,753719
Canada,"Total, industries and households",2017,766876
