# 2. Cancer overview

There are five aspects for cancer overview:

2. Cancer Over <br>
    2.1. Age distribution <br>
    2.2. Gender variance of cancer types <br>
    2.3. Regional distribution <br>
    2.4. Most common region for each cancer <br>
    2.5. Time changes <br>


In [2]:
# Load necessary packages for data manipulation, visualization, and processing
library(tidyverse)
library(ggsci)
library(ggpubr)
library(ggrepel)
library(sf)
library(flextable)
source('tools.R')

"package 'tidyverse' was built under R version 4.2.3"
"package 'ggplot2' was built under R version 4.2.3"
"package 'tibble' was built under R version 4.2.3"
"package 'tidyr' was built under R version 4.2.3"
"package 'readr' was built under R version 4.2.3"
"package 'purrr' was built under R version 4.2.3"
"package 'dplyr' was built under R version 4.2.3"
"package 'stringr' was built under R version 4.2.3"
"package 'forcats' was built under R version 4.2.3"
"package 'lubridate' was built under R version 4.2.3"
── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ──────────

ERROR: Error in file(filename, "r", encoding = encoding): cannot open the connection


In [None]:
# Read the CSV file containing cancer deaths by age data
age <- read_csv('data/cancer-deaths-by-age.csv')

## 2.1 Age distribution

In [None]:
# Read the CSV file containing cancer incidence data
incidence <- read_csv("data/clean/incidence.csv")

In [None]:
# Process the age data:
# - Replace 'S' in the Rate column with '0' and convert it to numeric
# - Categorize age groups into 'child' and 'adult'
# - Group by 'population' and 'Year' columns
# - Sum up the number of incidences for each group
# - Reshape the data to have a wider format
# - Display the data in a table format

age %>% 
  mutate(Rate = as.numeric(str_replace(Rate,"S","0"))) %>%
  # filter(Number > 5) %>%
  mutate(population = ifelse(agegrpid <= 3, "child",'adult')) %>%
  group_by(population,Year) %>%
  summarize(`Incidence number` = as.integer(sum(Number))) %>%
  pivot_wider(names_from = Year,values_from = `Incidence number`) %>%
  rempsyc::nice_table(title="Table 1: Number of New Registered Cancers")

## 2.2 Gender variance of cancer types

In [None]:
incidence <- read_csv("data/clean/incidence.csv")
mortality <- read_csv('data/clean/mortality.csv')

# Use the custom function 'draw_sex_variance' to create plots showing gender variance for incidence and mortality
g1 =  draw_sex_variance(incidence,type="Incidence",fulltitle=FALSE)
g2 = draw_sex_variance(mortality,type="Mortality",fulltitle=FALSE)

# Arrange the two plots side by side
ggarrange(g1,g2)


## 2.3 Regional distribution

In [None]:
# Read incidence and mortality data filtered by sex
incidence_sexfiltered <- read_csv("data/clean/incidence_sexfiltered.csv")
mortality_sexfiltered <- read_csv('data/clean/mortality_sexfiltered.csv')

# Use the custom function 'regional_heatmap' to create heatmaps for incidence and mortality
ht_incidence = regional_heatmap(incidence_sexfiltered,type='Incidence')
draw(ht_incidence, heatmap_legend_side = "right",annotation_legend_side="right",legend_grouping = "original")

ht_mortality = regional_heatmap(mortality_sexfiltered,type='mortality')
draw(ht_mortality, heatmap_legend_side = "right",annotation_legend_side="right",legend_grouping = "original")

## 2.4 Most common region for each cancer

In [None]:
# Read the map data of New Zealand District Health Board boundaries
library(sf)
incidence_sexfiltered <- read_csv("data/clean/incidence_sexfiltered.csv")
mortality_sexfiltered <- read_csv('data/clean/mortality_sexfiltered.csv')
DHB_map <- st_read("data/NZ_District_Health_Board_boundaries_-_generalised.kml", quiet=TRUE)

# Use the custom function 'cancer_region_map' to create maps showing the most common regions for each cancer type for incidence and mortality
ggmap_incidence <- cancer_region_map(data = incidence_sexfiltered , type ="Incidence",map = DHB_map)
ggmap_incidence

ggmap_incidence <- cancer_region_map(data = mortality_sexfiltered , type ="Mortality",map = DHB_map)
ggmap_incidence

## 2.5 Time changes

In [None]:
# Read incidence and mortality data filtered by sex
incidence_sexfiltered <- read_csv("data/clean/incidence_sexfiltered.csv")
mortality_sexfiltered <- read_csv('data/clean/mortality_sexfiltered.csv')

# Use the custom function 'time_change_plot' to create a plot showing the change in cancer rates over time
# The label number is calculated as the difference between the mean rates of 2016-2020 and 2011-2015
gg_time <- time_change_plot(data = incidence_sexfiltered, type = "Incidence",label.size = 2.5)
gg_time 