### **01 - Incidence of MLTC**
#### **01H02 - Manuscript outputs - all condition incidence by age - plots**

**Imports**

In [1]:
%%pyspark
# required imports

# requires blank line after last import


In [3]:
%%sparkr

if (!requireNamespace("svglite", quietly = TRUE)) {
  install.packages("svglite")
}

In [4]:
# Load necessary libraries
library(SparkR)
library(ggplot2)
library(patchwork)
library(svglite)

**Parameter cell**

In [5]:
%%pyspark
# parameter cell
incidence_schema = ""  # "mltc_incidence_outputs_v40_20230331"

# optional, can be blank


In [6]:
%%pyspark
# Set parameters in Spark configuration with 'param.' prefix (for use in SQL cells)
spark.conf.set("param.incidence_schema", incidence_schema)


---

#### **01H02 - Manuscript outputs - all condition incidence by age - plots**

This notebook produces a chart that combines plots for individual subsegments showing incidence by age and gender.

**a - Load data**

In [7]:
data <- sql("SELECT * FROM ${param.incidence_schema}.output_01h01_all_incidence_by_subsegment_and_age")

**b - Create plot**

In [9]:
# Convert incidence_rate to numeric
data <- withColumn(data, "incidence_rate", cast(data$incidence_rate, "double"))

# Update subsegment names
data <- withColumn(data, "subsegment", regexp_replace(data$subsegment, "_", " "))
data <- withColumn(data, "subsegment", regexp_replace(data$subsegment, "Parkinsons Disease", "Parkinson's Disease"))

# Collect data to local R data frame for plotting
local_data <- collect(data)

# Split by subsegment to create a list of data frames
data_list <- split(local_data, local_data$subsegment)

# Initialize an empty list to store plots
plots_list <- list()

# Loop through each subsegment and create a plot
for (i in seq_along(data_list)) {
  
  p <- ggplot(data_list[[i]], aes(x = age_band, y = incidence_rate, group = gender_description, color = gender_description)) +
    geom_line() +
    scale_color_manual(values = c("MALE" = "#4a86e8", "FEMALE" = "#dc3e5c")) +
    ylim(0, NA) +
    labs(
      title = names(data_list)[i],
      x = "Age", 
      y = "Incidence rate per 100,000"
    ) +
    theme_minimal() +
    theme(
      text = element_text(size = 8, family = "Helvetica"),
      plot.title = element_text(size = 8, hjust = 0.5, family = "Helvetica"),
      axis.title = element_text(size = 6, family = "Helvetica"),
      axis.text.x = element_text(size = 6, angle = 45, hjust = 1, family = "Helvetica"),
      axis.text.y = element_text(size = 6, family = "Helvetica"),
      legend.title = element_blank(),
      legend.position = "none"
    )
  
  plots_list[[i]] <- p
}

# Combine all plots into a single patchwork object and display
plot_matrix <- wrap_plots(plots_list, ncol = 4)

# Create a dummy plot for the legend
legend_plot <- ggplot() + 
  geom_line(aes(y = c(1, 1), x = c(1, 1), color = "MALE"), show.legend = TRUE) +
  geom_line(aes(y = c(1, 1), x = c(1, 1), color = "FEMALE"), show.legend = TRUE) +
  scale_color_manual(values = c("MALE" = "#4a86e8", "FEMALE" = "#dc3e5c")) +
  theme_void() +
  theme(
    legend.position = "bottom",
    legend.title = element_blank(),
    text = element_text(family = "Helvetica")  # Ensuring legend text is also in Helvetica
  ) +
  guides(color = guide_legend(title = "Gender"))

# Combine the plot matrix with the legend plot, adjusting layout to ensure the legend is at the bottom
final_plot <- plot_matrix / legend_plot + 
  plot_layout(heights = c(1, 0.01))  # Adjust the second value as needed to control legend height

# Display the final plot with the legend
print(final_plot)