### **01 - Incidence of MLTC**
#### **01E - Manuscript outputs - trend**

**Chart**: Trend of progression to MLTC

**Imports**

In [1]:
%%pyspark
# required imports

# requires blank line after last import


In [2]:
%%sparkr

if (!requireNamespace("svglite", quietly = TRUE)) {
  install.packages("svglite")
}

In [3]:
# Load necessary libraries
library(SparkR)
library(ggplot2)
library(patchwork)
library(svglite)

**Parameter cell**

<blockquote style="color: #D8000C; background-color: #FFD2D2; padding: 10px; border-left: 6px solid #D8000C;">
  <strong>⚠️ Warning:</strong> selected financial years are hard-coded below, make sure to update these if required.
</blockquote>

In [4]:
%%pyspark
# parameter cell
incidence_schema = ""  # "mltc_incidence_outputs_v40_20230331"

# optional, can be blank


In [5]:
%%pyspark
# Set parameters in Spark configuration with 'param.' prefix (for use in SQL cells)
spark.conf.set("param.incidence_schema", incidence_schema)


---

#### **Creating the plot**

**a - Load data**

In [6]:
data <- sql("SELECT * FROM ${param.incidence_schema}.output_01E_incidence_results_trend")

In [7]:
%%sql
SELECT * FROM ${param.incidence_schema}.output_01E_incidence_results_trend

**b - Create plot**

Create charts

In [34]:
# Convert Spark DataFrame to R DataFrame for ggplot2
data_local <- collect(data)

# Convert financial_year to a factor
data_local$financial_year <- as.factor(data_local$financial_year)

# Remove NAs if they exist
data_local <- na.omit(data_local)

# Exclude 2016/17 data due to potential register effect
data_local <- subset(data_local, financial_year != "2016/17")

# Ensure correct levels for later ordering
data_local$financial_year <- factor(data_local$financial_year, levels = c("2017/18", "2018/19", "2019/20", "2020/21", "2021/22", "2022/23"))


In [35]:
# Create the bar chart for progression_rate_1_2_plus
bar_chart <- ggplot(data_local, aes(x = financial_year, y = progression_rate_1_2_plus)) +
  geom_bar(stat = "identity", fill = "#6cb1beff", width = 0.7) +
  geom_errorbar(aes(ymin = lower_cl_1_2, ymax = upper_cl_1_2), width = 0.2) +
  geom_text(
    aes(label = formatC(round(progression_rate_1_2_plus, 1), format = "f", big.mark = ",", digits = 1)),
    vjust = 3,
    color = "white",
    fontface = "bold",
    size = 8
  ) +
  labs(title = NULL, y = "Progression rate (from 1 to 2+ conditions) per 100 person years", x = NULL) +
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    axis.title = element_text(size = 25, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    axis.text = element_text(size = 25, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    panel.grid.minor = element_blank()
  )

# Create the box plot
box_plot <- ggplot(data_local, aes(x = financial_year)) +
  geom_boxplot(
    aes(
      ymin = perc_05,
      lower = perc_25,
      middle = perc_50,
      upper = perc_75,
      ymax = perc_95
    ),
    stat = "identity",
    fill = "#a7d0d8ff",
    color = "#6cb1beff",
    whisker.linetype = "solid",
    whisker.size = 0.5
  ) +
  geom_segment(aes(x = as.numeric(financial_year) - 0.3, xend = as.numeric(financial_year) + 0.3, y = perc_05, yend = perc_05), color = "black") +
  geom_segment(aes(x = as.numeric(financial_year) - 0.3, xend = as.numeric(financial_year) + 0.3, y = perc_95, yend = perc_95), color = "black") +
  geom_text(aes(y = perc_50, label = round(perc_50, 2)), vjust = 3, color = "#5fa3b1", fontface = "bold", size = 8) +
  labs(title = NULL, y = "Age (years) at progression from 1 to 2+ conditions", x = NULL) +
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    axis.title = element_text(size = 25, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    axis.text = element_text(size = 25, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    panel.grid.minor = element_blank()
  )

# Combine the plots using patchwork with added spacing
final_plot <- bar_chart / box_plot + plot_layout(heights = c(1, 1), ncol = 1)

# Display the combined plot
print(final_plot)