### **01 - Incidence of MLTC**
#### **01D02 - Manuscript outputs - age standardisation outputs**

**Chart**: Progression rate ratios by age and gender

**Imports**

In [1]:
%%pyspark
# required import

# requires blank line after last import


In [2]:
%%sparkr

if (!requireNamespace("svglite", quietly = TRUE)) {
  install.packages("svglite")
}

In [3]:
# Load necessary libraries
library(SparkR)
library(ggplot2)
library(patchwork)
library(svglite)

**Parameter cell**

In [4]:
%%pyspark
# parameter cell
incidence_schema = ""  # "mltc_incidence_outputs_v40_20230331"

# optional, can be blank


In [5]:
%%pyspark
# Set parameters in Spark configuration with 'param.' prefix (for use in SQL cells)
spark.conf.set("param.incidence_schema", incidence_schema)


---

#### **Creating the plot**

**a - Load data**

In [6]:
data <- sql("SELECT * FROM ${param.incidence_schema}.output_01d02_incidence_results_age_standardisation")

**b - Create plot**

Data cleaning steps

In [7]:
# Replace NA values with "All"
data <- withColumn(data, "gender_description", regexp_replace(data$gender_description, "^NA$", "All"))

data <- withColumn(data, "breakdown_type", regexp_replace(data$breakdown_type, "^NA$", "All"))

data <- withColumn(data, "socio_demographic_breakdown", regexp_replace(data$socio_demographic_breakdown, "^NA$", "All"))

# Convert columns to numeric
data <- withColumn(data, "prr_1_2", 
                   cast(data$prr_1_2, "double"))
data <- withColumn(data, "prr_2_3", 
                   cast(data$prr_2_3, "double"))
data <- withColumn(data, "lower_cl_prr_1_2", 
                   cast(data$lower_cl_prr_1_2, "double"))
data <- withColumn(data, "upper_cl_prr_1_2", 
                   cast(data$upper_cl_prr_1_2, "double"))
data <- withColumn(data, "lower_cl_prr_2_3", 
                   cast(data$lower_cl_prr_2_3, "double"))
data <- withColumn(data, "upper_cl_prr_2_3", 
                   cast(data$upper_cl_prr_2_3, "double"))

# Filter data for Female and Male with breakdown_type "Age"
female_data <- SparkR::filter(data, 
                              data$gender_description == "FEMALE" & 
                              (data$breakdown_type == "Age" | data$breakdown_type == "All"))

male_data <- SparkR::filter(data, 
                            data$gender_description == "MALE" & 
                            (data$breakdown_type == "Age" | data$breakdown_type == "All"))

Unpivot data for easier charting

In [8]:
# Reshape female data
female_long_1_2 <- SparkR::select(female_data, 
                                  female_data$socio_demographic_breakdown,
                                  alias(female_data$prr_1_2, "Value"),
                                  alias(lit("prr_1_2"), "PRR"),
                                  alias(female_data$lower_cl_prr_1_2, "Lower_CL"),
                                  alias(female_data$upper_cl_prr_1_2, "Upper_CL"))

female_long_2_3 <- SparkR::select(female_data, 
                                  female_data$socio_demographic_breakdown,
                                  alias(female_data$prr_2_3, "Value"),
                                  alias(lit("prr_2_3"), "PRR"),
                                  alias(female_data$lower_cl_prr_2_3, "Lower_CL"),
                                  alias(female_data$upper_cl_prr_2_3, "Upper_CL"))

female_long <- unionAll(female_long_1_2, female_long_2_3)

# Reshape male data
male_long_1_2 <- SparkR::select(male_data, 
                                  male_data$socio_demographic_breakdown,
                                  alias(male_data$prr_1_2, "Value"),
                                  alias(lit("prr_1_2"), "PRR"),
                                  alias(male_data$lower_cl_prr_1_2, "Lower_CL"),
                                  alias(male_data$upper_cl_prr_1_2, "Upper_CL"))

male_long_2_3 <- SparkR::select(male_data, 
                                  male_data$socio_demographic_breakdown,
                                  alias(male_data$prr_2_3, "Value"),
                                  alias(lit("prr_2_3"), "PRR"),
                                  alias(male_data$lower_cl_prr_2_3, "Lower_CL"),
                                  alias(male_data$upper_cl_prr_2_3, "Upper_CL"))

male_long <- unionAll(male_long_1_2, male_long_2_3)

Create R data frames

In [9]:
# Collect the data into local data frames for plotting
female_long_local <- SparkR::collect(female_long)
male_long_local <- SparkR::collect(male_long)

Create charts

In [27]:
# Calculate the maximum value for the y-axis across both datasets
max_value <- max(c(female_long_local$Upper_CL, male_long_local$Upper_CL), na.rm = TRUE) * 1.1

# Create the plot for Female
plot_female <- ggplot(female_long_local, aes(x = socio_demographic_breakdown, y = Value, fill = PRR)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.95)) +
  geom_errorbar(aes(ymin = Lower_CL, ymax = Upper_CL), width = 0.2, position = position_dodge(width = 0.95)) +
  labs(title = "Female", y = "Progression Rate Ratio", x = "Age group") +
  scale_fill_manual(values = c("prr_1_2" = "#28b8d1", "prr_2_3" = "#005494"),
                    labels = c("PRR 1 to 2+ conditions", "PRR 2 to 3+ conditions")) +
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    plot.title = element_text(size = 25),
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 15),
    legend.title = element_blank(),
    legend.text = element_text(size = 25)
  ) +
  coord_cartesian(ylim = c(1.0, max_value)) +  # Zoom into the y-axis range
  scale_y_continuous(breaks = seq(1.0, max_value, by = 0.5), expand = c(0, 0))  # Explicitly set breaks and remove padding

# Create the plot for Male
plot_male <- ggplot(male_long_local, aes(x = socio_demographic_breakdown, y = Value, fill = PRR)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.95)) +
  geom_errorbar(aes(ymin = Lower_CL, ymax = Upper_CL), width = 0.2, position = position_dodge(width = 0.95)) +
  labs(title = "Male", y = "Progression Rate Ratio", x = "Age group") +
  scale_fill_manual(values = c("prr_1_2" = "#28b8d1", "prr_2_3" = "#005494"),
                    labels = c("PRR 1 to 2+ conditions", "PRR 2 to 3+ conditions")) +
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    plot.title = element_text(size = 25),
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 15),
    legend.title = element_blank(),
    legend.text = element_text(size = 25)
  ) +
  coord_cartesian(ylim = c(1.0, max_value)) +  # Zoom into the y-axis range
  scale_y_continuous(breaks = seq(1.0, max_value, by = 0.5), expand = c(0, 0))  # Explicitly set breaks and remove padding

# Combine plots using patchwork and add a shared legend
final_plot <- (plot_female | plot_male) + plot_layout(guides = "collect") & theme(legend.position = "bottom")

# Display the combined plot
print(final_plot)