### **01 - Incidence of MLTC**
#### **01D02 - Manuscript outputs - age standardisation outputs**

**Chart**: Progression rates by IMD and ethnicity - 0 to 1+ conditions

**Imports**

In [1]:
%%pyspark
# required imports

# requires blank line after last import


In [2]:
%%sparkr

if (!requireNamespace("svglite", quietly = TRUE)) {
  install.packages("svglite")
}

In [2]:
# Load necessary libraries
library(SparkR)
library(ggplot2)
library(patchwork)
library(svglite)
library(stringr)

**Parameter cell**

In [3]:
%%pyspark
# parameter cell
incidence_schema = ""  # "mltc_incidence_outputs_v40_20230331"

# optional, can be blank


In [4]:
%%pyspark
# Set parameters in Spark configuration with 'param.' prefix (for use in SQL cells)
spark.conf.set("param.incidence_schema", incidence_schema)


---

#### **Creating the plot**

**a - Load data**

In [5]:
data <- sql("SELECT * FROM ${param.incidence_schema}.output_01d02_incidence_results_age_standardisation_ethnicity_imd")

**b - Create plot**

Data cleaning steps

In [6]:
# Convert progression rates to numeric
data <- withColumn(data, "progression_rate_0_1_plus", 
                   cast(data$progression_rate_0_1_plus, "double"))
data <- withColumn(data, "progression_rate_1_2_plus", 
                   cast(data$progression_rate_1_2_plus, "double"))
data <- withColumn(data, "progression_rate_2_3_plus", 
                   cast(data$progression_rate_2_3_plus, "double"))

# Convert confidence intervals to numeric
data <- withColumn(data, "lower_cl_0_1", 
                   cast(data$lower_cl_0_1, "double"))
data <- withColumn(data, "upper_cl_0_1", 
                   cast(data$upper_cl_0_1, "double"))
data <- withColumn(data, "lower_cl_1_2", 
                   cast(data$lower_cl_1_2, "double"))
data <- withColumn(data, "upper_cl_1_2", 
                   cast(data$upper_cl_1_2, "double"))
data <- withColumn(data, "lower_cl_2_3", 
                   cast(data$lower_cl_2_3, "double"))
data <- withColumn(data, "upper_cl_2_3", 
                   cast(data$upper_cl_2_3, "double"))

# Update ethnicity labels
data <- withColumn(data, "ethnicity",
                   regexp_replace(data$ethnicity, "Asian or Asian British", "Asian"))
data <- withColumn(data, "ethnicity",
                   regexp_replace(data$ethnicity, "Black, African, Caribbean or Black British", "Black"))
data <- withColumn(data, "ethnicity",
                   regexp_replace(data$ethnicity, "Mixed or Multiple ethnic groups", "Mixed"))
data <- withColumn(data, "ethnicity",
                   regexp_replace(data$ethnicity, "Other ethnic group", "Other"))

# Filter data
female_data <- SparkR::filter(data, 
                              data$gender_description == "FEMALE" & 
                              data$IMD != "Unknown" &
                              data$ethnicity != "Unknown")

male_data <- SparkR::filter(data, 
                            data$gender_description == "MALE" & 
                              data$IMD != "Unknown" &
                              data$ethnicity != "Unknown")

Unpivot data for easier charting

In [7]:
# Reshape female data
female_long <- SparkR::select(female_data, 
                                  female_data$IMD,
                                  female_data$ethnicity,
                                  alias(female_data$progression_rate_0_1_plus, "Value"),
                                  alias(lit("0 to 1+ conditions"), "Progression_Rate"),
                                  alias(female_data$lower_cl_0_1, "Lower_CL"),
                                  alias(female_data$upper_cl_0_1, "Upper_CL"))

# Reshape male data
male_long <- SparkR::select(male_data, 
                                male_data$IMD,
                                male_data$ethnicity,
                                alias(male_data$progression_rate_0_1_plus, "Value"),
                                alias(lit("0 to 1+ conditions"), "Progression_Rate"),
                                alias(male_data$lower_cl_0_1, "Lower_CL"),
                                alias(male_data$upper_cl_0_1, "Upper_CL"))

Create R data frames

In [8]:
# Collect the data into local data frames for plotting
female_long_local <- SparkR::collect(female_long)
male_long_local <- SparkR::collect(male_long)

Create charts

In [9]:
# Wrap the x-axis labels using str_wrap
female_long_local$ethnicity <- str_wrap(female_long_local$ethnicity, width = 15)
male_long_local$ethnicity <- str_wrap(male_long_local$ethnicity, width = 15)

# Calculate the maximum value for the y-axis across both datasets
max_value <- max(c(female_long_local$Upper_CL, male_long_local$Upper_CL), na.rm = TRUE) * 1.1

# Create the plot for Female
plot_female <- ggplot(female_long_local, aes(x = ethnicity, y = Value, fill = IMD)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.95)) +
  geom_errorbar(aes(ymin = Lower_CL, ymax = Upper_CL), width = 0.2, position = position_dodge(width = 0.95)) +
  labs(title = "Female: 0 to 1+ conditions", y = "Progression rate per 100 person years", x = NULL) +
  scale_fill_manual(values = c("1" = "#6e0003ff",
                               "2" = "#db000eff",
                               "3" = "#DEA62A",
                               "4" = "#00ab57ff",
                               "5" = "#8bc680ff"),
                    labels = c("1 - most deprived", "2", "3", "4", "5 - least deprived")) +
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    plot.title = element_text(size = 25),
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 15),
    legend.title = element_blank(),
    legend.text = element_text(size = 25)
  ) +
  ylim(0, max_value)

# Create the plot for Male
plot_male <- ggplot(male_long_local, aes(x = ethnicity, y = Value, fill = IMD)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.95)) +
  geom_errorbar(aes(ymin = Lower_CL, ymax = Upper_CL), width = 0.2, position = position_dodge(width = 0.95)) +
  labs(title = "Male: 0 to 1+ conditions", y = "Progression rate per 100 person years", x = NULL) +
  scale_fill_manual(values = c("1" = "#6e0003ff",
                               "2" = "#db000eff",
                               "3" = "#DEA62A",
                               "4" = "#00ab57ff",
                               "5" = "#8bc680ff"),
                    labels = c("1 - most deprived", "2", "3", "4", "5 - least deprived")) +
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    plot.title = element_text(size = 25),
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 15),
    legend.title = element_blank(),
    legend.text = element_text(size = 25)
  ) +
  ylim(0, max_value)

# Combine plots using patchwork and add a shared legend
final_plot <- (plot_female | plot_male) + plot_layout(guides = "collect") & theme(legend.position = "bottom")

# Display the combined plot
print(final_plot)