### **01 - Incidence of MLTC**
#### **01G - Manuscript outputs - first condition incidence**

**Chart**: Progression from 0 to 1+ by initial condition count

**Imports**

In [1]:
%%pyspark
# required imports

# requires blank line after last import


In [2]:
%%sparkr

if (!requireNamespace("svglite", quietly = TRUE)) {
  install.packages("svglite")
}

In [2]:
# Load necessary libraries
library(SparkR)
library(ggplot2)
library(patchwork)
library(svglite)

**Parameter cell**

In [3]:
%%pyspark
# parameter cell
incidence_schema = ""  # "mltc_incidence_outputs_v40_20230331"

# optional, can be blank


In [4]:
%%pyspark
# Set parameters in Spark configuration with 'param.' prefix (for use in SQL cells)
spark.conf.set("param.incidence_schema", incidence_schema)


---

#### **Creating the plot**

**a - Load data**

In [5]:
data <- sql("SELECT * FROM ${param.incidence_schema}.output_01G_incidence_results_first_conditions")

In [6]:
%%sql

SELECT * FROM ${param.incidence_schema}.output_01G_incidence_results_first_conditions

**b - Create plot**

Create charts

In [7]:
# Convert Spark DataFrame to R DataFrame for ggplot2
data_local <- collect(data)

# Remove NAs if they exist
data_local <- na.omit(data_local)

# Order the DataFrame in descending order by progression rate
data_local_ordered <- data_local[order(-data_local$progression_rate_0_1_plus), ]

# Convert subsegment_description to a factor to maintain the order in the plots
data_local_ordered$subsegment_description <- factor(data_local_ordered$subsegment_description, levels = data_local_ordered$subsegment_description)


In [14]:
# Create the bar chart for progression_rate_0_1_plus
bar_chart <- ggplot(data_local_ordered, aes(x = subsegment_description, y = progression_rate_0_1_plus)) +
  geom_bar(stat = "identity", fill = "#6cb1beff", width = 0.7) +
  geom_errorbar(aes(ymin = lower_cl_0_1, ymax = upper_cl_0_1), width = 0.2) +
  geom_text(
    aes(label = formatC(round(progression_rate_0_1_plus, 0), format = "f", big.mark = ",", digits = 0)),
    hjust = -0.2,  # Position outside the bars
    angle = 90,    # Rotate vertically
    color = "darkgrey",  # Set color to dark grey
    fontface = "bold",
    size = 6
  ) +
  labs(title = NULL, y = "Incidence rate per 100,000 person years with 0 conditions", x = NULL) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +  # Add space above the bars
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    axis.title = element_text(size = 20, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    axis.text = element_text(size = 17, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),  # Rotate labels
    panel.grid.minor = element_blank()
  )

# Create the box plot
box_plot <- ggplot(data_local_ordered, aes(x = subsegment_description)) +
  geom_boxplot(
    aes(
      ymin = perc_05,
      lower = perc_25,
      middle = perc_50,
      upper = perc_75,
      ymax = perc_95
    ),
    stat = "identity",
    fill = "#a7d0d8ff",
    color = "#6cb1beff",
    whisker.linetype = "solid",
    whisker.size = 0.5
  ) +
  geom_segment(aes(x = as.numeric(subsegment_description) - 0.3, xend = as.numeric(subsegment_description) + 0.3, y = perc_05, yend = perc_05), color = "black") +
  geom_segment(aes(x = as.numeric(subsegment_description) - 0.3, xend = as.numeric(subsegment_description) + 0.3, y = perc_95, yend = perc_95), color = "black") +
  geom_text(aes(y = perc_50, label = round(perc_50, 2)), vjust = 1.5, color = "#5fa3b1", fontface = "bold", size = 6) +
  labs(title = NULL, y = "Age (years)", x = NULL) +
  theme_minimal() +
  theme(
    text = element_text(family = "Helvetica"),
    axis.title = element_text(size = 20, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    axis.text = element_text(size = 17, margin = margin(t = 20, r = 20, b = 20, l = 20)),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),  # Rotate labels
    panel.grid.minor = element_blank()
  )

# Combine the plots using patchwork with added spacing
final_plot <- bar_chart / box_plot + plot_layout(heights = c(1, 1), ncol = 1)

# Display the combined plot
print(final_plot)