In [1]:
require(dplyr)
require(ggpubr)
require(data.table)
library(tidyverse)
options(warn = -1)

Loading required package: dplyr


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: ggpubr

Loading required package: ggplot2

Loading required package: data.table


Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last


── [1mAttaching core tidyverse packages[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mreadr    [39m 2.1.4     
── [1mConflicts[22m ──────────────────────

**Parameters**

In [2]:
# Inputs
inputpath_perez2022 <- "data/Compare_SLE_subgroups/STAT1_SP1_perez2022_level1.csv"
inputpath_scgt00 <- "data/Compare_SLE_subgroups/STAT1_SP1_scgt00_level1.csv"

inputpath_perez2022_level2 <- "data/Compare_SLE_subgroups/STAT1_SP1_perez2022_Level2.csv"
inputpath_scgt00_level2 <- "data/Compare_SLE_subgroups/STAT1_SP1_scgt00_Level2.csv"

# Outputs
outputpath <- "results/"

# Palette
color_palette_path <- "data/colors_palette.R"
source(color_palette_path)

# Load data

In [3]:
perez2022_level1 <- read.csv(inputpath_perez2022, header = TRUE)
scgt00_level1 <- read.csv(inputpath_scgt00, header = TRUE)

perez2022_level2 <- read.csv(inputpath_perez2022_level2, header = TRUE)
scgt00_level2 <- read.csv(inputpath_scgt00_level2, header = TRUE)

# Paper figures

## Supplementary B: Boxplots

In [4]:
tf_order <- c("STAT1", "SP1")
l1_cell_type_order <- names(cell_colors)
l2_cell_type_order <- names(cell_level2_colors)

In [5]:
toplot_perez <-
    perez2022_level2 %>%
    filter(!Level1 %in% c("Cycling_cells", "Progenitors", "Platelets"))

toplot_scgt00 <-
    scgt00_level2 %>%
    filter(!Level1 %in% c("Cycling_cells", "Progenitors", "Platelets"))
toplot_level2 <- rbind(toplot_perez, toplot_scgt00)  %>%
  pivot_longer(cols = c(STAT1, SP1), names_to = "TF", values_to = "activity") %>%
  mutate(
    Level1 = factor(Level1, levels = l1_cell_type_order),
    Level2 = factor(Level2, levels = l2_cell_type_order),
    TF = factor(TF, levels = tf_order)
  )

**p-val**

In [40]:
library(dplyr)
library(broom)
library(tidyr)
library(purrr)

# Define a function to perform one-vs-all Wilcoxon tests for each Level2 within Level1
compare_one_vs_all <- function(data) {
  # List to store results
  results <- list()
  
  # Loop over each Level1 category
  unique_level1 <- unique(data$Level1)
  for (level1 in unique_level1) {
    # Filter data for the current Level1 category and remove rows where activity is NA
    level1_data <- data %>%
      filter(Level1 == level1, !is.na(activity))
    
    # Get unique combinations of Level2 and TF
    combinations <- expand.grid(Level2 = unique(level1_data$Level2), TF = unique(level1_data$TF))
    
    # Perform one-vs-all comparisons for each combination
    for (i in 1:nrow(combinations)) {
      level2 <- combinations$Level2[i]
      tf <- combinations$TF[i]
      
      # Filter data for the current Level2 and TF
      one_data <- level1_data %>% filter(Level2 == level2, TF == tf)
      all_data <- level1_data %>% filter(Level2 != level2, TF == tf)

      mean_one <- mean(one_data$activity)
      mean_all <- mean(all_data$activity)
      
      # Ensure there are at least two observations in each group
      if (nrow(one_data) >= 3 && nrow(all_data) >= 3) {
        # Perform Wilcoxon test (one-sided: greater)
        test_result <- wilcox.test(one_data$activity, all_data$activity, alternative = "two.sided")
        
        # Clean up and store the result
        tidy_result <- tidy(test_result)
        tidy_result$Level1 <- level1
        tidy_result$Level2 <- level2
        tidy_result$TF <- tf
        tidy_result$mean_one <- mean_one
        tidy_result$mean_all <- mean_all
        results[[paste(level1, level2, tf)]] <- tidy_result
      }
    }
  }
  
  # Combine all results into a single data frame
  final_results <- bind_rows(results, .id = "comparison")
  return(final_results)
}

# Example usage with your data frame
final_comparison_results <- compare_one_vs_all(toplot_level2)

In [42]:
library(dplyr)

# Assuming final_comparison_results is already created
final_comparison_results_adjusted <- final_comparison_results %>%
  # Adjust p-values globally first
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  # Group by TFs to adjust p-values within each TF group
  #group_by(TF) %>%
  #mutate(
  #  pval_adj_indep = p.adjust(p.value, method = "BH")
  #) %>%
  #ungroup() %>%
  # Filter based on the unadjusted p-value
  #filter(pval_adj < 0.05) %>%
  # Arrange by p-value to see the most significant results first
  arrange(Level1, TF, pval_adj)

output_path <- "results/supplementary_tables/SLE_Level2_pval.csv"
write.csv(final_comparison_results_adjusted, output_path, row.names = FALSE)

# Print the adjusted and filtered results
final_comparison_results_adjusted


comparison,statistic,p.value,method,alternative,Level1,Level2,TF,mean_one,mean_all,pval_adj
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<fct>,<fct>,<dbl>,<dbl>,<dbl>
ILC NK_lowRibocontent SP1,8964.0,1.283159e-09,Wilcoxon rank sum test with continuity correction,two.sided,ILC,NK_lowRibocontent,SP1,-0.342425391,0.20283399,1.154843e-08
ILC NK_adaptive SP1,27821.0,3.469194e-05,Wilcoxon rank sum test with continuity correction,two.sided,ILC,NK_adaptive,SP1,0.409337594,0.06087694,0.0001135373
ILC NK_CD56dimCD16 SP1,31022.0,0.001289171,Wilcoxon rank sum test with continuity correction,two.sided,ILC,NK_CD56dimCD16,SP1,0.312912263,0.06838008,0.002900636
ILC NK_IFN1response SP1,10179.0,0.08454847,Wilcoxon rank sum test with continuity correction,two.sided,ILC,NK_IFN1response,SP1,0.003605688,0.14268336,0.1449402
ILC NK_CD56high SP1,22554.0,0.1123243,Wilcoxon rank sum test with continuity correction,two.sided,ILC,NK_CD56high,SP1,-0.009442693,0.17171453,0.1838033
ILC NK_CD16high SP1,12245.0,0.4722113,Wilcoxon rank sum test with continuity correction,two.sided,ILC,NK_CD16high,SP1,0.204182768,0.12204616,0.6299905
Mono Mono_IFNresponse STAT1,51741.0,3.066814e-21,Wilcoxon rank sum test with continuity correction,two.sided,Mono,Mono_IFNresponse,STAT1,2.450774366,1.14031489,1.1040529999999998e-19
Mono Mono_regulatory STAT1,12725.0,3.095138e-09,Wilcoxon rank sum test with continuity correction,two.sided,Mono,Mono_regulatory,STAT1,0.529648683,1.56074646,2.228499e-08
Mono Mono_nonClassical STAT1,26507.0,0.0002326705,Wilcoxon rank sum test with continuity correction,two.sided,Mono,Mono_nonClassical,STAT1,0.941861006,1.56626926,0.0005982955
Mono Mono_classical STAT1,31484.0,0.12015,Wilcoxon rank sum test with continuity correction,two.sided,Mono,Mono_classical,STAT1,1.298654408,1.46953068,0.1880608


**Boxplots**

In [None]:
plot_l2 <- ggboxplot(toplot_level2, x = "Level2", y = "activity", fill = "Level2") +
    facet_grid(rows = vars(TF), cols = vars(Level1), scales = "free_x"
              ) +
    scale_fill_manual(values = cell_level2_colors) +
    theme_bw() +
    ggtitle(label = "") +
    theme(
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=0.5)
    )

In [36]:
ggsave(
    plot_l2,
    filename = "sle_TFactivity_level2_test.pdf",
    device = "pdf",
    width = 20,
    height = 8,
    path = outputpath)

In [37]:
outputpath

## Flare comparison

In [4]:
tf_order <- c("STAT1", "SP1")
l1_cell_type_order <- names(cell_colors)
l2_cell_type_order <- names(cell_level2_colors)

In [5]:
flare_palette <- c(
  "F" = "#e76f51",
  "notF" = "#e9c46a"
)

In [6]:
data_flare_level2 <-
    perez2022_level2 %>%
    mutate(
        Flare = case_when(Flare == "not_F" ~ "notF",
        TRUE ~ as.character(Flare))
    ) %>%
    filter(
        Flare %in% c("notF", "F")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "TF",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity),
        #Level1 %in% pvals_flare$Level1
    ) %>%
  mutate(
    Level1 = factor(Level1, levels = l1_cell_type_order),
    Level2 = factor(Level2, levels = l2_cell_type_order),
    TF = factor(TF, levels = tf_order)
  )


In [20]:
flare_counts <- data_flare_level2 %>%
  group_by(Level2, Flare) %>%
  summarise(sample_count = n_distinct(sampleID))
flare_counts

[1m[22m`summarise()` has grouped output by 'Level2'. You can override using the `.groups` argument.


Level2,Flare,sample_count
<fct>,<chr>,<int>
Plasma_IGHA,notF,5
Plasma_IGHG,F,1
pDC,F,2
pDC,notF,60
Mono_classical,F,9
Mono_classical,notF,115
Mono_nonClassical,F,7
Mono_nonClassical,notF,114
Mono_inflammatory,F,6
Mono_inflammatory,notF,107


In [41]:
flare_counts <- data_flare_level2 %>%
  group_by(Level2, Flare) %>%
  summarise(sample_count = n_distinct(sampleID)) %>%
  filter(sample_count >= 3) %>%
  group_by(Level2) %>%
  filter(all(c("F", "notF") %in% Flare)) %>%
  ungroup()
level2_categories <- unique(flare_counts$Level2)
level2_categories

[1m[22m`summarise()` has grouped output by 'Level2'. You can override using the `.groups` argument.


In [42]:
mean_activity_df <- data_flare_level2 %>%
  group_by(level_1, Level1, Level2, TF, Flare) %>%
  summarize(mean_activity = mean(activity, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(level_1_TF = paste(level_1, TF, sep = "_")) %>%
  column_to_rownames(var = "level_1_TF") %>%
  filter(Level2 %in% level2_categories)


[1m[22m`summarise()` has grouped output by 'level_1', 'Level1', 'Level2', 'TF'. You can override using the `.groups` argument.


In [43]:
unique(mean_activity_df$Level2)

In [44]:
head(mean_activity_df)

Unnamed: 0_level_0,level_1,Level1,Level2,TF,Flare,mean_activity
Unnamed: 0_level_1,<chr>,<fct>,<fct>,<fct>,<chr>,<dbl>
Perez2022_1004_T0_Mono_IFNresponse-3_STAT1,Perez2022_1004_T0_Mono_IFNresponse-3,Mono,Mono_IFNresponse,STAT1,notF,1.3177752
Perez2022_1004_T0_Mono_IFNresponse-3_SP1,Perez2022_1004_T0_Mono_IFNresponse-3,Mono,Mono_IFNresponse,SP1,notF,0.3957995
Perez2022_1004_T0_Mono_classical-3_STAT1,Perez2022_1004_T0_Mono_classical-3,Mono,Mono_classical,STAT1,notF,0.2079604
Perez2022_1004_T0_Mono_classical-3_SP1,Perez2022_1004_T0_Mono_classical-3,Mono,Mono_classical,SP1,notF,1.5004512
Perez2022_1004_T0_Mono_inflammatory-3_STAT1,Perez2022_1004_T0_Mono_inflammatory-3,Mono,Mono_inflammatory,STAT1,notF,0.9097472
Perez2022_1004_T0_Mono_inflammatory-3_SP1,Perez2022_1004_T0_Mono_inflammatory-3,Mono,Mono_inflammatory,SP1,notF,1.1708406


In [45]:
mean_activity_df <-  mean_activity_df %>%
  group_by(TF) %>%     
  mutate(mean_activity_processed = mean_activity + abs(min(mean_activity, na.rm = TRUE))) %>%
  mutate(TF_normalized_mean_activity = (mean_activity_processed - min(mean_activity_processed, na.rm = TRUE)) / diff(range(mean_activity_processed, na.rm = TRUE))) %>%
  ungroup()

In [46]:
print(head(mean_activity_df))

[90m# A tibble: 6 × 8[39m
  level_1         Level1 Level2 TF    Flare mean_activity mean_activity_proces…¹
  [3m[90m<chr>[39m[23m           [3m[90m<fct>[39m[23m  [3m[90m<fct>[39m[23m  [3m[90m<fct>[39m[23m [3m[90m<chr>[39m[23m         [3m[90m<dbl>[39m[23m                  [3m[90m<dbl>[39m[23m
[90m1[39m Perez2022_1004… Mono   Mono_… STAT1 notF          1.32                    3.90
[90m2[39m Perez2022_1004… Mono   Mono_… SP1   notF          0.396                   3.51
[90m3[39m Perez2022_1004… Mono   Mono_… STAT1 notF          0.208                   2.79
[90m4[39m Perez2022_1004… Mono   Mono_… SP1   notF          1.50                    4.62
[90m5[39m Perez2022_1004… Mono   Mono_… STAT1 notF          0.910                   3.50
[90m6[39m Perez2022_1004… Mono   Mono_… SP1   notF          1.17                    4.29
[90m# ℹ abbreviated name: ¹​mean_activity_processed[39m
[90m# ℹ 1 more variable: TF_normalized_mean_activity <dbl>[39m


In [47]:
heatmap_df <- mean_activity_df %>%
  mutate(Flare_TF = paste(Flare, TF, sep = "_")) %>%
  group_by(Flare_TF, TF, Level2) %>%
  summarize(mean_activity = mean(mean_activity, na.rm = TRUE), .groups = 'drop') %>%
  ungroup() %>% 
  group_by(TF) %>%   
  mutate(TF_normalized_mean_activity = (mean_activity - min(mean_activity, na.rm = TRUE)) / diff(range(mean_activity, na.rm = TRUE))) %>% 
  ungroup() %>% 
  group_by(Flare_TF, Level2) %>%
  summarize(mean_activity_norm = TF_normalized_mean_activity, .groups = 'drop')  %>% 
  pivot_wider(names_from = Level2, values_from = mean_activity_norm) %>%
  column_to_rownames(var = "Flare_TF")

In [48]:
col_annot <- mean_activity_df %>%
  select(Level2, Level1) %>%
  distinct() %>%
  {rownames(.) <- NULL; .} %>%
  arrange(match(Level2, names(cell_level2_colors))) %>%
  column_to_rownames(var = "Level2") 
col_annot$Level2 <- rownames(col_annot)
col_order <- rownames(col_annot)

In [49]:
row_annot <- mean_activity_df %>%
  select(TF, Flare) %>%
  distinct() %>%
  mutate(Flare_TF = paste(Flare, TF, sep = "_")) %>%
  {rownames(.) <- NULL; .} %>%
  column_to_rownames(var = "Flare_TF") %>%
  arrange(match(TF, tf_order))
row_order <- rownames(row_annot) 

In [50]:
row_annot

Unnamed: 0_level_0,TF,Flare
Unnamed: 0_level_1,<fct>,<chr>
notF_STAT1,STAT1,notF
F_STAT1,STAT1,F
notF_SP1,SP1,notF
F_SP1,SP1,F


In [51]:
my_colour_annot <- list(
    Flare = flare_palette,
    TF = tfs_colors[names(tfs_colors) %in% levels(row_annot$TF)],
    Level1 = cell_colors[names(cell_colors) %in% col_annot$Level1],
    Level2 = cell_level2_colors[names(cell_level2_colors) %in% col_annot$Level2]
)

In [32]:
my_colour_annot

In [55]:
figure_path = paste0(outputpath, "SP1_STAT1_heatmap.pdf")
pdf(figure_path, width = 15, height = 12)
a <- pheatmap::pheatmap(
    heatmap_df[row_order, col_order],
    border_color = NA,
    na_col = "white",
    color = hcl.colors(50, "Plasma"),
    #breaks = my_breaks,
    annotation_colors = my_colour_annot,
    cluster_cols = FALSE,
    cluster_rows = FALSE,
    cellheight = 10,
    cellwidth = 10,
    annotation_col = col_annot,
    annotation_row = row_annot,
    cex = 1,
    fontsize = 9,
    labels_col = col_annot$disease,
    filename = figure_path,
    main = "IFN_response",
    gaps_col = c(4, 5, 6, 14), 
    gaps_row = c(2)
    )
print(a)
dev.off()

### Main (Mono)

**Select celltypes for main figure**

In [13]:
selected_celltypes = c("Mono")

In [14]:
main_toplot <- data_flare_level2 %>% 
    filter(Level1 == "Mono", 
           TF == "SP1")

In [16]:
plot_flare <- ggboxplot(main_toplot, x = "Level2", y = "activity", fill = "Flare") +
    #facet_grid(tfs ~ Level1, scales = "free") +
    scale_fill_manual(values = flare_palette) +
    theme_bw() +
    ggtitle(label = "Flare") +
    theme(
        axis.text.x = element_text(angle = 25, vjust = 0.5, hjust=0.5)
)

ggsave(
    plot_flare,
    filename = "Main_plot_sp1_mono.pdf",
    device = "pdf",
    width = 10,
    height = 2,
    path = "results/")

In [15]:
main_plot <- ggboxplot(main_toplot, x = "Level2", y = "activity", fill = "Flare") +
    #facet_wrap(~Level1, scales = "free", ncol = 4, nrow = 2) +
    scale_fill_manual(values = flare_palette) +
    theme_bw() +
    ggtitle(label = "") +
    theme(
        axis.text.x = element_text(angle = 25, vjust = 0.5, hjust=0.5)
    )

ggsave(
    main_plot,
    filename = "Main_plot_sp1_mono.pdf",
    device = "pdf",
    width = 10,
    height = 2,
    path = "results/")

### Supplementary (all)

In [9]:
toplot_flare <-
    perez2022 %>%
    mutate(
        Flare = case_when(Flare == "not_F" ~ "notF",
        TRUE ~ as.character(Flare))
    ) %>%
    filter(
        Flare %in% c("notF", "F")
    ) %>%
  pivot_longer(cols = c(STAT1, SP1), names_to = "TF", values_to = "activity") %>%
  mutate(
    Level1 = factor(Level1, levels = l1_cell_type_order),
    Level2 = factor(Level2, levels = l2_cell_type_order),
    TF = factor(TF, levels = tf_order)
  )

In [26]:
plot_flare_sp1 <- ggboxplot(toplot_flare, x = "Level2", y = "SP1", fill = "Flare") +
    facet_wrap(~Level1, scales = "free", ncol = 4, nrow = 2) +
    scale_fill_manual(values = flare_palette) +
    theme_bw() +
    ggtitle(label = "Flare SP1") +
    theme(
        axis.text.x = element_text(angle = 25, vjust = 0.5, hjust=0.5)
    )

ggsave(
    plot_flare_sp1,
    filename = "flare_sp1.pdf",
    device = "pdf",
    width = 20,
    height = 8,
    path = "results/SLE_subgroups/")

# Flare comparison

**Palette**

In [4]:
# Flare ---------------------
flare_palette <- c(
  "F" = "#e76f51",
  "notF" = "#e9c46a"
)


## Level 1

**Filter input DF**

In [5]:
data_flare_level1 <-
    perez2022_level1 %>%
    mutate(
        Flare = case_when(Flare == "not_F" ~ "notF",
        TRUE ~ as.character(Flare))
    ) %>%
    filter(
        Flare %in% c("notF", "F")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity)
    )

**p-values**

In [15]:
pvals_flare <- data_flare_level1 %>%
  group_by(Level1, tfs) %>%
  filter(n_distinct(Flare) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(Flare) >= 3)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ Flare))),
    n_patients_F = sum(Flare == "F"),
    n_patients_nF = sum(Flare == "notF"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level1, tfs, p.value, n_patients_F, n_patients_nF) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  filter(
    p.value < 0.5
  ) %>%
  arrange(p.value)

In [16]:
pvals_flare

Level1,tfs,p.value,n_patients_F,n_patients_nF,pval_adj,pval_adj_indep
<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<dbl>
T_CD8_Naive,STAT1,0.001263,9,111,0.008841,0.003789
Mono,SP1,0.031628,9,115,0.110698,0.1174516
T_CD4_Naive,SP1,0.05872581,9,115,0.1370269,0.1174516
ILC,SP1,0.21007653,8,113,0.3676339,0.280102
T_CD8_NonNaive,STAT1,0.46756171,9,115,0.610264,0.7013426


## Level 2

In [56]:
data_flare_level2 <-
    perez2022_level2 %>%
    mutate(
        Flare = case_when(Flare == "not_F" ~ "notF",
        TRUE ~ as.character(Flare))
    ) %>%
    filter(
        Flare %in% c("notF", "F")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity),
        #Level1 %in% pvals_flare$Level1
    ) %>%
    mutate(tfs = factor(tfs, levels = c("STAT1", "SP1")))


In [63]:
pvals_flare_supp_table <- data_flare_level2 %>%
  group_by(Level2, tfs) %>%
  filter(n_distinct(Flare) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(Flare) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ Flare))),
    n_patients_F = sum(Flare == "F"),
    n_patients_nF = sum(Flare == "notF"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level2, tfs, p.value, n_patients_F, n_patients_nF) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  arrange(p.value)

write.csv(pvals_flare_supp_table, "results/supplementary_tables/Flare_comparison_pvalues.csv", row.names = FALSE)


In [60]:
pvals_flare <- pvals_flare_supp_table %>%
  filter(
    p.value < 0.05
  ) %>%
  arrange(p.value)
pvals_flare

Level2,tfs,p.value,n_patients_F,n_patients_nF,pval_adj,pval_adj_indep
<chr>,<fct>,<dbl>,<int>,<int>,<dbl>,<dbl>
Mono_classical,SP1,0.0002221261,9,115,0.006885908,0.003776143
T_CD8_Naive,STAT1,0.0015835846,9,111,0.024545562,0.022170185
Mono_IFNresponse,SP1,0.0031170107,9,114,0.02715654,0.019856395
T_CD8_CM_stem,SP1,0.0035040697,3,19,0.02715654,0.019856395
Mono_inflammatory,SP1,0.0052947684,6,107,0.032827564,0.022502766
T_CD8_eff_HOBIT,STAT1,0.0100411587,4,39,0.051298005,0.070288111
Mono_nonClassical,SP1,0.0115834206,7,114,0.051298005,0.03938363


In [None]:
pvals_flare_supp_table <- data_flare_level2 %>%
  group_by(Level2, tfs) %>%
  filter(n_distinct(Flare) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(Flare) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ Flare))),
    n_patients_F = sum(Flare == "F"),
    n_patients_nF = sum(Flare == "notF"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level2, tfs, p.value, n_patients_F, n_patients_nF) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  arrange(p.value)

pvals_flare

### Plot

In [30]:
plot_flare <- ggboxplot(data_flare_level2, x = "Level2", y = "activity", fill = "Flare") +
    facet_grid(tfs ~ Level1, scales = "free") +
    scale_fill_manual(values = flare_palette) +
    theme_bw() +
    ggtitle(label = "Flare") +
    theme(
        axis.text.x = element_text(angle = 25, vjust = 0.5, hjust=0.5)
)

ggsave(
    plot_flare,
    filename = "plot_flare.pdf",
    device = "pdf",
    width = 15,
    height = 12,
    path = "results/Compare_SLE_subgroups/")

# Response comparison

**Palette**

In [25]:
# Response ---------------------
response_palette <- c(
    "R" = "#70e000",
    "NR" = "#007200"
)


## Level 1

**Filter input DF**

In [76]:
data_response_level1 <-
    scgt00_level1 %>%
    filter(
        Responder %in% c("R", "NR")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity)
    )

**p-values**

In [77]:
pvals_response <- data_response_level1 %>%
  group_by(Level1, tfs) %>%
  filter(n_distinct(Responder) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(Responder) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ Responder))),
    n_patients_R = sum(Responder == "R"),
    n_patients_NR = sum(Responder == "NR"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level1, tfs, p.value, n_patients_R, n_patients_NR) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "fdr")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  filter(
    p.value < 1
  ) %>%
  arrange(p.value)

In [78]:
pvals_response

Level1,tfs,p.value,n_patients_R,n_patients_NR,pval_adj,pval_adj_indep
<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<dbl>
T_CD4_Naive,SP1,0.08256271,8,8,0.7101654,0.3302509
Plasma,STAT1,0.15781454,3,3,0.7101654,0.7640626
Mono,STAT1,0.41754813,8,8,0.9959693,0.7640626
pDC,STAT1,0.48499994,4,6,0.9959693,0.7640626
T_CD8_Naive,STAT1,0.61125007,8,8,0.9959693,0.7640626
ILC,SP1,0.81912622,8,8,0.9959693,0.9959693
T_CD8_NonNaive,STAT1,0.89300462,8,8,0.9959693,0.8930046
T_CD8_NonNaive,SP1,0.94809089,8,8,0.9959693,0.9959693
Mono,SP1,0.99596934,8,8,0.9959693,0.9959693


## Level 2

In [79]:
data_response_level2 <-
    scgt00_level2 %>%
    filter(
        Responder %in% c("R", "NR")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity)
    ) %>%
    mutate(tfs = factor(tfs, levels = c("STAT1", "SP1")))

In [43]:
pvals_response <- data_response_level2 %>%
  group_by(Level2, tfs) %>%
  filter(n_distinct(Responder) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(Responder) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ Responder))),
    n_patients_R = sum(Responder == "R"),
    n_patients_NR = sum(Responder == "NR"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level2, tfs, p.value, n_patients_R, n_patients_NR) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  filter(
    p.value < 1
  ) %>%
  arrange(p.value)
pvals_response

Level2,tfs,p.value,n_patients_R,n_patients_NR,pval_adj,pval_adj_indep
<chr>,<fct>,<dbl>,<int>,<int>,<dbl>,<dbl>
Plasma_IGHA,STAT1,0.03910844,2,3,0.951166,0.5475181
T_CD4_Naive,SP1,0.10231064,8,8,0.951166,0.9883599
Mono_IFNresponse,STAT1,0.31797313,7,7,0.951166,0.8481143
Mono_nonClassical,STAT1,0.3302873,6,7,0.951166,0.8481143
NK_adaptive,SP1,0.34739137,7,3,0.951166,0.9883599
pDC,STAT1,0.35960776,4,6,0.951166,0.8481143
Mono_inflammatory,STAT1,0.37076575,8,8,0.951166,0.8481143
Mono_regulatory,STAT1,0.42106943,8,8,0.951166,0.8481143
T_CD8_IFNresponse,SP1,0.49570302,8,8,0.951166,0.9883599
NK_CD56dimCD16,SP1,0.49667236,6,4,0.951166,0.9883599


### Plot

In [111]:
plot_response <- ggboxplot(data_response_level2, x = "Level2", y = "activity", fill = "Responder") +
    facet_grid(tfs ~ Level1, scales = "free") +
    scale_fill_manual(values = response_palette) +
    theme_bw() +
    ggtitle(label = "Response") +
    theme(
        axis.text.x = element_text(angle = 25, vjust = 0.5, hjust=0.5)
)

ggsave(
    plot_response,
    filename = "plot_response.pdf",
    device = "pdf",
    width = 15,
    height = 12,
    path = "results/Compare_SLE_subgroups/")

# SLEDAI comparison

**Palette**

In [55]:
# Response ---------------------
SLEDAI_palette <- c(
    "High" = "#103783",
    "Low" = "#9bafd9"
)


## Level 1

**Filter input DF**

In [56]:
data_sledai_perez_level1 <-
    perez2022_level1 %>%
    filter(
        SLEDAI_category %in% c("High", "Low")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity)
    )

In [57]:
data_sledai_scgt_level1 <-
    scgt00_level1 %>%
    filter(
        SLEDAI_category %in% c("High", "Low")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity)
    )

**p-values**

In [64]:
pvals_sledai_perez <- data_sledai_perez_level1 %>%
  group_by(Level1, tfs) %>%
  filter(n_distinct(SLEDAI_category) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(SLEDAI_category) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ SLEDAI_category))),
    n_patients_high = sum(SLEDAI_category == "High"),
    n_patients_low = sum(SLEDAI_category == "Low"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level1, tfs, p.value, n_patients_high, n_patients_low) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  filter(
    pval_adj < 1
  ) %>%
  arrange(p.value)
pvals_sledai_perez

Level1,tfs,p.value,n_patients_high,n_patients_low,pval_adj,pval_adj_indep
<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<dbl>
ILC,SP1,0.4845364,5,107,0.7976467,0.5414977
Mono,SP1,0.5220713,5,108,0.7976467,0.5414977
T_CD4_Naive,SP1,0.5308101,5,108,0.7976467,0.5414977
T_CD8_NonNaive,SP1,0.5414977,5,108,0.7976467,0.5414977
T_CD8_NonNaive,STAT1,0.5697476,5,108,0.7976467,0.8990487
T_CD8_Naive,STAT1,0.868656,5,104,0.8990487,0.8990487
Mono,STAT1,0.8990487,5,108,0.8990487,0.8990487


In [63]:
pvals_sledai_scgt <- data_sledai_scgt_level1 %>%
  group_by(Level1, tfs) %>%
  filter(n_distinct(SLEDAI_category) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(SLEDAI_category) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ SLEDAI_category))),
    n_patients_high = sum(SLEDAI_category == "High"),
    n_patients_low = sum(SLEDAI_category == "Low"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level1, tfs, p.value, n_patients_high, n_patients_low) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  filter(
    pval_adj < 1
  ) %>%
  arrange(p.value)
pvals_sledai_scgt

Level1,tfs,p.value,n_patients_high,n_patients_low,pval_adj,pval_adj_indep
<chr>,<chr>,<dbl>,<int>,<int>,<dbl>,<dbl>
Mono,SP1,0.1046637,7,9,0.6559515,0.4186549
T_CD8_Naive,STAT1,0.145767,7,9,0.6559515,0.728835
ILC,SP1,0.2791148,7,9,0.7442559,0.4399581
T_CD8_NonNaive,SP1,0.3307804,7,9,0.7442559,0.4399581
T_CD4_Naive,SP1,0.4399581,7,9,0.7919246,0.4399581
Plasma,STAT1,0.6079418,2,4,0.9119127,0.9895141
Mono,STAT1,0.8898138,7,9,0.9895141,0.9895141
pDC,STAT1,0.9227868,4,6,0.9895141,0.9895141
T_CD8_NonNaive,STAT1,0.9895141,7,9,0.9895141,0.9895141


## Level 2

In [66]:
data_sledai_perez_level2 <-
    perez2022_level2 %>%
    filter(
        SLEDAI_category %in% c("High", "Low")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity)
    ) %>%
    mutate(tfs = factor(tfs, levels = c("STAT1", "SP1")))

data_sledai_scgt_level2 <-
    scgt00_level2 %>%
    filter(
        SLEDAI_category %in% c("High", "Low")
    ) %>%
    tidyr::pivot_longer(
        cols = c(STAT1, SP1),
        names_to = "tfs",
        values_to = "activity"
        ) %>%
    filter(
        !is.na(activity)
    ) %>%
    mutate(tfs = factor(tfs, levels = c("STAT1", "SP1")))

In [72]:
pvals_sledai_perez_level2 <- data_sledai_perez_level2 %>%
  group_by(Level2, tfs) %>%
  filter(n_distinct(SLEDAI_category) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(SLEDAI_category) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ SLEDAI_category))),
    n_patients_high = sum(SLEDAI_category == "High"),
    n_patients_low = sum(SLEDAI_category == "Low"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level2, tfs, p.value, n_patients_high, n_patients_low) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  filter(
    p.value < 0.5
  ) %>%
  arrange(p.value)
pvals_sledai_perez_level2

Level2,tfs,p.value,n_patients_high,n_patients_low,pval_adj,pval_adj_indep
<chr>,<fct>,<dbl>,<int>,<int>,<dbl>,<dbl>
Mono_regulatory,STAT1,0.0001527686,2,60,0.004735825,0.001985991
T_CD8_eff_HOBIT,SP1,0.0004229089,2,36,0.006555087,0.007612359
T_CD8_activated,SP1,0.025010499,4,87,0.258441823,0.225094491
T_CD8_eff_HOBIT,STAT1,0.0987575796,2,36,0.765371242,0.641924267
T_CD8_EM_CX3CR1int,STAT1,0.2058104531,4,95,0.83661449,0.660407053
NK_adaptive,SP1,0.2270014522,4,83,0.83661449,0.842012003
T_CD8_Mem_cytotoxic,STAT1,0.2909070066,4,99,0.83661449,0.660407053
Mono_classical,STAT1,0.3111002409,5,108,0.83661449,0.660407053
T_CD8_CM,SP1,0.3585504896,5,107,0.83661449,0.842012003
NK_CD56dimCD16,SP1,0.3708857195,5,106,0.83661449,0.842012003


In [71]:
pvals_sledai_scgt_level2 <- data_sledai_scgt_level2 %>%
  group_by(Level2, tfs) %>%
  filter(n_distinct(SLEDAI_category) == 2) %>% # Ensure that Flare has two levels
  filter(all(table(SLEDAI_category) >= 2)) %>% # Ensure that each level has at least two observations
  summarise(
    t_test = list(broom::tidy(t.test(activity ~ SLEDAI_category))),
    n_patients_high = sum(SLEDAI_category == "High"),
    n_patients_low = sum(SLEDAI_category == "Low"),  
    .groups = "drop"
  ) %>%
  tidyr::unnest(t_test) %>%
  select(Level2, tfs, p.value, n_patients_high, n_patients_low) %>%
  mutate(
    pval_adj = p.adjust(p.value, method = "BH")
  ) %>%
  group_by(tfs) %>% # Group by tfs before adjusting p-values
  mutate(
    pval_adj_indep = p.adjust(p.value, method = "BH")
  ) %>%
  ungroup() %>%
  filter(
    p.value < 0.5
  ) %>%
  arrange(p.value)
pvals_sledai_scgt_level2

Level2,tfs,p.value,n_patients_high,n_patients_low,pval_adj,pval_adj_indep
<chr>,<fct>,<dbl>,<int>,<int>,<dbl>,<dbl>
Mono_regulatory,SP1,0.044259,7,9,0.4959954,0.3399968
Mono_inflammatory,SP1,0.05828507,7,9,0.4959954,0.3399968
T_CD8_Naive,STAT1,0.07307147,7,9,0.4959954,0.8302714
Mono_IFNresponse,SP1,0.07765514,7,7,0.4959954,0.3399968
Mono_classical,SP1,0.07999925,7,9,0.4959954,0.3399968
Mono_nonClassical,SP1,0.13421888,5,8,0.6128193,0.4563442
T_CD8_CM_stem,STAT1,0.16921096,5,6,0.6128193,0.8302714
T_CD8_CM_stem,SP1,0.17161043,5,6,0.6128193,0.4862295
Mono_inflammatory,STAT1,0.17791529,7,9,0.6128193,0.8302714
Mono_IFNresponse,STAT1,0.26054512,7,7,0.6982245,0.9075016


### Plot

In [74]:
toplot_sledai <- rbind(data_sledai_perez_level2, data_sledai_scgt_level2)

In [75]:
plot_response <- ggboxplot(toplot_sledai, x = "Level2", y = "activity", fill = "SLEDAI_category") +
    facet_grid(tfs + studyID ~ Level1, scales = "free") +
    scale_fill_manual(values = SLEDAI_palette) +
    theme_bw() +
    ggtitle(label = "SLEDAI category") +
    theme(
        axis.text.x = element_text(angle = 25, vjust = 0.5, hjust=0.5)
)

ggsave(
    plot_response,
    filename = "plot_sledai.pdf",
    device = "pdf",
    width = 15,
    height = 12,
    path = "results/Compare_SLE_subgroups/")