In [None]:
source(paste0(dirname(dirname(dirname(getwd()))),'/map.r'))
source(paste0(HELP_DIR, "shortcuts.r"))
source(paste0(HELP_DIR, "helpers.r"))
source(paste0(HELP_DIR, "fisher.r"))

In [None]:
library(cluster)
library(purrr)

# Read prepped cohorts data

## Extract top results

- Read in prepared data and output

In [None]:
univariate_results <- fread(paste0(SHARE_DIR, "1_run_fishers_exact.csv")) %>% fi(grepl("driver", feature))
fisher_base <- fread(paste0(SHARE_DIR, "fisher_base.csv"))

In [None]:
min_events <- 1

- Collect top independent features

In [None]:
top <- 
univariate_results %>% 
 fi(fisher_pval < combination_threshold) %>% 
 gb(cohortGo) %>% mu(rk = row_number(fisher_pval)) %>% ug()

# Add Clusters

- Add cluster labels for combination features

In [None]:
auto_cluster_features <- function(data, method = "average", max_clusters = 5, corr_method = "pearson") {
  # Ensure features are columns
  if (nrow(data) < ncol(data)) {
    warning("Data has more features than samples; transposing for correlation.")
    data <- t(data)
  }
  
  # Step 1: Compute correlation matrix and convert to distance
  corr_matrix <- cor(data, method = corr_method, use = "pairwise.complete.obs")
  dist_matrix <- as.dist(1 - abs(corr_matrix))
  
  # Step 2: Hierarchical clustering
  hc <- hclust(dist_matrix, method = method)
  
  # Step 3: Evaluate silhouette scores for different cluster numbers
  best_score <- -Inf; 
  best_k <- 2; 
  best_clusters <- NULL

  for (k in 2:min(max_clusters, ncol(data) - 1)) {
    clusters <- cutree(hc, k = k)
    sil <- silhouette(clusters, dist_matrix)
    avg_sil <- mean(sil[, "sil_width"])
    
    if (avg_sil > best_score) {
      best_score <- avg_sil
      best_k <- k
      best_clusters <- clusters
    }
  }
  
  cat("Best number of clusters:", best_k, "\n")
  return(best_clusters)
}

In [None]:
add_cluster_labels <- function(i = "Skin Melanoma ## Immunotherapy"){
  data <- 
    fisher_base %>% 
      fi(cohortGo == i) %>% 
      se(any_of(top %>% fi(cohortGo == i) %>% pu(feature)))

  data.frame(auto_cluster_features(data)) %>% 
   rownames_to_column("feature") %>% 
   rename(cluster = auto_cluster_features.data.) %>% 
   mu(cohortGo = i)
}

- Compute cluster labels

In [None]:
cluster_labels <- data.frame()
for( i in unique(top$cohortGo)){
  print(i)
  tmp <- tryCatch({add_cluster_labels(i)}, error = function(e) {return(NA)})
  if(is.data.frame(tmp)) cluster_labels <- rbind(cluster_labels, tmp)  
}

- Append cluster labels to top features

In [None]:
top_go <- 
top %>% 
 lj(cluster_labels, by = c("cohortGo", "feature")) %>% 
 gb(cohortGo, cluster) %>% mu(rk = row_number(fisher_pval)) %>% fi(rk <= 1) %>%
 se(cohortGo, feature, cluster) %>% 
 ug()

- Compute correlation for top features

In [None]:
feature_pair <- list()
for( i in unique(top$cohortGo)){
    features <- top_go %>% fi(i == cohortGo) %>% pu(feature)
    if(length(features) > 1){
        feature_pair[[i]][["pairs"]] <- combn(features, 2, simplify = FALSE)
    } 
}

# Compute combination features

- Compute combinations for pair of features

In [None]:
add_combination_feature <- function(i = "Skin Melanoma ## Immunotherapy", pair = c('clin_hasRadiotherapyPreTreatment','driver_B2M')){
  fisher_base %>% 
   filter( cohortGo == i) %>% 
   select( sampleId, cohortGo, non_response, any_of(pair)) %>% 
   mutate( 
    !!paste0(pair[1], "_and_", pair[2]) := 
     case_when(
      is.na(!!sym(pair[1])) | is.na(!!sym(pair[2])) ~ NA,
      (!!sym(pair[1]) + !!sym(pair[2])) == 2 ~ 1,
      TRUE ~ 0), 
  !!paste0(pair[1], "_or_", pair[2]) := 
     case_when(
      is.na(!!sym(pair[1])) | is.na(!!sym(pair[2])) ~ NA,
      (!!sym(pair[1]) + !!sym(pair[2])) >= 1 ~ 1,
      TRUE ~ 0) 
  ) %>% se(-any_of(pair))
}

- Add over a cohort

In [None]:
cohort_combinations <- function(i = "Skin Melanoma ## Immunotherapy"){
  pairs <- feature_pair[[i]][['pairs']]
  combos <- list()
  for( j in seq(length(pairs)) ){
    pair_ready <- pairs[[j]]
    print(pair_ready)
    clusters <- length(unique(top_go %>% fi(cohortGo == i, feature %in% pair_ready) %>% pu(cluster)))
    if( clusters > 1) {
     print("here")
     combos[[j]] <- add_combination_feature(i = i, pair = pair_ready)
    }
  }
  clean_combos <- Filter(Negate(is.null), combos)  
  reduce(clean_combos, ~ inner_join(.x, .y, by = c("sampleId", "cohortGo", "non_response")))  
}

- Collect values over cohorts

In [None]:
combos_ready <- list()
for( i in names(feature_pair)){
    print(i); flush.console()
    combos_ready[[i]] <- cohort_combinations(i = i)
}

# Compute Fisher's Exact tests

- Store combination results

In [None]:
combo_results <- data.frame()

In [None]:
remove_no_variation_except_cohortGo <- function(df) {
  keep_cols <- names(df)[sapply(df, function(col) length(unique(col)) > 1)]
  keep_cols <- union(keep_cols, "cohortGo")  # Always keep cohortGo
  df %>% select(all_of(keep_cols))
}

In [None]:
for( i in names(combos_ready)){
    print(i); flush.console()
    tmp <- tryCatch({ra_formatter_and_test(combos_ready[[i]] %>% se(-sampleId))}, 
     error = function(e) { NA }
    )

    if(is.data.frame(tmp)){
      combo_results <- rbind(combo_results, tmp)
    }
}

# Combine Results back together

In [None]:
together <- rbind(univariate_results %>% mu(type = "univariate"), combo_results %>% mu(type = "combination"))

In [None]:
top <- 
together %>% 
 fi(cohortGo == "Pan-Cancer", direction == "Non-Response", type == "combination", !grepl("perplexity", feature))

In [None]:
combo_results %>% mu(type = "combination") %>% ar(fisher_pval) %>% fi(cohortGo == "Pan-Cancer")

In [None]:
top <- 
together %>% 
 fi(cohortGo == "Pan-Cancer", direction == "Non-Response", type == "univariate", !grepl("perplexity", feature)) %>% 
 ar(fisher_pval) %>% 
 head(16) %>% 
 pu(feature)

In [None]:
options(repr.plot.width = 24, repr.plot.height = 12)

In [None]:
together %>% 
 ar(fisher_pval) %>% 
 fi(feature %in% top, !cohortGo %in% c("Prostate ## Hormonal therapy", "Pan-Cancer ## Anti-AR"), grepl("Pan-Cancer", cohortGo)) %>% 
 mu(cohortGo = fct_reorder(cohortGo, or, .desc = TRUE)) %>% 
 ggplot(aes(x = log(or+1), y = cohortGo)) + 
 facet_wrap(~feature, ncol = 8) + 
 geom_point(size = 3, color = "blue") +
  # Add horizontal lines for the confidence intervals
  geom_errorbarh(aes(xmin = log(ci_low+1), xmax = log(ci_high+1)), height = 0.2, color = "red") +
  # Add a vertical line at x = 1 (null effect)
  geom_vline(xintercept = 1, linetype = "dashed", color = "gray") +
  # Customize labels and theme
  labs(x = "Odds Ratio of Response", y = "Study", title = "Prognostic Driver Signals") +
  theme_minimal() +
  xlim(0, 2) + 
  theme(
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    title = element_text(size = 14)
  )

In [None]:
ggsave("prognostic_driver_signals.png", width = 24, height = 12)

- Combinations

In [None]:
top <- 
together %>% 
 fi(direction == "Non-Response", type == "combination", !grepl("perplexity", feature)) %>% 
 ar(fisher_pval) %>% 
 head(24) %>% 
 pu(feature) %>% 
 unique()

In [None]:
options(repr.plot.width = 16, repr.plot.height = 8)

In [None]:
together %>% 
 ar(fisher_pval) %>% 
 fi(feature %in% top, !cohortGo %in% c("Prostate ## Hormonal therapy", "Pan-Cancer ## Anti-AR"), grepl("Pan-Cancer", cohortGo)) %>% 
 mu(cohortGo = fct_reorder(cohortGo, or, .desc = TRUE)) %>% 
 ggplot(aes(x = log(or+1), y = feature)) + 
 facet_wrap(~cohortGo, ncol = 8) + 
 geom_point(size = 3, color = "blue") +
  # Add horizontal lines for the confidence intervals
  geom_errorbarh(aes(xmin = log(ci_low+1), xmax = log(ci_high+1)), height = 0.2, color = "red") +
  # Add a vertical line at x = 1 (null effect)
  geom_vline(xintercept = 1, linetype = "dashed", color = "gray") +
  # Customize labels and theme
  labs(x = "Odds Ratio of Response", y = "Study", title = "Prognostic Driver Signals") +
  theme_minimal() +
  xlim(0, 2) + 
  theme(
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    title = element_text(size = 14)
  )

In [None]:
ggsave("prognostic_driver_signals_combo.png", width = 16, height = 8)