Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ License: CC BY-NC-SA 4.0
Encoding: UTF-8
Imports:
dplyr,
broom,
ggplot2,
ggpubr,
tsne,
cowplot,
ggraph,
graphics,
magrittr,
rlang,
Expand All @@ -35,9 +38,11 @@ Imports:
GenomeInfoDb,
GenomicRanges,
stats,
survival,
S4Vectors,
grDevices,
tools
tools,
pROC
Suggests:
biomaRt,
circlize,
Expand All @@ -48,15 +53,13 @@ Suggests:
rmarkdown,
DESeq2,
ggnewscale,
ggraph,
ggrepel,
igraph,
matrixStats,
mockery,
openxlsx,
pheatmap,
readr,
survival,
survminer,
tidygraph,
tidyr,
Expand Down
49 changes: 49 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# Generated by roxygen2: do not edit by hand

S3method(print,circos_genome)
S3method(print,get_glm_result)
export(add_annotations)
export(addgenesPA)
export(concordanceDE)
export(crossLayerCorr)
export(detect_filter)
export(do_clust)
export(geneset_similarity)
export(get_annotations)
export(get_cox)
export(get_glm)
export(get_network_communities)
export(get_stars)
export(get_superterm)
Expand All @@ -21,32 +26,76 @@ export(merge_PA)
export(multiplot_PA)
export(network_clust)
export(network_clust_gg)
export(nice_ConcordanceScatter)
export(nice_GenomeTrack)
export(nice_KM)
export(nice_PCA)
export(nice_ROC)
export(nice_UMAP)
export(nice_VSB)
export(nice_VSB_DEseq2)
export(nice_Volcano)
export(nice_circos)
export(nice_forest)
export(nice_tSNE)
export(power_analysis)
export(resolve_genome)
export(save_results)
export(split_cases)
export(splot_PA)
export(tpm)
export(trend_filter)
import(ggplot2)
importFrom(S4Vectors,"mcols<-")
importFrom(S4Vectors,mcols)
importFrom(broom,tidy)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,filter)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(ggplot2,aes)
importFrom(ggplot2,annotate)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_rect)
importFrom(ggplot2,element_text)
importFrom(ggplot2,geom_abline)
importFrom(ggplot2,geom_errorbar)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_color_manual)
importFrom(ggplot2,scale_x_log10)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_classic)
importFrom(ggplot2,theme_minimal)
importFrom(grDevices,adjustcolor)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
importFrom(grid,gpar)
importFrom(magrittr,"%>%")
importFrom(pROC,auc)
importFrom(pROC,ci.auc)
importFrom(pROC,roc)
importFrom(pROC,roc.test)
importFrom(pROC,smooth)
importFrom(patchwork,plot_layout)
importFrom(rlang,.data)
importFrom(stats,AIC)
importFrom(stats,as.formula)
importFrom(stats,complete.cases)
importFrom(stats,glm)
importFrom(stats,na.omit)
importFrom(stats,nobs)
importFrom(stats,p.adjust)
importFrom(stats,predict)
importFrom(stats,relevel)
importFrom(stats,setNames)
importFrom(survival,Surv)
importFrom(survival,coxph)
importFrom(tibble,tibble)
importFrom(tools,file_ext)
importFrom(utils,modifyList)
183 changes: 183 additions & 0 deletions R/concordanceDE.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
##############################
# Function concordanceDE #
##############################

#' Classify differential results by cross-layer concordance.
#'
#' Compares two differential-analysis tables from paired omics layers, such as
#' RNA-seq and total-protein RPPA, by merging shared genes and classifying each
#' gene according to statistical significance and log-fold-change direction.
#'
#' Genes are assigned to one of five categories: `concordant`, `discordant`,
#' `only_x`, `only_y`, or `not_significant`. A concordance score is also
#' computed as the fraction of genes significant in both layers that have the
#' same effect direction.
#'
#' @param de_x Data frame for layer X. Must contain columns defined by
#' `gene_col`, `logfc_col`, and `padj_col`.
#' @param de_y Data frame for layer Y. Must contain columns defined by
#' `gene_col`, `logfc_col`, and `padj_col`.
#' @param gene_col Column name containing gene identifiers. Default: `"gene"`.
#' @param logfc_col Column name containing log-fold changes. Default: `"logFC"`.
#' @param padj_col Column name containing adjusted p-values/FDR values.
#' Default: `"padj"`.
#' @param padj_threshold Numeric threshold for adjusted p-values. Use a single
#' value for both layers or a length-two vector for layer X and layer Y,
#' respectively. Default: `0.05`.
#' @param logfc_threshold Numeric absolute log-fold-change threshold. Use a
#' single value for both layers or a length-two vector for layer X and layer Y,
#' respectively. Default: `1`.
#'
#' @return A list with:
#' \itemize{
#' \item `table`: merged data frame with one `category` column.
#' \item `concordance_score`: fraction of genes significant in both layers
#' with matching log-fold-change direction.
#' \item `summary`: table with counts per concordance category.
#' \item `n_shared_genes`: number of genes shared by both layers.
#' \item `n_both_significant`: number of genes significant in both layers.
#' }
#'
#' @examples
#' de_rna <- data.frame(
#' gene = c("ESR1", "PGR", "EGFR", "MKI67", "GATA3"),
#' logFC = c(3.2, 2.1, -1.6, -1.4, 1.8),
#' padj = c(1e-6, 0.002, 0.01, 0.03, 0.04)
#' )
#'
#' de_protein <- data.frame(
#' gene = c("ESR1", "PGR", "EGFR", "MKI67", "GATA3"),
#' logFC = c(0.7, 0.4, -0.5, 0.3, 0.05),
#' padj = c(1e-4, 0.01, 0.02, 0.04, 0.40)
#' )
#'
#' res <- concordanceDE(
#' de_x = de_rna,
#' de_y = de_protein,
#' logfc_threshold = c(1, 0.2)
#' )
#' res$summary
#' res$concordance_score
#'
#' @importFrom dplyr case_when
#'
#' @seealso
#' [nice_ConcordanceScatter()] to visualize the output of `concordanceDE()`;
#' [crossLayerCorr()] to estimate sample-level concordance between two omics
#' layers.
#'
#' @export
concordanceDE <- function(
de_x,
de_y,
gene_col = "gene",
logfc_col = "logFC",
padj_col = "padj",
padj_threshold = 0.05,
logfc_threshold = 1
) {
de_x <- as.data.frame(de_x)
de_y <- as.data.frame(de_y)

required_cols <- c(gene_col, logfc_col, padj_col)
missing_x <- setdiff(required_cols, names(de_x))
missing_y <- setdiff(required_cols, names(de_y))

if (length(missing_x) > 0) {
stop(
"`de_x` is missing required columns: ",
paste(missing_x, collapse = ", "),
call. = FALSE
)
}

if (length(missing_y) > 0) {
stop(
"`de_y` is missing required columns: ",
paste(missing_y, collapse = ", "),
call. = FALSE
)
}

de_x <- de_x[!is.na(de_x[[gene_col]]) & nzchar(trimws(de_x[[gene_col]])), , drop = FALSE]
de_y <- de_y[!is.na(de_y[[gene_col]]) & nzchar(trimws(de_y[[gene_col]])), , drop = FALSE]

if (anyDuplicated(de_x[[gene_col]]) > 0) {
stop("`de_x` must contain one row per gene in `gene_col`.", call. = FALSE)
}

if (anyDuplicated(de_y[[gene_col]]) > 0) {
stop("`de_y` must contain one row per gene in `gene_col`.", call. = FALSE)
}

if (!is.numeric(padj_threshold) || length(padj_threshold) > 2) {
stop("`padj_threshold` must be a numeric vector of length 1 or 2.", call. = FALSE)
}

if (!is.numeric(logfc_threshold) || length(logfc_threshold) > 2) {
stop("`logfc_threshold` must be a numeric vector of length 1 or 2.", call. = FALSE)
}

padj_threshold <- rep(padj_threshold, length.out = 2)
logfc_threshold <- rep(logfc_threshold, length.out = 2)

shared <- merge(de_x, de_y, by = gene_col, suffixes = c("_x", "_y"))

if (nrow(shared) == 0) {
stop("No shared genes were found between `de_x` and `de_y`.", call. = FALSE)
}

lfc_x_col <- paste0(logfc_col, "_x")
lfc_y_col <- paste0(logfc_col, "_y")
padj_x_col <- paste0(padj_col, "_x")
padj_y_col <- paste0(padj_col, "_y")

lfc_x <- suppressWarnings(as.numeric(shared[[lfc_x_col]]))
lfc_y <- suppressWarnings(as.numeric(shared[[lfc_y_col]]))
padj_x <- suppressWarnings(as.numeric(shared[[padj_x_col]]))
padj_y <- suppressWarnings(as.numeric(shared[[padj_y_col]]))

sig_x <- !is.na(lfc_x) & !is.na(padj_x) &
abs(lfc_x) >= logfc_threshold[1] & padj_x < padj_threshold[1]
sig_y <- !is.na(lfc_y) & !is.na(padj_y) &
abs(lfc_y) >= logfc_threshold[2] & padj_y < padj_threshold[2]

dir_x <- sign(lfc_x)
dir_y <- sign(lfc_y)

shared$category <- dplyr::case_when(
sig_x & sig_y & dir_x == dir_y ~ "concordant",
sig_x & sig_y & dir_x != dir_y ~ "discordant",
sig_x & !sig_y ~ "only_x",
!sig_x & sig_y ~ "only_y",
TRUE ~ "not_significant"
)

category_levels <- c(
"concordant",
"discordant",
"only_x",
"only_y",
"not_significant"
)

shared$category <- factor(shared$category, levels = category_levels)

both_sig <- sig_x & sig_y
concordance_score <- if (any(both_sig)) {
mean(dir_x[both_sig] == dir_y[both_sig], na.rm = TRUE)
} else {
NA_real_
}

out <- list(
table = shared,
concordance_score = round(concordance_score, 4),
summary = table(shared$category),
n_shared_genes = nrow(shared),
n_both_significant = sum(both_sig, na.rm = TRUE)
)

class(out) <- c("omicskit_concordanceDE", "list")
out
}
Loading
Loading