In [48]:
library(tidyverse)

library(grid)
library(gridExtra)
library(patchwork)

source("../evaluation_utils/plots_eda.R")
source("../evaluation_utils/evaluation_funcs.R")

In [49]:
# R version
R.version.string

In [50]:
cbPalette <- c("#CC79A7", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00")

# Load data

In [51]:
path_to_data <- "../evaluation_data/simulated/"
n_runs <- 30


In [53]:
for(mode in c("balanced", "mild_imbalanced", "strong_imbalanced"
)){
    differences_table <- data.frame(
        min = numeric(),
        max = numeric(),
        mean = numeric()
    )
    error_expr_levels_table <- data.frame(
        min = numeric(),
        max = numeric(),
        mean = numeric()
    )

    print(paste0("Processing mode: ", mode))

    metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
        as.data.frame()
    rownames(metadata) <- metadata$file

    for(j in 1:n_runs){
        fedsim_results <- read.csv(
                paste0(path_to_data, mode, "/after/runs/", j, "_FedSim_corrected.tsv"), 
                sep = "\t") %>%
            as.data.frame() %>% column_to_rownames("rowname")
        fedsim_results <- fedsim_results[, metadata$file]

        r_corrected <- read.csv(
                paste0(path_to_data, mode, "/after/runs/", j, "_R_corrected.tsv"), 
                sep = "\t") %>%
            as.data.frame() %>% column_to_rownames("rowname")

        # if size of r_corrected is different from fedsim_results, print a warning
        if(nrow(r_corrected) != nrow(fedsim_results)){
            print(paste0("Warning: size of R_corrected is different from FedSim_corrected for run ", j))
        } else if (ncol(r_corrected) != ncol(fedsim_results)){
            print(paste0("Warning: number of samples in R_corrected is different from FedSim_corrected for run ", j))
        }
        
        r_corrected <- r_corrected[, metadata$file]
        fedsim_results <- fedsim_results[rownames(r_corrected), colnames(r_corrected)]

        # calcualte differences
        differences <- as.matrix(abs(r_corrected - fedsim_results))
        min_max_mean <- c(min(differences, na.rm = TRUE), max(differences, na.rm = TRUE), mean(differences, na.rm = TRUE))
        differences_table[j, ] <- min_max_mean

        # differences divided by expression level in r_corrected
        error_expr_levels <- abs(differences * 100 / as.matrix(r_corrected))
        min_max_mean_expr_levels <- c(min(error_expr_levels, na.rm = TRUE), 
                                      max(error_expr_levels, na.rm = TRUE), 
                                      mean(error_expr_levels, na.rm = TRUE))
        error_expr_levels_table[j, ] <- min_max_mean_expr_levels

        
        form <- ~ condition + lab
        # calculate after correction LMPV
        after_correction_lmpv <- lmpv_plot(fedsim_results, metadata, 
                                           form = form, only_table = TRUE)
        # write to file
        write.table(after_correction_lmpv, 
                    paste0("eval_simulation/", mode, "/lmvp/", j, "_after_correction_lmpv.tsv"),
                    sep = "\t", quote = FALSE, row.names = FALSE)
    }

    # write differences to file with max, min, mean colnames
    write.table(differences_table %>% rownames_to_column("run"),
                paste0("eval_simulation/", mode, "/differences_fedRBE_R.tsv"),
                sep = "\t", quote = FALSE, row.names = FALSE)

    # write differences to file with max, min, mean colnames
    write.table(error_expr_levels_table %>% rownames_to_column("run"),
                paste0("eval_simulation/", mode, "/error_expr_levels_fedRBE_R.tsv"),
                sep = "\t", quote = FALSE, row.names = FALSE)
}

[1] "Processing mode: balanced"
[1] "Processing mode: mild_imbalanced"
[1] "Processing mode: strong_imbalanced"


before correctuin lmpv

In [52]:
for(mode in c(
    "balanced",  "mild_imbalanced", "strong_imbalanced"
)){
    print(paste0("Processing mode: ", mode))

    metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
        as.data.frame()
    rownames(metadata) <- metadata$file

    for(j in 1:n_runs){
        before_corr <- read.csv(
                paste0(path_to_data, mode, "/before/intermediate/", j, "_intensities_data.tsv"), 
                sep = "\t") %>%
            as.data.frame() %>% column_to_rownames("rowname")
        before_corr <- before_corr[, metadata$file]


        form <- ~ condition + lab
        # calculate after correction LMPV
        before_correction_lmpv <- lmpv_plot(before_corr, metadata, 
                                           form = form, only_table = TRUE)
        # write to file
        write.table(before_correction_lmpv, 
                    paste0("eval_simulation/", mode, "/lmvp/", j, "_before_correction_lmpv.tsv"),
                    sep = "\t", quote = FALSE, row.names = FALSE)
    }
}

[1] "Processing mode: balanced"
[1] "Processing mode: mild_imbalanced"
[1] "Processing mode: strong_imbalanced"
