In [77]:
library(tidyverse)

source("../../evaluation_utils/plots_eda.R")

In [78]:
plots_multiple <- function(intensities, metadata, name){
    pca_plot_study <- pca_plot(
        intensities, metadata, 
        title = name,
        quantitative_col_name = 'file',
        col_col = "lab", shape_col = "A")

    boxplot <- boxplot_plot_groupped(
        intensities, metadata, 
        title = name,
        color_col = 'lab', quantitativeColumnName = 'file', 
        path = '')

    density_plot <- plotIntensityDensity(
        intensities, metadata, 
        quantitativeColumnName = 'file', 
        colorColumnName = 'lab',
        title = name)

    layout <- pca_plot_study /
                boxplot /
                density_plot

    return(layout)
}


# Load data

In [80]:
simulated_data <- list()

for(mode in c("balanced", "mild_imbalanced", "strong_imbalanced")){

    mode_intensities <- NULL
    mode_metadata <- NULL

    for(lab in c("lab1", "lab2", "lab3")){
        path_to_data <- paste0(mode, "/before/", lab, "/")
        intensities <- read.csv(paste0(path_to_data, "intensities.tsv"), sep = "\t") %>%
            rownames_to_column("file") %>%
            as.data.frame()
        metadata <- read.csv(paste0(path_to_data, "design.tsv"), sep = "\t") %>%
            as.data.frame()
        
        metadata$lab <- lab

        if (is.null(mode_intensities)){
            mode_intensities <- intensities
            mode_metadata <- metadata
        } else {
            mode_intensities <- merge(mode_intensities, intensities, 
                                   by = "file", all = TRUE)
            mode_metadata <- rbind(mode_metadata, metadata)
        }
    }

    print(paste0("Mode: ", mode
                , " Intensities: ", dim(mode_intensities)
                , " Metadata: ", dim(mode_metadata)))


    mode_intensities <- mode_intensities %>%
        column_to_rownames("file")
    # sort intensities rows by prt.1, prt.2, prt.3, ...
    mode_intensities <- mode_intensities[paste0("prt", order(rownames(mode_intensities))), ]

    # write to file
    write.table(mode_intensities %>% rownames_to_column("file"),
        file = paste0(mode, "/all_intensities.tsv"), sep = "\t", quote = FALSE)
    write.table(mode_metadata, file = paste0(mode, "/all_metadata.tsv"), sep = "\t", quote = FALSE)

    mode_metadata$file <- rownames(mode_metadata)
    mode_intensities <- mode_intensities[, mode_metadata$file]

    simulated_data[[mode]] <- list(intensities = mode_intensities, metadata = mode_metadata)
}

[1] "Mode: balanced Intensities: 6000 Metadata: 600"
[2] "Mode: balanced Intensities: 601 Metadata: 3"   
[1] "Mode: mild_imbalanced Intensities: 6000 Metadata: 600"
[2] "Mode: mild_imbalanced Intensities: 601 Metadata: 3"   
[1] "Mode: strong_imbalanced Intensities: 6000 Metadata: 600"
[2] "Mode: strong_imbalanced Intensities: 601 Metadata: 3"   


In [81]:
for (mode in c("balanced", "mild_imbalanced", "strong_imbalanced")){

    data_for_plot <- simulated_data[[mode]]$intensities
    # replace NA with 0
    data_for_plot[is.na(data_for_plot)] <- 0


    meta <- simulated_data[[mode]]$metadata
    meta <- meta %>% mutate(A = as.factor(A))

    layout <- plots_multiple(
        # plot first 100 rows
        data_for_plot[1:200,],
        meta, 
        mode)

    ggsave(
        paste0(mode, "/plots/before_plots.png"), 
        plot = layout, width = 8, height = 12)

    print(paste0("Saved plots for mode: ", mode))
}

No id variables; using all as measure variables



[1] "Saved plots for mode: balanced"


No id variables; using all as measure variables



[1] "Saved plots for mode: mild_imbalanced"


No id variables; using all as measure variables



[1] "Saved plots for mode: strong_imbalanced"


# Correction

In [82]:
for (mode in c("balanced", "mild_imbalanced", "strong_imbalanced")){

    metadata <- simulated_data[[mode]]$metadata
    intensities <- simulated_data[[mode]]$intensities
    intensities <- intensities[, metadata$file]

    metadata <- metadata %>%
        mutate(A = as.factor(A), lab = as.factor(lab))

    design <- model.matrix(~ A, metadata)
    colnames(design) <- c("Intercept", "A")

    intensities_corrected <- limma::removeBatchEffect(
        intensities, 
        metadata$lab, 
        design = design) %>% as.data.frame()

    # write to file
    write.table(intensities_corrected %>% rownames_to_column("file"),
                paste0(mode, "/after/intensities_R_corrected.tsv"), sep = "\t", quote = FALSE)

    print(paste0("Saved corrected intensities for mode: ", mode))
    
    # plot
    intensities_corrected[is.na(intensities_corrected)] <- 0
    layout <- plots_multiple(
        intensities_corrected[1:200,],
        metadata, 
        paste0(mode, " R corrected"))
    ggsave(
        paste0(mode, "/plots/afterR_plots.png"), 
        plot = layout, width = 8, height = 12)


}

[1] "Saved corrected intensities for mode: balanced"


No id variables; using all as measure variables



[1] "Saved corrected intensities for mode: mild_imbalanced"


No id variables; using all as measure variables



[1] "Saved corrected intensities for mode: strong_imbalanced"


No id variables; using all as measure variables

