In [24]:
library(tidyverse)

source("../../evaluation_utils/plots_eda.R")

In [25]:
plots_multiple <- function(intensities, metadata, name){
    pca_plot_study <- pca_plot(
        intensities, metadata, 
        title = name,
        quantitative_col_name = 'file',
        col_col = "lab", shape_col = "condition")

    boxplot <- boxplot_plot_groupped(
        intensities, metadata, 
        title = name,
        color_col = 'lab', quantitativeColumnName = 'file', 
        path = '')

    density_plot <- plotIntensityDensity(
        intensities, metadata, 
        quantitativeColumnName = 'file', 
        colorColumnName = 'lab',
        title = name)

    layout <- pca_plot_study /
                boxplot /
                density_plot

    return(layout)
}


# Load and central correction

In [26]:
path_to_data <- "/home/yuliya/repos/cosybio/removeBatch/evaluation_data/simulated/"

n_runs = 30

In [None]:
for(mode in c(
    "balanced",
    "mild_imbalanced", "strong_imbalanced"
)){

    print(paste0("Processing mode: ", mode))

    metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
        as.data.frame()

    for(j in 1:n_runs){
        intensities <- read.csv(paste0(path_to_data, mode, "/before/intermediate/", j, "_intensities_data.tsv"), sep = "\t") %>%
            as.data.frame() %>%
            column_to_rownames("rowname")

        metadata <- metadata %>%
          mutate(condition = as.factor(condition), lab = as.factor(lab))

        design <- model.matrix(~ condition, metadata)
        colnames(design) <- c("Intercept", "condition")

        intensities_corrected <- limma::removeBatchEffect(
                intensities[,metadata$file], 
                metadata$lab, 
                design = design) %>%
            as.data.frame()

        # write to file
        write.table(intensities_corrected %>% rownames_to_column("rowname"),
                    paste0(path_to_data, mode, "/after/runs/", j, "_R_corrected.tsv"),
                    sep = "\t", row.names = F)

        print(paste0("\t\tSaved corrected intensities for mode: ", mode))
    }
}

[1] "Processing mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"
[1] "\t\tSaved corrected intensities for mode: balanced"

# Run fedRBE app simulation

In [28]:
for(mode in c(
    "balanced",
  "mild_imbalanced", "strong_imbalanced"
)){

    print(paste0("Processing mode: ", mode))

    metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
        as.data.frame()

    for(j in 1:n_runs){
        intensities <- read.csv(paste0(path_to_data, mode, "/before/intermediate/", j, "_intensities_data.tsv"), sep = "\t") %>%
            as.data.frame() %>%
            column_to_rownames("rowname")
        
        for(lab_name in unique(metadata$lab)){

            metadata_lab <- metadata %>%
                filter(lab == lab_name)

            print(paste0("Processing lab: ", lab_name, " in mode: ", mode, " number of samples: ", nrow(metadata_lab)))
            intensities_lab <- intensities[, metadata_lab$file]
            intensities_lab %>%
                rownames_to_column("rowname") %>%
                write.table(paste0(path_to_data, mode, "/before/", lab_name, "/intensities.tsv"),
                            sep = "\t", row.names = F)

            
            metadata_lab$A <- as.integer(as.factor(metadata_lab$condition))
            metadata_lab$A <- metadata_lab$A - 1
            write.table(metadata_lab, file = paste0(path_to_data, mode, "/before/", lab_name, "/design.tsv"), sep = "\t", quote = T, row.names = FALSE)

        }

        print(paste0("Saved intensities for mode: ", mode))

        # run simulations
        system(paste("PYTHONPATH=../../ python3 -m evaluation_utils.fedRBE_simulation_scrip_simdata", mode))
        system(paste("mv", paste0(path_to_data, mode, "/after/FedSim_corrected_data_v2.tsv"), paste0(path_to_data, mode, "/after/runs/", j, "_FedSim_corrected.tsv")))

    }
}


[1] "Processing mode: balanced"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Saved intensities for mode: balanced"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Saved intensities for mode: balanced"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Saved intensities for mode: balanced"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Saved intensities 

# Run test central correction

In [1]:
library(tidyverse)

source("../../evaluation_utils/plots_eda.R")

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘gridExtra’


The following object is masked from ‘package:dplyr’:

    combine


Loading required package: viridisLite



In [2]:
path_to_data <- "/home/yuliya/repos/cosybio/removeBatch/evaluation_data/simulated/"

In [None]:
for(mode in c(
    "balanced",
    "mild_imbalanced", "strong_imbalanced"
)){

  j = 1

  print(paste0("Processing mode: ", mode))
  metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
    as.data.frame()

  intensities <- read.csv(paste0(path_to_data, mode, "/before/intermediate/", j, "_intensities_data.tsv"), sep = "\t") %>%
            as.data.frame() %>%
            column_to_rownames("rowname")

  for (lab_name in unique(metadata$lab)){
    metadata_lab <- metadata %>%
      filter(lab == lab_name)

    lab_intensities <- intensities[, lab_meta$file]
    lab_intensities %>%
        rownames_to_column("rowname") %>%
        write.table(paste0(path_to_data, mode, "/before/", lab_name, "/intensities.tsv"),
                    sep = "\t", row.names = F)

    metadata_lab$A <- as.integer(as.factor(metadata_lab$condition))
    metadata_lab$A <- metadata_lab$A - 1
    write.table(metadata_lab, file = paste0(path_to_data, mode, "/before/", lab_name, "/design.tsv"), sep = "\t", quote = T, row.names = FALSE)

  }
  
  metadata <- metadata %>%
    mutate(condition = as.factor(condition), lab = as.factor(lab))

  design <- model.matrix(~ condition, metadata)
  colnames(design) <- c("Intercept", "condition")

  intensities_corrected <- limma::removeBatchEffect(
                intensities[,metadata$file], 
                metadata$lab, 
                design = design) %>%
            as.data.frame()

  # write to file
  write.table(intensities_corrected %>% rownames_to_column("rowname"),
              paste0(path_to_data, mode, "/after/intensities_R_corrected.tsv"),
              sep = "\t", row.names = F)

}

[1] "Processing mode: balanced"


ERROR: Error: object 'metadata_lab' not found
