In [1]:
library(tidyverse)

source("../../evaluation_utils/plots_eda.R")

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.4     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘gridExtra’


The following object is masked from ‘package:dplyr’:

    combine


Loading required package: viridisLite



May take several hours to run. You can skip this step and go to the `evaluation` folder to see the results.

# Load and central correction

In [2]:
path_to_data <- "./"

n_runs = 30

In [3]:
for(mode in c(
    "balanced",
    "mild_imbalanced", "strong_imbalanced"
)){
    # if folder does not exist, create it
    if(!dir.exists(paste0(path_to_data, mode, "/after/runs"))){
        dir.create(paste0(path_to_data, mode, "/after/runs"), recursive = T)
    }

    print(paste0("Processing mode: ", mode))

    metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
        as.data.frame()

    for(j in 1:n_runs){
        intensities <- read.csv(paste0(path_to_data, mode, "/before/intermediate/", j, "_intensities_data.tsv"), sep = "\t") %>%
            as.data.frame() %>%
            column_to_rownames("rowname")

        metadata <- metadata %>%
          mutate(condition = as.factor(condition), lab = as.factor(lab))

        design <- model.matrix(~ condition, metadata)
        colnames(design) <- c("Intercept", "condition")

        intensities_corrected <- limma::removeBatchEffect(
                intensities[,metadata$file], 
                metadata$lab, 
                design = design) %>%
            as.data.frame()

        # write to file
        write.table(intensities_corrected %>% rownames_to_column("rowname"),
                    paste0(path_to_data, mode, "/after/runs/", j, "_R_corrected.tsv"),
                    sep = "\t", row.names = F)

        print(paste0("\t\tSaved corrected intensities for mode: ", mode))
    }
}

# Run fedRBE app simulation

In [8]:
for(mode in c(
    "balanced",
  "mild_imbalanced", "strong_imbalanced"
)){

    print(paste0("Processing mode: ", mode))

    metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
        as.data.frame()

    for(j in 1:n_runs){
        intensities <- read.csv(paste0(path_to_data, mode, "/before/intermediate/", j, "_intensities_data.tsv"), sep = "\t") %>%
            as.data.frame() %>%
            column_to_rownames("rowname")
        
        for(lab_name in unique(metadata$lab)){

            metadata_lab <- metadata %>%
                filter(lab == lab_name)

            print(paste0("Processing lab: ", lab_name, " in mode: ", mode, " number of samples: ", nrow(metadata_lab)))
            intensities_lab <- intensities[, metadata_lab$file]
            intensities_lab %>%
                rownames_to_column("rowname") %>%
                write.table(paste0(path_to_data, mode, "/before/", lab_name, "/intensities.tsv"),
                            sep = "\t", row.names = F)

            
            metadata_lab$A <- as.integer(as.factor(metadata_lab$condition))
            metadata_lab$A <- metadata_lab$A - 1
            write.table(metadata_lab, file = paste0(path_to_data, mode, "/before/", lab_name, "/design.tsv"), sep = "\t", quote = T, row.names = FALSE)

        }

        print(paste0("Saved intensities for mode: ", mode))

        # run simulations
        # print(paste("PYTHONPATH=../../ python3 -m evaluation_utils.fedRBE_simulation_scrip_simdata", mode))
        system(paste("PYTHONPATH=../../ python3 -m evaluation_utils.fedRBE_simulation_scrip_simdata", mode))
        system(paste("mv", paste0(path_to_data, mode, "/after/FedSim_corrected_data_v2.tsv"), paste0(path_to_data, mode, "/after/runs/", j, "_FedSim_corrected.tsv")))

    }
}


[1] "Processing mode: balanced"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Saved intensities for mode: balanced"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Saved intensities for mode: balanced"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Saved intensities for mode: balanced"
[1] "Processing lab: lab2 in mode: balanced number of samples: 200"
[1] "Processing lab: lab1 in mode: balanced number of samples: 200"
[1] "Processing lab: lab3 in mode: balanced number of samples: 200"
[1] "Saved intensities 

# Run test central correction

In [6]:
library(tidyverse)

source("../../evaluation_utils/plots_eda.R")

In [7]:
path_to_data <- "./"

In [8]:
for(mode in c(
    "balanced",
    "mild_imbalanced", "strong_imbalanced"
)){

  j = 1

  print(paste0("Processing mode: ", mode))
  metadata <- read.csv(paste0(path_to_data, mode, "/all_metadata.tsv"), sep = "\t") %>%
    as.data.frame()

  intensities <- read.csv(paste0(path_to_data, mode, "/before/intermediate/", j, "_intensities_data.tsv"), sep = "\t") %>%
            as.data.frame() %>%
            column_to_rownames("rowname")

  for (lab_name in unique(metadata$lab)){
    metadata_lab <- metadata %>%
      filter(lab == lab_name)

    lab_intensities <- intensities[, metadata_lab$file]
    lab_intensities %>%
        rownames_to_column("rowname") %>%
        write.table(paste0(path_to_data, mode, "/before/", lab_name, "/intensities.tsv"),
                    sep = "\t", row.names = F)

    metadata_lab$A <- as.integer(as.factor(metadata_lab$condition))
    metadata_lab$A <- metadata_lab$A - 1
    write.table(metadata_lab, file = paste0(path_to_data, mode, "/before/", lab_name, "/design.tsv"), sep = "\t", quote = T, row.names = FALSE)

  }
  
  metadata <- metadata %>%
    mutate(condition = as.factor(condition), lab = as.factor(lab))

  design <- model.matrix(~ condition, metadata)
  colnames(design) <- c("Intercept", "condition")

  intensities_corrected <- limma::removeBatchEffect(
                intensities[,metadata$file], 
                metadata$lab, 
                design = design) %>%
            as.data.frame()

  # write to file
  write.table(intensities_corrected %>% rownames_to_column("rowname"),
              paste0(path_to_data, mode, "/after/intensities_R_corrected.tsv"),
              sep = "\t", row.names = F)

}

[1] "Processing mode: balanced"
[1] "Processing mode: mild_imbalanced"
[1] "Processing mode: strong_imbalanced"


# Session info

In [9]:
sessionInfo()

R version 4.3.3 (2024-02-29)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 24.04.2 LTS

Matrix products: default
BLAS/LAPACK: /home/yuliya-cosybio/miniforge3/envs/fedRBE/lib/libopenblasp-r0.3.29.so;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Berlin
tzcode source: system (glibc)

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] viridis_0.6.5     viridisLite_0.4.2 ggsci_3.2.0       umap_0.2.10.0    
 [5] patchwork_1.3.0   gridExtra_2.3     lubridate_1.9.4   forcats_1.0.0    
 [9] stringr_1.5.1     dplyr_1.1.4       purrr_1