In [1]:
library(diann)
library(tidyverse)
library(gtools)
library(grid)
library(cowplot)

“package ‘tidyverse’ was built under R version 4.2.2”
“package ‘ggplot2’ was built under R version 4.2.3”
“package ‘tibble’ was built under R version 4.2.3”
“package ‘tidyr’ was built under R version 4.2.2”
“package ‘readr’ was built under R version 4.2.2”
“package ‘purrr’ was built under R version 4.2.3”
“package ‘dplyr’ was built under R version 4.2.3”
“package ‘stringr’ was built under R version 4.2.3”
“package ‘forcats’ was built under R version 4.2.2”
“package ‘lubridate’ was built under R version 4.2.2”
── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ──────────

# Paths to report files

In [2]:
lab_A_path = '/home/yuliya/repos/cosybio/Proteomics/Data/lab_A/server_run/2023-04-23_21-10-50_diann_run/report.tsv'
lab_B_path = '/home/yuliya/repos/cosybio/Proteomics/Data/lab_B/DIA-NN/Clinspect_FedProt_E_coli_trial_LabB_report.tsv'
lab_C_path = '/home/yuliya/repos/cosybio/Proteomics/Data/lab_C/server_run/2023-04-18_14-59-30_diann_run/report.tsv'
lab_D_path = '/home/yuliya/repos/cosybio/Proteomics/Data/lab_D/2023-06-16_12-05-48_diann_run/report.tsv'
lab_E_path = '/home/yuliya/repos/cosybio/Proteomics/Data/lab_E/2023-05-05_18-41-36_diann_run/report.tsv'


labs_list = list(lab_A = lab_A_path, lab_B = lab_B_path, lab_C = lab_C_path, lab_D = lab_D_path, lab_E = lab_E_path)

# Functions to plot

In [3]:
pca_plot <- function(df, batch_info, title, path) {
  pca <- prcomp(t(na.omit(df)))
  pca_df <- pca$x %>%
    as.data.frame() %>%
    rownames_to_column("file") %>% 
    left_join(batch_info,  by = "file") 
  var_expl <- pca$sdev^2 / sum(pca$sdev^2)
  names(var_expl) <- paste0("PC", 1:length(var_expl))

  pca_plot <- pca_df %>%
    ggplot(aes(PC1, PC2)) +
    geom_point(aes(col=condition, shape=lab), size=3)  +
    theme_classic() +
    labs(title = title,
         x = glue::glue("PC1 [{round(var_expl['PC1']*100, 2)}%]"),
         y = glue::glue("PC2 [{round(var_expl['PC2']*100, 2)}%]"))
  
  # Check if "S37" exists in the pca_df dataframe
  if("S37" %in% pca_df$file) {
    pca_plot <- pca_plot +
      geom_text(data = pca_df[pca_df$file == "S37", ], aes(label = file), vjust = -1)
  }
  
  ggsave(path, pca_plot, width = 5, height = 5)
  return(pca_plot)
}


In [4]:
# boxplot
boxplot_pg <- function(protein_matrix, title, path) {
  # Reshape data into long format
  long_data <- tidyr::gather(protein_matrix, 
                             key = "file", value = "Intensity")
  # Log tranformed scale
  boxplot <- ggplot(long_data, aes(x = file, y = Intensity)) + 
    geom_boxplot() +
    stat_summary(fun = mean, geom = "point", shape = 4, size = 3, color = "red") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
    labs(title = title) 

  ggsave(path, boxplot)
  return(boxplot)
}

In [5]:
heatmap_plot <- function(pg_matrix, batch_info, name, plot_name_prefix){
    cor_matrix <- cor(na.omit(pg_matrix), use = "pairwise.complete.obs")
    pheatmap::pheatmap(cor_matrix, 
                        annotation_col = select(batch_info, c(condition, lab)),
                        treeheight_row = 0, treeheight_col = 0, 
                        main = paste0(name, ' heatmap'),
                        filename = paste0(plot_name_prefix, "_heatmap.png"))
}


In [6]:
plot_three_in_one <- function(pg_matrix, metadata_with_ref_samples, name, plot_name_prefix) {

    pca <- pca_plot(pg_matrix, metadata_with_ref_samples, paste0(name, ' pca'), paste0(plot_name_prefix, '_pca.png'))
    box <- boxplot_pg(pg_matrix, paste0(name, ' boxplot'), paste0(plot_name_prefix, '_boxplot.png'))
    
    combined_plot <- plot_grid(pca, box, ncol = 1, align = "v")

    # Save the combined plot
    ggsave(paste0(plot_name_prefix, "_combined_two.png"), combined_plot, width = 7, height = 10)
}

# Preprocessing - Client side 
Load data and prepare for the analysis 
    - QC plots
    - remove low-quality samples
    - save  to a new files with new names

Clients:
- use filter - Lib.Q.Value <= 0.01 & Lib.PG.Q.Value <= 0.01
- use column - PG.MaxLFQ
- check PCA
- plot boxplot
- plot hetmap

And also save user-specific metadata files to the new folder and peptide-counts.

## Metadata

In [7]:
create_metadata <- function(data, name) {
    # if(name == 'lab_A') {
    #   condition = c(rep("A", 4), rep("B", 4), rep("A", 9), rep("B", 10))
    # test like we didnt see S37 sample
    if(name == 'lab_A') {
      condition = c(rep("Pyr", 2), rep("Glu", 2), rep("Pyr", 10), rep("Glu", 10))
    } else if (name == 'lab_B') {
      condition = c(rep("Pyr", 12), rep("Glu", 11))
    } else if (name == 'lab_C') {
      condition = c(rep("Pyr", 11), rep("Glu", 12))
    } else if (name == 'lab_D') {
      condition = c(rep("Pyr", 10), rep("Glu", 10), rep("Pyr", 2), rep("Glu", 2))
    } else if (name == 'lab_E') {
      condition = c(rep("Glu", 1), rep("Pyr", 2), rep("Glu", 1), rep("Pyr", 10), rep("Glu", 10))
    } else {
      stop('Wrong lab name')
    }
    batch_info <- data.frame(
        file = c(gtools::mixedsort(unique(data$File.Name))),
        lab = as.factor(c(rep(name, length(unique(data$File.Name))))),
        condition = as.factor(condition)
    )
    rownames(batch_info) <- batch_info$file
    return(batch_info)
}

## Functions

In [12]:
read_and_preprocc <- function(path, name) {
    # read data
    data <- diann::diann_load(path)
    data$File.Name <- data$Run
    
    # outlier samples based on name
    filter_sample_out <- switch(name,
                                'lab_C' = c("BBM_673_P283_01_VEB_008_R2"),
                                'lab_A' = c("Ref8537_QC1_20230414_2", 'Ref8537_QC2_20230414_2', 'Ref8537_QC3_20230414_2', 'Ref8537_QC4_20230414_2'),
                                'lab_B' = c('Clinspect_E_coli_A_S29_Slot1-19_1_8668'),
                                NULL)
    
    # filter data if filter_sample_out is not NULL
    if (!is.null(filter_sample_out)) {
        data <- data[!data$File.Name %in% filter_sample_out, ]
    }

    # further filter data
    data <- data %>% filter(Lib.Q.Value <= 0.01 & Lib.PG.Q.Value <= 0.01)


    if(name == 'lab_B'){
        data <- data %>% mutate(File.Name = ifelse(File.Name == 'Clinspect_E_coli_B_66_Slot1-13_1_8647', 'Clinspect_E_coli_B_S66_Slot1-13_1_8647', File.Name)) %>%
            mutate(Run = ifelse(Run == 'Clinspect_E_coli_B_66_Slot1-13_1_8647', 'Clinspect_E_coli_B_S66_Slot1-13_1_8647',Run))
    }
    
    return(data)
}

In [13]:
# function to create expression matrix
extract_pg_matrix <- function(data) {
  # extract protein group matrix
  pg_matrix <- data %>% 
    select(Protein.Group, Run, PG.MaxLFQ) %>% 
    unique() %>% 
    pivot_wider(names_from = Run, values_from = PG.MaxLFQ) %>% 
    column_to_rownames("Protein.Group")
  return(pg_matrix)
}

In [14]:
get_pep_counts_table <- function(data, sample_names=NULL) {
    if (is.null(sample_names)) {
        pre_prec.count.table <- data %>%
        filter(Lib.Q.Value <= 0.01 & Lib.PG.Q.Value <= 0.01) %>%
        select(c(Protein.Group, Precursor.Id, Run)) %>%
        unique() 
    } else {
       pre_prec.count.table <- data %>%
        filter(Lib.Q.Value <= 0.01 & Lib.PG.Q.Value <= 0.01) %>%
        filter(Run %in% sample_names) %>%
        select(c(Protein.Group, Precursor.Id, Run)) %>%
        unique()
    }
    
    prec.count.table <- pre_prec.count.table %>%
        select(c(Protein.Group, Precursor.Id)) %>%
        unique()
        
    summ.prec.count.table <- pre_prec.count.table %>%
        group_by(Run, Protein.Group) %>% 
        summarise(count = n_distinct(Precursor.Id)) %>% 
        pivot_wider(names_from = Run, values_from = count) %>%
        # add new column with the minimum number per row, with rm.na = TRUE
        mutate(count = pmin(!!!.[-1], na.rm = TRUE)) %>%
        select(Protein.Group, count) %>%
        as.data.frame(.) 

    rownames(summ.prec.count.table) <- summ.prec.count.table$Protein.Group
    summ.prec.count.table$Protein.Group <- NULL

    return(list(summ.prec.count.table, prec.count.table))
}

## Preprocessing

In [16]:
# start redirection
#sink("/home/yuliya/repos/cosybio/FedDEqMS/data/00_prot_matrices/log.txt")

log_file_path <- "/home/yuliya/repos/cosybio/FedProt/bacterial_data/balanced/log.txt"

# Remove the file if it exists
if (file.exists(log_file_path)) {
  file.remove(log_file_path)
}
# Create a new, empty file
file.create(log_file_path)

bath_info_all <- NULL

for (name in names(labs_list)) {
    lab_path <- labs_list[[name]]
    # check if we have lab results
    if (is.null(lab_path)) {next}
    # create plots file prefix
    plot_name_prefix <- paste0('/home/yuliya/repos/cosybio/FedProt/bacterial_data/balanced/', name, '/plots/', name)
    file_name_prefix <- paste0('/home/yuliya/repos/cosybio/FedProt/bacterial_data/balanced/', name, "/")

    # read data
    data <- read_and_preprocc(lab_path, name)
    
    # create metadata file
    batch_info <- create_metadata(data, name)
    # add  into metadata for centralized run
    if(is.null(bath_info_all)){
      bath_info_all <- batch_info
    } else {
       bath_info_all <- rbind(bath_info_all, batch_info)
    }
    # create design matrix for fedDEqMS
    dummy_df <- model.matrix(~condition - 1, batch_info)
    colnames(dummy_df) <- gsub("condition", "", colnames(dummy_df))
    batch_info <- batch_info %>% select(-condition) %>% cbind(dummy_df)
    write_tsv(batch_info %>% rownames_to_column(), 
              paste0(file_name_prefix, "design.tsv"))
    
    # create protein groups matrix
    pg_matrix <- extract_pg_matrix(data)[, batch_info$file]
    # pg_matrix <- pg_matrix[apply(pg_matrix, 1, function(x) any(!is.na(x))), ]
    write_tsv(pg_matrix %>% rownames_to_column(), 
              paste0(file_name_prefix, "protein_groups_matrix.tsv"))
    # create precursor counts table
    both_counts_table  <- get_pep_counts_table(data)
    summ_prec_counts_table <- both_counts_table[[1]]
    prec_counts_table <- both_counts_table[[2]]
    write_tsv(summ_prec_counts_table %>% rownames_to_column(),
              paste0(file_name_prefix, "protein_counts.tsv"))

    # log2 transform for plots
    pg_matrix <- pg_matrix %>% 
      mutate(across(everything(), 
                      ~ {.x <- log2(.)
                      replace(.x, .x < 0 | is.infinite(.x), NA)
                      }))
    
    metadata_with_ref_samples <-create_metadata_with_ref(data, name)
    # rename
    if (name == 'lab_A' | name == 'lab_E'){
      metadata_with_ref_samples$file <- sapply(strsplit(as.character(metadata_with_ref_samples$file), "_"), function(x) {
          if (length(x) == 4) {  # if there are four parts
            paste(x[2], x[4], sep = "_")
          } else {  # otherwise, there are three parts
            x[2]
          }
        })
      rownames(metadata_with_ref_samples) <- metadata_with_ref_samples$file
      colnames(pg_matrix) <- metadata_with_ref_samples$file
    }
    else if (name == 'lab_C') {
      metadata_with_ref_samples$file <- sapply(strsplit(as.character(metadata_with_ref_samples$file), "_"), function(x) {
          paste(x[5], x[6], sep = "_")
        })
      rownames(metadata_with_ref_samples) <- metadata_with_ref_samples$file
      colnames(pg_matrix) <- metadata_with_ref_samples$file
    
    } else if (name == 'lab_B') {
        metadata_with_ref_samples$file <- sapply(strsplit(as.character(metadata_with_ref_samples$file), "_"), function(x) {
          x[5]
        })
      rownames(metadata_with_ref_samples) <- metadata_with_ref_samples$file
      colnames(pg_matrix) <- metadata_with_ref_samples$file
    
    } else if (name == 'lab_D') {
      metadata_with_ref_samples$file <- sapply(strsplit(as.character(metadata_with_ref_samples$file), "_"), function(x) {
          x[8]
        })
      rownames(metadata_with_ref_samples) <- metadata_with_ref_samples$file
      colnames(pg_matrix) <- metadata_with_ref_samples$file
    }

    # plots
    #boxplot_pg(pg_matrix, paste0(name, ' boxplot'), paste0(plot_name_prefix, '_boxplot.png'))
    #pca_plot(pg_matrix, metadata_with_ref_samples, paste0(name, ' pca'), paste0(plot_name_prefix, '_pca.png'))
    heatmap_plot(pg_matrix, metadata_with_ref_samples, name, paste0(plot_name_prefix, ""))
    plot_three_in_one(pg_matrix, metadata_with_ref_samples, name, plot_name_prefix)
    # print info
    print_info <- capture.output({
        print(paste0(name, " processed. Info:  Number of proteins: ", dim(pg_matrix)[1], "   Numer of samples: ", dim(pg_matrix)[2]))
    })
    write(print_info, file = log_file_path, append = TRUE)

}

write_tsv(bath_info_all %>% rownames_to_column(), 
          '/home/yuliya/repos/cosybio/FedProt/bacterial_data/balanced/bath_info_all.tsv')


#sink()

[1m[22m`summarise()` has grouped output by 'Run'. You can override using the `.groups`
argument.
[1m[22mSaving 7 x 7 in image
“[1m[22mRemoved 1516 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 1516 rows containing non-finite values (`stat_summary()`).”
“[1m[22mRemoved 1516 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 1516 rows containing non-finite values (`stat_summary()`).”
“Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.”
[1m[22m`summarise()` has grouped output by 'Run'. You can override using the `.groups`
argument.
[1m[22mSaving 7 x 7 in image
“[1m[22mRemoved 1540 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 1540 rows containing non-finite values (`stat_summary()`).”
“[1m[22mRemoved 1540 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 1540 rows containing non-finite values (`stat_summary()`).”
“Graphs cannot be 

```
[1] "lab_A processed. Info:  Number of proteins: 2571   Numer of samples: 24"
[1] "lab_B processed. Info:  Number of proteins: 2806   Numer of samples: 23"
[1] "lab_C processed. Info:  Number of proteins: 2803   Numer of samples: 23"
[1] "lab_D processed. Info:  Number of proteins: 2824   Numer of samples: 24"
[1] "lab_E processed. Info:  Number of proteins: 2425   Numer of samples: 24"

```

# Create imbalanced datasets

In [17]:
# start redirection

log_file_path <- "/home/yuliya/repos/cosybio/FedProt/bacterial_data/imbalanced/log.txt"

# Remove the file if it exists
if (file.exists(log_file_path)) {
  file.remove(log_file_path)
}
# Create a new, empty file
file.create(log_file_path)

bath_info_all <- NULL

for (name in names(labs_list)) {
    lab_path <- labs_list[[name]]
    # check if we have lab results
    if (is.null(lab_path)) {next}
    # create plots file prefix
    plot_name_prefix <- paste0('/home/yuliya/repos/cosybio/FedProt/bacterial_data/imbalanced/', name, '/plots/', name)
    file_name_prefix <- paste0('/home/yuliya/repos/cosybio/FedProt/bacterial_data/imbalanced/', name, '/')

    # read data
    data <- read_and_preprocc(lab_path, name)
    
    # create metadata file
    batch_info <- create_metadata(data, name)
    # select only some samples
    # set.seed(1)
    if (name == "lab_A") {
      sample_names <- c(rownames(batch_info[batch_info$condition == 'Pyr', ][sample(1:nrow(batch_info[batch_info$condition == 'Pyr', ]), 4), ]), 
                        rownames(batch_info[batch_info$condition == 'Glu', ][sample(1:nrow(batch_info[batch_info$condition == 'Glu', ]), 12), ]))
      
    } else if (name == "lab_B") {
       sample_names <- c(rownames(batch_info[batch_info$condition == 'Pyr', ][sample(1:nrow(batch_info[batch_info$condition == 'Pyr', ]),9), ]), 
                         rownames(batch_info[batch_info$condition == 'Glu', ][sample(1:nrow(batch_info[batch_info$condition == 'Glu', ]), 4), ]))

    } else if (name == "lab_C") {
      sample_names <- c(rownames(batch_info[batch_info$condition == 'Pyr', ]), 
                        rownames(batch_info[batch_info$condition == 'Glu', ][sample(1:nrow(batch_info[batch_info$condition == 'Glu', ]), 3), ]))
      
    } else if (name == "lab_D") {
      sample_names <- c(rownames(batch_info[batch_info$condition == 'Pyr', ][sample(1:nrow(batch_info[batch_info$condition == 'Pyr', ]), 5), ]), 
                        rownames(batch_info[batch_info$condition == 'Glu', ][sample(1:nrow(batch_info[batch_info$condition == 'Glu', ]), 10), ]))
      
    } else if (name == "lab_E") {
      sample_names <- c(rownames(batch_info[batch_info$condition == 'Pyr', ][sample(1:nrow(batch_info[batch_info$condition == 'Pyr', ]), 10), ]), #10
                        rownames(batch_info[batch_info$condition == 'Glu', ][sample(1:nrow(batch_info[batch_info$condition == 'Glu', ]), 5), ])) # 5
    }
    batch_info <- batch_info[batch_info$file %in% sample_names, ]
    # add  into metadata for centralized run
    if(is.null(bath_info_all)){
      bath_info_all <- batch_info
    } else {
       bath_info_all <- rbind(bath_info_all, batch_info)
    }
    metadata <- batch_info
    # create design matrix for fedDEqMS
    dummy_df <- model.matrix(~condition - 1, batch_info)
    colnames(dummy_df) <- gsub("condition", "", colnames(dummy_df))
    batch_info <- batch_info %>% select(-condition) %>% cbind(dummy_df)
    write_tsv(batch_info %>% rownames_to_column(), 
              paste0(file_name_prefix, "design.tsv"))
    
    # create protein groups matrix
    pg_matrix <- extract_pg_matrix(data)[, batch_info$file]
    pg_matrix <- pg_matrix[apply(pg_matrix, 1, function(x) any(!is.na(x))), ]
    write_tsv(pg_matrix %>% rownames_to_column(), 
              paste0(file_name_prefix, "protein_groups_matrix.tsv"))
    # create precursor counts table
    both_counts_table  <- get_pep_counts_table(data, sample_names)
    summ_prec_counts_table <- both_counts_table[[1]]
    prec_counts_table <- both_counts_table[[2]]
    write_tsv(summ_prec_counts_table %>% rownames_to_column(),
              paste0(file_name_prefix, "protein_counts.tsv"))

    # log2 transform for plots
    pg_matrix <- pg_matrix %>% 
      mutate(across(everything(), 
                      ~ {.x <- log2(.)
                      replace(.x, .x < 0 | is.infinite(.x), NA)
                      }))
    pg_matrix <- pg_matrix[, metadata$file]
   
    if (name == 'lab_A' | name == 'lab_E'){
      metadata$file <- sapply(strsplit(as.character(metadata$file), "_"), function(x) {
          if (length(x) == 4) {  # if there are four parts
            paste(x[2], x[4], sep = "_")
          } else {  # otherwise, there are three parts
            x[2]
          }
        })
      rownames(metadata) <- metadata$file

      colnames(pg_matrix) <- metadata$file
    
    } else if (name == 'lab_B') {
      metadata$file <- sapply(strsplit(as.character( metadata$file), "_"), function(x) {
          x[5]
        })
      rownames(metadata) <- metadata$file
      colnames(pg_matrix) <- metadata$file
    
    } else if (name == 'lab_C') {
      metadata$file <- sapply(strsplit(as.character(metadata$file), "_"), function(x) {
          paste(x[5], x[6], sep = "_")
        })
      rownames(metadata) <- metadata$file
      colnames(pg_matrix) <- metadata$file
    }
    else if (name == 'lab_D') {
      metadata$file <- sapply(strsplit(as.character(metadata$file), "_"), function(x) {
          x[8]
        })
      rownames(metadata) <- metadata$file
      colnames(pg_matrix) <- metadata$file
    }

    # plots
    #boxplot_pg(pg_matrix, paste0(name, ' boxplot'), paste0(plot_name_prefix, '_boxplot.png'))
    #pca_plot(pg_matrix, metadata_with_ref_samples, paste0(name, ' pca'), paste0(plot_name_prefix, '_pca.png'))
    heatmap_plot(pg_matrix, metadata, name, paste0(plot_name_prefix, ""))
    plot_three_in_one(pg_matrix, metadata, name, plot_name_prefix)
    # print info
    print_info <- capture.output({
        print(paste0(name, " processed. Info:  Number of proteins: ", dim(pg_matrix)[1], "   Numer of samples: ", dim(pg_matrix)[2]))
    })
    write(print_info, file = log_file_path, append = TRUE)

}

write_tsv(bath_info_all %>% rownames_to_column(), 
          '/home/yuliya/repos/cosybio/FedProt/bacterial_data/imbalanced/bath_info_all.tsv')
#sink()

[1m[22m`summarise()` has grouped output by 'Run'. You can override using the `.groups`
argument.
[1m[22mSaving 7 x 7 in image
“[1m[22mRemoved 1055 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 1055 rows containing non-finite values (`stat_summary()`).”
“[1m[22mRemoved 1055 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 1055 rows containing non-finite values (`stat_summary()`).”
“Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.”
[1m[22m`summarise()` has grouped output by 'Run'. You can override using the `.groups`
argument.
[1m[22mSaving 7 x 7 in image
“[1m[22mRemoved 705 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 705 rows containing non-finite values (`stat_summary()`).”
“[1m[22mRemoved 705 rows containing non-finite values (`stat_boxplot()`).”
“[1m[22mRemoved 705 rows containing non-finite values (`stat_summary()`).”
“Graphs cannot be vert

```

[1] "lab_A processed. Info:  Number of proteins: 2569   Numer of samples: 16"
[1] "lab_B processed. Info:  Number of proteins: 2806   Numer of samples: 13"
[1] "lab_C processed. Info:  Number of proteins: 2796   Numer of samples: 14"
[1] "lab_D processed. Info:  Number of proteins: 2824   Numer of samples: 15"
[1] "lab_E processed. Info:  Number of proteins: 2423   Numer of samples: 15"


```