In [None]:
medianNorm <- function(log_intensities){
  dt <- data.table::as.data.table(log_intensities)
  # find median of each sample
  sample_med <- apply(dt, 2, stats::median, na.rm=TRUE) # columns
  # find mean of medians
  mean_med <- mean(sample_med, na.rm=TRUE)
  # divide data by median
  norm_dt <- t(t(dt)/sample_med)
  # multiply data by mean of medians
  norm_dt <- norm_dt * mean_med
  norm_dt <- data.table::as.data.table(norm_dt)
  colnames(norm_dt) <- colnames(dt)
  rownames(norm_dt) <- rownames(dt)
  return(as.data.frame(norm_dt))
}

In [None]:
library(data.table) # For data manipulation
library(dplyr) # For data manipulation

irsNormDF <- function(df_data, df_metadata){
  # Convert input data frames to data.tables for efficiency
  dt <- data.table(df_data)
  md <- data.table(df_metadata)
  
  # Extract unique pools (batches) and references
  pools <- unique(md$Pool)
  refs <- md[Group == "Common Reference", Quantitative.column.name]
  
  refs_md <- md[md$Quantitative.column.name %in% refs,]

  # Prepare list to hold data separated by pool
  dt_list <- lapply(pools, function(pool) {
    pool_samples <- md[Pool == pool, Quantitative.column.name]
    dt_chunk <- dt[, ..pool_samples]
    return(dt_chunk)
  })
  names(dt_list) <- pools
  
  # Prepare IRS data
  if (length(refs) == 0){
    stop("No reference samples specified")
  } else {
    irs <- dt[, ..refs]
    # Update column names to match pool names based on reference samples
    colnames(irs) <- md[Quantitative.column.name %in% refs, Pool]
  }

  irs <- tibble::as_tibble(irs)
  # Compute geometric average intensity for each protein
  irs$average <- apply(irs, 1, function(x) exp(mean(log(x), na.rm = TRUE)))
  
  # Normalize data by pool
  dt_irs_list <- lapply(names(dt_list), function(pool) {
    fac <- irs$average / irs[, pool]
    dt_irs_chunk <- tibble::as_tibble(dt_list[[pool]]) * fac[,1]
    return(dt_irs_chunk)
  })
  
  # Reconstruct data after IRS normalization
  dt_irs <- do.call(cbind, dt_irs_list)
  colnames(dt_irs) <- unlist(lapply(dt_list, colnames))
  
  return(as.data.frame(dt_irs))
}