In [38]:
library(tidyverse)
library(DESeq2)
library(sva)

source("../../utils/plots_eda.R")

# Load data

In [39]:
datasets <- c("GSE129508", "GSE58135", "GSE149276")

In [40]:
all_metadata <- read.table("before/all_design.tsv", header = TRUE, sep = "\t")
all_expression <- read.table("before/all_expr_for_correction.tsv", header = TRUE, sep = "\t") %>%
    column_to_rownames("gene_id")

all_expression <- all_expression[, all_metadata$sample_id]

# remove NAs rows
all_expression <- na.omit(all_expression)
# remove genes with 0 variance
all_expression <- all_expression[apply(all_expression, 1, var) > 0, ]

In [41]:
print("Loading metadata and expression data")
print("Metadata dimensions:")
print(dim(all_metadata))
print("Expression data dimensions:")
print(dim(all_expression))

[1] "Loading metadata and expression data"
[1] "Metadata dimensions:"
[1] 131   3
[1] "Expression data dimensions:"
[1] 28823   131


In [42]:
design <- model.matrix(~all_metadata$lum)

corrected_expr <- sva::ComBat(dat = all_expression, 
                              batch = all_metadata$batch, 
                              mod = design)

corrected_expr <- as.data.frame(corrected_expr)

Found3batches

Adjusting for1covariate(s) or covariate level(s)

Standardizing Data across genes

Fitting L/S model and finding priors

Finding parametric adjustments

Adjusting the Data




In [43]:
all_metadata$Status <- all_metadata$lum
all_metadata$Dataset <- as.character(all_metadata$batch)

In [44]:
# plot the combined corrected data
print("Plotting combined corrected data")
plot_res <- plot_diagnostic(corrected_expr, all_metadata, "Combined Corrected",
                            log_transform = TRUE, with_rowname = TRUE)
layout <- (plot_res[[1]] + plot_res[[2]] ) / 
          (plot_res[[3]] )
ggsave("after/diagnostic_plot_corrected.png", 
            plot = layout, width = 12, height = 12)


[1] "Plotting combined corrected data"


[1] "..plotting.."


No id variables; using all as measure variables



In [45]:
# write out the corrected expression data
write.table(corrected_expr %>% rownames_to_column("gene_id"), "after/all_corrected_R_expr.tsv", sep = "\t", 
            quote = FALSE, col.names = TRUE, row.names = FALSE)