---
title: Analysis with DESeq2
format:
    confluence-html:
        code-fold: true
---

[DESeq2](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0550-8?ref=https://githubhelp.com) is a program for differential abundance analysis which was originally developed for RNASeq experiments. It models counts using a negative binomial distribution and can handle complex design formulations. An evaluation of tools specifically adapted for compositional data analysis and more traditional tools for differential expression analysis of RNA-seq data showed that DESeq2 had consistent results also on metagenomic datasets ([Calgaro et al, 2020](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02104-1)).

Specific considerations for metagenomic data used here is the `poscounts` setting which normalizes the data using non-zero counts.

In [18]:
library(DESeq2)
library(mixOmics)
library(arrow)
library(tibble)
library(stringr)
library(dplyr)

In [19]:
cpus <- 10

In [20]:
save_results <- function(r, name) {
    f = paste0("../atlas/stats/DESeq2/", name, ".tsv")
    r$feature <- rownames(r)
    r <- r[, c("feature",colnames(r)[colnames(r)!="feature"])]
    isDE <- plotMA(r, returnData = TRUE)$isDE
    r <- cbind(r, isDE)
    write.table(file = f, x = r, sep = "\t", quote = FALSE, row.names = FALSE)
}

In [21]:
low.count.removal <- function(
                        data, # feature count df of size n (sample) x p (feature)
                        percent=0.01 # cutoff chosen
                        ) 
  {
    keep.feat = which(rowSums(data)*100/(sum(rowSums(data))) > percent)
    data.filter = data[keep.feat, ]
    return(list(data.filter = data.filter, keep.feat = keep.feat))
}

## Read the data

In [22]:
mag <- list()
mag_data <- as.matrix(
    column_to_rownames(
        read_parquet("../atlas/genomes/counts/median_coverage_genomes.parquet"), 
        var="index")
    )
mag$cov <- t(mag_data)

In [23]:
mag_genus <- read.csv("../atlas/genomes/counts/genus.coverage.tsv", sep = "\t", header = TRUE, row.names = 1)
colnames(mag_genus) <- gsub(".t.r", "-t.r", colnames(mag_genus))
mag$cov_genus <- mag_genus

In [24]:
mag_tax = read.table("../atlas/genomes/taxonomy/gtdb_taxonomy.tsv", sep = "\t", header = TRUE, row.names = 1)

In [25]:
pfam <- list()
pfam_data <- read.csv("../atlas/Genecatalog/counts/PFAMs.median_coverage.tsv", sep = "\t", header = TRUE, row.names=1)
colnames(pfam_data) <- gsub(".t.r", "-t.r", colnames(pfam_data))
pfam$cov <- pfam_data

In [26]:
ko <- list()
ko_data <- read.csv("../atlas/Genecatalog/counts/KO.median_coverage.tsv", sep = "\t", header = TRUE, row.names=1)
colnames(ko_data) <- gsub(".t.r", "-t.r", colnames(ko_data))
ko$cov <- ko_data

In [27]:
cazy <- list()
cazy_data <- read.csv("../atlas/Genecatalog/counts/CAZy.median_coverage.tsv", sep = "\t", header = TRUE, row.names=1)
colnames(cazy_data) <- gsub(".t.r", "-t.r", colnames(cazy_data))
cazy$cov <- cazy_data

In [67]:
rgi <- list()
rgi_data <- read.csv("../atlas/Genecatalog/counts/rgi.best_hit_aro.median_coverage.tsv", sep = "\t", header = TRUE, row.names=1)
colnames(rgi_data) <- gsub(".t.r", "-t.r", colnames(rgi_data))
rgi$cov <- rgi_data

In [2]:
uniref <- list()
uniref_data <- read.csv("../atlas/taxonomy/UniRef100.median_fold.species.allsamples.tsv", sep = "\t", header = TRUE, row.names = 1)
uniref_taxonomy <- read.csv("../atlas/taxonomy/UniRef100.median_fold.tsv", sep="\t", header = TRUE)
colnames(uniref_data) <- gsub(".t.r", "-t.r", colnames(uniref_data))
# Remove unclassified and unknown
uc <- rownames(uniref_data)[grep("^uc_", rownames(uniref_data))]
# Remove uncultured
unc <- rownames(uniref_data)[grep("^uncultured", rownames(uniref_data))]
# Remove species with unknown phylum
unk <- uniref_taxonomy[uniref_taxonomy$phylum=="unknown","species"]
uniref_data_filt <- uniref_data[!c(rownames(uniref_data)%in%c(unc, unk, uc)),]
uniref_data_filt <- uniref_data_filt[!c(rownames(uniref_data_filt)%in%c("unknown")), ]
# Keep species with proper name
keep <- grep("^[A-Z][a-z]+ ", rownames(uniref_data_filt))
uniref_data_filt <- uniref_data_filt[keep,]
uniref$cov <- uniref_data_filt

In [4]:
uniref.genus <- list()
uniref_genus_data <- read.csv("../atlas/taxonomy//UniRef100.median_fold.genus.tsv", sep="\t", header = TRUE, row.names = 1)
colnames(uniref_genus_data) <- gsub(".t.r", "-t.r", colnames(uniref_genus_data))
# Remove unclassified and unknown
uc <- rownames(uniref_genus_data)[grep("^uc_", rownames(uniref_genus_data))]
# Remove uncultured
unc <- rownames(uniref_genus_data)[grep("^uncultured", rownames(uniref_genus_data))]
# Remove genera with unknown phylum
unk <- uniref_taxonomy[uniref_taxonomy$phylum=="unknown","genus"]
uniref_genus_filt <- uniref_genus_data[!c(rownames(uniref_genus_data)%in%c(unc, unk, uc)),]
uniref_genus_filt <- uniref_genus_filt[!c(rownames(uniref_genus_filt)%in%c("unknown")), ]
uniref.genus$cov <- uniref_genus_filt

In [68]:
# Only keep best replicates
drop_reps <- c("C11-t.r", "C12", "C19-t.r", "H10", "H13-t.r", "H32", "L11-t.r", "L29", "L6")
mag$cov <- mag$cov[, !c(colnames(mag$cov)%in%drop_reps)]
pfam$cov <- pfam$cov[, !c(colnames(pfam$cov)%in%drop_reps)]
ko$cov <- ko$cov[, !c(colnames(ko$cov)%in%drop_reps)]
cazy$cov <- cazy$cov[, !c(colnames(cazy$cov)%in%drop_reps)]
rgi$cov <- rgi$cov[, !c(colnames(rgi$cov)%in%drop_reps)]
uniref$cov <- uniref$cov[, !c(colnames(uniref$cov)%in%drop_reps)]
uniref.genus$cov <- uniref.genus$cov[,!c(colnames(uniref.genus$cov)%in%drop_reps)]

In [69]:
# Make all dataframes line up
samples <- colnames(mag$cov)
mag$cov <- mag$cov[, samples]
pfam$cov <- pfam$cov[, samples]
ko$cov <- ko$cov[, samples]
cazy$cov <- cazy$cov[, samples]
rgi$cov <- rgi$cov[, samples]
uniref$cov <- uniref$cov[, samples]
uniref.genus$cov <- uniref.genus$cov[, samples]

In [32]:
# Read sample info and set up the relevant columns
info <- read.csv("../data/sample_groups.csv", header = T, row.names = 1)
info[c("Generation","Treatment")] <- str_split_fixed(info$group, "_", 2)
# Reorder the metadata
info <- info[samples, ]

info$group <- factor(info$group)
info$Generation <- factor(info$Generation)
info$Generation <- relevel(info$Generation, ref = "F0")
info$Treatment <- factor(info$Treatment)
info$Treatment <- relevel(info$Treatment, ref = "C")
info$sample <- factor(gsub("-t.r", "", rownames(info)))

In [70]:
all(all(rownames(info) == colnames(mag$cov)),
all(rownames(info) == colnames(pfam$cov)),
all(rownames(info) == colnames(ko$cov)),
all(rownames(info) == colnames(cazy$cov)),
all(rownames(info) == colnames(rgi$cov)),
all(rownames(info) == colnames(uniref$cov)),
all(rownames(info) == colnames(uniref.genus$cov)))

## Differential expression analysis

### MAGs

#### MAGs - generational differences

In [94]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.percent_occ <- 10
# Quantile used for removing low abundance features
mag.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov[rowSums(mag$cov) > 0, ])
mag$nz <- mag$cov[rownames(mag$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz) / sum(rowSums(mag$nz)) * 100, probs = mag.quantile_thresh)
mag$filtered <- low.count.removal(mag$nz, percent = low_threshold)$data.filter

mag$occ <- rowSums(mag$filtered>0) / ncol(mag$filtered) * 100
# Keep features occurring in defined percentage of samples
mag$filtered <- mag$filtered[which(mag$occ >= mag.percent_occ), ]
paste0(c(dim(mag$cov)[1] - dim(mag$filtered)[1], "MAGs removed", dim(mag$filtered)[1], "remaining"))

mag$filtered <- ceiling(mag$filtered)

## DESeq2 analysis
info.mag <- info[, c("run","sample","Generation")]
dds.mag <- DESeqDataSetFromMatrix(countData = mag$filtered,
                              colData = info.mag,
                              design = ~ 1 + Generation)
dds.mag$sample <- info$sample
dds.mag$run <- info$run
#dds.mag <- collapseReplicates(dds.mag, dds.mag$sample, dds.mag$run)
dds.mag <- DESeq(dds.mag, sfType = "poscounts", parallel = TRUE)

# Multi-factor (treatment + generation)
#dds.magMF <- dds.mag
#design(dds.magMF) <- formula(~ group)
#dds.magMF <- DESeq(dds.magMF, sfType = "poscounts")

# Contrast F2 vs F0
res.mag.F2vsF0 <- results(dds.mag, contrast = list("Generation_F2_vs_F0"))
res.mag.F2vsF0 <- res.mag.F2vsF0[order(res.mag.F2vsF0$padj),]
save_results(res.mag.F2vsF0, "res.mag.F2vsF0")

# Contrast F1 vs F0
res.mag.F1vsF0 <- results(dds.mag, contrast = list("Generation_F1_vs_F0"))
res.mag.F1vsF0 <- res.mag.F1vsF0[order(res.mag.F1vsF0$padj),]
save_results(res.mag.F1vsF0, "res.mag.F1vsF0")

# Contrast F2 vs F1
res.mag.F2vsF1 <- results(dds.mag, contrast = list("Generation_F2_vs_F0", "Generation_F1_vs_F0"))
res.mag.F2vsF1 <- res.mag.F2vsF1[order(res.mag.F2vsF1$padj),]
save_results(res.mag.F2vsF1, "res.mag.F2vsF1")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 11 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### MAGs - all samples with generation as a covariate

In [156]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.percent_occ <- 10
# Quantile used for removing low abundance features
mag.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov[rowSums(mag$cov) > 0, ])
mag$nz <- mag$cov[rownames(mag$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz) / sum(rowSums(mag$nz)) * 100, probs = mag.quantile_thresh)
mag$filtered <- low.count.removal(mag$nz, percent = low_threshold)$data.filter

mag$occ <- rowSums(mag$filtered>0) / ncol(mag$filtered) * 100
# Keep features occurring in defined percentage of samples
mag$filtered <- mag$filtered[which(mag$occ >= mag.percent_occ), ]
paste0(c(dim(mag$cov)[1] - dim(mag$filtered)[1], "MAGs removed", dim(mag$filtered)[1], "remaining"))

mag$filtered <- ceiling(mag$filtered)

## DESeq2 analysis
info.mag <- info[, c("run","sample","Generation", "Treatment")]
dds.mag.co <- DESeqDataSetFromMatrix(countData = mag$filtered,
                              colData = info.mag,
                              design = ~ 1 + Generation + Treatment)
dds.mag.co$sample <- info$sample
dds.mag.co$run <- info$run
#dds.mag <- collapseReplicates(dds.mag, dds.mag$sample, dds.mag$run)
dds.mag.co <- DESeq(dds.mag.co, sfType = "poscounts", parallel = TRUE)

res.mag.co.HvsC <- results(dds.mag.co, contrast=list("Treatment_H_vs_C"))
res.mag.co.HvsC <- res.mag.co.HvsC[order(res.mag.co.HvsC$padj),]
save_results(res.mag.co.HvsC, "res.mag.co.HvsC")

res.mag.co.LvsC <- results(dds.mag.co, contrast=list("Treatment_L_vs_C"))
res.mag.co.LvsC <- res.mag.co.LvsC[order(res.mag.co.LvsC$padj),]
save_results(res.mag.co.LvsC, "res.mag.co.LvsC")

res.mag.co.HvsL <- results(dds.mag.co, contrast=list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.mag.co.HvsL <- res.mag.co.HvsL[order(res.mag.co.HvsL$padj),]
save_results(res.mag.co.HvsL, "res.mag.co.HvsL")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 9 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



In [159]:
res.mag.co.LvsC

log2 fold change (MLE): Treatment_L_vs_C effect 
Wald test p-value: Treatment_L_vs_C effect 
DataFrame with 232 rows and 6 columns
        baseMean log2FoldChange     lfcSE        stat      pvalue      padj
       <numeric>      <numeric> <numeric>   <numeric>   <numeric> <numeric>
MAG219   1.69769        2.09479  0.552281     3.79298 0.000148848 0.0172664
MAG226   0.90631        2.00385  0.524510     3.82042 0.000133222 0.0172664
MAG172  10.73429       -4.09456  1.287750    -3.17962 0.001474682 0.0855316
MAG183   1.93392       -1.98054  0.617381    -3.20797 0.001336752 0.0855316
MAG086   3.72290       -1.08586  0.357094    -3.04081 0.002359405 0.0912303
...          ...            ...       ...         ...         ...       ...
MAG141  0.797269     0.02890951  0.746282  0.03873807    0.969099  0.993241
MAG149  5.487468     0.00558991  0.475127  0.01176510    0.990613  0.993241
MAG202  0.974295    -0.01029338  1.215017 -0.00847180    0.993241  0.993241
MAG228  3.496216     0.00401855  

#### MAGs - F0 generation

In [144]:
# Extract and filter F0 samples
mag$cov.F0 <- mag$cov[, rownames(info[info$Generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F0.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F0.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F0[rowSums(mag$cov.F0) > 0, ])
mag$nz.F0 <- mag$cov.F0[rownames(mag$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F0) / sum(rowSums(mag$nz.F0)) * 100, probs = mag.F0.quantile_thresh)
mag$filtered.F0 <- low.count.removal(mag$nz.F0, percent = low_threshold)$data.filter

mag$occ.F0 <- rowSums(mag$filtered.F0>0) / ncol(mag$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F0 <- mag$filtered.F0[which(mag$occ.F0 >= mag.F0.percent_occ), ]
paste0(c(dim(mag$cov.F0)[1] - dim(mag$filtered.F0)[1], "MAGs removed", dim(mag$filtered.F0)[1], "remaining"))


mag$filtered.F0 <- ceiling(mag$filtered.F0)


info.F0 <- info[colnames(mag$filtered.F0), c("run","sample","Treatment")]

# Create DESeq dataset
dds.mag.F0 <- DESeqDataSetFromMatrix(countData = mag$filtered.F0,
                                    colData = info.F0,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.mag.F0$sample <- info.F0$sample
dds.mag.F0$run <- info.F0$run
#dds.mag.F0 <- collapseReplicates(dds.mag.F0, dds.mag.F0$sample, dds.mag.F0$run)

# Run DESeq2
dds.mag.F0 <- DESeq(dds.mag.F0, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.mag.F0.HvsC <- results(dds.mag.F0, contrast = list("Treatment_H_vs_C"))
res.mag.F0.HvsC <- res.mag.F0.HvsC[order(res.mag.F0.HvsC$padj),]
save_results(res.mag.F0.HvsC, "res.mag.F0_HvsF0_C")

# Contrast L vs C
res.mag.F0.LvsC <- results(dds.mag.F0, contrast = list("Treatment_L_vs_C"))
res.mag.F0.LvsC <- res.mag.F0.LvsC[order(res.mag.F0.LvsC$padj),]
save_results(res.mag.F0.LvsC, "res.mag.F0_LvsF0_C")

# Contrast H vs L
res.mag.F0.HvsL <- results(dds.mag.F0, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.mag.F0.HvsL <- res.mag.F0.HvsL[order(res.mag.F0.HvsL$padj),]
save_results(res.mag.F0.HvsL, "res.mag.F0_HvsF0_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 2 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### MAGs - F0 generation (C+L vs H)

In [96]:
info.F0_2 <- info.F0
info.F0_2$Treatment <- gsub(x = info.F0_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.mag.F0_2 <- DESeqDataSetFromMatrix(countData = mag$filtered.F0,
                                    colData = info.F0_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.mag.F0_2 <- DESeq(dds.mag.F0_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.mag.F0.HvsCL <- results(dds.mag.F0_2, contrast = list("Treatment_H_vs_CL"))
res.mag.F0.HvsCL <- res.mag.F0.HvsCL[order(res.mag.F0.HvsCL$padj),]
save_results(res.mag.F0.HvsCL, "res.mag.F0_HvsF0_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### MAGs - F1 generation

In [97]:
# Extract and filter F1 samples
mag$cov.F1 <- mag$cov[, rownames(info[info$Generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F1.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F1.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F1[rowSums(mag$cov.F1) > 0, ])
mag$nz.F1 <- mag$cov.F1[rownames(mag$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F1) / sum(rowSums(mag$nz.F1)) * 100, probs = mag.F1.quantile_thresh)
mag$filtered.F1 <- low.count.removal(mag$nz.F1, percent = low_threshold)$data.filter

mag$occ.F1 <- rowSums(mag$filtered.F1>0) / ncol(mag$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F1 <- mag$filtered.F1[which(mag$occ.F1 >= mag.F1.percent_occ), ]
paste0(c(dim(mag$cov.F1)[1] - dim(mag$filtered.F1)[1], "MAGs removed", dim(mag$filtered.F1)[1], "remaining"))


mag$filtered.F1 <- ceiling(mag$filtered.F1)


info.F1 <- info[colnames(mag$filtered.F1), c("run","sample","Treatment")]

# Create DESeq dataset
dds.mag.F1 <- DESeqDataSetFromMatrix(countData = mag$filtered.F1,
                                    colData = info.F1,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.mag.F1$sample <- info.F1$sample
dds.mag.F1$run <- info.F1$run
#dds.mag.F1 <- collapseReplicates(dds.mag.F1, dds.mag.F1$sample, dds.mag.F1$run)

# Run DESeq2
dds.mag.F1 <- DESeq(dds.mag.F1, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.mag.F1.HvsC <- results(dds.mag.F1, contrast = list("Treatment_H_vs_C"))
res.mag.F1.HvsC <- res.mag.F1.HvsC[order(res.mag.F1.HvsC$padj),]
save_results(res.mag.F1.HvsC, "res.mag.F1_HvsF1_C")

# Contrast L vs C
res.mag.F1.LvsC <- results(dds.mag.F1, contrast = list("Treatment_L_vs_C"))
res.mag.F1.LvsC <- res.mag.F1.LvsC[order(res.mag.F1.LvsC$padj),]
save_results(res.mag.F1.LvsC, "res.mag.F1_LvsF1_C")

# Contrast H vs L
res.mag.F1.HvsL <- results(dds.mag.F1, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.mag.F1.HvsL <- res.mag.F1.HvsL[order(res.mag.F1.HvsL$padj),]
save_results(res.mag.F1.HvsL, "res.mag.F1_HvsF1_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### MAGs - F1 generation (C vs H+L)

In [98]:
info.F1_2 <- info.F1
info.F1_2$Treatment <- gsub(x = info.F1_2$Treatment, pattern = "[HL]", replacement = "HL")
# Create DESeq dataset
dds.mag.F1_2 <- DESeqDataSetFromMatrix(countData = mag$filtered.F1,
                                    colData = info.F1_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.mag.F1_2 <- DESeq(dds.mag.F1_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.mag.F1.HLvsC <- results(dds.mag.F1_2, contrast = list("Treatment_HL_vs_C"))
res.mag.F1.HvLsC <- res.mag.F1.HLvsC[order(res.mag.F1.HLvsC$padj),]
save_results(res.mag.F1.HLvsC, "res.mag.F1_HLvsF1_C")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 12 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### MAGs - F1 generation (C+L vs H)

In [99]:
info.F1_3 <- info.F1
info.F1_3$Treatment <- gsub(x = info.F1_3$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.mag.F1_3 <- DESeqDataSetFromMatrix(countData = mag$filtered.F1,
                                    colData = info.F1_3,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.mag.F1_3 <- DESeq(dds.mag.F1_3, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.mag.F1.HvsCL <- results(dds.mag.F1_3, contrast = list("Treatment_H_vs_CL"))
res.mag.F1.HvsCL <- res.mag.F1.HvsCL[order(res.mag.F1.HvsCL$padj),]
save_results(res.mag.F1.HvsCL, "res.mag.F1_HvsF1_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 9 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### MAGs - F2 generation

In [100]:
# Extract and filter F2 samples
mag$cov.F2 <- mag$cov[, rownames(info[info$Generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F2.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F2.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F2[rowSums(mag$cov.F2) > 0, ])
mag$nz.F2 <- mag$cov.F2[rownames(mag$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F2) / sum(rowSums(mag$nz.F2)) * 100, probs = mag.F2.quantile_thresh)
mag$filtered.F2 <- low.count.removal(mag$nz.F2, percent = low_threshold)$data.filter

mag$occ.F2 <- rowSums(mag$filtered.F2>0) / ncol(mag$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F2 <- mag$filtered.F2[which(mag$occ.F2 >= mag.F2.percent_occ), ]
paste0(c(dim(mag$cov.F2)[1] - dim(mag$filtered.F2)[1], "MAGs removed", dim(mag$filtered.F2)[1], "remaining"))


mag$filtered.F2 <- ceiling(mag$filtered.F2)


info.F2 <- info[colnames(mag$filtered.F2), c("run","sample","Treatment")]

# Create DESeq dataset
dds.mag.F2 <- DESeqDataSetFromMatrix(countData = mag$filtered.F2,
                                    colData = info.F2,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.mag.F2$sample <- info.F2$sample
dds.mag.F2$run <- info.F2$run
#dds.mag.F2 <- collapseReplicates(dds.mag.F2, dds.mag.F2$sample, dds.mag.F2$run)

# Run DESeq2
dds.mag.F2 <- DESeq(dds.mag.F2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.mag.F2.HvsC <- results(dds.mag.F2, contrast = list("Treatment_H_vs_C"))
res.mag.F2.HvsC <- res.mag.F2.HvsC[order(res.mag.F2.HvsC$padj),]
save_results(res.mag.F2.HvsC, "res.mag.F2_HvsF2_C")

# Contrast L vs C
res.mag.F2.LvsC <- results(dds.mag.F2, contrast = list("Treatment_L_vs_C"))
res.mag.F2.LvsC <- res.mag.F2.LvsC[order(res.mag.F2.LvsC$padj),]
save_results(res.mag.F2.LvsC, "res.mag.F2_LvsF2_C")

# Contrast H vs L
res.mag.F2.HvsL <- results(dds.mag.F2, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.mag.F2.HvsL <- res.mag.F2.HvsL[order(res.mag.F2.HvsL$padj),]
save_results(res.mag.F2.HvsL, "res.mag.F2_HvsF2_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 32 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### MAGs - F2 generation (C+L vs H)

In [101]:
info.F2_2 <- info.F2
info.F2_2$Treatment <- gsub(x = info.F2_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.mag.F2_2 <- DESeqDataSetFromMatrix(countData = mag$filtered.F2,
                                    colData = info.F2_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.mag.F2_2 <- DESeq(dds.mag.F2_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.mag.F2.HvsCL <- results(dds.mag.F2_2, contrast = list("Treatment_H_vs_CL"))
res.mag.F2.HvsCL <- res.mag.F2.HvsCL[order(res.mag.F2.HvsCL$padj),]
save_results(res.mag.F2.HvsCL, "res.mag.F2_HvsF2_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 32 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



### UniRef species

#### UniRef species - generational differences

In [102]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov[rowSums(uniref$cov) > 0, ])
uniref$nz <- uniref$cov[rownames(uniref$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz) / sum(rowSums(uniref$nz)) * 100, probs = uniref.quantile_thresh)
uniref$filtered <- low.count.removal(uniref$nz, percent = low_threshold)$data.filter

uniref$occ <- rowSums(uniref$filtered>0) / ncol(uniref$filtered) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered <- uniref$filtered[which(uniref$occ >= uniref.percent_occ), ]
paste0(c(dim(uniref$cov)[1] - dim(uniref$filtered)[1], "species removed", dim(uniref$filtered)[1], "remaining"))

uniref$filtered <- ceiling(uniref$filtered)

## DESeq2 analysis
info.uniref <- info[, c("run","sample","Generation")]
dds.uniref <- DESeqDataSetFromMatrix(countData = uniref$filtered,
                              colData = info.uniref,
                              design = ~ 1 + Generation)
dds.uniref$sample <- info$sample
dds.uniref$run <- info$run
#dds.uniref <- collapseReplicates(dds.uniref, dds.uniref$sample, dds.uniref$run)
dds.uniref <- DESeq(dds.uniref, sfType = "poscounts", parallel = TRUE)

# Multi-factor (treatment + generation)
#dds.unirefMF <- dds.uniref
#design(dds.unirefMF) <- formula(~ group)
#dds.unirefMF <- DESeq(dds.unirefMF, sfType = "poscounts")

# Contrast H vs C
res.uniref.F2vsF0 <- results(dds.uniref, contrast = list("Generation_F2_vs_F0"))
res.uniref.F2vsF0 <- res.uniref.F2vsF0[order(res.uniref.F2vsF0$padj),]
save_results(res.uniref.F2vsF0, "res.uniref.F2vsF0")

# Contrast L vs C
res.uniref.F1vsF0 <- results(dds.uniref, contrast = list("Generation_F1_vs_F0"))
res.uniref.F1vsF0 <- res.uniref.F1vsF0[order(res.uniref.F1vsF0$padj),]
save_results(res.uniref.F1vsF0, "res.uniref.F1vsF0")

# Contrast H vs L
res.uniref.F2vsF1 <- results(dds.uniref, contrast = list("Generation_F2_vs_F0", "Generation_F1_vs_F0"))
res.uniref.F2vsF1 <- res.uniref.F2vsF1[order(res.uniref.F2vsF1$padj),]
save_results(res.uniref.F2vsF1, "res.uniref.F2vsF1")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 32 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef species - F0 generation

In [103]:
# Extract and filter F0 samples
uniref$cov.F0 <- uniref$cov[, rownames(info[info$Generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F0.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F0.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F0[rowSums(uniref$cov.F0) > 0, ])
uniref$nz.F0 <- uniref$cov.F0[rownames(uniref$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F0) / sum(rowSums(uniref$nz.F0)) * 100, probs = uniref.F0.quantile_thresh)
uniref$filtered.F0 <- low.count.removal(uniref$nz.F0, percent = low_threshold)$data.filter

uniref$occ.F0 <- rowSums(uniref$filtered.F0>0) / ncol(uniref$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F0 <- uniref$filtered.F0[which(uniref$occ.F0 >= uniref.F0.percent_occ), ]
paste0(c(dim(uniref$cov.F0)[1] - dim(uniref$filtered.F0)[1], "species removed", dim(uniref$filtered.F0)[1], "remaining"))


uniref$filtered.F0 <- ceiling(uniref$filtered.F0)


info.F0 <- info[colnames(uniref$filtered.F0), c("run","sample","Treatment")]

# Create DESeq dataset
dds.uniref.F0 <- DESeqDataSetFromMatrix(countData = uniref$filtered.F0,
                                    colData = info.F0,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.uniref.F0$sample <- info.F0$sample
dds.uniref.F0$run <- info.F0$run
#dds.uniref.F0 <- collapseReplicates(dds.uniref.F0, dds.uniref.F0$sample, dds.uniref.F0$run)

# Run DESeq2
dds.uniref.F0 <- DESeq(dds.uniref.F0, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.uniref.F0.HvsC <- results(dds.uniref.F0, contrast = list("Treatment_H_vs_C"))
res.uniref.F0.HvsC <- res.uniref.F0.HvsC[order(res.uniref.F0.HvsC$padj),]
save_results(res.uniref.F0.HvsC, "res.uniref.F0_HvsF0_C")

# Contrast L vs C
res.uniref.F0.LvsC <- results(dds.uniref.F0, contrast = list("Treatment_L_vs_C"))
res.uniref.F0.LvsC <- res.uniref.F0.LvsC[order(res.uniref.F0.LvsC$padj),]
save_results(res.uniref.F0.LvsC, "res.uniref.F0_LvsF0_C")

# Contrast H vs L
res.uniref.F0.HvsL <- results(dds.uniref.F0, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.uniref.F0.HvsL <- res.uniref.F0.HvsL[order(res.uniref.F0.HvsL$padj),]
save_results(res.uniref.F0.HvsL, "res.uniref.F0_HvsF0_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 45 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef species - F0 generation (C+L vs H)

In [104]:
info.F0_2 <- info.F0
info.F0_2$Treatment <- gsub(x = info.F0_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.uniref.F0_2 <- DESeqDataSetFromMatrix(countData = uniref$filtered.F0,
                                    colData = info.F0_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.uniref.F0_2 <- DESeq(dds.uniref.F0_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.uniref.F0.HvsCL <- results(dds.uniref.F0_2, contrast = list("Treatment_H_vs_CL"))
res.uniref.F0.HvsCL <- res.uniref.F0.HvsCL[order(res.uniref.F0.HvsCL$padj),]
save_results(res.uniref.F0.HvsCL, "res.uniref.F0_HvsF0_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 97 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef species - F1 generation

In [105]:
# Extract and filter F1 samples
uniref$cov.F1 <- uniref$cov[, rownames(info[info$Generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F1.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F1.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F1[rowSums(uniref$cov.F1) > 0, ])
uniref$nz.F1 <- uniref$cov.F1[rownames(uniref$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F1) / sum(rowSums(uniref$nz.F1)) * 100, probs = uniref.F1.quantile_thresh)
uniref$filtered.F1 <- low.count.removal(uniref$nz.F1, percent = low_threshold)$data.filter

uniref$occ.F1 <- rowSums(uniref$filtered.F1>0) / ncol(uniref$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F1 <- uniref$filtered.F1[which(uniref$occ.F1 >= uniref.F1.percent_occ), ]
paste0(c(dim(uniref$cov.F1)[1] - dim(uniref$filtered.F1)[1], "species removed", dim(uniref$filtered.F1)[1], "remaining"))


uniref$filtered.F1 <- ceiling(uniref$filtered.F1)


info.F1 <- info[colnames(uniref$filtered.F1), c("run","sample","Treatment")]

# Create DESeq dataset
dds.uniref.F1 <- DESeqDataSetFromMatrix(countData = uniref$filtered.F1,
                                    colData = info.F1,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.uniref.F1$sample <- info.F1$sample
dds.uniref.F1$run <- info.F1$run
#dds.uniref.F1 <- collapseReplicates(dds.uniref.F1, dds.uniref.F1$sample, dds.uniref.F1$run)

# Run DESeq2
dds.uniref.F1 <- DESeq(dds.uniref.F1, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.uniref.F1.HvsC <- results(dds.uniref.F1, contrast = list("Treatment_H_vs_C"))
res.uniref.F1.HvsC <- res.uniref.F1.HvsC[order(res.uniref.F1.HvsC$padj),]
save_results(res.uniref.F1.HvsC, "res.uniref.F1_HvsF1_C")

# Contrast L vs C
res.uniref.F1.LvsC <- results(dds.uniref.F1, contrast = list("Treatment_L_vs_C"))
res.uniref.F1.LvsC <- res.uniref.F1.LvsC[order(res.uniref.F1.LvsC$padj),]
save_results(res.uniref.F1.LvsC, "res.uniref.F1_LvsF1_C")

# Contrast H vs L
res.uniref.F1.HvsL <- results(dds.uniref.F1, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.uniref.F1.HvsL <- res.uniref.F1.HvsL[order(res.uniref.F1.HvsL$padj),]
save_results(res.uniref.F1.HvsL, "res.uniref.F1_HvsF1_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 50 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef species - F1 generation (C+L vs H)

In [106]:
info.F1_2 <- info.F1
info.F1_2$Treatment <- gsub(x = info.F1_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.uniref.F1_2 <- DESeqDataSetFromMatrix(countData = uniref$filtered.F1,
                                    colData = info.F1_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.uniref.F1_2 <- DESeq(dds.uniref.F1_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.uniref.F1.HvsCL <- results(dds.uniref.F1_2, contrast = list("Treatment_H_vs_CL"))
res.uniref.F1.HvsCL <- res.uniref.F1.HvsCL[order(res.uniref.F1.HvsCL$padj),]
save_results(res.uniref.F1.HvsCL, "res.uniref.F1_HvsF1_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 101 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef species - F2 generation

In [107]:
# Extract and filter F2 samples
uniref$cov.F2 <- uniref$cov[, rownames(info[info$Generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F2.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F2.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F2[rowSums(uniref$cov.F2) > 0, ])
uniref$nz.F2 <- uniref$cov.F2[rownames(uniref$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F2) / sum(rowSums(uniref$nz.F2)) * 100, probs = uniref.F2.quantile_thresh)
uniref$filtered.F2 <- low.count.removal(uniref$nz.F2, percent = low_threshold)$data.filter

uniref$occ.F2 <- rowSums(uniref$filtered.F2>0) / ncol(uniref$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F2 <- uniref$filtered.F2[which(uniref$occ.F2 >= uniref.F2.percent_occ), ]
paste0(c(dim(uniref$cov.F2)[1] - dim(uniref$filtered.F2)[1], "species removed", dim(uniref$filtered.F2)[1], "remaining"))


uniref$filtered.F2 <- ceiling(uniref$filtered.F2)


info.F2 <- info[colnames(uniref$filtered.F2), c("run","sample","Treatment")]

# Create DESeq dataset
dds.uniref.F2 <- DESeqDataSetFromMatrix(countData = uniref$filtered.F2,
                                    colData = info.F2,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.uniref.F2$sample <- info.F2$sample
dds.uniref.F2$run <- info.F2$run
#dds.uniref.F2 <- collapseReplicates(dds.uniref.F2, dds.uniref.F2$sample, dds.uniref.F2$run)

# Run DESeq2
dds.uniref.F2 <- DESeq(dds.uniref.F2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.uniref.F2.HvsC <- results(dds.uniref.F2, contrast = list("Treatment_H_vs_C"))
res.uniref.F2.HvsC <- res.uniref.F2.HvsC[order(res.uniref.F2.HvsC$padj),]
save_results(res.uniref.F2.HvsC, "res.uniref.F2_HvsF2_C")

# Contrast L vs C
res.uniref.F2.LvsC <- results(dds.uniref.F2, contrast = list("Treatment_L_vs_C"))
res.uniref.F2.LvsC <- res.uniref.F2.LvsC[order(res.uniref.F2.LvsC$padj),]
save_results(res.uniref.F2.LvsC, "res.uniref.F2_LvsF2_C")

# Contrast H vs L
res.uniref.F2.HvsL <- results(dds.uniref.F2, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.uniref.F2.HvsL <- res.uniref.F2.HvsL[order(res.uniref.F2.HvsL$padj),]
save_results(res.uniref.F2.HvsL, "res.uniref.F2_HvsF2_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 121 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef species - F2 generation (C+L vs H)

In [108]:
info.F2_2 <- info.F2
info.F2_2$Treatment <- gsub(x = info.F2_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.uniref.F2_2 <- DESeqDataSetFromMatrix(countData = uniref$filtered.F2,
                                    colData = info.F2_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.uniref.F2_2 <- DESeq(dds.uniref.F2_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.uniref.F2.HvsCL <- results(dds.uniref.F2_2, contrast = list("Treatment_H_vs_CL"))
res.uniref.F2.HvsCL <- res.uniref.F2.HvsCL[order(res.uniref.F2.HvsCL$padj),]
save_results(res.uniref.F2.HvsCL, "res.uniref.F2_HvsF2_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 76 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



### UniRef genera

#### UniRef genera - generational differences

In [109]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov[rowSums(uniref.genus$cov) > 0, ])
uniref.genus$nz <- uniref.genus$cov[rownames(uniref.genus$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz) / sum(rowSums(uniref.genus$nz)) * 100, probs = uniref.genus.quantile_thresh)
uniref.genus$filtered <- low.count.removal(uniref.genus$nz, percent = low_threshold)$data.filter

uniref.genus$occ <- rowSums(uniref.genus$filtered>0) / ncol(uniref.genus$filtered) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered <- uniref.genus$filtered[which(uniref.genus$occ >= uniref.genus.percent_occ), ]
paste0(c(dim(uniref.genus$cov)[1] - dim(uniref.genus$filtered)[1], "species removed", dim(uniref.genus$filtered)[1], "remaining"))

uniref.genus$filtered <- ceiling(uniref.genus$filtered)

## DESeq2 analysis
info.uniref.genus <- info[, c("run","sample","Generation")]
dds.uniref.genus <- DESeqDataSetFromMatrix(countData = uniref.genus$filtered,
                              colData = info.uniref.genus,
                              design = ~ 1 + Generation)
dds.uniref.genus$sample <- info$sample
dds.uniref.genus$run <- info$run
#dds.uniref.genus <- collapseReplicates(dds.uniref.genus, dds.uniref.genus$sample, dds.uniref.genus$run)
dds.uniref.genus <- DESeq(dds.uniref.genus, sfType = "poscounts", parallel = TRUE)

# Multi-factor (treatment + generation)
#dds.uniref.genusMF <- dds.uniref.genus
#design(dds.uniref.genusMF) <- formula(~ group)
#dds.uniref.genusMF <- DESeq(dds.uniref.genusMF, sfType = "poscounts")

# Contrast H vs C
res.uniref.genus.F2vsF0 <- results(dds.uniref.genus, contrast = list("Generation_F2_vs_F0"))
res.uniref.genus.F2vsF0 <- res.uniref.genus.F2vsF0[order(res.uniref.genus.F2vsF0$padj),]
save_results(res.uniref.genus.F2vsF0, "res.uniref.genus.F2vsF0")

# Contrast L vs C
res.uniref.genus.F1vsF0 <- results(dds.uniref.genus, contrast = list("Generation_F1_vs_F0"))
res.uniref.genus.F1vsF0 <- res.uniref.genus.F1vsF0[order(res.uniref.genus.F1vsF0$padj),]
save_results(res.uniref.genus.F1vsF0, "res.uniref.genus.F1vsF0")

# Contrast H vs L
res.uniref.genus.F2vsF1 <- results(dds.uniref.genus, contrast = list("Generation_F2_vs_F0", "Generation_F1_vs_F0"))
res.uniref.genus.F2vsF1 <- res.uniref.genus.F2vsF1[order(res.uniref.genus.F2vsF1$padj),]
save_results(res.uniref.genus.F2vsF1, "res.uniref.genus.F2vsF1")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 18 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef genera - F0 generation

In [110]:
# Extract and filter F0 samples
uniref.genus$cov.F0 <- uniref.genus$cov[, rownames(info[info$Generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F0.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F0.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F0[rowSums(uniref.genus$cov.F0) > 0, ])
uniref.genus$nz.F0 <- uniref.genus$cov.F0[rownames(uniref.genus$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F0) / sum(rowSums(uniref.genus$nz.F0)) * 100, probs = uniref.genus.F0.quantile_thresh)
uniref.genus$filtered.F0 <- low.count.removal(uniref.genus$nz.F0, percent = low_threshold)$data.filter

uniref.genus$occ.F0 <- rowSums(uniref.genus$filtered.F0>0) / ncol(uniref.genus$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F0 <- uniref.genus$filtered.F0[which(uniref.genus$occ.F0 >= uniref.genus.F0.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F0)[1] - dim(uniref.genus$filtered.F0)[1], "genera removed", dim(uniref.genus$filtered.F0)[1], "remaining"))


uniref.genus$filtered.F0 <- ceiling(uniref.genus$filtered.F0)


info.F0 <- info[colnames(uniref.genus$filtered.F0), c("run","sample","Treatment")]

# Create DESeq dataset
dds.uniref.genus.F0 <- DESeqDataSetFromMatrix(countData = uniref.genus$filtered.F0,
                                    colData = info.F0,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.uniref.genus.F0$sample <- info.F0$sample
dds.uniref.genus.F0$run <- info.F0$run
#dds.uniref.genus.F0 <- collapseReplicates(dds.uniref.genus.F0, dds.uniref.genus.F0$sample, dds.uniref.genus.F0$run)

# Run DESeq2
dds.uniref.genus.F0 <- DESeq(dds.uniref.genus.F0, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.uniref.genus.F0.HvsC <- results(dds.uniref.genus.F0, contrast = list("Treatment_H_vs_C"))
res.uniref.genus.F0.HvsC <- res.uniref.genus.F0.HvsC[order(res.uniref.genus.F0.HvsC$padj),]
save_results(res.uniref.genus.F0.HvsC, "res.uniref.genus.F0_HvsF0_C")

# Contrast L vs C
res.uniref.genus.F0.LvsC <- results(dds.uniref.genus.F0, contrast = list("Treatment_L_vs_C"))
res.uniref.genus.F0.LvsC <- res.uniref.genus.F0.LvsC[order(res.uniref.genus.F0.LvsC$padj),]
save_results(res.uniref.genus.F0.LvsC, "res.uniref.genus.F0_LvsF0_C")

# Contrast H vs L
res.uniref.genus.F0.HvsL <- results(dds.uniref.genus.F0, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.uniref.genus.F0.HvsL <- res.uniref.genus.F0.HvsL[order(res.uniref.genus.F0.HvsL$padj),]
save_results(res.uniref.genus.F0.HvsL, "res.uniref.genus.F0_HvsF0_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 15 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef genera - F0 generation (C+L vs H)

In [111]:
info.F0_2 <- info.F0
info.F0_2$Treatment <- gsub(x = info.F0_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.uniref.genus.F0_2 <- DESeqDataSetFromMatrix(countData = uniref.genus$filtered.F0,
                                    colData = info.F0_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.uniref.genus.F0_2 <- DESeq(dds.uniref.genus.F0_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.uniref.genus.F0.HvsCL <- results(dds.uniref.genus.F0_2, contrast = list("Treatment_H_vs_CL"))
res.uniref.genus.F0.HvsCL <- res.uniref.genus.F0.HvsCL[order(res.uniref.genus.F0.HvsCL$padj),]
save_results(res.uniref.genus.F0.HvsCL, "res.uniref.genus.F0_HvsF0_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 31 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef genera - F1 generation

In [112]:
# Extract and filter F1 samples
uniref.genus$cov.F1 <- uniref.genus$cov[, rownames(info[info$Generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F1.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F1.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F1[rowSums(uniref.genus$cov.F1) > 0, ])
uniref.genus$nz.F1 <- uniref.genus$cov.F1[rownames(uniref.genus$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F1) / sum(rowSums(uniref.genus$nz.F1)) * 100, probs = uniref.genus.F1.quantile_thresh)
uniref.genus$filtered.F1 <- low.count.removal(uniref.genus$nz.F1, percent = low_threshold)$data.filter

uniref.genus$occ.F1 <- rowSums(uniref.genus$filtered.F1>0) / ncol(uniref.genus$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F1 <- uniref.genus$filtered.F1[which(uniref.genus$occ.F1 >= uniref.genus.F1.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F1)[1] - dim(uniref.genus$filtered.F1)[1], "genera removed", dim(uniref.genus$filtered.F1)[1], "remaining"))


uniref.genus$filtered.F1 <- ceiling(uniref.genus$filtered.F1)


info.F1 <- info[colnames(uniref.genus$filtered.F1), c("run","sample","Treatment")]

# Create DESeq dataset
dds.uniref.genus.F1 <- DESeqDataSetFromMatrix(countData = uniref.genus$filtered.F1,
                                    colData = info.F1,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.uniref.genus.F1$sample <- info.F1$sample
dds.uniref.genus.F1$run <- info.F1$run
#dds.uniref.genus.F1 <- collapseReplicates(dds.uniref.genus.F1, dds.uniref.genus.F1$sample, dds.uniref.genus.F1$run)

# Run DESeq2
dds.uniref.genus.F1 <- DESeq(dds.uniref.genus.F1, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.uniref.genus.F1.HvsC <- results(dds.uniref.genus.F1, contrast = list("Treatment_H_vs_C"))
res.uniref.genus.F1.HvsC <- res.uniref.genus.F1.HvsC[order(res.uniref.genus.F1.HvsC$padj),]
save_results(res.uniref.genus.F1.HvsC, "res.uniref.genus.F1_HvsF1_C")

# Contrast L vs C
res.uniref.genus.F1.LvsC <- results(dds.uniref.genus.F1, contrast = list("Treatment_L_vs_C"))
res.uniref.genus.F1.LvsC <- res.uniref.genus.F1.LvsC[order(res.uniref.genus.F1.LvsC$padj),]
save_results(res.uniref.genus.F1.LvsC, "res.uniref.genus.F1_LvsF1_C")

# Contrast H vs L
res.uniref.genus.F1.HvsL <- results(dds.uniref.genus.F1, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.uniref.genus.F1.HvsL <- res.uniref.genus.F1.HvsL[order(res.uniref.genus.F1.HvsL$padj),]
save_results(res.uniref.genus.F1.HvsL, "res.uniref.genus.F1_HvsF1_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 15 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef genera - F1 generation (C+L vs H)

In [113]:
info.F1_2 <- info.F1
info.F1_2$Treatment <- gsub(x = info.F1_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.uniref.genus.F1_2 <- DESeqDataSetFromMatrix(countData = uniref.genus$filtered.F1,
                                    colData = info.F1_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.uniref.genus.F1_2 <- DESeq(dds.uniref.genus.F1_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.uniref.genus.F1.HvsCL <- results(dds.uniref.genus.F1_2, contrast = list("Treatment_H_vs_CL"))
res.uniref.genus.F1.HvsCL <- res.uniref.genus.F1.HvsCL[order(res.uniref.genus.F1.HvsCL$padj),]
save_results(res.uniref.genus.F1.HvsCL, "res.uniref.genus.F1_HvsF1_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 38 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef genera - F2 generation

In [114]:
# Extract and filter F2 samples
uniref.genus$cov.F2 <- uniref.genus$cov[, rownames(info[info$Generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F2.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F2.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F2[rowSums(uniref.genus$cov.F2) > 0, ])
uniref.genus$nz.F2 <- uniref.genus$cov.F2[rownames(uniref.genus$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F2) / sum(rowSums(uniref.genus$nz.F2)) * 100, probs = uniref.genus.F2.quantile_thresh)
uniref.genus$filtered.F2 <- low.count.removal(uniref.genus$nz.F2, percent = low_threshold)$data.filter

uniref.genus$occ.F2 <- rowSums(uniref.genus$filtered.F2>0) / ncol(uniref.genus$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F2 <- uniref.genus$filtered.F2[which(uniref.genus$occ.F2 >= uniref.genus.F2.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F2)[1] - dim(uniref.genus$filtered.F2)[1], "genera removed", dim(uniref.genus$filtered.F2)[1], "remaining"))


uniref.genus$filtered.F2 <- ceiling(uniref.genus$filtered.F2)


info.F2 <- info[colnames(uniref.genus$filtered.F2), c("run","sample","Treatment")]

# Create DESeq dataset
dds.uniref.genus.F2 <- DESeqDataSetFromMatrix(countData = uniref.genus$filtered.F2,
                                    colData = info.F2,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.uniref.genus.F2$sample <- info.F2$sample
dds.uniref.genus.F2$run <- info.F2$run
#dds.uniref.genus.F2 <- collapseReplicates(dds.uniref.genus.F2, dds.uniref.genus.F2$sample, dds.uniref.genus.F2$run)

# Run DESeq2
dds.uniref.genus.F2 <- DESeq(dds.uniref.genus.F2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.uniref.genus.F2.HvsC <- results(dds.uniref.genus.F2, contrast = list("Treatment_H_vs_C"))
res.uniref.genus.F2.HvsC <- res.uniref.genus.F2.HvsC[order(res.uniref.genus.F2.HvsC$padj),]
save_results(res.uniref.genus.F2.HvsC, "res.uniref.genus.F2_HvsF2_C")

# Contrast L vs C
res.uniref.genus.F2.LvsC <- results(dds.uniref.genus.F2, contrast = list("Treatment_L_vs_C"))
res.uniref.genus.F2.LvsC <- res.uniref.genus.F2.LvsC[order(res.uniref.genus.F2.LvsC$padj),]
save_results(res.uniref.genus.F2.LvsC, "res.uniref.genus.F2_LvsF2_C")

# Contrast H vs L
res.uniref.genus.F2.HvsL <- results(dds.uniref.genus.F2, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.uniref.genus.F2.HvsL <- res.uniref.genus.F2.HvsL[order(res.uniref.genus.F2.HvsL$padj),]
save_results(res.uniref.genus.F2.HvsL, "res.uniref.genus.F2_HvsF2_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 57 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### UniRef genera - F2 generation (C+L vs H)

In [115]:
info.F2_2 <- info.F2
info.F2_2$Treatment <- gsub(x = info.F2_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.uniref.genus.F2_2 <- DESeqDataSetFromMatrix(countData = uniref.genus$filtered.F2,
                                    colData = info.F2_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.uniref.genus.F2_2 <- DESeq(dds.uniref.genus.F2_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.uniref.genus.F2.HvsCL <- results(dds.uniref.genus.F2_2, contrast = list("Treatment_H_vs_CL"))
res.uniref.genus.F2.HvsCL <- res.uniref.genus.F2.HvsCL[order(res.uniref.genus.F2.HvsCL$padj),]
save_results(res.uniref.genus.F2.HvsCL, "res.uniref.genus.F2_HvsF2_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 36 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



### PFAM

#### PFAM - generational differences

In [116]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov[rowSums(pfam$cov) > 0, ])
pfam$nz <- pfam$cov[rownames(pfam$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz) / sum(rowSums(pfam$nz)) * 100, probs = pfam.quantile_thresh)
pfam$filtered <- low.count.removal(pfam$nz, percent = low_threshold)$data.filter

pfam$occ <- rowSums(pfam$filtered>0) / ncol(pfam$filtered) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered <- pfam$filtered[which(pfam$occ >= pfam.percent_occ), ]
paste0(c(dim(pfam$cov)[1] - dim(pfam$filtered)[1], "PFAMs removed", dim(pfam$filtered)[1], "remaining"))

pfam$filtered <- ceiling(pfam$filtered)

## DESeq2 analysis
info.pfam <- info[, c("run","sample","Generation")]
dds.pfam <- DESeqDataSetFromMatrix(countData = pfam$filtered,
                              colData = info.pfam,
                              design = ~ 1 + Generation)
dds.pfam$sample <- info$sample
dds.pfam$run <- info$run
#dds.pfam <- collapseReplicates(dds.pfam, dds.pfam$sample, dds.pfam$run)
dds.pfam <- DESeq(dds.pfam, sfType = "poscounts", parallel = TRUE)

# Multi-factor (treatment + generation)
#dds.pfamMF <- dds.pfam
#design(dds.pfamMF) <- formula(~ group)
#dds.pfamMF <- DESeq(dds.pfamMF, sfType = "poscounts")

# Contrast H vs C
res.pfam.F2vsF0 <- results(dds.pfam, contrast = list("Generation_F2_vs_F0"))
res.pfam.F2vsF0 <- res.pfam.F2vsF0[order(res.pfam.F2vsF0$padj),]
save_results(res.pfam.F2vsF0, "res.pfam.F2vsF0")

# Contrast L vs C
res.pfam.F1vsF0 <- results(dds.pfam, contrast = list("Generation_F1_vs_F0"))
res.pfam.F1vsF0 <- res.pfam.F1vsF0[order(res.pfam.F1vsF0$padj),]
save_results(res.pfam.F1vsF0, "res.pfam.F1vsF0")

# Contrast H vs L
res.pfam.F2vsF1 <- results(dds.pfam, contrast = list("Generation_F2_vs_F0", "Generation_F1_vs_F0"))
res.pfam.F2vsF1 <- res.pfam.F2vsF1[order(res.pfam.F2vsF1$padj),]
save_results(res.pfam.F2vsF1, "res.pfam.F2vsF1")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 106 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### PFAMs - F0 generation

In [117]:
# Extract and filter F0 samples
pfam$cov.F0 <- pfam$cov[, rownames(info[info$Generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F0.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F0.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F0[rowSums(pfam$cov.F0) > 0, ])
pfam$nz.F0 <- pfam$cov.F0[rownames(pfam$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F0) / sum(rowSums(pfam$nz.F0)) * 100, probs = pfam.F0.quantile_thresh)
pfam$filtered.F0 <- low.count.removal(pfam$nz.F0, percent = low_threshold)$data.filter

pfam$occ.F0 <- rowSums(pfam$filtered.F0>0) / ncol(pfam$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F0 <- pfam$filtered.F0[which(pfam$occ.F0 >= pfam.F0.percent_occ), ]


pfam$filtered.F0 <- ceiling(pfam$filtered.F0)


info.F0 <- info[colnames(pfam$filtered.F0), c("run","sample","Treatment")]

# Create DESeq dataset
dds.pfam.F0 <- DESeqDataSetFromMatrix(countData = pfam$filtered.F0,
                                    colData = info.F0,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.pfam.F0$sample <- info.F0$sample
dds.pfam.F0$run <- info.F0$run
#dds.pfam.F0 <- collapseReplicates(dds.pfam.F0, dds.pfam.F0$sample, dds.pfam.F0$run)

# Run DESeq2
dds.pfam.F0 <- DESeq(dds.pfam.F0, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.pfam.F0.HvsC <- results(dds.pfam.F0, contrast = list("Treatment_H_vs_C"))
res.pfam.F0.HvsC <- res.pfam.F0.HvsC[order(res.pfam.F0.HvsC$padj),]
save_results(res.pfam.F0.HvsC, "res.pfam.F0_HvsF0_C")

# Contrast L vs C
res.pfam.F0.LvsC <- results(dds.pfam.F0, contrast = list("Treatment_L_vs_C"))
res.pfam.F0.LvsC <- res.pfam.F0.LvsC[order(res.pfam.F0.LvsC$padj),]
save_results(res.pfam.F0.LvsC, "res.pfam.F0_LvsF0_C")

# Contrast H vs L
res.pfam.F0.HvsL <- results(dds.pfam.F0, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.pfam.F0.HvsL <- res.pfam.F0.HvsL[order(res.pfam.F0.HvsL$padj),]
save_results(res.pfam.F0.HvsL, "res.pfam.F0_HvsF0_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### PFAMs - F0 generation (C+L vs H)

In [118]:
info.F0_2 <- info.F0
info.F0_2$Treatment <- gsub(x = info.F0_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.pfam.F0_2 <- DESeqDataSetFromMatrix(countData = pfam$filtered.F0,
                                    colData = info.F0_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.pfam.F0_2 <- DESeq(dds.pfam.F0_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.pfam.F0.HvsCL <- results(dds.pfam.F0_2, contrast = list("Treatment_H_vs_CL"))
res.pfam.F0.HvsCL <- res.pfam.F0.HvsCL[order(res.pfam.F0.HvsCL$padj),]
save_results(res.pfam.F0.HvsCL, "res.pfam.F0_HvsF0_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 19 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### PFAMs - F1 generation

In [119]:
# Extract and filter F1 samples
pfam$cov.F1 <- pfam$cov[, rownames(info[info$Generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F1.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F1.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F1[rowSums(pfam$cov.F1) > 0, ])
pfam$nz.F1 <- pfam$cov.F1[rownames(pfam$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F1) / sum(rowSums(pfam$nz.F1)) * 100, probs = pfam.F1.quantile_thresh)
pfam$filtered.F1 <- low.count.removal(pfam$nz.F1, percent = low_threshold)$data.filter

pfam$occ.F1 <- rowSums(pfam$filtered.F1>0) / ncol(pfam$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F1 <- pfam$filtered.F1[which(pfam$occ.F1 >= pfam.F1.percent_occ), ]


pfam$filtered.F1 <- ceiling(pfam$filtered.F1)


info.F1 <- info[colnames(pfam$filtered.F1), c("run","sample","Treatment")]

# Create DESeq dataset
dds.pfam.F1 <- DESeqDataSetFromMatrix(countData = pfam$filtered.F1,
                                    colData = info.F1,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.pfam.F1$sample <- info.F1$sample
dds.pfam.F1$run <- info.F1$run
#dds.pfam.F1 <- collapseReplicates(dds.pfam.F1, dds.pfam.F1$sample, dds.pfam.F1$run)

# Run DESeq2
dds.pfam.F1 <- DESeq(dds.pfam.F1, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.pfam.F1.HvsC <- results(dds.pfam.F1, contrast = list("Treatment_H_vs_C"))
res.pfam.F1.HvsC <- res.pfam.F1.HvsC[order(res.pfam.F1.HvsC$padj),]
save_results(res.pfam.F1.HvsC, "res.pfam.F1_HvsF1_C")

# Contrast L vs C
res.pfam.F1.LvsC <- results(dds.pfam.F1, contrast = list("Treatment_L_vs_C"))
res.pfam.F1.LvsC <- res.pfam.F1.LvsC[order(res.pfam.F1.LvsC$padj),]
save_results(res.pfam.F1.LvsC, "res.pfam.F1_LvsF1_C")

# Contrast H vs L
res.pfam.F1.HvsL <- results(dds.pfam.F1, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.pfam.F1.HvsL <- res.pfam.F1.HvsL[order(res.pfam.F1.HvsL$padj),]
save_results(res.pfam.F1.HvsL, "res.pfam.F1_HvsF1_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 17 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### PFAMs - F1 generation (C+L vs H)

In [120]:
info.F1_2 <- info.F1
info.F1_2$Treatment <- gsub(x = info.F1_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.pfam.F1_2 <- DESeqDataSetFromMatrix(countData = pfam$filtered.F1,
                                    colData = info.F1_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.pfam.F1_2 <- DESeq(dds.pfam.F1_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.pfam.F1.HvsCL <- results(dds.pfam.F1_2, contrast = list("Treatment_H_vs_CL"))
res.pfam.F1.HvsCL <- res.pfam.F1.HvsCL[order(res.pfam.F1.HvsCL$padj),]
save_results(res.pfam.F1.HvsCL, "res.pfam.F1_HvsF1_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 58 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### PFAMs - F2 generation

In [121]:
# Extract and filter F2 samples
pfam$cov.F2 <- pfam$cov[, rownames(info[info$Generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F2.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F2.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F2[rowSums(pfam$cov.F2) > 0, ])
pfam$nz.F2 <- pfam$cov.F2[rownames(pfam$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F2) / sum(rowSums(pfam$nz.F2)) * 100, probs = pfam.F2.quantile_thresh)
pfam$filtered.F2 <- low.count.removal(pfam$nz.F2, percent = low_threshold)$data.filter

pfam$occ.F2 <- rowSums(pfam$filtered.F2>0) / ncol(pfam$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F2 <- pfam$filtered.F2[which(pfam$occ.F2 >= pfam.F2.percent_occ), ]


pfam$filtered.F2 <- ceiling(pfam$filtered.F2)


info.F2 <- info[colnames(pfam$filtered.F2), c("run","sample","Treatment")]

# Create DESeq dataset
dds.pfam.F2 <- DESeqDataSetFromMatrix(countData = pfam$filtered.F2,
                                    colData = info.F2,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.pfam.F2$sample <- info.F2$sample
dds.pfam.F2$run <- info.F2$run
#dds.pfam.F2 <- collapseReplicates(dds.pfam.F2, dds.pfam.F2$sample, dds.pfam.F2$run)

# Run DESeq2
dds.pfam.F2 <- DESeq(dds.pfam.F2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.pfam.F2.HvsC <- results(dds.pfam.F2, contrast = list("Treatment_H_vs_C"))
res.pfam.F2.HvsC <- res.pfam.F2.HvsC[order(res.pfam.F2.HvsC$padj),]
save_results(res.pfam.F2.HvsC, "res.pfam.F2_HvsF2_C")

# Contrast L vs C
res.pfam.F2.LvsC <- results(dds.pfam.F2, contrast = list("Treatment_L_vs_C"))
res.pfam.F2.LvsC <- res.pfam.F2.LvsC[order(res.pfam.F2.LvsC$padj),]
save_results(res.pfam.F2.LvsC, "res.pfam.F2_LvsF2_C")

# Contrast H vs L
res.pfam.F2.HvsL <- results(dds.pfam.F2, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.pfam.F2.HvsL <- res.pfam.F2.HvsL[order(res.pfam.F2.HvsL$padj),]
save_results(res.pfam.F2.HvsL, "res.pfam.F2_HvsF2_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 327 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### PFAMs - F2 generation (C+L vs H)

In [122]:
info.F2_2 <- info.F2
info.F2_2$Treatment <- gsub(x = info.F2_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.pfam.F2_2 <- DESeqDataSetFromMatrix(countData = pfam$filtered.F2,
                                    colData = info.F2_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.pfam.F2_2 <- DESeq(dds.pfam.F2_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.pfam.F2.HvsCL <- results(dds.pfam.F2_2, contrast = list("Treatment_H_vs_CL"))
res.pfam.F2.HvsCL <- res.pfam.F2.HvsCL[order(res.pfam.F2.HvsCL$padj),]
save_results(res.pfam.F2.HvsCL, "res.pfam.F2_HvsF2_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 274 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



### KO

#### ko - generational differences

In [123]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.percent_occ <- 10
# Quantile used for removing low abundance features
ko.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov[rowSums(ko$cov) > 0, ])
ko$nz <- ko$cov[rownames(ko$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz) / sum(rowSums(ko$nz)) * 100, probs = ko.quantile_thresh)
ko$filtered <- low.count.removal(ko$nz, percent = low_threshold)$data.filter

ko$occ <- rowSums(ko$filtered>0) / ncol(ko$filtered) * 100
# Keep features occurring in defined percentage of samples
ko$filtered <- ko$filtered[which(ko$occ >= ko.percent_occ), ]
paste0(c(dim(ko$cov)[1] - dim(ko$filtered)[1], "kos removed", dim(ko$filtered)[1], "remaining"))

ko$filtered <- ceiling(ko$filtered)

## DESeq2 analysis
info.ko <- info[, c("run","sample","Generation")]
dds.ko <- DESeqDataSetFromMatrix(countData = ko$filtered,
                              colData = info.ko,
                              design = ~ 1 + Generation)
dds.ko$sample <- info$sample
dds.ko$run <- info$run
#dds.ko <- collapseReplicates(dds.ko, dds.ko$sample, dds.ko$run)
dds.ko <- DESeq(dds.ko, sfType = "poscounts", parallel = TRUE)

# Multi-factor (treatment + generation)
#dds.koMF <- dds.ko
#design(dds.koMF) <- formula(~ group)
#dds.koMF <- DESeq(dds.koMF, sfType = "poscounts")

# Contrast H vs C
res.ko.F2vsF0 <- results(dds.ko, contrast = list("Generation_F2_vs_F0"))
res.ko.F2vsF0 <- res.ko.F2vsF0[order(res.ko.F2vsF0$padj),]
save_results(res.ko.F2vsF0, "res.ko.F2vsF0")

# Contrast L vs C
res.ko.F1vsF0 <- results(dds.ko, contrast = list("Generation_F1_vs_F0"))
res.ko.F1vsF0 <- res.ko.F1vsF0[order(res.ko.F1vsF0$padj),]
save_results(res.ko.F1vsF0, "res.ko.F1vsF0")

# Contrast H vs L
res.ko.F2vsF1 <- results(dds.ko, contrast = list("Generation_F2_vs_F0", "Generation_F1_vs_F0"))
res.ko.F2vsF1 <- res.ko.F2vsF1[order(res.ko.F2vsF1$padj),]
save_results(res.ko.F2vsF1, "res.ko.F2vsF1")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 103 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### kos - F0 generation

In [124]:
# Extract and filter F0 samples
ko$cov.F0 <- ko$cov[, rownames(info[info$Generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F0.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F0.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F0[rowSums(ko$cov.F0) > 0, ])
ko$nz.F0 <- ko$cov.F0[rownames(ko$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F0) / sum(rowSums(ko$nz.F0)) * 100, probs = ko.F0.quantile_thresh)
ko$filtered.F0 <- low.count.removal(ko$nz.F0, percent = low_threshold)$data.filter

ko$occ.F0 <- rowSums(ko$filtered.F0>0) / ncol(ko$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F0 <- ko$filtered.F0[which(ko$occ.F0 >= ko.F0.percent_occ), ]


ko$filtered.F0 <- ceiling(ko$filtered.F0)


info.F0 <- info[colnames(ko$filtered.F0), c("run","sample","Treatment")]

# Create DESeq dataset
dds.ko.F0 <- DESeqDataSetFromMatrix(countData = ko$filtered.F0,
                                    colData = info.F0,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.ko.F0$sample <- info.F0$sample
dds.ko.F0$run <- info.F0$run
#dds.ko.F0 <- collapseReplicates(dds.ko.F0, dds.ko.F0$sample, dds.ko.F0$run)

# Run DESeq2
dds.ko.F0 <- DESeq(dds.ko.F0, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.ko.F0.HvsC <- results(dds.ko.F0, contrast = list("Treatment_H_vs_C"))
res.ko.F0.HvsC <- res.ko.F0.HvsC[order(res.ko.F0.HvsC$padj),]
save_results(res.ko.F0.HvsC, "res.ko.F0_HvsF0_C")

# Contrast L vs C
res.ko.F0.LvsC <- results(dds.ko.F0, contrast = list("Treatment_L_vs_C"))
res.ko.F0.LvsC <- res.ko.F0.LvsC[order(res.ko.F0.LvsC$padj),]
save_results(res.ko.F0.LvsC, "res.ko.F0_LvsF0_C")

# Contrast H vs L
res.ko.F0.HvsL <- results(dds.ko.F0, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.ko.F0.HvsL <- res.ko.F0.HvsL[order(res.ko.F0.HvsL$padj),]
save_results(res.ko.F0.HvsL, "res.ko.F0_HvsF0_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 8 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### kos - F0 generation (C+L vs H)

In [125]:
info.F0_2 <- info.F0
info.F0_2$Treatment <- gsub(x = info.F0_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.ko.F0_2 <- DESeqDataSetFromMatrix(countData = ko$filtered.F0,
                                    colData = info.F0_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.ko.F0_2 <- DESeq(dds.ko.F0_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.ko.F0.HvsCL <- results(dds.ko.F0_2, contrast = list("Treatment_H_vs_CL"))
res.ko.F0.HvsCL <- res.ko.F0.HvsCL[order(res.ko.F0.HvsCL$padj),]
save_results(res.ko.F0.HvsCL, "res.ko.F0_HvsF0_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 32 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### kos - F1 generation

In [126]:
# Extract and filter F1 samples
ko$cov.F1 <- ko$cov[, rownames(info[info$Generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F1.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F1.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F1[rowSums(ko$cov.F1) > 0, ])
ko$nz.F1 <- ko$cov.F1[rownames(ko$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F1) / sum(rowSums(ko$nz.F1)) * 100, probs = ko.F1.quantile_thresh)
ko$filtered.F1 <- low.count.removal(ko$nz.F1, percent = low_threshold)$data.filter

ko$occ.F1 <- rowSums(ko$filtered.F1>0) / ncol(ko$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F1 <- ko$filtered.F1[which(ko$occ.F1 >= ko.F1.percent_occ), ]


ko$filtered.F1 <- ceiling(ko$filtered.F1)


info.F1 <- info[colnames(ko$filtered.F1), c("run","sample","Treatment")]

# Create DESeq dataset
dds.ko.F1 <- DESeqDataSetFromMatrix(countData = ko$filtered.F1,
                                    colData = info.F1,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.ko.F1$sample <- info.F1$sample
dds.ko.F1$run <- info.F1$run
#dds.ko.F1 <- collapseReplicates(dds.ko.F1, dds.ko.F1$sample, dds.ko.F1$run)

# Run DESeq2
dds.ko.F1 <- DESeq(dds.ko.F1, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.ko.F1.HvsC <- results(dds.ko.F1, contrast = list("Treatment_H_vs_C"))
res.ko.F1.HvsC <- res.ko.F1.HvsC[order(res.ko.F1.HvsC$padj),]
save_results(res.ko.F1.HvsC, "res.ko.F1_HvsF1_C")

# Contrast L vs C
res.ko.F1.LvsC <- results(dds.ko.F1, contrast = list("Treatment_L_vs_C"))
res.ko.F1.LvsC <- res.ko.F1.LvsC[order(res.ko.F1.LvsC$padj),]
save_results(res.ko.F1.LvsC, "res.ko.F1_LvsF1_C")

# Contrast H vs L
res.ko.F1.HvsL <- results(dds.ko.F1, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.ko.F1.HvsL <- res.ko.F1.HvsL[order(res.ko.F1.HvsL$padj),]
save_results(res.ko.F1.HvsL, "res.ko.F1_HvsF1_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 15 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### kos - F1 generation (C+L vs H)

In [127]:
info.F1_2 <- info.F1
info.F1_2$Treatment <- gsub(x = info.F1_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.ko.F1_2 <- DESeqDataSetFromMatrix(countData = ko$filtered.F1,
                                    colData = info.F1_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.ko.F1_2 <- DESeq(dds.ko.F1_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.ko.F1.HvsCL <- results(dds.ko.F1_2, contrast = list("Treatment_H_vs_CL"))
res.ko.F1.HvsCL <- res.ko.F1.HvsCL[order(res.ko.F1.HvsCL$padj),]
save_results(res.ko.F1.HvsCL, "res.ko.F1_HvsF1_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 68 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### kos - F2 generation

In [128]:
# Extract and filter F2 samples
ko$cov.F2 <- ko$cov[, rownames(info[info$Generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F2.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F2.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F2[rowSums(ko$cov.F2) > 0, ])
ko$nz.F2 <- ko$cov.F2[rownames(ko$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F2) / sum(rowSums(ko$nz.F2)) * 100, probs = ko.F2.quantile_thresh)
ko$filtered.F2 <- low.count.removal(ko$nz.F2, percent = low_threshold)$data.filter

ko$occ.F2 <- rowSums(ko$filtered.F2>0) / ncol(ko$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F2 <- ko$filtered.F2[which(ko$occ.F2 >= ko.F2.percent_occ), ]


ko$filtered.F2 <- ceiling(ko$filtered.F2)


info.F2 <- info[colnames(ko$filtered.F2), c("run","sample","Treatment")]

# Create DESeq dataset
dds.ko.F2 <- DESeqDataSetFromMatrix(countData = ko$filtered.F2,
                                    colData = info.F2,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.ko.F2$sample <- info.F2$sample
dds.ko.F2$run <- info.F2$run
#dds.ko.F2 <- collapseReplicates(dds.ko.F2, dds.ko.F2$sample, dds.ko.F2$run)

# Run DESeq2
dds.ko.F2 <- DESeq(dds.ko.F2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.ko.F2.HvsC <- results(dds.ko.F2, contrast = list("Treatment_H_vs_C"))
res.ko.F2.HvsC <- res.ko.F2.HvsC[order(res.ko.F2.HvsC$padj),]
save_results(res.ko.F2.HvsC, "res.ko.F2_HvsF2_C")

# Contrast L vs C
res.ko.F2.LvsC <- results(dds.ko.F2, contrast = list("Treatment_L_vs_C"))
res.ko.F2.LvsC <- res.ko.F2.LvsC[order(res.ko.F2.LvsC$padj),]
save_results(res.ko.F2.LvsC, "res.ko.F2_LvsF2_C")

# Contrast H vs L
res.ko.F2.HvsL <- results(dds.ko.F2, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.ko.F2.HvsL <- res.ko.F2.HvsL[order(res.ko.F2.HvsL$padj),]
save_results(res.ko.F2.HvsL, "res.ko.F2_HvsF2_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 362 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### kos - F2 generation (C+L vs H)

In [129]:
info.F2_2 <- info.F2
info.F2_2$Treatment <- gsub(x = info.F2_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.ko.F2_2 <- DESeqDataSetFromMatrix(countData = ko$filtered.F2,
                                    colData = info.F2_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.ko.F2_2 <- DESeq(dds.ko.F2_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.ko.F2.HvsCL <- results(dds.ko.F2_2, contrast = list("Treatment_H_vs_CL"))
res.ko.F2.HvsCL <- res.ko.F2.HvsCL[order(res.ko.F2.HvsCL$padj),]
save_results(res.ko.F2.HvsCL, "res.ko.F2_HvsF2_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 298 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



### cazy

#### cazy - generational differences

In [130]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
cazy.percent_occ <- 10
# Quantile used for removing low abundance features
cazy.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(cazy$cov[rowSums(cazy$cov) > 0, ])
cazy$nz <- cazy$cov[rownames(cazy$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(cazy$nz) / sum(rowSums(cazy$nz)) * 100, probs = cazy.quantile_thresh)
cazy$filtered <- low.count.removal(cazy$nz, percent = low_threshold)$data.filter

cazy$occ <- rowSums(cazy$filtered>0) / ncol(cazy$filtered) * 100
# Keep features occurring in defined percentage of samples
cazy$filtered <- cazy$filtered[which(cazy$occ >= cazy.percent_occ), ]
paste0(c(dim(cazy$cov)[1] - dim(cazy$filtered)[1], "cazys removed", dim(cazy$filtered)[1], "remaining"))

cazy$filtered <- ceiling(cazy$filtered)

## DESeq2 analysis
info.cazy <- info[, c("run","sample","Generation")]
dds.cazy <- DESeqDataSetFromMatrix(countData = cazy$filtered,
                              colData = info.cazy,
                              design = ~ 1 + Generation)
dds.cazy$sample <- info$sample
dds.cazy$run <- info$run
#dds.cazy <- collapseReplicates(dds.cazy, dds.cazy$sample, dds.cazy$run)
dds.cazy <- DESeq(dds.cazy, sfType = "poscounts", parallel = TRUE)

# Multi-factor (treatment + generation)
#dds.cazyMF <- dds.cazy
#design(dds.cazyMF) <- formula(~ group)
#dds.cazyMF <- DESeq(dds.cazyMF, sfType = "poscounts")

# Contrast H vs C
res.cazy.F2vsF0 <- results(dds.cazy, contrast = list("Generation_F2_vs_F0"))
res.cazy.F2vsF0 <- res.cazy.F2vsF0[order(res.cazy.F2vsF0$padj),]
save_results(res.cazy.F2vsF0, "res.cazy.F2vsF0")

# Contrast L vs C
res.cazy.F1vsF0 <- results(dds.cazy, contrast = list("Generation_F1_vs_F0"))
res.cazy.F1vsF0 <- res.cazy.F1vsF0[order(res.cazy.F1vsF0$padj),]
save_results(res.cazy.F1vsF0, "res.cazy.F1vsF0")

# Contrast H vs L
res.cazy.F2vsF1 <- results(dds.cazy, contrast = list("Generation_F2_vs_F0", "Generation_F1_vs_F0"))
res.cazy.F2vsF1 <- res.cazy.F2vsF1[order(res.cazy.F2vsF1$padj),]
save_results(res.cazy.F2vsF1, "res.cazy.F2vsF1")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 1 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### cazys - F0 generation

In [131]:
# Extract and filter F0 samples
cazy$cov.F0 <- cazy$cov[, rownames(info[info$Generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
cazy.F0.percent_occ <- 10
# Quantile used for removing low abundance features
cazy.F0.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(cazy$cov.F0[rowSums(cazy$cov.F0) > 0, ])
cazy$nz.F0 <- cazy$cov.F0[rownames(cazy$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(cazy$nz.F0) / sum(rowSums(cazy$nz.F0)) * 100, probs = cazy.F0.quantile_thresh)
cazy$filtered.F0 <- low.count.removal(cazy$nz.F0, percent = low_threshold)$data.filter

cazy$occ.F0 <- rowSums(cazy$filtered.F0>0) / ncol(cazy$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
cazy$filtered.F0 <- cazy$filtered.F0[which(cazy$occ.F0 >= cazy.F0.percent_occ), ]


cazy$filtered.F0 <- ceiling(cazy$filtered.F0)


info.F0 <- info[colnames(cazy$filtered.F0), c("run","sample","Treatment")]

# Create DESeq dataset
dds.cazy.F0 <- DESeqDataSetFromMatrix(countData = cazy$filtered.F0,
                                    colData = info.F0,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.cazy.F0$sample <- info.F0$sample
dds.cazy.F0$run <- info.F0$run
#dds.cazy.F0 <- collapseReplicates(dds.cazy.F0, dds.cazy.F0$sample, dds.cazy.F0$run)

# Run DESeq2
dds.cazy.F0 <- DESeq(dds.cazy.F0, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.cazy.F0.HvsC <- results(dds.cazy.F0, contrast = list("Treatment_H_vs_C"))
res.cazy.F0.HvsC <- res.cazy.F0.HvsC[order(res.cazy.F0.HvsC$padj),]
save_results(res.cazy.F0.HvsC, "res.cazy.F0_HvsF0_C")

# Contrast L vs C
res.cazy.F0.LvsC <- results(dds.cazy.F0, contrast = list("Treatment_L_vs_C"))
res.cazy.F0.LvsC <- res.cazy.F0.LvsC[order(res.cazy.F0.LvsC$padj),]
save_results(res.cazy.F0.LvsC, "res.cazy.F0_LvsF0_C")

# Contrast H vs L
res.cazy.F0.HvsL <- results(dds.cazy.F0, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.cazy.F0.HvsL <- res.cazy.F0.HvsL[order(res.cazy.F0.HvsL$padj),]
save_results(res.cazy.F0.HvsL, "res.cazy.F0_HvsF0_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers



#### cazys - F0 generation (C+L vs H)

In [132]:
info.F0_2 <- info.F0
info.F0_2$Treatment <- gsub(x = info.F0_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.cazy.F0_2 <- DESeqDataSetFromMatrix(countData = cazy$filtered.F0,
                                    colData = info.F0_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.cazy.F0_2 <- DESeq(dds.cazy.F0_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.cazy.F0.HvsCL <- results(dds.cazy.F0_2, contrast = list("Treatment_H_vs_CL"))
res.cazy.F0.HvsCL <- res.cazy.F0.HvsCL[order(res.cazy.F0.HvsCL$padj),]
save_results(res.cazy.F0.HvsCL, "res.cazy.F0_HvsF0_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers



#### cazys - F1 generation

In [133]:
# Extract and filter F1 samples
cazy$cov.F1 <- cazy$cov[, rownames(info[info$Generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
cazy.F1.percent_occ <- 10
# Quantile used for removing low abundance features
cazy.F1.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(cazy$cov.F1[rowSums(cazy$cov.F1) > 0, ])
cazy$nz.F1 <- cazy$cov.F1[rownames(cazy$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(cazy$nz.F1) / sum(rowSums(cazy$nz.F1)) * 100, probs = cazy.F1.quantile_thresh)
cazy$filtered.F1 <- low.count.removal(cazy$nz.F1, percent = low_threshold)$data.filter

cazy$occ.F1 <- rowSums(cazy$filtered.F1>0) / ncol(cazy$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
cazy$filtered.F1 <- cazy$filtered.F1[which(cazy$occ.F1 >= cazy.F1.percent_occ), ]


cazy$filtered.F1 <- ceiling(cazy$filtered.F1)


info.F1 <- info[colnames(cazy$filtered.F1), c("run","sample","Treatment")]

# Create DESeq dataset
dds.cazy.F1 <- DESeqDataSetFromMatrix(countData = cazy$filtered.F1,
                                    colData = info.F1,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.cazy.F1$sample <- info.F1$sample
dds.cazy.F1$run <- info.F1$run
#dds.cazy.F1 <- collapseReplicates(dds.cazy.F1, dds.cazy.F1$sample, dds.cazy.F1$run)

# Run DESeq2
dds.cazy.F1 <- DESeq(dds.cazy.F1, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.cazy.F1.HvsC <- results(dds.cazy.F1, contrast = list("Treatment_H_vs_C"))
res.cazy.F1.HvsC <- res.cazy.F1.HvsC[order(res.cazy.F1.HvsC$padj),]
save_results(res.cazy.F1.HvsC, "res.cazy.F1_HvsF1_C")

# Contrast L vs C
res.cazy.F1.LvsC <- results(dds.cazy.F1, contrast = list("Treatment_L_vs_C"))
res.cazy.F1.LvsC <- res.cazy.F1.LvsC[order(res.cazy.F1.LvsC$padj),]
save_results(res.cazy.F1.LvsC, "res.cazy.F1_LvsF1_C")

# Contrast H vs L
res.cazy.F1.HvsL <- results(dds.cazy.F1, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.cazy.F1.HvsL <- res.cazy.F1.HvsL[order(res.cazy.F1.HvsL$padj),]
save_results(res.cazy.F1.HvsL, "res.cazy.F1_HvsF1_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers



#### cazys - F1 generation (C+L vs H)

In [134]:
info.F1_2 <- info.F1
info.F1_2$Treatment <- gsub(x = info.F1_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.cazy.F1_2 <- DESeqDataSetFromMatrix(countData = cazy$filtered.F1,
                                    colData = info.F1_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.cazy.F1_2 <- DESeq(dds.cazy.F1_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.cazy.F1.HvsCL <- results(dds.cazy.F1_2, contrast = list("Treatment_H_vs_CL"))
res.cazy.F1.HvsCL <- res.cazy.F1.HvsCL[order(res.cazy.F1.HvsCL$padj),]
save_results(res.cazy.F1.HvsCL, "res.cazy.F1_HvsF1_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 1 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### cazys - F2 generation

In [135]:
# Extract and filter F2 samples
cazy$cov.F2 <- cazy$cov[, rownames(info[info$Generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
cazy.F2.percent_occ <- 10
# Quantile used for removing low abundance features
cazy.F2.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(cazy$cov.F2[rowSums(cazy$cov.F2) > 0, ])
cazy$nz.F2 <- cazy$cov.F2[rownames(cazy$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(cazy$nz.F2) / sum(rowSums(cazy$nz.F2)) * 100, probs = cazy.F2.quantile_thresh)
cazy$filtered.F2 <- low.count.removal(cazy$nz.F2, percent = low_threshold)$data.filter

cazy$occ.F2 <- rowSums(cazy$filtered.F2>0) / ncol(cazy$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
cazy$filtered.F2 <- cazy$filtered.F2[which(cazy$occ.F2 >= cazy.F2.percent_occ), ]


cazy$filtered.F2 <- ceiling(cazy$filtered.F2)


info.F2 <- info[colnames(cazy$filtered.F2), c("run","sample","Treatment")]

# Create DESeq dataset
dds.cazy.F2 <- DESeqDataSetFromMatrix(countData = cazy$filtered.F2,
                                    colData = info.F2,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.cazy.F2$sample <- info.F2$sample
dds.cazy.F2$run <- info.F2$run
#dds.cazy.F2 <- collapseReplicates(dds.cazy.F2, dds.cazy.F2$sample, dds.cazy.F2$run)

# Run DESeq2
dds.cazy.F2 <- DESeq(dds.cazy.F2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.cazy.F2.HvsC <- results(dds.cazy.F2, contrast = list("Treatment_H_vs_C"))
res.cazy.F2.HvsC <- res.cazy.F2.HvsC[order(res.cazy.F2.HvsC$padj),]
save_results(res.cazy.F2.HvsC, "res.cazy.F2_HvsF2_C")

# Contrast L vs C
res.cazy.F2.LvsC <- results(dds.cazy.F2, contrast = list("Treatment_L_vs_C"))
res.cazy.F2.LvsC <- res.cazy.F2.LvsC[order(res.cazy.F2.LvsC$padj),]
save_results(res.cazy.F2.LvsC, "res.cazy.F2_LvsF2_C")

# Contrast H vs L
res.cazy.F2.HvsL <- results(dds.cazy.F2, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.cazy.F2.HvsL <- res.cazy.F2.HvsL[order(res.cazy.F2.HvsL$padj),]
save_results(res.cazy.F2.HvsL, "res.cazy.F2_HvsF2_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 3 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### cazys - F2 generation (C+L vs H)

In [136]:
info.F2_2 <- info.F2
info.F2_2$Treatment <- gsub(x = info.F2_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.cazy.F2_2 <- DESeqDataSetFromMatrix(countData = cazy$filtered.F2,
                                    colData = info.F2_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.cazy.F2_2 <- DESeq(dds.cazy.F2_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.cazy.F2.HvsCL <- results(dds.cazy.F2_2, contrast = list("Treatment_H_vs_CL"))
res.cazy.F2.HvsCL <- res.cazy.F2.HvsCL[order(res.cazy.F2.HvsCL$padj),]
save_results(res.cazy.F2.HvsCL, "res.cazy.F2_HvsF2_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



### rgis

#### rgis - generational differences

In [137]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
rgi.percent_occ <- 10
# Quantile used for removing low abundance features
rgi.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(rgi$cov[rowSums(rgi$cov) > 0, ])
rgi$nz <- rgi$cov[rownames(rgi$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(rgi$nz) / sum(rowSums(rgi$nz)) * 100, probs = rgi.quantile_thresh)
#rgi$filtered <- low.count.removal(rgi$nz, percent = low_threshold)$data.filter

#rgi$occ <- rowSums(rgi$filtered>0) / ncol(rgi$filtered) * 100
# Keep features occurring in defined percentage of samples
#rgi$filtered <- rgi$filtered[which(rgi$occ >= rgi.percent_occ), ]
rgi$filtered <- rgi$nz
paste0(c(dim(rgi$cov)[1] - dim(rgi$filtered)[1], "rgis removed", dim(rgi$filtered)[1], "remaining"))

rgi$filtered <- ceiling(rgi$filtered)

## DESeq2 analysis
info.rgi <- info[, c("run","sample","Generation")]
dds.rgi <- DESeqDataSetFromMatrix(countData = rgi$filtered,
                              colData = info.rgi,
                              design = ~ 1 + Generation)
dds.rgi$sample <- info$sample
dds.rgi$run <- info$run
#dds.rgi <- collapseReplicates(dds.rgi, dds.rgi$sample, dds.rgi$run)
dds.rgi <- DESeq(dds.rgi, sfType = "poscounts", parallel = TRUE)

# Multi-factor (treatment + generation)
#dds.rgiMF <- dds.rgi
#design(dds.rgiMF) <- formula(~ group)
#dds.rgiMF <- DESeq(dds.rgiMF, sfType = "poscounts")

# Contrast H vs C
res.rgi.F2vsF0 <- results(dds.rgi, contrast = list("Generation_F2_vs_F0"))
res.rgi.F2vsF0 <- res.rgi.F2vsF0[order(res.rgi.F2vsF0$padj),]
save_results(res.rgi.F2vsF0, "res.rgi.F2vsF0")

# Contrast L vs C
res.rgi.F1vsF0 <- results(dds.rgi, contrast = list("Generation_F1_vs_F0"))
res.rgi.F1vsF0 <- res.rgi.F1vsF0[order(res.rgi.F1vsF0$padj),]
save_results(res.rgi.F1vsF0, "res.rgi.F1vsF0")

# Contrast H vs L
res.rgi.F2vsF1 <- results(dds.rgi, contrast = list("Generation_F2_vs_F0", "Generation_F1_vs_F0"))
res.rgi.F2vsF1 <- res.rgi.F2vsF1[order(res.rgi.F2vsF1$padj),]
save_results(res.rgi.F2vsF1, "res.rgi.F2vsF1")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers



#### rgis - F0 generation

In [138]:
# Extract and filter F0 samples
rgi$cov.F0 <- rgi$cov[, rownames(info[info$Generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
rgi.F0.percent_occ <- 10
# Quantile used for removing low abundance features
rgi.F0.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(rgi$cov.F0[rowSums(rgi$cov.F0) > 0, ])
rgi$nz.F0 <- rgi$cov.F0[rownames(rgi$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(rgi$nz.F0) / sum(rowSums(rgi$nz.F0)) * 100, probs = rgi.F0.quantile_thresh)
#rgi$filtered.F0 <- low.count.removal(rgi$nz.F0, percent = low_threshold)$data.filter

#rgi$occ.F0 <- rowSums(rgi$filtered.F0>0) / ncol(rgi$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
#rgi$filtered.F0 <- rgi$filtered.F0[which(rgi$occ.F0 >= rgi.F0.percent_occ), ]

rgi$filtered.F0 <- rgi$nz.F0
paste0(c(dim(rgi$cov.F0)[1] - dim(rgi$filtered.F0)[1], "rgis removed", dim(rgi$filtered.F0)[1], "remaining"))


rgi$filtered.F0 <- ceiling(rgi$filtered.F0)


info.F0 <- info[colnames(rgi$filtered.F0), c("run","sample","Treatment")]

# Create DESeq dataset
dds.rgi.F0 <- DESeqDataSetFromMatrix(countData = rgi$filtered.F0,
                                    colData = info.F0,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.rgi.F0$sample <- info.F0$sample
dds.rgi.F0$run <- info.F0$run
#dds.rgi.F0 <- collapseReplicates(dds.rgi.F0, dds.rgi.F0$sample, dds.rgi.F0$run)

# Run DESeq2
dds.rgi.F0 <- DESeq(dds.rgi.F0, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.rgi.F0.HvsC <- results(dds.rgi.F0, contrast = list("Treatment_H_vs_C"))
res.rgi.F0.HvsC <- res.rgi.F0.HvsC[order(res.rgi.F0.HvsC$padj),]
save_results(res.rgi.F0.HvsC, "res.rgi.F0_HvsF0_C")

# Contrast L vs C
res.rgi.F0.LvsC <- results(dds.rgi.F0, contrast = list("Treatment_L_vs_C"))
res.rgi.F0.LvsC <- res.rgi.F0.LvsC[order(res.rgi.F0.LvsC$padj),]
save_results(res.rgi.F0.LvsC, "res.rgi.F0_LvsF0_C")

# Contrast H vs L
res.rgi.F0.HvsL <- results(dds.rgi.F0, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.rgi.F0.HvsL <- res.rgi.F0.HvsL[order(res.rgi.F0.HvsL$padj),]
save_results(res.rgi.F0.HvsL, "res.rgi.F0_HvsF0_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers



#### rgis - F0 generation (C+L vs H)

In [139]:
info.F0_2 <- info.F0
info.F0_2$Treatment <- gsub(x = info.F0_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.rgi.F0_2 <- DESeqDataSetFromMatrix(countData = rgi$filtered.F0,
                                    colData = info.F0_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.rgi.F0_2 <- DESeq(dds.rgi.F0_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.rgi.F0.HvsCL <- results(dds.rgi.F0_2, contrast = list("Treatment_H_vs_CL"))
res.rgi.F0.HvsCL <- res.rgi.F0.HvsCL[order(res.rgi.F0.HvsCL$padj),]
save_results(res.rgi.F0.HvsCL, "res.rgi.F0_HvsF0_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 1 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### rgis - F1 generation

In [140]:
# Extract and filter F1 samples
rgi$cov.F1 <- rgi$cov[, rownames(info[info$Generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
rgi.F1.percent_occ <- 10
# Quantile used for removing low abundance features
rgi.F1.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(rgi$cov.F1[rowSums(rgi$cov.F1) > 0, ])
rgi$nz.F1 <- rgi$cov.F1[rownames(rgi$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(rgi$nz.F1) / sum(rowSums(rgi$nz.F1)) * 100, probs = rgi.F1.quantile_thresh)
#rgi$filtered.F1 <- low.count.removal(rgi$nz.F1, percent = low_threshold)$data.filter

#rgi$occ.F1 <- rowSums(rgi$filtered.F1>0) / ncol(rgi$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
#rgi$filtered.F1 <- rgi$filtered.F1[which(rgi$occ.F1 >= rgi.F1.percent_occ), ]

rgi$filtered.F1 <- rgi$nz.F1
paste0(c(dim(rgi$cov.F1)[1] - dim(rgi$filtered.F1)[1], "rgis removed", dim(rgi$filtered.F1)[1], "remaining"))


rgi$filtered.F1 <- ceiling(rgi$filtered.F1)


info.F1 <- info[colnames(rgi$filtered.F1), c("run","sample","Treatment")]

# Create DESeq dataset
dds.rgi.F1 <- DESeqDataSetFromMatrix(countData = rgi$filtered.F1,
                                    colData = info.F1,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.rgi.F1$sample <- info.F1$sample
dds.rgi.F1$run <- info.F1$run
#dds.rgi.F1 <- collapseReplicates(dds.rgi.F1, dds.rgi.F1$sample, dds.rgi.F1$run)

# Run DESeq2
dds.rgi.F1 <- DESeq(dds.rgi.F1, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.rgi.F1.HvsC <- results(dds.rgi.F1, contrast = list("Treatment_H_vs_C"))
res.rgi.F1.HvsC <- res.rgi.F1.HvsC[order(res.rgi.F1.HvsC$padj),]
save_results(res.rgi.F1.HvsC, "res.rgi.F1_HvsF1_C")

# Contrast L vs C
res.rgi.F1.LvsC <- results(dds.rgi.F1, contrast = list("Treatment_L_vs_C"))
res.rgi.F1.LvsC <- res.rgi.F1.LvsC[order(res.rgi.F1.LvsC$padj),]
save_results(res.rgi.F1.LvsC, "res.rgi.F1_LvsF1_C")

# Contrast H vs L
res.rgi.F1.HvsL <- results(dds.rgi.F1, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.rgi.F1.HvsL <- res.rgi.F1.HvsL[order(res.rgi.F1.HvsL$padj),]
save_results(res.rgi.F1.HvsL, "res.rgi.F1_HvsF1_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers



#### rgis - F1 generation (C+L vs H)

In [141]:
info.F1_2 <- info.F1
info.F1_2$Treatment <- gsub(x = info.F1_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.rgi.F1_2 <- DESeqDataSetFromMatrix(countData = rgi$filtered.F1,
                                    colData = info.F1_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.rgi.F1_2 <- DESeq(dds.rgi.F1_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.rgi.F1.HvsCL <- results(dds.rgi.F1_2, contrast = list("Treatment_H_vs_CL"))
res.rgi.F1.HvsCL <- res.rgi.F1.HvsCL[order(res.rgi.F1.HvsCL$padj),]
save_results(res.rgi.F1.HvsCL, "res.rgi.F1_HvsF1_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 1 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### rgis - F2 generation

In [142]:
# Extract and filter F2 samples
rgi$cov.F2 <- rgi$cov[, rownames(info[info$Generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
rgi.F2.percent_occ <- 10
# Quantile used for removing low abundance features
rgi.F2.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(rgi$cov.F2[rowSums(rgi$cov.F2) > 0, ])
rgi$nz.F2 <- rgi$cov.F2[rownames(rgi$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(rgi$nz.F2) / sum(rowSums(rgi$nz.F2)) * 100, probs = rgi.F2.quantile_thresh)
#rgi$filtered.F2 <- low.count.removal(rgi$nz.F2, percent = low_threshold)$data.filter

#rgi$occ.F2 <- rowSums(rgi$filtered.F2>0) / ncol(rgi$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
#rgi$filtered.F2 <- rgi$filtered.F2[which(rgi$occ.F2 >= rgi.F2.percent_occ), ]

rgi$filtered.F2 <- rgi$nz.F2
paste0(c(dim(rgi$cov.F2)[1] - dim(rgi$filtered.F2)[1], "rgis removed", dim(rgi$filtered.F2)[1], "remaining"))


rgi$filtered.F2 <- ceiling(rgi$filtered.F2)


info.F2 <- info[colnames(rgi$filtered.F2), c("run","sample","Treatment")]

# Create DESeq dataset
dds.rgi.F2 <- DESeqDataSetFromMatrix(countData = rgi$filtered.F2,
                                    colData = info.F2,
                                    design = ~ 1 + Treatment)

# Add information in case of collapsing technical replicates
dds.rgi.F2$sample <- info.F2$sample
dds.rgi.F2$run <- info.F2$run
#dds.rgi.F2 <- collapseReplicates(dds.rgi.F2, dds.rgi.F2$sample, dds.rgi.F2$run)

# Run DESeq2
dds.rgi.F2 <- DESeq(dds.rgi.F2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C
res.rgi.F2.HvsC <- results(dds.rgi.F2, contrast = list("Treatment_H_vs_C"))
res.rgi.F2.HvsC <- res.rgi.F2.HvsC[order(res.rgi.F2.HvsC$padj),]
save_results(res.rgi.F2.HvsC, "res.rgi.F2_HvsF2_C")

# Contrast L vs C
res.rgi.F2.LvsC <- results(dds.rgi.F2, contrast = list("Treatment_L_vs_C"))
res.rgi.F2.LvsC <- res.rgi.F2.LvsC[order(res.rgi.F2.LvsC$padj),]
save_results(res.rgi.F2.LvsC, "res.rgi.F2_LvsF2_C")

# Contrast H vs L
res.rgi.F2.HvsL <- results(dds.rgi.F2, contrast = list("Treatment_H_vs_C", "Treatment_L_vs_C"))
res.rgi.F2.HvsL <- res.rgi.F2.HvsL[order(res.rgi.F2.HvsL$padj),]
save_results(res.rgi.F2.HvsL, "res.rgi.F2_HvsF2_L")

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 1 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing



#### rgis - F2 generation (C+L vs H)

In [143]:
info.F2_2 <- info.F2
info.F2_2$Treatment <- gsub(x = info.F2_2$Treatment, pattern = "[CL]", replacement = "CL")
# Create DESeq dataset
dds.rgi.F2_2 <- DESeqDataSetFromMatrix(countData = rgi$filtered.F2,
                                    colData = info.F2_2,
                                    design = ~ 1 + Treatment)


# Run DESeq2
dds.rgi.F2_2 <- DESeq(dds.rgi.F2_2, sfType = "poscounts", parallel = TRUE)

# Contrast H vs C+L
res.rgi.F2.HvsCL <- results(dds.rgi.F2_2, contrast = list("Treatment_H_vs_CL"))
res.rgi.F2.HvsCL <- res.rgi.F2.HvsCL[order(res.rgi.F2.HvsCL$padj),]
save_results(res.rgi.F2.HvsCL, "res.rgi.F2_HvsF2_CL")

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates: 8 workers

mean-dispersion relationship

final dispersion estimates, fitting model and testing: 8 workers

-- replacing outliers and refitting for 1 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing

