---
title: ALDEx2 analysis
format:
    confluence-html:
        code-fold: true
---

[ALDEx2](https://microbiomejournal.biomedcentral.com/articles/10.1186/2049-2618-2-15) is a software package for differential relative abundance analysis of high-throughput data. It uses log ratio transformation of counts and Monte-Carlo sampling to estimate technical variation in samples and outputs effect-sizes of features given the model design. The authors recommend an effect-size cutoff of 1 for HTS datasets. Further, for studies with more than two conditions (*e.g.* 'control', 'low' and 'high' as in the current study), the `aldex.glm` function is recommended which uses a generalized linear model.

Here we use the inter-quartile log-ratio or 'iqlr' method to transform the counts. This approach is recommended for asymmetric datasets.

In [1]:
library(ALDEx2)
library(arrow)
library(tibble)
library(stringr)
library(dplyr)

Loading required package: zCompositions

Loading required package: MASS

Loading required package: NADA

Loading required package: survival


Attaching package: ‘NADA’


The following object is masked from ‘package:stats’:

    cor


Loading required package: truncnorm


Attaching package: ‘arrow’


The following object is masked from ‘package:utils’:

    timestamp



Attaching package: ‘dplyr’


The following object is masked from ‘package:MASS’:

    select


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [25]:
save_results <- function(e, name) {
    f = paste0("../atlas/stats/ALDEx2/", name, ".tsv")
    data <- as.data.frame(e)
    data$feature <- rownames(data)
    data <- data[, c("feature",colnames(data)[colnames(data)!="feature"])]
    write.table(file = f, x = data, sep = "\t", quote = FALSE, row.names = FALSE)
}

In [26]:
low.count.removal <- function(
                        data, # feature count df of size n (sample) x p (feature)
                        percent=0.01 # cutoff chosen
                        ) 
  {
    keep.feat = which(rowSums(data)*100/(sum(rowSums(data))) > percent)
    data.filter = data[keep.feat, ]
    return(list(data.filter = data.filter, keep.feat = keep.feat))
}

# Read data

In [27]:
#set.seed(5249)
cpus <- 10

**MAG coverage**

In [28]:
mag <- list()
mag_data <- as.matrix(
    column_to_rownames(
        read_parquet("../atlas/genomes/counts//median_coverage_genomes.parquet"), 
        var="index")
    )
mag$cov <- t(mag_data)

**PFAMs**

In [29]:
pfam <- list()
pfam_data <- read.csv("../atlas/Genecatalog/counts/PFAMs.median_coverage.tsv", sep = "\t", header = TRUE, row.names=1)
colnames(pfam_data) <- gsub(".t.r", "-t.r", colnames(pfam_data))
pfam$cov <- pfam_data

**Kegg orthologs (KOs)**

In [30]:
ko <- list()
ko_data <- read.csv("../atlas/Genecatalog/counts/KO.median_coverage.tsv", sep = "\t", header = TRUE, row.names=1)
colnames(ko_data) <- gsub(".t.r", "-t.r", colnames(ko_data))
ko$cov <- ko_data

**CAZY**

In [31]:
cazy <- list()
cazy_data <- read.csv("../atlas/Genecatalog/counts/CAZy.median_coverage.tsv", sep = "\t", header = TRUE, row.names=1)
colnames(cazy_data) <- gsub(".t.r", "-t.r", colnames(cazy_data))
cazy$cov <- cazy_data

**UniRef100 taxonomy**

In [32]:
uniref <- list()
uniref_data <- read.csv("../atlas/taxonomy/UniRef100.median_fold.species.allsamples.tsv", sep = "\t", header = TRUE, row.names = 1)
uniref_taxonomy <- read.csv("../atlas/taxonomy/UniRef100.median_fold.tsv", sep="\t", header = TRUE)
colnames(uniref_data) <- gsub(".t.r", "-t.r", colnames(uniref_data))
# Remove unclassified and unknown
uc <- rownames(uniref_data)[grep("^uc_", rownames(uniref_data))]
# Remove uncultured
unc <- rownames(uniref_data)[grep("^uncultured", rownames(uniref_data))]
# Remove species with unknown phylum
unk <- uniref_taxonomy[uniref_taxonomy$phylum=="unknown","species"]
uniref_data_filt <- uniref_data[!c(rownames(uniref_data)%in%c(unc, unk, uc)),]
uniref_data_filt <- uniref_data_filt[!c(rownames(uniref_data_filt)%in%c("unknown")), ]
# Keep species with proper name
keep <- grep("^[A-Z][a-z]+ ", rownames(uniref_data_filt))
uniref_data_filt <- uniref_data_filt[keep,]
uniref$cov <- uniref_data_filt

In [33]:
uniref.genus <- list()
uniref_genus_data <- read.csv("../atlas/taxonomy//UniRef100.median_fold.genus.tsv", sep="\t", header = TRUE, row.names = 1)
colnames(uniref_genus_data) <- gsub(".t.r", "-t.r", colnames(uniref_genus_data))
# Remove unclassified and unknown
uc <- rownames(uniref_genus_data)[grep("^uc_", rownames(uniref_genus_data))]
# Remove uncultured
unc <- rownames(uniref_genus_data)[grep("^uncultured", rownames(uniref_genus_data))]
# Remove genera with unknown phylum
unk <- uniref_taxonomy[uniref_taxonomy$phylum=="unknown","genus"]
uniref_genus_filt <- uniref_genus_data[!c(rownames(uniref_genus_data)%in%c(unc, unk, uc)),]
uniref_genus_filt <- uniref_genus_filt[!c(rownames(uniref_genus_filt)%in%c("unknown")), ]
uniref.genus$cov <- uniref_genus_filt

In [34]:
cbind(dim(mag$cov),
dim(pfam$cov),
dim(ko$cov),
dim(cazy$cov),
dim(uniref$cov))

0,1,2,3,4
262,7363,5926,89,4060
105,105,105,105,105


Drop replicates

In [35]:
drop_reps <- c("C11-t.r", "C12", "C19-t.r", "H10", "H13-t.r", "H32", "L11-t.r", "L29", "L6")
samples <- colnames(mag$cov)[!colnames(mag$cov)%in%drop_reps]
mag$cov <- mag$cov[,samples]
pfam$cov <- pfam$cov[,samples]
ko$cov <- ko$cov[,samples]
cazy$cov <- cazy$cov[,samples]
uniref$cov <- uniref$cov[,samples]
uniref.genus$cov <- uniref.genus$cov[,!c(colnames(uniref.genus$cov)%in%drop_reps)]

In [36]:
cbind(dim(mag$cov),
dim(pfam$cov),
dim(ko$cov),
dim(cazy$cov),
dim(uniref$cov),
dim(uniref.genus$cov))

0,1,2,3,4,5
262,7363,5926,89,4060,1396
96,96,96,96,96,96


Make all dataframes line up

In [37]:
# Make all dataframes line up
samples <- colnames(mag$cov)
mag$cov <- mag$cov[, samples]
pfam$cov <- pfam$cov[, samples]
ko$cov <- ko$cov[, samples]
cazy$cov <- cazy$cov[, samples]
uniref$cov <- uniref$cov[, samples]
uniref.genus$cov <- uniref.genus$cov[, samples]

In [38]:
cbind(dim(mag$cov),
dim(pfam$cov),
dim(ko$cov),
dim(cazy$cov),
dim(uniref$cov),
dim(uniref.genus$cov))

0,1,2,3,4,5
262,7363,5926,89,4060,1396
96,96,96,96,96,96


In [39]:
# Read sample info and set up the relevant columns
info <- read.csv("../data/sample_groups.csv", header = T, row.names = 1)
# Reorder the metadata
info <- info[colnames(mag$cov), ]
info$group <- factor(info$group)
info$generation <- factor(info$generation)
info$treatment <- factor(info$treatment)
mm.generation <- model.matrix(~ generation, info)
mm.generation.treatment <- model.matrix(~ generation + treatment, info)
mm.F0.treatment <- model.matrix(~ treatment, info[info$generation == "F0", ])
mm.F1.treatment <- model.matrix(~ treatment, info[info$generation == "F1", ])
mm.F2.treatment <- model.matrix(~ treatment, info[info$generation == "F2", ])

In [40]:
all(all(rownames(info) == colnames(mag$cov)),
all(rownames(info) == colnames(pfam$cov)),
all(rownames(info) == colnames(ko$cov)),
all(rownames(info) == colnames(cazy$cov)),
all(rownames(info) == colnames(uniref$cov)),
all(rownames(info) == colnames(uniref.genus$cov)))

## ALDEX2 analysis

### MAGs

#### MAGs - generational differences

In [25]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.percent_occ <- 10
# Quantile used for removing low abundance features
mag.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov[rowSums(mag$cov) > 0, ])
mag$nz <- mag$cov[rownames(mag$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz) / sum(rowSums(mag$nz)) * 100, probs = mag.quantile_thresh)
mag$filtered <- low.count.removal(mag$nz, percent = low_threshold)$data.filter

mag$occ <- rowSums(mag$filtered>0) / ncol(mag$filtered) * 100
# Keep features occurring in defined percentage of samples
mag$filtered <- mag$filtered[which(mag$occ >= mag.percent_occ), ]
paste0(c(dim(mag$cov)[1] - dim(mag$filtered)[1], "MAGs removed", dim(mag$filtered)[1], "remaining"))

mag$filtered <- ceiling(mag$filtered)

denoms <- aldex.set.mode(ceiling(mag$filtered), info[colnames(mag$filtered),"generation"], denom="iqlr")

x.mag <- aldex.clr(ceiling(mag$filtered), mm.generation, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.mag <- aldex.glm(x.mag, mm.generation)
save_results(glm.test.mag, "glm.test.mag")
glm.effect.mag <- aldex.glm.effect(x.mag)
save_results(glm.effect.mag, "glm.effect.mag")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - treatment with generation as covariate

In [37]:
x.mag.co <- aldex.clr(ceiling(mag$filtered), mm.generation, mc.samples=mc.samples, denom="all", verbose=TRUE, useMC = TRUE)
glm.test.mag.co <- aldex.glm(x.mag.co, mm.generation.treatment)
save_results(glm.test.mag.co, "glm.test.mag.co")
glm.effect.mag.co <- aldex.glm.effect(x.mag.co)
save_results(glm.effect.mag.co, "glm.effect.mag.co")

checking for condition length disabled!

using all features for denominator

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

computing center with all features

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F0

In [87]:
info[info$generation == "F0",] %>% count(treatment)
ceiling(1000 / 9)

treatment,n
<fct>,<int>
C,11
H,11
L,9


In [88]:
# Extract and filter F0 samples
mag$cov.F0 <- mag$cov[, rownames(info[info$generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F0.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F0.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F0[rowSums(mag$cov.F0) > 0, ])
mag$nz.F0 <- mag$cov.F0[rownames(mag$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F0) / sum(rowSums(mag$nz.F0)) * 100, probs = mag.F0.quantile_thresh)
mag$filtered.F0 <- low.count.removal(mag$nz.F0, percent = low_threshold)$data.filter

mag$occ.F0 <- rowSums(mag$filtered.F0>0) / ncol(mag$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F0 <- mag$filtered.F0[which(mag$occ.F0 >= mag.F0.percent_occ), ]
paste0(c(dim(mag$cov.F0)[1] - dim(mag$filtered.F0)[1], "MAGs removed", dim(mag$filtered.F0)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(mag$filtered.F0), info[colnames(mag$filtered.F0),"treatment"], denom="iqlr")

x.mag.F0 <- aldex.clr(ceiling(mag$filtered.F0), mm.F0.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.mag.F0 <- aldex.glm(x.mag.F0, mm.F0.treatment)
save_results(glm.test.mag.F0, "glm.test.mag.F0")
glm.effect.mag.F0 <- aldex.glm.effect(x.mag.F0)
save_results(glm.effect.mag.F0, "glm.effect.mag.F0")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F0 generation (C+L vs H)

In [90]:
#info.F0_2 %>% count(treatment)
#ceiling(1000 / 11)

treatment,n
<fct>,<int>
CL,20
H,11


In [91]:
info.F0 <- info[info$generation=="F0",]
info.F0_2 <- info.F0
info.F0_2$treatment <- gsub(x = info.F0_2$treatment, pattern = "[CL]", replacement = "CL")
info.F0_2$treatment <- factor(info.F0_2$treatment)
mm.F0_2.treatment <- model.matrix(~ treatment, info.F0_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(mag$filtered.F0), info[colnames(mag$filtered.F0),"treatment"], denom="iqlr")
conds <- info.F0_2[colnames(mag$filtered.F0),"treatment"]
x.mag.F0_2 <- aldex(ceiling(mag$filtered.F0), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F0_2[order(x.mag.F0_2$effect, decreasing = TRUE),], "mag.F0_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



In [65]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F0_HC.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F0_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F0_HC[rowSums(mag$cov.F0_HC) > 0, ])
mag$nz.F0_HC <- mag$cov.F0_HC[rownames(mag$cov.F0_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F0_HC) / sum(rowSums(mag$nz.F0_HC)) * 100, probs = mag.F0_HC.quantile_thresh)
mag$filtered.F0_HC <- low.count.removal(mag$nz.F0_HC, percent = low_threshold)$data.filter

mag$occ.F0_HC <- rowSums(mag$filtered.F0_HC>0) / ncol(mag$filtered.F0_HC) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F0_HC <- mag$filtered.F0_HC[which(mag$occ.F0_HC >= mag.F0_HC.percent_occ), ]
paste0(c(dim(mag$cov.F0_HC)[1] - dim(mag$filtered.F0_HC)[1], "MAGs removed", dim(mag$filtered.F0_HC)[1], "remaining"))


#### MAGs - F0 generation (H vs C)

In [71]:
#info.F0_HC %>% count(treatment)
#1000 / 11

treatment,n
<fct>,<int>
C,11
H,11


In [66]:
info.F0_HC <- info[info$generation=="F0",]
info.F0_HC <- info.F0_HC[rownames(info.F0_HC)[info.F0_HC$treatment%in%c("C","H")],]
mc.samples <- 128
mag$cov.F0_HC <- mag$cov[, rownames(info.F0_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F0_HC.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F0_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F0_HC[rowSums(mag$cov.F0_HC) > 0, ])
mag$nz.F0_HC <- mag$cov.F0_HC[rownames(mag$cov.F0_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F0_HC) / sum(rowSums(mag$nz.F0_HC)) * 100, probs = mag.F0_HC.quantile_thresh)
mag$filtered.F0_HC <- low.count.removal(mag$nz.F0_HC, percent = low_threshold)$data.filter

mag$occ.F0_HC <- rowSums(mag$filtered.F0_HC>0) / ncol(mag$filtered.F0_HC) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F0_HC <- mag$filtered.F0_HC[which(mag$occ.F0_HC >= mag.F0_HC.percent_occ), ]
paste0(c(dim(mag$cov.F0_HC)[1] - dim(mag$filtered.F0_HC)[1], "MAGs removed", dim(mag$filtered.F0_HC)[1], "remaining"))

X <- mag$filtered.F0_HC
Y <- info.F0_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.mag.F0_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F0_HC[order(x.mag.F0_HC$effect, decreasing = TRUE),], "mag.F0_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F0 generation (L vs C)

In [73]:
#info.F0_LC %>% count(treatment)
#1000 / 9

treatment,n
<fct>,<int>
C,11
L,9


In [69]:
info.F0_LC <- info[info$generation=="F0",]
info.F0_LC <- info.F0_LC[rownames(info.F0_LC)[info.F0_LC$treatment%in%c("C","L")],]
mc.samples <- 128
mag$cov.F0_LC <- mag$cov[, rownames(info.F0_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F0_LC.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F0_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F0_LC[rowSums(mag$cov.F0_LC) > 0, ])
mag$nz.F0_LC <- mag$cov.F0_LC[rownames(mag$cov.F0_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F0_LC) / sum(rowSums(mag$nz.F0_LC)) * 100, probs = mag.F0_LC.quantile_thresh)
mag$filtered.F0_LC <- low.count.removal(mag$nz.F0_LC, percent = low_threshold)$data.filter

mag$occ.F0_LC <- rowSums(mag$filtered.F0_LC>0) / ncol(mag$filtered.F0_LC) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F0_LC <- mag$filtered.F0_LC[which(mag$occ.F0_LC >= mag.F0_LC.percent_occ), ]
paste0(c(dim(mag$cov.F0_LC)[1] - dim(mag$filtered.F0_LC)[1], "MAGs removed", dim(mag$filtered.F0_LC)[1], "remaining"))

X <- mag$filtered.F0_LC
Y <- info.F0_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.mag.F0_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F0_LC[order(x.mag.F0_LC$effect, decreasing = TRUE),], "mag.F0_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F1 generation

In [92]:
#info[info$generation == "F1",] %>% count(treatment)
#ceiling(1000 / 5)

treatment,n
<fct>,<int>
C,7
H,7
L,5


In [115]:
# Extract and filter F1 samples
mag$cov.F1 <- mag$cov[, rownames(info[info$generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F1.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F1.quantile_thresh <- 0.1
mc.samples <- 200

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F1[rowSums(mag$cov.F1) > 0, ])
mag$nz.F1 <- mag$cov.F1[rownames(mag$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F1) / sum(rowSums(mag$nz.F1)) * 100, probs = mag.F1.quantile_thresh)
mag$filtered.F1 <- low.count.removal(mag$nz.F1, percent = low_threshold)$data.filter

mag$occ.F1 <- rowSums(mag$filtered.F1>0) / ncol(mag$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F1 <- mag$filtered.F1[which(mag$occ.F1 >= mag.F1.percent_occ), ]
paste0(c(dim(mag$cov.F1)[1] - dim(mag$filtered.F1)[1], "MAGs removed", dim(mag$filtered.F1)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(mag$filtered.F1), info[colnames(mag$filtered.F1),"treatment"], denom="iqlr")

x.mag.F1 <- aldex.clr(ceiling(mag$filtered.F1), mm.F1.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.mag.F1 <- aldex.glm(x.mag.F1, mm.F1.treatment)
save_results(glm.test.mag.F1, "glm.test.mag.F1")
glm.effect.mag.F1 <- aldex.glm.effect(x.mag.F1)
save_results(glm.effect.mag.F1, "glm.effect.mag.F1")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F1 generation (C+L vs H)

In [97]:
#info.F1_2 %>% count(treatment)
#ceiling(1000 / 7)

treatment,n
<fct>,<int>
CL,12
H,7


In [116]:
info.F1 <- info[info$generation=="F1",]
info.F1_2 <- info.F1
info.F1_2$treatment <- gsub(x = info.F1_2$treatment, pattern = "[CL]", replacement = "CL")
info.F1_2$treatment <- factor(info.F1_2$treatment)
mm.F1_2.treatment <- model.matrix(~ treatment, info.F1_2)
mc.samples <- 200

denoms <- aldex.set.mode(ceiling(mag$filtered.F1), info[colnames(mag$filtered.F1),"treatment"], denom="iqlr")
conds <- info.F1_2[colnames(mag$filtered.F1),"treatment"]
x.mag.F1_2 <- aldex(ceiling(mag$filtered.F1), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F1_2[order(x.mag.F1_2$effect, decreasing = TRUE),], "mag.F1_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F1 generation (H vs C)

In [75]:
info.F1_HC <- info[info$generation=="F1",]
info.F1_HC <- info.F1_HC[rownames(info.F1_HC)[info.F1_HC$treatment%in%c("C","H")],]
info.F1_HC %>% count(treatment)
ceiling(1000 / 7)

treatment,n
<fct>,<int>
C,7
H,7


In [67]:
info.F1_HC <- info[info$generation=="F1",]
info.F1_HC <- info.F1_HC[rownames(info.F1_HC)[info.F1_HC$treatment%in%c("C","H")],]
mc.samples <- 200
mag$cov.F1_HC <- mag$cov[, rownames(info.F1_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F1_HC.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F1_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F1_HC[rowSums(mag$cov.F1_HC) > 0, ])
mag$nz.F1_HC <- mag$cov.F1_HC[rownames(mag$cov.F1_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F1_HC) / sum(rowSums(mag$nz.F1_HC)) * 100, probs = mag.F1_HC.quantile_thresh)
mag$filtered.F1_HC <- low.count.removal(mag$nz.F1_HC, percent = low_threshold)$data.filter

mag$occ.F1_HC <- rowSums(mag$filtered.F1_HC>0) / ncol(mag$filtered.F1_HC) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F1_HC <- mag$filtered.F1_HC[which(mag$occ.F1_HC >= mag.F1_HC.percent_occ), ]
paste0(c(dim(mag$cov.F1_HC)[1] - dim(mag$filtered.F1_HC)[1], "MAGs removed", dim(mag$filtered.F1_HC)[1], "remaining"))

X <- mag$filtered.F1_HC
Y <- info.F1_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.mag.F1_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F1_HC[order(x.mag.F1_HC$effect, decreasing = TRUE),], "mag.F1_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F1 generation (L vs C)

In [77]:
info.F1_LC <- info[info$generation=="F1",]
info.F1_LC <- info.F1_LC[rownames(info.F1_LC)[info.F1_LC$treatment%in%c("C","L")],]
info.F1_LC %>% count(treatment)
ceiling(1000 / 5)

treatment,n
<fct>,<int>
C,7
L,5


In [78]:
info.F1_LC <- info[info$generation=="F1",]
info.F1_LC <- info.F1_LC[rownames(info.F1_LC)[info.F1_LC$treatment%in%c("C","L")],]
mc.samples <- 200
mag$cov.F1_LC <- mag$cov[, rownames(info.F1_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F1_LC.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F1_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F1_LC[rowSums(mag$cov.F1_LC) > 0, ])
mag$nz.F1_LC <- mag$cov.F1_LC[rownames(mag$cov.F1_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F1_LC) / sum(rowSums(mag$nz.F1_LC)) * 100, probs = mag.F1_LC.quantile_thresh)
mag$filtered.F1_LC <- low.count.removal(mag$nz.F1_LC, percent = low_threshold)$data.filter

mag$occ.F1_LC <- rowSums(mag$filtered.F1_LC>0) / ncol(mag$filtered.F1_LC) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F1_LC <- mag$filtered.F1_LC[which(mag$occ.F1_LC >= mag.F1_LC.percent_occ), ]
paste0(c(dim(mag$cov.F1_LC)[1] - dim(mag$filtered.F1_LC)[1], "MAGs removed", dim(mag$filtered.F1_LC)[1], "remaining"))

X <- mag$filtered.F1_LC
Y <- info.F1_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.mag.F1_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F1_LC[order(x.mag.F1_LC$effect, decreasing = TRUE),], "mag.F1_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F2 generation

In [103]:
#info[info$generation == "F2",] %>% count(treatment)
#ceiling(1000 / 15)

treatment,n
<fct>,<int>
C,15
H,15
L,16


In [117]:
# Extract and filter F2 samples
mag$cov.F2 <- mag$cov[, rownames(info[info$generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F2.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F2.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F2[rowSums(mag$cov.F2) > 0, ])
mag$nz.F2 <- mag$cov.F2[rownames(mag$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F2) / sum(rowSums(mag$nz.F2)) * 100, probs = mag.F2.quantile_thresh)
mag$filtered.F2 <- low.count.removal(mag$nz.F2, percent = low_threshold)$data.filter

mag$occ.F2 <- rowSums(mag$filtered.F2>0) / ncol(mag$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F2 <- mag$filtered.F2[which(mag$occ.F2 >= mag.F2.percent_occ), ]
paste0(c(dim(mag$cov.F2)[1] - dim(mag$filtered.F2)[1], "MAGs removed", dim(mag$filtered.F2)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(mag$filtered.F2), info[colnames(mag$filtered.F2),"treatment"], denom="iqlr")

x.mag.F2 <- aldex.clr(ceiling(mag$filtered.F2), mm.F2.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.mag.F2 <- aldex.glm(x.mag.F2, mm.F2.treatment)
save_results(glm.test.mag.F2, "glm.test.mag.F2")
glm.effect.mag.F2 <- aldex.glm.effect(x.mag.F2)
save_results(glm.effect.mag.F2, "glm.effect.mag.F2")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F2 generation (C+L vs H)

In [107]:
#info.F2_2 %>% count(treatment)
#ceiling(1000 / 15)

treatment,n
<fct>,<int>
CL,31
H,15


In [118]:
info.F2 <- info[info$generation=="F2",]
info.F2_2 <- info.F2
info.F2_2$treatment <- gsub(x = info.F2_2$treatment, pattern = "[CL]", replacement = "CL")
info.F2_2$treatment <- factor(info.F2_2$treatment)
mm.F2_2.treatment <- model.matrix(~ treatment, info.F2_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(mag$filtered.F2), info[colnames(mag$filtered.F2),"treatment"], denom="iqlr")
conds <- info.F2_2[colnames(mag$filtered.F2),"treatment"]
x.mag.F2_2 <- aldex(ceiling(mag$filtered.F2), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F2_2[order(x.mag.F2_2$effect, decreasing = TRUE),], "mag.F2_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F2 generation (H vs C)

In [80]:
info.F2_HC <- info[info$generation=="F2",]
info.F2_HC <- info.F2_HC[rownames(info.F2_HC)[info.F2_HC$treatment%in%c("C","L")],]
info.F2_HC %>% count(treatment)
ceiling(1000 / 15)

treatment,n
<fct>,<int>
C,15
L,16


In [81]:
info.F2_HC <- info[info$generation=="F2",]
info.F2_HC <- info.F2_HC[rownames(info.F2_HC)[info.F2_HC$treatment%in%c("C","H")],]
mc.samples <- 128
mag$cov.F2_HC <- mag$cov[, rownames(info.F2_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F2_HC.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F2_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F2_HC[rowSums(mag$cov.F2_HC) > 0, ])
mag$nz.F2_HC <- mag$cov.F2_HC[rownames(mag$cov.F2_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F2_HC) / sum(rowSums(mag$nz.F2_HC)) * 100, probs = mag.F2_HC.quantile_thresh)
mag$filtered.F2_HC <- low.count.removal(mag$nz.F2_HC, percent = low_threshold)$data.filter

mag$occ.F2_HC <- rowSums(mag$filtered.F2_HC>0) / ncol(mag$filtered.F2_HC) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F2_HC <- mag$filtered.F2_HC[which(mag$occ.F2_HC >= mag.F2_HC.percent_occ), ]
paste0(c(dim(mag$cov.F2_HC)[1] - dim(mag$filtered.F2_HC)[1], "MAGs removed", dim(mag$filtered.F2_HC)[1], "remaining"))

X <- mag$filtered.F2_HC
Y <- info.F2_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.mag.F2_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F2_HC[order(x.mag.F2_HC$effect, decreasing = TRUE),], "mag.F2_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### MAGs - F2 generation (L vs C)

In [82]:
info.F2_LC <- info[info$generation=="F2",]
info.F2_LC <- info.F2_LC[rownames(info.F2_LC)[info.F2_LC$treatment%in%c("C","H")],]
info.F2_LC %>% count(treatment)
ceiling(1000 / 15)

treatment,n
<fct>,<int>
C,15
H,15


In [83]:
info.F2_LC <- info[info$generation=="F2",]
info.F2_LC <- info.F2_LC[rownames(info.F2_LC)[info.F2_LC$treatment%in%c("C","L")],]
mc.samples <- 128
mag$cov.F2_LC <- mag$cov[, rownames(info.F2_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
mag.F2_LC.percent_occ <- 10
# Quantile used for removing low abundance features
mag.F2_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(mag$cov.F2_LC[rowSums(mag$cov.F2_LC) > 0, ])
mag$nz.F2_LC <- mag$cov.F2_LC[rownames(mag$cov.F2_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(mag$nz.F2_LC) / sum(rowSums(mag$nz.F2_LC)) * 100, probs = mag.F2_LC.quantile_thresh)
mag$filtered.F2_LC <- low.count.removal(mag$nz.F2_LC, percent = low_threshold)$data.filter

mag$occ.F2_LC <- rowSums(mag$filtered.F2_LC>0) / ncol(mag$filtered.F2_LC) * 100
# Keep features occurring in defined percentage of samples
mag$filtered.F2_LC <- mag$filtered.F2_LC[which(mag$occ.F2_LC >= mag.F2_LC.percent_occ), ]
paste0(c(dim(mag$cov.F2_LC)[1] - dim(mag$filtered.F2_LC)[1], "MAGs removed", dim(mag$filtered.F2_LC)[1], "remaining"))

X <- mag$filtered.F2_LC
Y <- info.F2_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.mag.F2_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.mag.F2_LC[order(x.mag.F2_LC$effect, decreasing = TRUE),], "mag.F2_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



### UniRef species

#### UniRef species - Generational differences

In [18]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov[rowSums(uniref$cov) > 0, ])
uniref$nz <- uniref$cov[rownames(uniref$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz) / sum(rowSums(uniref$nz)) * 100, probs = uniref.quantile_thresh)
uniref$filtered <- low.count.removal(uniref$nz, percent = low_threshold)$data.filter

uniref$occ <- rowSums(uniref$filtered>0) / ncol(uniref$filtered) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered <- uniref$filtered[which(uniref$occ >= uniref.percent_occ), ]
paste0(c(dim(uniref$cov)[1] - dim(uniref$filtered)[1], "species removed", dim(uniref$filtered)[1], "remaining"))

uniref$filtered <- ceiling(uniref$filtered)

denoms <- aldex.set.mode(ceiling(uniref$filtered), info[colnames(uniref$filtered),"generation"], denom="iqlr")

x.uniref <- aldex.clr(ceiling(uniref$filtered), mm.generation, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref <- aldex.glm(x.uniref, mm.generation)
save_results(glm.test.uniref, "glm.test.uniref")
glm.effect.uniref <- aldex.glm.effect(x.uniref)
save_results(glm.effect.uniref, "glm.effect.uniref")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F0

In [19]:
# Extract and filter F0 samples
uniref$cov.F0 <- uniref$cov[, rownames(info[info$generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F0.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F0.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F0[rowSums(uniref$cov.F0) > 0, ])
uniref$nz.F0 <- uniref$cov.F0[rownames(uniref$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F0) / sum(rowSums(uniref$nz.F0)) * 100, probs = uniref.F0.quantile_thresh)
uniref$filtered.F0 <- low.count.removal(uniref$nz.F0, percent = low_threshold)$data.filter

uniref$occ.F0 <- rowSums(uniref$filtered.F0>0) / ncol(uniref$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F0 <- uniref$filtered.F0[which(uniref$occ.F0 >= uniref.F0.percent_occ), ]
paste0(c(dim(uniref$cov.F0)[1] - dim(uniref$filtered.F0)[1], "species removed", dim(uniref$filtered.F0)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(uniref$filtered.F0), info[colnames(uniref$filtered.F0),"treatment"], denom="iqlr")

x.uniref.F0 <- aldex.clr(ceiling(uniref$filtered.F0), mm.F0.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref.F0 <- aldex.glm(x.uniref.F0, mm.F0.treatment)
save_results(glm.test.uniref.F0, "glm.test.uniref.F0")
glm.effect.uniref.F0 <- aldex.glm.effect(x.uniref.F0)
save_results(glm.effect.uniref.F0, "glm.effect.uniref.F0")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F0 generation (C+L vs H)

In [20]:
info.F0 <- info[info$generation=="F0",]
info.F0_2 <- info.F0
info.F0_2$treatment <- gsub(x = info.F0_2$treatment, pattern = "[CL]", replacement = "CL")
info.F0_2$treatment <- factor(info.F0_2$treatment)
mm.F0_2.treatment <- model.matrix(~ treatment, info.F0_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(uniref$filtered.F0), info[colnames(uniref$filtered.F0),"treatment"], denom="iqlr")
conds <- info.F0_2[colnames(uniref$filtered.F0),"treatment"]
x.uniref.F0_2 <- aldex(ceiling(uniref$filtered.F0), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F0_2[order(x.uniref.F0_2$effect, decreasing = TRUE),], "uniref.F0_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F0 generation (H vs C)

In [84]:
info.F0_HC <- info[info$generation=="F0",]
info.F0_HC <- info.F0_HC[rownames(info.F0_HC)[info.F0_HC$treatment%in%c("C","H")],]
mc.samples <- 128
uniref$cov.F0_HC <- uniref$cov[, rownames(info.F0_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F0_HC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F0_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F0_HC[rowSums(uniref$cov.F0_HC) > 0, ])
uniref$nz.F0_HC <- uniref$cov.F0_HC[rownames(uniref$cov.F0_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F0_HC) / sum(rowSums(uniref$nz.F0_HC)) * 100, probs = uniref.F0_HC.quantile_thresh)
uniref$filtered.F0_HC <- low.count.removal(uniref$nz.F0_HC, percent = low_threshold)$data.filter

uniref$occ.F0_HC <- rowSums(uniref$filtered.F0_HC>0) / ncol(uniref$filtered.F0_HC) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F0_HC <- uniref$filtered.F0_HC[which(uniref$occ.F0_HC >= uniref.F0_HC.percent_occ), ]
paste0(c(dim(uniref$cov.F0_HC)[1] - dim(uniref$filtered.F0_HC)[1], "species removed", dim(uniref$filtered.F0_HC)[1], "remaining"))

X <- uniref$filtered.F0_HC
Y <- info.F0_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.F0_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F0_HC[order(x.uniref.F0_HC$effect, decreasing = TRUE),], "uniref.F0_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F0 generation (L vs C)

In [85]:
info.F0_LC <- info[info$generation=="F0",]
info.F0_LC <- info.F0_LC[rownames(info.F0_LC)[info.F0_LC$treatment%in%c("C","L")],]
mc.samples <- 128
uniref$cov.F0_LC <- uniref$cov[, rownames(info.F0_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F0_LC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F0_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F0_LC[rowSums(uniref$cov.F0_LC) > 0, ])
uniref$nz.F0_LC <- uniref$cov.F0_LC[rownames(uniref$cov.F0_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F0_LC) / sum(rowSums(uniref$nz.F0_LC)) * 100, probs = uniref.F0_LC.quantile_thresh)
uniref$filtered.F0_LC <- low.count.removal(uniref$nz.F0_LC, percent = low_threshold)$data.filter

uniref$occ.F0_LC <- rowSums(uniref$filtered.F0_LC>0) / ncol(uniref$filtered.F0_LC) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F0_LC <- uniref$filtered.F0_LC[which(uniref$occ.F0_LC >= uniref.F0_LC.percent_occ), ]
paste0(c(dim(uniref$cov.F0_LC)[1] - dim(uniref$filtered.F0_LC)[1], "species removed", dim(uniref$filtered.F0_LC)[1], "remaining"))

X <- uniref$filtered.F0_LC
Y <- info.F0_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.F0_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F0_LC[order(x.uniref.F0_LC$effect, decreasing = TRUE),], "uniref.F0_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F1 generation

In [21]:
# Extract and filter F1 samples
uniref$cov.F1 <- uniref$cov[, rownames(info[info$generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F1.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F1.quantile_thresh <- 0.1
mc.samples <- 200

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F1[rowSums(uniref$cov.F1) > 0, ])
uniref$nz.F1 <- uniref$cov.F1[rownames(uniref$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F1) / sum(rowSums(uniref$nz.F1)) * 100, probs = uniref.F1.quantile_thresh)
uniref$filtered.F1 <- low.count.removal(uniref$nz.F1, percent = low_threshold)$data.filter

uniref$occ.F1 <- rowSums(uniref$filtered.F1>0) / ncol(uniref$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F1 <- uniref$filtered.F1[which(uniref$occ.F1 >= uniref.F1.percent_occ), ]
paste0(c(dim(uniref$cov.F1)[1] - dim(uniref$filtered.F1)[1], "species removed", dim(uniref$filtered.F1)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(uniref$filtered.F1), info[colnames(uniref$filtered.F1),"treatment"], denom="iqlr")

x.uniref.F1 <- aldex.clr(ceiling(uniref$filtered.F1), mm.F1.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref.F1 <- aldex.glm(x.uniref.F1, mm.F1.treatment)
save_results(glm.test.uniref.F1, "glm.test.uniref.F1")
glm.effect.uniref.F1 <- aldex.glm.effect(x.uniref.F1)
save_results(glm.effect.uniref.F1, "glm.effect.uniref.F1")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F1 generation (C+L vs H)

In [22]:
info.F1 <- info[info$generation=="F1",]
info.F1_2 <- info.F1
info.F1_2$treatment <- gsub(x = info.F1_2$treatment, pattern = "[CL]", replacement = "CL")
info.F1_2$treatment <- factor(info.F1_2$treatment)
mm.F1_2.treatment <- model.matrix(~ treatment, info.F1_2)
mc.samples <- 200

denoms <- aldex.set.mode(ceiling(uniref$filtered.F1), info[colnames(uniref$filtered.F1),"treatment"], denom="iqlr")
conds <- info.F1_2[colnames(uniref$filtered.F1),"treatment"]
x.uniref.F1_2 <- aldex(ceiling(uniref$filtered.F1), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F1_2[order(x.uniref.F1_2$effect, decreasing = TRUE),], "uniref.F1_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F1 generation (H vs C)

In [86]:
info.F1_HC <- info[info$generation=="F1",]
info.F1_HC <- info.F1_HC[rownames(info.F1_HC)[info.F1_HC$treatment%in%c("C","H")],]
mc.samples <- 200
uniref$cov.F1_HC <- uniref$cov[, rownames(info.F1_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F1_HC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F1_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F1_HC[rowSums(uniref$cov.F1_HC) > 0, ])
uniref$nz.F1_HC <- uniref$cov.F1_HC[rownames(uniref$cov.F1_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F1_HC) / sum(rowSums(uniref$nz.F1_HC)) * 100, probs = uniref.F1_HC.quantile_thresh)
uniref$filtered.F1_HC <- low.count.removal(uniref$nz.F1_HC, percent = low_threshold)$data.filter

uniref$occ.F1_HC <- rowSums(uniref$filtered.F1_HC>0) / ncol(uniref$filtered.F1_HC) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F1_HC <- uniref$filtered.F1_HC[which(uniref$occ.F1_HC >= uniref.F1_HC.percent_occ), ]
paste0(c(dim(uniref$cov.F1_HC)[1] - dim(uniref$filtered.F1_HC)[1], "unirefs removed", dim(uniref$filtered.F1_HC)[1], "remaining"))

X <- uniref$filtered.F1_HC
Y <- info.F1_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.F1_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F1_HC[order(x.uniref.F1_HC$effect, decreasing = TRUE),], "uniref.F1_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F1 generation (L vs C)

In [87]:
info.F1_LC <- info[info$generation=="F1",]
info.F1_LC <- info.F1_LC[rownames(info.F1_LC)[info.F1_LC$treatment%in%c("C","L")],]
mc.samples <- 200
uniref$cov.F1_LC <- uniref$cov[, rownames(info.F1_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F1_LC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F1_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F1_LC[rowSums(uniref$cov.F1_LC) > 0, ])
uniref$nz.F1_LC <- uniref$cov.F1_LC[rownames(uniref$cov.F1_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F1_LC) / sum(rowSums(uniref$nz.F1_LC)) * 100, probs = uniref.F1_LC.quantile_thresh)
uniref$filtered.F1_LC <- low.count.removal(uniref$nz.F1_LC, percent = low_threshold)$data.filter

uniref$occ.F1_LC <- rowSums(uniref$filtered.F1_LC>0) / ncol(uniref$filtered.F1_LC) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F1_LC <- uniref$filtered.F1_LC[which(uniref$occ.F1_LC >= uniref.F1_LC.percent_occ), ]
paste0(c(dim(uniref$cov.F1_LC)[1] - dim(uniref$filtered.F1_LC)[1], "unirefs removed", dim(uniref$filtered.F1_LC)[1], "remaining"))

X <- uniref$filtered.F1_LC
Y <- info.F1_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.F1_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F1_LC[order(x.uniref.F1_LC$effect, decreasing = TRUE),], "uniref.F1_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F2 generation

In [23]:
# Extract and filter F2 samples
uniref$cov.F2 <- uniref$cov[, rownames(info[info$generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F2.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F2.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F2[rowSums(uniref$cov.F2) > 0, ])
uniref$nz.F2 <- uniref$cov.F2[rownames(uniref$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F2) / sum(rowSums(uniref$nz.F2)) * 100, probs = uniref.F2.quantile_thresh)
uniref$filtered.F2 <- low.count.removal(uniref$nz.F2, percent = low_threshold)$data.filter

uniref$occ.F2 <- rowSums(uniref$filtered.F2>0) / ncol(uniref$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F2 <- uniref$filtered.F2[which(uniref$occ.F2 >= uniref.F2.percent_occ), ]
paste0(c(dim(uniref$cov.F2)[1] - dim(uniref$filtered.F2)[1], "species removed", dim(uniref$filtered.F2)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(uniref$filtered.F2), info[colnames(uniref$filtered.F2),"treatment"], denom="iqlr")

x.uniref.F2 <- aldex.clr(ceiling(uniref$filtered.F2), mm.F2.treatment, mc.samples=128, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref.F2 <- aldex.glm(x.uniref.F2, mm.F2.treatment)
save_results(glm.test.uniref.F2, "glm.test.uniref.F2")
glm.effect.uniref.F2 <- aldex.glm.effect(x.uniref.F2)
save_results(glm.effect.uniref.F2, "glm.effect.uniref.F2")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F2 generation (C+L vs H)

In [24]:
info.F2 <- info[info$generation=="F2",]
info.F2_2 <- info.F2
info.F2_2$treatment <- gsub(x = info.F2_2$treatment, pattern = "[CL]", replacement = "CL")
info.F2_2$treatment <- factor(info.F2_2$treatment)
mm.F2_2.treatment <- model.matrix(~ treatment, info.F2_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(uniref$filtered.F2), info[colnames(uniref$filtered.F2),"treatment"], denom="iqlr")
conds <- info.F2_2[colnames(uniref$filtered.F2),"treatment"]
x.uniref.F2_2 <- aldex(ceiling(uniref$filtered.F2), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F2_2[order(x.uniref.F2_2$effect, decreasing = TRUE),], "uniref.F2_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F2 generation (H vs C)

In [88]:
info.F2_HC <- info[info$generation=="F2",]
info.F2_HC <- info.F2_HC[rownames(info.F2_HC)[info.F2_HC$treatment%in%c("C","H")],]
mc.samples <- 128
uniref$cov.F2_HC <- uniref$cov[, rownames(info.F2_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F2_HC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F2_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F2_HC[rowSums(uniref$cov.F2_HC) > 0, ])
uniref$nz.F2_HC <- uniref$cov.F2_HC[rownames(uniref$cov.F2_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F2_HC) / sum(rowSums(uniref$nz.F2_HC)) * 100, probs = uniref.F2_HC.quantile_thresh)
uniref$filtered.F2_HC <- low.count.removal(uniref$nz.F2_HC, percent = low_threshold)$data.filter

uniref$occ.F2_HC <- rowSums(uniref$filtered.F2_HC>0) / ncol(uniref$filtered.F2_HC) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F2_HC <- uniref$filtered.F2_HC[which(uniref$occ.F2_HC >= uniref.F2_HC.percent_occ), ]
paste0(c(dim(uniref$cov.F2_HC)[1] - dim(uniref$filtered.F2_HC)[1], "species removed", dim(uniref$filtered.F2_HC)[1], "remaining"))

X <- uniref$filtered.F2_HC
Y <- info.F2_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.F2_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F2_HC[order(x.uniref.F2_HC$effect, decreasing = TRUE),], "uniref.F2_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef species - F2 generation (L vs C)

In [89]:
info.F2_LC <- info[info$generation=="F2",]
info.F2_LC <- info.F2_LC[rownames(info.F2_LC)[info.F2_LC$treatment%in%c("C","L")],]
mc.samples <- 128
uniref$cov.F2_LC <- uniref$cov[, rownames(info.F2_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.F2_LC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.F2_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref$cov.F2_LC[rowSums(uniref$cov.F2_LC) > 0, ])
uniref$nz.F2_LC <- uniref$cov.F2_LC[rownames(uniref$cov.F2_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref$nz.F2_LC) / sum(rowSums(uniref$nz.F2_LC)) * 100, probs = uniref.F2_LC.quantile_thresh)
uniref$filtered.F2_LC <- low.count.removal(uniref$nz.F2_LC, percent = low_threshold)$data.filter

uniref$occ.F2_LC <- rowSums(uniref$filtered.F2_LC>0) / ncol(uniref$filtered.F2_LC) * 100
# Keep features occurring in defined percentage of samples
uniref$filtered.F2_LC <- uniref$filtered.F2_LC[which(uniref$occ.F2_LC >= uniref.F2_LC.percent_occ), ]
paste0(c(dim(uniref$cov.F2_LC)[1] - dim(uniref$filtered.F2_LC)[1], "species removed", dim(uniref$filtered.F2_LC)[1], "remaining"))

X <- uniref$filtered.F2_LC
Y <- info.F2_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.F2_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.F2_LC[order(x.uniref.F2_LC$effect, decreasing = TRUE),], "uniref.F2_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



### UniRef genera

#### UniRef genera - Generational differences

In [25]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov[rowSums(uniref.genus$cov) > 0, ])
uniref.genus$nz <- uniref.genus$cov[rownames(uniref.genus$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz) / sum(rowSums(uniref.genus$nz)) * 100, probs = uniref.genus.quantile_thresh)
uniref.genus$filtered <- low.count.removal(uniref.genus$nz, percent = low_threshold)$data.filter

uniref.genus$occ <- rowSums(uniref.genus$filtered>0) / ncol(uniref.genus$filtered) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered <- uniref.genus$filtered[which(uniref.genus$occ >= uniref.genus.percent_occ), ]
paste0(c(dim(uniref.genus$cov)[1] - dim(uniref.genus$filtered)[1], "species removed", dim(uniref.genus$filtered)[1], "remaining"))

uniref.genus$filtered <- ceiling(uniref.genus$filtered)

denoms <- aldex.set.mode(ceiling(uniref.genus$filtered), info[colnames(uniref.genus$filtered),"generation"], denom="iqlr")

x.uniref.genus <- aldex.clr(ceiling(uniref.genus$filtered), mm.generation, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref.genus <- aldex.glm(x.uniref.genus, mm.generation)
save_results(glm.test.uniref.genus, "glm.test.uniref.genus")
glm.effect.uniref.genus <- aldex.glm.effect(x.uniref.genus)
save_results(glm.effect.uniref.genus, "glm.effect.uniref.genus")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F0

In [26]:
# Extract and filter F0 samples
uniref.genus$cov.F0 <- uniref.genus$cov[, rownames(info[info$generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F0.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F0.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F0[rowSums(uniref.genus$cov.F0) > 0, ])
uniref.genus$nz.F0 <- uniref.genus$cov.F0[rownames(uniref.genus$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F0) / sum(rowSums(uniref.genus$nz.F0)) * 100, probs = uniref.genus.F0.quantile_thresh)
uniref.genus$filtered.F0 <- low.count.removal(uniref.genus$nz.F0, percent = low_threshold)$data.filter

uniref.genus$occ.F0 <- rowSums(uniref.genus$filtered.F0>0) / ncol(uniref.genus$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F0 <- uniref.genus$filtered.F0[which(uniref.genus$occ.F0 >= uniref.genus.F0.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F0)[1] - dim(uniref.genus$filtered.F0)[1], "species removed", dim(uniref.genus$filtered.F0)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(uniref.genus$filtered.F0), info[colnames(uniref.genus$filtered.F0),"treatment"], denom="iqlr")

x.uniref.genus.F0 <- aldex.clr(ceiling(uniref.genus$filtered.F0), mm.F0.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref.genus.F0 <- aldex.glm(x.uniref.genus.F0, mm.F0.treatment)
save_results(glm.test.uniref.genus.F0, "glm.test.uniref.genus.F0")
glm.effect.uniref.genus.F0 <- aldex.glm.effect(x.uniref.genus.F0)
save_results(glm.effect.uniref.genus.F0, "glm.effect.uniref.genus.F0")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F0 generation (C+L vs H)

In [27]:
info.F0 <- info[info$generation=="F0",]
info.F0_2 <- info.F0
info.F0_2$treatment <- gsub(x = info.F0_2$treatment, pattern = "[CL]", replacement = "CL")
info.F0_2$treatment <- factor(info.F0_2$treatment)
mm.F0_2.treatment <- model.matrix(~ treatment, info.F0_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(uniref.genus$filtered.F0), info[colnames(uniref.genus$filtered.F0),"treatment"], denom="iqlr")
conds <- info.F0_2[colnames(uniref.genus$filtered.F0),"treatment"]
x.uniref.genus.F0_2 <- aldex(ceiling(uniref.genus$filtered.F0), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F0_2[order(x.uniref.genus.F0_2$effect, decreasing = TRUE),], "uniref.genus.F0_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F0 generation (H vs C)

In [90]:
info.F0_HC <- info[info$generation=="F0",]
info.F0_HC <- info.F0_HC[rownames(info.F0_HC)[info.F0_HC$treatment%in%c("C","H")],]
mc.samples <- 128
uniref.genus$cov.F0_HC <- uniref.genus$cov[, rownames(info.F0_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F0_HC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F0_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F0_HC[rowSums(uniref.genus$cov.F0_HC) > 0, ])
uniref.genus$nz.F0_HC <- uniref.genus$cov.F0_HC[rownames(uniref.genus$cov.F0_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F0_HC) / sum(rowSums(uniref.genus$nz.F0_HC)) * 100, probs = uniref.genus.F0_HC.quantile_thresh)
uniref.genus$filtered.F0_HC <- low.count.removal(uniref.genus$nz.F0_HC, percent = low_threshold)$data.filter

uniref.genus$occ.F0_HC <- rowSums(uniref.genus$filtered.F0_HC>0) / ncol(uniref.genus$filtered.F0_HC) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F0_HC <- uniref.genus$filtered.F0_HC[which(uniref.genus$occ.F0_HC >= uniref.genus.F0_HC.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F0_HC)[1] - dim(uniref.genus$filtered.F0_HC)[1], "genera removed", dim(uniref.genus$filtered.F0_HC)[1], "remaining"))

X <- uniref.genus$filtered.F0_HC
Y <- info.F0_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.genus.F0_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F0_HC[order(x.uniref.genus.F0_HC$effect, decreasing = TRUE),], "uniref.genus.F0_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F0 generation (L vs C)

In [91]:
info.F0_LC <- info[info$generation=="F0",]
info.F0_LC <- info.F0_LC[rownames(info.F0_LC)[info.F0_LC$treatment%in%c("C","L")],]
mc.samples <- 128
uniref.genus$cov.F0_LC <- uniref.genus$cov[, rownames(info.F0_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F0_LC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F0_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F0_LC[rowSums(uniref.genus$cov.F0_LC) > 0, ])
uniref.genus$nz.F0_LC <- uniref.genus$cov.F0_LC[rownames(uniref.genus$cov.F0_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F0_LC) / sum(rowSums(uniref.genus$nz.F0_LC)) * 100, probs = uniref.genus.F0_LC.quantile_thresh)
uniref.genus$filtered.F0_LC <- low.count.removal(uniref.genus$nz.F0_LC, percent = low_threshold)$data.filter

uniref.genus$occ.F0_LC <- rowSums(uniref.genus$filtered.F0_LC>0) / ncol(uniref.genus$filtered.F0_LC) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F0_LC <- uniref.genus$filtered.F0_LC[which(uniref.genus$occ.F0_LC >= uniref.genus.F0_LC.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F0_LC)[1] - dim(uniref.genus$filtered.F0_LC)[1], "genera removed", dim(uniref.genus$filtered.F0_LC)[1], "remaining"))

X <- uniref.genus$filtered.F0_LC
Y <- info.F0_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.genus.F0_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F0_LC[order(x.uniref.genus.F0_LC$effect, decreasing = TRUE),], "uniref.genus.F0_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F1 generation

In [28]:
# Extract and filter F1 samples
uniref.genus$cov.F1 <- uniref.genus$cov[, rownames(info[info$generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F1.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F1.quantile_thresh <- 0.1
mc.samples <- 200

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F1[rowSums(uniref.genus$cov.F1) > 0, ])
uniref.genus$nz.F1 <- uniref.genus$cov.F1[rownames(uniref.genus$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F1) / sum(rowSums(uniref.genus$nz.F1)) * 100, probs = uniref.genus.F1.quantile_thresh)
uniref.genus$filtered.F1 <- low.count.removal(uniref.genus$nz.F1, percent = low_threshold)$data.filter

uniref.genus$occ.F1 <- rowSums(uniref.genus$filtered.F1>0) / ncol(uniref.genus$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F1 <- uniref.genus$filtered.F1[which(uniref.genus$occ.F1 >= uniref.genus.F1.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F1)[1] - dim(uniref.genus$filtered.F1)[1], "species removed", dim(uniref.genus$filtered.F1)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(uniref.genus$filtered.F1), info[colnames(uniref.genus$filtered.F1),"treatment"], denom="iqlr")

x.uniref.genus.F1 <- aldex.clr(ceiling(uniref.genus$filtered.F1), mm.F1.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref.genus.F1 <- aldex.glm(x.uniref.genus.F1, mm.F1.treatment)
save_results(glm.test.uniref.genus.F1, "glm.test.uniref.genus.F1")
glm.effect.uniref.genus.F1 <- aldex.glm.effect(x.uniref.genus.F1)
save_results(glm.effect.uniref.genus.F1, "glm.effect.uniref.genus.F1")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F1 generation (C+L vs H)

In [29]:
info.F1 <- info[info$generation=="F1",]
info.F1_2 <- info.F1
info.F1_2$treatment <- gsub(x = info.F1_2$treatment, pattern = "[CL]", replacement = "CL")
info.F1_2$treatment <- factor(info.F1_2$treatment)
mm.F1_2.treatment <- model.matrix(~ treatment, info.F1_2)
mc.samples <- 200

denoms <- aldex.set.mode(ceiling(uniref.genus$filtered.F1), info[colnames(uniref.genus$filtered.F1),"treatment"], denom="iqlr")
conds <- info.F1_2[colnames(uniref.genus$filtered.F1),"treatment"]
x.uniref.genus.F1_2 <- aldex(ceiling(uniref.genus$filtered.F1), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F1_2[order(x.uniref.genus.F1_2$effect, decreasing = TRUE),], "uniref.genus.F1_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F1 generation (H vs C)

In [92]:
info.F1_HC <- info[info$generation=="F1",]
info.F1_HC <- info.F1_HC[rownames(info.F1_HC)[info.F1_HC$treatment%in%c("C","H")],]
mc.samples <- 200
uniref.genus$cov.F1_HC <- uniref.genus$cov[, rownames(info.F1_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F1_HC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F1_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F1_HC[rowSums(uniref.genus$cov.F1_HC) > 0, ])
uniref.genus$nz.F1_HC <- uniref.genus$cov.F1_HC[rownames(uniref.genus$cov.F1_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F1_HC) / sum(rowSums(uniref.genus$nz.F1_HC)) * 100, probs = uniref.genus.F1_HC.quantile_thresh)
uniref.genus$filtered.F1_HC <- low.count.removal(uniref.genus$nz.F1_HC, percent = low_threshold)$data.filter

uniref.genus$occ.F1_HC <- rowSums(uniref.genus$filtered.F1_HC>0) / ncol(uniref.genus$filtered.F1_HC) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F1_HC <- uniref.genus$filtered.F1_HC[which(uniref.genus$occ.F1_HC >= uniref.genus.F1_HC.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F1_HC)[1] - dim(uniref.genus$filtered.F1_HC)[1], "genera removed", dim(uniref.genus$filtered.F1_HC)[1], "remaining"))

X <- uniref.genus$filtered.F1_HC
Y <- info.F1_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.genus.F1_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F1_HC[order(x.uniref.genus.F1_HC$effect, decreasing = TRUE),], "uniref.genus.F1_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F1 generation (L vs C)

In [93]:
info.F1_LC <- info[info$generation=="F1",]
info.F1_LC <- info.F1_LC[rownames(info.F1_LC)[info.F1_LC$treatment%in%c("C","L")],]
mc.samples <- 200
uniref.genus$cov.F1_LC <- uniref.genus$cov[, rownames(info.F1_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F1_LC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F1_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F1_LC[rowSums(uniref.genus$cov.F1_LC) > 0, ])
uniref.genus$nz.F1_LC <- uniref.genus$cov.F1_LC[rownames(uniref.genus$cov.F1_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F1_LC) / sum(rowSums(uniref.genus$nz.F1_LC)) * 100, probs = uniref.genus.F1_LC.quantile_thresh)
uniref.genus$filtered.F1_LC <- low.count.removal(uniref.genus$nz.F1_LC, percent = low_threshold)$data.filter

uniref.genus$occ.F1_LC <- rowSums(uniref.genus$filtered.F1_LC>0) / ncol(uniref.genus$filtered.F1_LC) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F1_LC <- uniref.genus$filtered.F1_LC[which(uniref.genus$occ.F1_LC >= uniref.genus.F1_LC.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F1_LC)[1] - dim(uniref.genus$filtered.F1_LC)[1], "genera removed", dim(uniref.genus$filtered.F1_LC)[1], "remaining"))

X <- uniref.genus$filtered.F1_LC
Y <- info.F1_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.genus.F1_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F1_LC[order(x.uniref.genus.F1_LC$effect, decreasing = TRUE),], "uniref.genus.F1_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F2 generation

In [30]:
# Extract and filter F2 samples
uniref.genus$cov.F2 <- uniref.genus$cov[, rownames(info[info$generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F2.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F2.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F2[rowSums(uniref.genus$cov.F2) > 0, ])
uniref.genus$nz.F2 <- uniref.genus$cov.F2[rownames(uniref.genus$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F2) / sum(rowSums(uniref.genus$nz.F2)) * 100, probs = uniref.genus.F2.quantile_thresh)
uniref.genus$filtered.F2 <- low.count.removal(uniref.genus$nz.F2, percent = low_threshold)$data.filter

uniref.genus$occ.F2 <- rowSums(uniref.genus$filtered.F2>0) / ncol(uniref.genus$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F2 <- uniref.genus$filtered.F2[which(uniref.genus$occ.F2 >= uniref.genus.F2.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F2)[1] - dim(uniref.genus$filtered.F2)[1], "species removed", dim(uniref.genus$filtered.F2)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(uniref.genus$filtered.F2), info[colnames(uniref.genus$filtered.F2),"treatment"], denom="iqlr")

x.uniref.genus.F2 <- aldex.clr(ceiling(uniref.genus$filtered.F2), mm.F2.treatment, mc.samples=128, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.uniref.genus.F2 <- aldex.glm(x.uniref.genus.F2, mm.F2.treatment)
save_results(glm.test.uniref.genus.F2, "glm.test.uniref.genus.F2")
glm.effect.uniref.genus.F2 <- aldex.glm.effect(x.uniref.genus.F2)
save_results(glm.effect.uniref.genus.F2, "glm.effect.uniref.genus.F2")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F2 generation (C+L vs H)

In [31]:
info.F2 <- info[info$generation=="F2",]
info.F2_2 <- info.F2
info.F2_2$treatment <- gsub(x = info.F2_2$treatment, pattern = "[CL]", replacement = "CL")
info.F2_2$treatment <- factor(info.F2_2$treatment)
mm.F2_2.treatment <- model.matrix(~ treatment, info.F2_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(uniref.genus$filtered.F2), info[colnames(uniref.genus$filtered.F2),"treatment"], denom="iqlr")
conds <- info.F2_2[colnames(uniref.genus$filtered.F2),"treatment"]
x.uniref.genus.F2_2 <- aldex(ceiling(uniref.genus$filtered.F2), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F2_2[order(x.uniref.genus.F2_2$effect, decreasing = TRUE),], "uniref.genus.F2_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F2 generation (H vs C)

In [94]:
info.F2_HC <- info[info$generation=="F2",]
info.F2_HC <- info.F2_HC[rownames(info.F2_HC)[info.F2_HC$treatment%in%c("C","H")],]
mc.samples <- 128
uniref.genus$cov.F2_HC <- uniref.genus$cov[, rownames(info.F2_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F2_HC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F2_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F2_HC[rowSums(uniref.genus$cov.F2_HC) > 0, ])
uniref.genus$nz.F2_HC <- uniref.genus$cov.F2_HC[rownames(uniref.genus$cov.F2_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F2_HC) / sum(rowSums(uniref.genus$nz.F2_HC)) * 100, probs = uniref.genus.F2_HC.quantile_thresh)
uniref.genus$filtered.F2_HC <- low.count.removal(uniref.genus$nz.F2_HC, percent = low_threshold)$data.filter

uniref.genus$occ.F2_HC <- rowSums(uniref.genus$filtered.F2_HC>0) / ncol(uniref.genus$filtered.F2_HC) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F2_HC <- uniref.genus$filtered.F2_HC[which(uniref.genus$occ.F2_HC >= uniref.genus.F2_HC.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F2_HC)[1] - dim(uniref.genus$filtered.F2_HC)[1], "genera removed", dim(uniref.genus$filtered.F2_HC)[1], "remaining"))

X <- uniref.genus$filtered.F2_HC
Y <- info.F2_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.genus.F2_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F2_HC[order(x.uniref.genus.F2_HC$effect, decreasing = TRUE),], "uniref.genus.F2_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### UniRef genera - F2 generation (L vs C)

In [95]:
info.F2_LC <- info[info$generation=="F2",]
info.F2_LC <- info.F2_LC[rownames(info.F2_LC)[info.F2_LC$treatment%in%c("C","L")],]
mc.samples <- 128
uniref.genus$cov.F2_LC <- uniref.genus$cov[, rownames(info.F2_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
uniref.genus.F2_LC.percent_occ <- 10
# Quantile used for removing low abundance features
uniref.genus.F2_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(uniref.genus$cov.F2_LC[rowSums(uniref.genus$cov.F2_LC) > 0, ])
uniref.genus$nz.F2_LC <- uniref.genus$cov.F2_LC[rownames(uniref.genus$cov.F2_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(uniref.genus$nz.F2_LC) / sum(rowSums(uniref.genus$nz.F2_LC)) * 100, probs = uniref.genus.F2_LC.quantile_thresh)
uniref.genus$filtered.F2_LC <- low.count.removal(uniref.genus$nz.F2_LC, percent = low_threshold)$data.filter

uniref.genus$occ.F2_LC <- rowSums(uniref.genus$filtered.F2_LC>0) / ncol(uniref.genus$filtered.F2_LC) * 100
# Keep features occurring in defined percentage of samples
uniref.genus$filtered.F2_LC <- uniref.genus$filtered.F2_LC[which(uniref.genus$occ.F2_LC >= uniref.genus.F2_LC.percent_occ), ]
paste0(c(dim(uniref.genus$cov.F2_LC)[1] - dim(uniref.genus$filtered.F2_LC)[1], "genera removed", dim(uniref.genus$filtered.F2_LC)[1], "remaining"))

X <- uniref.genus$filtered.F2_LC
Y <- info.F2_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.uniref.genus.F2_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.uniref.genus.F2_LC[order(x.uniref.genus.F2_LC$effect, decreasing = TRUE),], "uniref.genus.F2_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



### PFAMs

#### PFAMs - Generational differences

In [None]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov[rowSums(pfam$cov) > 0, ])
pfam$nz <- pfam$cov[rownames(pfam$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz) / sum(rowSums(pfam$nz)) * 100, probs = pfam.quantile_thresh)
pfam$filtered <- low.count.removal(pfam$nz, percent = low_threshold)$data.filter

pfam$occ <- rowSums(pfam$filtered>0) / ncol(pfam$filtered) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered <- pfam$filtered[which(pfam$occ >= pfam.percent_occ), ]
paste0(c(dim(pfam$cov)[1] - dim(pfam$filtered)[1], "pfams removed", dim(pfam$filtered)[1], "remaining"))

pfam$filtered <- ceiling(pfam$filtered)

denoms <- aldex.set.mode(ceiling(pfam$filtered), info[colnames(pfam$filtered),"generation"], denom="iqlr")

x.pfam <- aldex.clr(ceiling(pfam$filtered), mm.generation, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.pfam <- aldex.glm(x.pfam, mm.generation)
save_results(glm.test.pfam, "glm.test.pfam")
glm.effect.pfam <- aldex.glm.effect(x.pfam)
save_results(glm.effect.pfam, "glm.effect.pfam")

#### PFAM - F0

In [42]:
# Extract and filter F0 samples
pfam$cov.F0 <- pfam$cov[, rownames(info[info$generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F0.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F0.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F0[rowSums(pfam$cov.F0) > 0, ])
pfam$nz.F0 <- pfam$cov.F0[rownames(pfam$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F0) / sum(rowSums(pfam$nz.F0)) * 100, probs = pfam.F0.quantile_thresh)
pfam$filtered.F0 <- low.count.removal(pfam$nz.F0, percent = low_threshold)$data.filter

pfam$occ.F0 <- rowSums(pfam$filtered.F0>0) / ncol(pfam$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F0 <- pfam$filtered.F0[which(pfam$occ.F0 >= pfam.F0.percent_occ), ]
paste0(c(dim(pfam$cov.F0)[1] - dim(pfam$filtered.F0)[1], "pfams removed", dim(pfam$filtered.F0)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(pfam$filtered.F0), info[colnames(pfam$filtered.F0),"treatment"], denom="iqlr")

x.pfam.F0 <- aldex.clr(ceiling(pfam$filtered.F0), mm.F0.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.pfam.F0 <- aldex.glm(x.pfam.F0, mm.F0.treatment)
save_results(glm.test.pfam.F0, "glm.test.pfam.F0")
glm.effect.pfam.F0 <- aldex.glm.effect(x.pfam.F0)
save_results(glm.effect.pfam.F0, "glm.effect.pfam.F0")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F0 generation (C+L vs H)

In [43]:
info.F0 <- info[info$generation=="F0",]
info.F0_2 <- info.F0
info.F0_2$treatment <- gsub(x = info.F0_2$treatment, pattern = "[CL]", replacement = "CL")
info.F0_2$treatment <- factor(info.F0_2$treatment)
mm.F0_2.treatment <- model.matrix(~ treatment, info.F0_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(pfam$filtered.F0), info[colnames(pfam$filtered.F0),"treatment"], denom="iqlr")
conds <- info.F0_2[colnames(pfam$filtered.F0),"treatment"]
x.pfam.F0_2 <- aldex(ceiling(pfam$filtered.F0), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F0_2[order(x.pfam.F0_2$effect, decreasing = TRUE),], "pfam.F0_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F0 generation (H vs C)

In [96]:
info.F0_HC <- info[info$generation=="F0",]
info.F0_HC <- info.F0_HC[rownames(info.F0_HC)[info.F0_HC$treatment%in%c("C","H")],]
mc.samples <- 128
pfam$cov.F0_HC <- pfam$cov[, rownames(info.F0_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F0_HC.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F0_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F0_HC[rowSums(pfam$cov.F0_HC) > 0, ])
pfam$nz.F0_HC <- pfam$cov.F0_HC[rownames(pfam$cov.F0_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F0_HC) / sum(rowSums(pfam$nz.F0_HC)) * 100, probs = pfam.F0_HC.quantile_thresh)
pfam$filtered.F0_HC <- low.count.removal(pfam$nz.F0_HC, percent = low_threshold)$data.filter

pfam$occ.F0_HC <- rowSums(pfam$filtered.F0_HC>0) / ncol(pfam$filtered.F0_HC) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F0_HC <- pfam$filtered.F0_HC[which(pfam$occ.F0_HC >= pfam.F0_HC.percent_occ), ]
paste0(c(dim(pfam$cov.F0_HC)[1] - dim(pfam$filtered.F0_HC)[1], "pfams removed", dim(pfam$filtered.F0_HC)[1], "remaining"))

X <- pfam$filtered.F0_HC
Y <- info.F0_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.pfam.F0_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F0_HC[order(x.pfam.F0_HC$effect, decreasing = TRUE),], "pfam.F0_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F0 generation (L vs C)

In [97]:
info.F0_LC <- info[info$generation=="F0",]
info.F0_LC <- info.F0_LC[rownames(info.F0_LC)[info.F0_LC$treatment%in%c("C","L")],]
mc.samples <- 128
pfam$cov.F0_LC <- pfam$cov[, rownames(info.F0_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F0_LC.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F0_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F0_LC[rowSums(pfam$cov.F0_LC) > 0, ])
pfam$nz.F0_LC <- pfam$cov.F0_LC[rownames(pfam$cov.F0_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F0_LC) / sum(rowSums(pfam$nz.F0_LC)) * 100, probs = pfam.F0_LC.quantile_thresh)
pfam$filtered.F0_LC <- low.count.removal(pfam$nz.F0_LC, percent = low_threshold)$data.filter

pfam$occ.F0_LC <- rowSums(pfam$filtered.F0_LC>0) / ncol(pfam$filtered.F0_LC) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F0_LC <- pfam$filtered.F0_LC[which(pfam$occ.F0_LC >= pfam.F0_LC.percent_occ), ]
paste0(c(dim(pfam$cov.F0_LC)[1] - dim(pfam$filtered.F0_LC)[1], "pfams removed", dim(pfam$filtered.F0_LC)[1], "remaining"))

X <- pfam$filtered.F0_LC
Y <- info.F0_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.pfam.F0_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F0_LC[order(x.pfam.F0_LC$effect, decreasing = TRUE),], "pfam.F0_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F1 generation

In [44]:
# Extract and filter F1 samples
pfam$cov.F1 <- pfam$cov[, rownames(info[info$generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F1.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F1.quantile_thresh <- 0.1
mc.samples <- 200

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F1[rowSums(pfam$cov.F1) > 0, ])
pfam$nz.F1 <- pfam$cov.F1[rownames(pfam$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F1) / sum(rowSums(pfam$nz.F1)) * 100, probs = pfam.F1.quantile_thresh)
pfam$filtered.F1 <- low.count.removal(pfam$nz.F1, percent = low_threshold)$data.filter

pfam$occ.F1 <- rowSums(pfam$filtered.F1>0) / ncol(pfam$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F1 <- pfam$filtered.F1[which(pfam$occ.F1 >= pfam.F1.percent_occ), ]
paste0(c(dim(pfam$cov.F1)[1] - dim(pfam$filtered.F1)[1], "pfams removed", dim(pfam$filtered.F1)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(pfam$filtered.F1), info[colnames(pfam$filtered.F1),"treatment"], denom="iqlr")

x.pfam.F1 <- aldex.clr(ceiling(pfam$filtered.F1), mm.F1.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.pfam.F1 <- aldex.glm(x.pfam.F1, mm.F1.treatment)
save_results(glm.test.pfam.F1, "glm.test.pfam.F1")
glm.effect.pfam.F1 <- aldex.glm.effect(x.pfam.F1)
save_results(glm.effect.pfam.F1, "glm.effect.pfam.F1")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F1 generation (C+L vs H)

In [45]:
info.F1 <- info[info$generation=="F1",]
info.F1_2 <- info.F1
info.F1_2$treatment <- gsub(x = info.F1_2$treatment, pattern = "[CL]", replacement = "CL")
info.F1_2$treatment <- factor(info.F1_2$treatment)
mm.F1_2.treatment <- model.matrix(~ treatment, info.F1_2)
mc.samples <- 200

denoms <- aldex.set.mode(ceiling(pfam$filtered.F1), info[colnames(pfam$filtered.F1),"treatment"], denom="iqlr")
conds <- info.F1_2[colnames(pfam$filtered.F1),"treatment"]
x.pfam.F1_2 <- aldex(ceiling(pfam$filtered.F1), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F1_2[order(x.pfam.F1_2$effect, decreasing = TRUE),], "pfam.F1_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F1 generation (H vs C)

In [102]:
info.F1_HC <- info[info$generation=="F1",]
info.F1_HC <- info.F1_HC[rownames(info.F1_HC)[info.F1_HC$treatment%in%c("C","H")],]
pfam$cov.F1_HC <- pfam$cov[, rownames(info.F1_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F1_HC.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F1_HC.quantile_thresh <- 0.1
mc.samples <- 200

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F1_HC[rowSums(pfam$cov.F1_HC) > 0, ])
pfam$nz.F1_HC <- pfam$cov.F1_HC[rownames(pfam$cov.F1_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F1_HC) / sum(rowSums(pfam$nz.F1_HC)) * 100, probs = pfam.F1_HC.quantile_thresh)
pfam$filtered.F1_HC <- low.count.removal(pfam$nz.F1_HC, percent = low_threshold)$data.filter

pfam$occ.F1_HC <- rowSums(pfam$filtered.F1_HC>0) / ncol(pfam$filtered.F1_HC) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F1_HC <- pfam$filtered.F1_HC[which(pfam$occ.F1_HC >= pfam.F1_HC.percent_occ), ]
paste0(c(dim(pfam$cov.F1_HC)[1] - dim(pfam$filtered.F1_HC)[1], "pfams removed", dim(pfam$filtered.F1_HC)[1], "remaining"))

X <- pfam$filtered.F1_HC
Y <- info.F1_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.pfam.F1_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F1_HC[order(x.pfam.F1_HC$effect, decreasing = TRUE),], "pfam.F1_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F1 generation (L vs C)

In [103]:
info.F1_LC <- info[info$generation=="F1",]
info.F1_LC <- info.F1_LC[rownames(info.F1_LC)[info.F1_LC$treatment%in%c("C","L")],]
mc.samples <- 200
pfam$cov.F1_LC <- pfam$cov[, rownames(info.F1_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F1_LC.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F1_LC.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F1_LC[rowSums(pfam$cov.F1_LC) > 0, ])
pfam$nz.F1_LC <- pfam$cov.F1_LC[rownames(pfam$cov.F1_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F1_LC) / sum(rowSums(pfam$nz.F1_LC)) * 100, probs = pfam.F1_LC.quantile_thresh)
pfam$filtered.F1_LC <- low.count.removal(pfam$nz.F1_LC, percent = low_threshold)$data.filter

pfam$occ.F1_LC <- rowSums(pfam$filtered.F1_LC>0) / ncol(pfam$filtered.F1_LC) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F1_LC <- pfam$filtered.F1_LC[which(pfam$occ.F1_LC >= pfam.F1_LC.percent_occ), ]
paste0(c(dim(pfam$cov.F1_LC)[1] - dim(pfam$filtered.F1_LC)[1], "pfams removed", dim(pfam$filtered.F1_LC)[1], "remaining"))

X <- pfam$filtered.F1_LC
Y <- info.F1_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.pfam.F1_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F1_LC[order(x.pfam.F1_LC$effect, decreasing = TRUE),], "pfam.F1_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F2 generation

In [18]:
# Extract and filter F2 samples
pfam$cov.F2 <- pfam$cov[, rownames(info[info$generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F2.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F2.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F2[rowSums(pfam$cov.F2) > 0, ])
pfam$nz.F2 <- pfam$cov.F2[rownames(pfam$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F2) / sum(rowSums(pfam$nz.F2)) * 100, probs = pfam.F2.quantile_thresh)
pfam$filtered.F2 <- low.count.removal(pfam$nz.F2, percent = low_threshold)$data.filter

pfam$occ.F2 <- rowSums(pfam$filtered.F2>0) / ncol(pfam$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F2 <- pfam$filtered.F2[which(pfam$occ.F2 >= pfam.F2.percent_occ), ]
paste0(c(dim(pfam$cov.F2)[1] - dim(pfam$filtered.F2)[1], "pfams removed", dim(pfam$filtered.F2)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(pfam$filtered.F2), info[colnames(pfam$filtered.F2),"treatment"], denom="iqlr")

x.pfam.F2 <- aldex.clr(ceiling(pfam$filtered.F2), mm.F2.treatment, mc.samples=128, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.pfam.F2 <- aldex.glm(x.pfam.F2, mm.F2.treatment)
save_results(glm.test.pfam.F2, "glm.test.pfam.F2")
glm.effect.pfam.F2 <- aldex.glm.effect(x.pfam.F2)
save_results(glm.effect.pfam.F2, "glm.effect.pfam.F2")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F2 generation (C+L vs H)

In [19]:
info.F2 <- info[info$generation=="F2",]
info.F2_2 <- info.F2
info.F2_2$treatment <- gsub(x = info.F2_2$treatment, pattern = "[CL]", replacement = "CL")
info.F2_2$treatment <- factor(info.F2_2$treatment)
mm.F2_2.treatment <- model.matrix(~ treatment, info.F2_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(pfam$filtered.F2), info[colnames(pfam$filtered.F2),"treatment"], denom="iqlr")
conds <- info.F2_2[colnames(pfam$filtered.F2),"treatment"]
x.pfam.F2_2 <- aldex(ceiling(pfam$filtered.F2), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F2_2[order(x.pfam.F2_2$effect, decreasing = TRUE),], "pfam.F2_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F2 generation (H vs C)

In [100]:
info.F2_HC <- info[info$generation=="F2",]
info.F2_HC <- info.F2_HC[rownames(info.F2_HC)[info.F2_HC$treatment%in%c("C","H")],]
mc.samples <- 128
pfam$cov.F2_HC <- pfam$cov[, rownames(info.F2_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F2_HC.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F2_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F2_HC[rowSums(pfam$cov.F2_HC) > 0, ])
pfam$nz.F2_HC <- pfam$cov.F2_HC[rownames(pfam$cov.F2_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F2_HC) / sum(rowSums(pfam$nz.F2_HC)) * 100, probs = pfam.F2_HC.quantile_thresh)
pfam$filtered.F2_HC <- low.count.removal(pfam$nz.F2_HC, percent = low_threshold)$data.filter

pfam$occ.F2_HC <- rowSums(pfam$filtered.F2_HC>0) / ncol(pfam$filtered.F2_HC) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F2_HC <- pfam$filtered.F2_HC[which(pfam$occ.F2_HC >= pfam.F2_HC.percent_occ), ]
paste0(c(dim(pfam$cov.F2_HC)[1] - dim(pfam$filtered.F2_HC)[1], "pfams removed", dim(pfam$filtered.F2_HC)[1], "remaining"))

X <- pfam$filtered.F2_HC
Y <- info.F2_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.pfam.F2_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F2_HC[order(x.pfam.F2_HC$effect, decreasing = TRUE),], "pfam.F2_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### PFAM - F2 generation (L vs C)

In [101]:
info.F2_LC <- info[info$generation=="F2",]
info.F2_LC <- info.F2_LC[rownames(info.F2_LC)[info.F2_LC$treatment%in%c("C","L")],]
mc.samples <- 128
pfam$cov.F2_LC <- pfam$cov[, rownames(info.F2_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
pfam.F2_LC.percent_occ <- 10
# Quantile used for removing low abundance features
pfam.F2_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(pfam$cov.F2_LC[rowSums(pfam$cov.F2_LC) > 0, ])
pfam$nz.F2_LC <- pfam$cov.F2_LC[rownames(pfam$cov.F2_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(pfam$nz.F2_LC) / sum(rowSums(pfam$nz.F2_LC)) * 100, probs = pfam.F2_LC.quantile_thresh)
pfam$filtered.F2_LC <- low.count.removal(pfam$nz.F2_LC, percent = low_threshold)$data.filter

pfam$occ.F2_LC <- rowSums(pfam$filtered.F2_LC>0) / ncol(pfam$filtered.F2_LC) * 100
# Keep features occurring in defined percentage of samples
pfam$filtered.F2_LC <- pfam$filtered.F2_LC[which(pfam$occ.F2_LC >= pfam.F2_LC.percent_occ), ]
paste0(c(dim(pfam$cov.F2_LC)[1] - dim(pfam$filtered.F2_LC)[1], "pfams removed", dim(pfam$filtered.F2_LC)[1], "remaining"))

X <- pfam$filtered.F2_LC
Y <- info.F2_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.pfam.F2_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.pfam.F2_LC[order(x.pfam.F2_LC$effect, decreasing = TRUE),], "pfam.F2_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



### KOs

#### KOs - Generational differences

In [19]:
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.percent_occ <- 10
# Quantile used for removing low abundance features
ko.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov[rowSums(ko$cov) > 0, ])
ko$nz <- ko$cov[rownames(ko$cov)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz) / sum(rowSums(ko$nz)) * 100, probs = ko.quantile_thresh)
ko$filtered <- low.count.removal(ko$nz, percent = low_threshold)$data.filter

ko$occ <- rowSums(ko$filtered>0) / ncol(ko$filtered) * 100
# Keep features occurring in defined percentage of samples
ko$filtered <- ko$filtered[which(ko$occ >= ko.percent_occ), ]
paste0(c(dim(ko$cov)[1] - dim(ko$filtered)[1], "kos removed", dim(ko$filtered)[1], "remaining"))

ko$filtered <- ceiling(ko$filtered)

denoms <- aldex.set.mode(ceiling(ko$filtered), info[colnames(ko$filtered),"generation"], denom="iqlr")

x.ko <- aldex.clr(ceiling(ko$filtered), mm.generation, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.ko <- aldex.glm(x.ko, mm.generation)
save_results(glm.test.ko, "glm.test.ko")
glm.effect.ko <- aldex.glm.effect(x.ko)
save_results(glm.effect.ko, "glm.effect.ko")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F0

In [18]:
# Extract and filter F0 samples
ko$cov.F0 <- ko$cov[, rownames(info[info$generation=="F0",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F0.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F0.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F0[rowSums(ko$cov.F0) > 0, ])
ko$nz.F0 <- ko$cov.F0[rownames(ko$cov.F0)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F0) / sum(rowSums(ko$nz.F0)) * 100, probs = ko.F0.quantile_thresh)
ko$filtered.F0 <- low.count.removal(ko$nz.F0, percent = low_threshold)$data.filter

ko$occ.F0 <- rowSums(ko$filtered.F0>0) / ncol(ko$filtered.F0) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F0 <- ko$filtered.F0[which(ko$occ.F0 >= ko.F0.percent_occ), ]
paste0(c(dim(ko$cov.F0)[1] - dim(ko$filtered.F0)[1], "kos removed", dim(ko$filtered.F0)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(ko$filtered.F0), info[colnames(ko$filtered.F0),"treatment"], denom="iqlr")

x.ko.F0 <- aldex.clr(ceiling(ko$filtered.F0), mm.F0.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.ko.F0 <- aldex.glm(x.ko.F0, mm.F0.treatment)
save_results(glm.test.ko.F0, "glm.test.ko.F0")
glm.effect.ko.F0 <- aldex.glm.effect(x.ko.F0)
save_results(glm.effect.ko.F0, "glm.effect.ko.F0")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F0 generation (C+L vs H)

In [19]:
info.F0 <- info[info$generation=="F0",]
info.F0_2 <- info.F0
info.F0_2$treatment <- gsub(x = info.F0_2$treatment, pattern = "[CL]", replacement = "CL")
info.F0_2$treatment <- factor(info.F0_2$treatment)
mm.F0_2.treatment <- model.matrix(~ treatment, info.F0_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(ko$filtered.F0), info[colnames(ko$filtered.F0),"treatment"], denom="iqlr")
conds <- info.F0_2[colnames(ko$filtered.F0),"treatment"]
x.ko.F0_2 <- aldex(ceiling(ko$filtered.F0), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F0_2[order(x.ko.F0_2$effect, decreasing = TRUE),], "ko.F0_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### ko - F0 generation (H vs C)

In [104]:
info.F0_HC <- info[info$generation=="F0",]
info.F0_HC <- info.F0_HC[rownames(info.F0_HC)[info.F0_HC$treatment%in%c("C","H")],]
ko$cov.F0_HC <- ko$cov[, rownames(info.F0_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F0_HC.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F0_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F0_HC[rowSums(ko$cov.F0_HC) > 0, ])
ko$nz.F0_HC <- ko$cov.F0_HC[rownames(ko$cov.F0_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F0_HC) / sum(rowSums(ko$nz.F0_HC)) * 100, probs = ko.F0_HC.quantile_thresh)
ko$filtered.F0_HC <- low.count.removal(ko$nz.F0_HC, percent = low_threshold)$data.filter

ko$occ.F0_HC <- rowSums(ko$filtered.F0_HC>0) / ncol(ko$filtered.F0_HC) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F0_HC <- ko$filtered.F0_HC[which(ko$occ.F0_HC >= ko.F0_HC.percent_occ), ]
paste0(c(dim(ko$cov.F0_HC)[1] - dim(ko$filtered.F0_HC)[1], "kos removed", dim(ko$filtered.F0_HC)[1], "remaining"))

X <- ko$filtered.F0_HC
Y <- info.F0_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.ko.F0_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F0_HC[order(x.ko.F0_HC$effect, decreasing = TRUE),], "ko.F0_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### ko - F0 generation (L vs C)

In [18]:
info.F0_LC <- info[info$generation=="F0",]
info.F0_LC <- info.F0_LC[rownames(info.F0_LC)[info.F0_LC$treatment%in%c("C","L")],]
mc.samples <- 128
ko$cov.F0_LC <- ko$cov[, rownames(info.F0_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F0_LC.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F0_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F0_LC[rowSums(ko$cov.F0_LC) > 0, ])
ko$nz.F0_LC <- ko$cov.F0_LC[rownames(ko$cov.F0_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F0_LC) / sum(rowSums(ko$nz.F0_LC)) * 100, probs = ko.F0_LC.quantile_thresh)
ko$filtered.F0_LC <- low.count.removal(ko$nz.F0_LC, percent = low_threshold)$data.filter

ko$occ.F0_LC <- rowSums(ko$filtered.F0_LC>0) / ncol(ko$filtered.F0_LC) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F0_LC <- ko$filtered.F0_LC[which(ko$occ.F0_LC >= ko.F0_LC.percent_occ), ]
paste0(c(dim(ko$cov.F0_LC)[1] - dim(ko$filtered.F0_LC)[1], "kos removed", dim(ko$filtered.F0_LC)[1], "remaining"))

X <- ko$filtered.F0_LC
Y <- info.F0_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.ko.F0_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F0_LC[order(x.ko.F0_LC$effect, decreasing = TRUE),], "ko.F0_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F1 generation

In [20]:
# Extract and filter F1 samples
ko$cov.F1 <- ko$cov[, rownames(info[info$generation=="F1",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F1.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F1.quantile_thresh <- 0.1
mc.samples <- 200

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F1[rowSums(ko$cov.F1) > 0, ])
ko$nz.F1 <- ko$cov.F1[rownames(ko$cov.F1)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F1) / sum(rowSums(ko$nz.F1)) * 100, probs = ko.F1.quantile_thresh)
ko$filtered.F1 <- low.count.removal(ko$nz.F1, percent = low_threshold)$data.filter

ko$occ.F1 <- rowSums(ko$filtered.F1>0) / ncol(ko$filtered.F1) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F1 <- ko$filtered.F1[which(ko$occ.F1 >= ko.F1.percent_occ), ]
paste0(c(dim(ko$cov.F1)[1] - dim(ko$filtered.F1)[1], "species removed", dim(ko$filtered.F1)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(ko$filtered.F1), info[colnames(ko$filtered.F1),"treatment"], denom="iqlr")

x.ko.F1 <- aldex.clr(ceiling(ko$filtered.F1), mm.F1.treatment, mc.samples=mc.samples, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.ko.F1 <- aldex.glm(x.ko.F1, mm.F1.treatment)
save_results(glm.test.ko.F1, "glm.test.ko.F1")
glm.effect.ko.F1 <- aldex.glm.effect(x.ko.F1)
save_results(glm.effect.ko.F1, "glm.effect.ko.F1")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F1 generation (C+L vs H)

In [21]:
info.F1 <- info[info$generation=="F1",]
info.F1_2 <- info.F1
info.F1_2$treatment <- gsub(x = info.F1_2$treatment, pattern = "[CL]", replacement = "CL")
info.F1_2$treatment <- factor(info.F1_2$treatment)
mm.F1_2.treatment <- model.matrix(~ treatment, info.F1_2)
mc.samples <- 200

denoms <- aldex.set.mode(ceiling(ko$filtered.F1), info[colnames(ko$filtered.F1),"treatment"], denom="iqlr")
conds <- info.F1_2[colnames(ko$filtered.F1),"treatment"]
x.ko.F1_2 <- aldex(ceiling(ko$filtered.F1), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F1_2[order(x.ko.F1_2$effect, decreasing = TRUE),], "ko.F1_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F1 (H vs C)

In [19]:
info.F1_HC <- info[info$generation=="F1",]
info.F1_HC <- info.F1_HC[rownames(info.F1_HC)[info.F1_HC$treatment%in%c("C","H")],]
ko$cov.F1_HC <- ko$cov[, rownames(info.F1_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F1_HC.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F1_HC.quantile_thresh <- 0.1
mc.samples <- 200

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F1_HC[rowSums(ko$cov.F1_HC) > 0, ])
ko$nz.F1_HC <- ko$cov.F1_HC[rownames(ko$cov.F1_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F1_HC) / sum(rowSums(ko$nz.F1_HC)) * 100, probs = ko.F1_HC.quantile_thresh)
ko$filtered.F1_HC <- low.count.removal(ko$nz.F1_HC, percent = low_threshold)$data.filter

ko$occ.F1_HC <- rowSums(ko$filtered.F1_HC>0) / ncol(ko$filtered.F1_HC) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F1_HC <- ko$filtered.F1_HC[which(ko$occ.F1_HC >= ko.F1_HC.percent_occ), ]
paste0(c(dim(ko$cov.F1_HC)[1] - dim(ko$filtered.F1_HC)[1], "kos removed", dim(ko$filtered.F1_HC)[1], "remaining"))

X <- ko$filtered.F1_HC
Y <- info.F1_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.ko.F1_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F1_HC[order(x.ko.F1_HC$effect, decreasing = TRUE),], "ko.F1_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F1 generation (L vs C)

In [20]:
info.F1_LC <- info[info$generation=="F1",]
info.F1_LC <- info.F1_LC[rownames(info.F1_LC)[info.F1_LC$treatment%in%c("C","L")],]
mc.samples <- 200
ko$cov.F1_LC <- ko$cov[, rownames(info.F1_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F1_LC.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F1_LC.quantile_thresh <- 0.1

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F1_LC[rowSums(ko$cov.F1_LC) > 0, ])
ko$nz.F1_LC <- ko$cov.F1_LC[rownames(ko$cov.F1_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F1_LC) / sum(rowSums(ko$nz.F1_LC)) * 100, probs = ko.F1_LC.quantile_thresh)
ko$filtered.F1_LC <- low.count.removal(ko$nz.F1_LC, percent = low_threshold)$data.filter

ko$occ.F1_LC <- rowSums(ko$filtered.F1_LC>0) / ncol(ko$filtered.F1_LC) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F1_LC <- ko$filtered.F1_LC[which(ko$occ.F1_LC >= ko.F1_LC.percent_occ), ]
paste0(c(dim(ko$cov.F1_LC)[1] - dim(ko$filtered.F1_LC)[1], "kos removed", dim(ko$filtered.F1_LC)[1], "remaining"))

X <- ko$filtered.F1_LC
Y <- info.F1_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.ko.F1_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F1_LC[order(x.ko.F1_LC$effect, decreasing = TRUE),], "ko.F1_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F2 generation

In [22]:
# Extract and filter F2 samples
ko$cov.F2 <- ko$cov[, rownames(info[info$generation=="F2",])]
## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F2.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F2.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F2[rowSums(ko$cov.F2) > 0, ])
ko$nz.F2 <- ko$cov.F2[rownames(ko$cov.F2)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F2) / sum(rowSums(ko$nz.F2)) * 100, probs = ko.F2.quantile_thresh)
ko$filtered.F2 <- low.count.removal(ko$nz.F2, percent = low_threshold)$data.filter

ko$occ.F2 <- rowSums(ko$filtered.F2>0) / ncol(ko$filtered.F2) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F2 <- ko$filtered.F2[which(ko$occ.F2 >= ko.F2.percent_occ), ]
paste0(c(dim(ko$cov.F2)[1] - dim(ko$filtered.F2)[1], "species removed", dim(ko$filtered.F2)[1], "remaining"))

# Get denominators from IQLR transformation
denoms <- aldex.set.mode(ceiling(ko$filtered.F2), info[colnames(ko$filtered.F2),"treatment"], denom="iqlr")

x.ko.F2 <- aldex.clr(ceiling(ko$filtered.F2), mm.F2.treatment, mc.samples=128, denom=denoms, verbose=TRUE, useMC = TRUE)
glm.test.ko.F2 <- aldex.glm(x.ko.F2, mm.F2.treatment)
save_results(glm.test.ko.F2, "glm.test.ko.F2")
glm.effect.ko.F2 <- aldex.glm.effect(x.ko.F2)
save_results(glm.effect.ko.F2, "glm.effect.ko.F2")

computing iqlr centering

checking for condition length disabled!

user-defined denominator used

multicore environment is is OK -- using the BiocParallel package

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F2 generation (C+L vs H)

In [23]:
info.F2 <- info[info$generation=="F2",]
info.F2_2 <- info.F2
info.F2_2$treatment <- gsub(x = info.F2_2$treatment, pattern = "[CL]", replacement = "CL")
info.F2_2$treatment <- factor(info.F2_2$treatment)
mm.F2_2.treatment <- model.matrix(~ treatment, info.F2_2)
mc.samples <- 128

denoms <- aldex.set.mode(ceiling(ko$filtered.F2), info[colnames(ko$filtered.F2),"treatment"], denom="iqlr")
conds <- info.F2_2[colnames(ko$filtered.F2),"treatment"]
x.ko.F2_2 <- aldex(ceiling(ko$filtered.F2), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F2_2[order(x.ko.F2_2$effect, decreasing = TRUE),], "ko.F2_2")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F2 generation (H vs C)

In [21]:
info.F2_HC <- info[info$generation=="F2",]
info.F2_HC <- info.F2_HC[rownames(info.F2_HC)[info.F2_HC$treatment%in%c("C","H")],]
mc.samples <- 128
ko$cov.F2_HC <- ko$cov[, rownames(info.F2_HC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F2_HC.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F2_HC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F2_HC[rowSums(ko$cov.F2_HC) > 0, ])
ko$nz.F2_HC <- ko$cov.F2_HC[rownames(ko$cov.F2_HC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F2_HC) / sum(rowSums(ko$nz.F2_HC)) * 100, probs = ko.F2_HC.quantile_thresh)
ko$filtered.F2_HC <- low.count.removal(ko$nz.F2_HC, percent = low_threshold)$data.filter

ko$occ.F2_HC <- rowSums(ko$filtered.F2_HC>0) / ncol(ko$filtered.F2_HC) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F2_HC <- ko$filtered.F2_HC[which(ko$occ.F2_HC >= ko.F2_HC.percent_occ), ]
paste0(c(dim(ko$cov.F2_HC)[1] - dim(ko$filtered.F2_HC)[1], "kos removed", dim(ko$filtered.F2_HC)[1], "remaining"))

X <- ko$filtered.F2_HC
Y <- info.F2_HC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.ko.F2_HC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F2_HC[order(x.ko.F2_HC$effect, decreasing = TRUE),], "ko.F2_HC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output



#### KO - F2 generation (L vs C)

In [22]:
info.F2_LC <- info[info$generation=="F2",]
info.F2_LC <- info.F2_LC[rownames(info.F2_LC)[info.F2_LC$treatment%in%c("C","L")],]
mc.samples <- 128
ko$cov.F2_LC <- ko$cov[, rownames(info.F2_LC)]

## PARAMS ##
# Percentage of samples in which features must have >0 sum
ko.F2_LC.percent_occ <- 10
# Quantile used for removing low abundance features
ko.F2_LC.quantile_thresh <- 0.1
mc.samples <- 128

## PREFILTERING ##
# Remove zero sum features
nz <- rownames(ko$cov.F2_LC[rowSums(ko$cov.F2_LC) > 0, ])
ko$nz.F2_LC <- ko$cov.F2_LC[rownames(ko$cov.F2_LC)%in%nz, ]

# Filter low abundance features
low_threshold = quantile(rowSums(ko$nz.F2_LC) / sum(rowSums(ko$nz.F2_LC)) * 100, probs = ko.F2_LC.quantile_thresh)
ko$filtered.F2_LC <- low.count.removal(ko$nz.F2_LC, percent = low_threshold)$data.filter

ko$occ.F2_LC <- rowSums(ko$filtered.F2_LC>0) / ncol(ko$filtered.F2_LC) * 100
# Keep features occurring in defined percentage of samples
ko$filtered.F2_LC <- ko$filtered.F2_LC[which(ko$occ.F2_LC >= ko.F2_LC.percent_occ), ]
paste0(c(dim(ko$cov.F2_LC)[1] - dim(ko$filtered.F2_LC)[1], "kos removed", dim(ko$filtered.F2_LC)[1], "remaining"))

X <- ko$filtered.F2_LC
Y <- info.F2_LC

denoms <- aldex.set.mode(ceiling(X), Y[colnames(X),"treatment"], denom="iqlr")
conds <- factor(Y[colnames(X),"treatment"])
x.ko.F2_LC <- aldex(ceiling(X), conds, mc.samples=mc.samples, denom=denoms, test="t", effect=TRUE,
     include.sample.summary=FALSE, verbose=TRUE, paired.test=FALSE)
save_results(x.ko.F2_LC[order(x.ko.F2_LC$effect, decreasing = TRUE),], "ko.F2_LC")

computing iqlr centering

aldex.clr: generating Monte-Carlo instances and clr values

operating in serial mode

removed rows with sums equal to zero

data format is OK

dirichlet samples complete

transformation complete

aldex.ttest: doing t-test

running tests for each MC instance:



|------------(25%)----------(50%)----------(75%)----------|


aldex.effect: calculating effect sizes

operating in serial mode

sanity check complete

rab.all  complete

rab.win  complete

rab of samples complete

within sample difference calculated

between group difference calculated

group summaries calculated

unpaired effect size calculated

summarizing output

