This notebook is to apply the Redundancy Analysis (RDA) to metabolomics data to compare the effect size for treatment and cage effects (reference: Falony, Science, 2016).

In [1]:
library(vegan)

“package ‘vegan’ was built under R version 3.2.5”Loading required package: permute
“package ‘permute’ was built under R version 3.2.5”Loading required package: lattice
This is vegan 2.4-4


## Data Preparation for RDA Analysis

In [2]:
mf = read.csv('haddad_6week_metadata_matched_rare2k.txt', sep='\t')

In [3]:
pc = read.csv('gower_pc.txt', header=FALSE, sep='\t')

In [4]:
dim(mf)
dim(pc)

In [5]:
head(mf)

X.SampleID,BarcodeSequence,LinkerPrimerSequence,center_name,experiment_design_description,extraction_robot,extractionkit_lot,instrument_model,library_construction_protocol,linker,⋯,physical_specimen_location,physical_specimen_remaining,sample_type,scientific_name,sex,title,weekly_cage_food_consumption,weight,weight_units,Description
10422.17.F.10,GTTGTTCTGGGA,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,Missing: Not provided,25.6,g,feces mouse 17 collection 10 of 13
10422.17.F.11,TGTGCTTGTAGG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,71.8,25.2,g,feces mouse 17 collection 11 of 13
10422.17.F.12,AGAATCCACCAC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,Missing: Not provided,25.7,g,feces mouse 17 collection 12 of 13
10422.17.F.13,CTGTAAAGGTTG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,71.7,26.3,g,final feces mouse 17 collection 13 of 13
10422.17.F.3,CTCCCGAGCTCC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,105.7,24.9,g,feces mouse 17 collection 3 of 13
10422.17.F.4,GGTCTTAGCACC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,Missing: Not provided,24.0,g,feces mouse 17 collection 4 of 13


In [6]:
head(pc)

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,⋯,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183
10422.17.F.10,23.142174,-2.547855,-34.9972372,49.657923,-7.3553,0.3921491,-3.8325385,18.452123,-3.775725,⋯,0,0,0,0,0,0,0,0,0,0
10422.17.F.11,-43.346726,43.45334,-7.4992363,54.433617,10.30729,22.6996433,-24.2811032,-7.96314,8.977522,⋯,0,0,0,0,0,0,0,0,0,0
10422.17.F.12,1.315299,12.354304,-19.7943174,36.322173,21.299721,21.0990025,0.5893194,2.770653,-7.943843,⋯,0,0,0,0,0,0,0,0,0,0
10422.17.F.13,-40.949758,28.272926,-18.5197684,5.734954,4.773362,52.2757097,18.4996261,9.205369,1.69249,⋯,0,0,0,0,0,0,0,0,0,0
10422.17.F.3,17.244964,-112.443567,-0.6944819,-24.985465,-52.762613,7.2012673,-8.647524,18.240301,6.145081,⋯,0,0,0,0,0,0,0,0,0,0
10422.17.F.4,22.093192,-92.224312,0.3704718,-33.686568,1.310252,17.2442619,0.7064991,2.679129,-33.160098,⋯,0,0,0,0,0,0,0,0,0,0


In [7]:
pc_10 = pc[, 1:11]
colnames(pc_10) = c('X.SampleID', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10')
dim(pc_10)
head(pc_10)

X.SampleID,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
10422.17.F.10,23.142174,-2.547855,-34.9972372,49.657923,-7.3553,0.3921491,-3.8325385,18.452123,-3.775725,10.488982
10422.17.F.11,-43.346726,43.45334,-7.4992363,54.433617,10.30729,22.6996433,-24.2811032,-7.96314,8.977522,38.068554
10422.17.F.12,1.315299,12.354304,-19.7943174,36.322173,21.299721,21.0990025,0.5893194,2.770653,-7.943843,9.566073
10422.17.F.13,-40.949758,28.272926,-18.5197684,5.734954,4.773362,52.2757097,18.4996261,9.205369,1.69249,28.634358
10422.17.F.3,17.244964,-112.443567,-0.6944819,-24.985465,-52.762613,7.2012673,-8.647524,18.240301,6.145081,23.414405
10422.17.F.4,22.093192,-92.224312,0.3704718,-33.686568,1.310252,17.2442619,0.7064991,2.679129,-33.160098,1.361084


In [8]:
dat = merge(mf, pc_10, by = "X.SampleID") 
dim(dat)

In [9]:
head(dat)

X.SampleID,BarcodeSequence,LinkerPrimerSequence,center_name,experiment_design_description,extraction_robot,extractionkit_lot,instrument_model,library_construction_protocol,linker,⋯,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
10422.17.F.10,GTTGTTCTGGGA,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,23.142174,-2.547855,-34.9972372,49.657923,-7.3553,0.3921491,-3.8325385,18.452123,-3.775725,10.488982
10422.17.F.11,TGTGCTTGTAGG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-43.346726,43.45334,-7.4992363,54.433617,10.30729,22.6996433,-24.2811032,-7.96314,8.977522,38.068554
10422.17.F.12,AGAATCCACCAC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,1.315299,12.354304,-19.7943174,36.322173,21.299721,21.0990025,0.5893194,2.770653,-7.943843,9.566073
10422.17.F.13,CTGTAAAGGTTG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-40.949758,28.272926,-18.5197684,5.734954,4.773362,52.2757097,18.4996261,9.205369,1.69249,28.634358
10422.17.F.3,CTCCCGAGCTCC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,17.244964,-112.443567,-0.6944819,-24.985465,-52.762613,7.2012673,-8.647524,18.240301,6.145081,23.414405
10422.17.F.4,GGTCTTAGCACC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,22.093192,-92.224312,0.3704718,-33.686568,1.310252,17.2442619,0.7064991,2.679129,-33.160098,1.361084


In [10]:
X = dat[, c('cage_number', 'exposure_type')]
Y = dat[, c('PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10')]

In [11]:
dim(X)
dim(Y)

In [12]:
summary(X)

  cage_number    exposure_type
 Min.   :5.000   Air:90       
 1st Qu.:5.250   IHH:92       
 Median :6.000                
 Mean   :6.495                
 3rd Qu.:7.750                
 Max.   :8.000                

## RDA Analysis for Effect Size Calculation

In [15]:
mod1 <- rda(Y ~ 1, X)  # Model with intercept only
mod2 <- rda(Y ~ ., X)  # Model with all explanatory variables

In [16]:
mdl <- ordiR2step(mod1, mod2, perm.max = 1000)

Step: R2.adj= 0 
Call: Y ~ 1 
 
                R2.adjusted
<All variables>  0.06836575
+ exposure_type  0.06158682
+ cage_number    0.04791669
<none>           0.00000000

                Df    AIC      F Pr(>F)   
+ exposure_type  1 1783.6 12.879  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.06158682 
Call: Y ~ exposure_type 
 
                R2.adjusted
+ cage_number    0.06836575
<All variables>  0.06836575
<none>           0.06158682



## Output Result

In [18]:
table = mdl$anova
table

Unnamed: 0,R2.adj,Df,AIC,F,Pr(>F)
+ exposure_type,0.06158682,1.0,1783.632,12.87879,0.002
<All variables>,0.06836575,,,,


In [None]:
                R2.adjusted
<All variables>  0.06836575
+ exposure_type  0.06158682
+ cage_number    0.04791669
<none>           0.00000000

In [19]:
table$ES.independent = c(0.06158682, 0.06836575)
table$ES.RDA = c(table$R2.adj[1], table$R2.adj[2]-table$R2.adj[1])
table

Unnamed: 0,R2.adj,Df,AIC,F,Pr(>F),ES.independent,ES.RDA
+ exposure_type,0.06158682,1.0,1783.632,12.87879,0.002,0.06158682,0.061586816
<All variables>,0.06836575,,,,,0.06836575,0.006778934
