This notebook is to apply the Redundancy Analysis (RDA) to microbiome data to compare the effect size for treatment and cage effects (reference: Falony, Science, 2016).

In [1]:
library(vegan)

“package ‘vegan’ was built under R version 3.2.5”Loading required package: permute
“package ‘permute’ was built under R version 3.2.5”Loading required package: lattice
This is vegan 2.4-4


## Data Preparation for RDA analysis

In [2]:
mf = read.csv('haddad_6week_metadata_matched_rare2k.txt', sep='\t')

In [3]:
pc = read.csv('unweighted_unifrac_pc.txt', header=FALSE, sep='\t')

In [4]:
dim(mf)
dim(pc)

In [5]:
head(mf)

X.SampleID,BarcodeSequence,LinkerPrimerSequence,center_name,experiment_design_description,extraction_robot,extractionkit_lot,instrument_model,library_construction_protocol,linker,⋯,physical_specimen_location,physical_specimen_remaining,sample_type,scientific_name,sex,title,weekly_cage_food_consumption,weight,weight_units,Description
10422.17.F.10,GTTGTTCTGGGA,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,Missing: Not provided,25.6,g,feces mouse 17 collection 10 of 13
10422.17.F.11,TGTGCTTGTAGG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,71.8,25.2,g,feces mouse 17 collection 11 of 13
10422.17.F.12,AGAATCCACCAC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,Missing: Not provided,25.7,g,feces mouse 17 collection 12 of 13
10422.17.F.13,CTGTAAAGGTTG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,71.7,26.3,g,final feces mouse 17 collection 13 of 13
10422.17.F.3,CTCCCGAGCTCC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,105.7,24.9,g,feces mouse 17 collection 3 of 13
10422.17.F.4,GGTCTTAGCACC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,UCSD LBR -80 freezer,True,stool,mouse gut metagenome,male,OSA,Missing: Not provided,24.0,g,feces mouse 17 collection 4 of 13


In [6]:
pc_10 = pc[, 1:11]
colnames(pc_10) = c('X.SampleID', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10')
dim(pc_10)
head(pc_10)

X.SampleID,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
10422.17.F.10,-0.05976827,0.02645879,0.01189369,-0.0454783,0.01172535,-0.08154547,0.05848336,-0.054970208,-0.072858399,0.007478379
10422.17.F.11,-0.06582195,0.03003458,0.04061219,-0.08585809,0.09319215,-0.064036326,0.04895493,0.009016226,-0.031530171,0.029190749
10422.17.F.12,-0.0837718,0.03660798,0.03076735,-0.06438793,0.09441188,-0.016211537,0.05954417,0.004364525,-0.02868282,0.032081034
10422.17.F.13,-0.08596504,0.07530612,0.0337558,-0.08652844,0.09139416,-0.007040922,-0.0470271,0.003740529,0.005535416,0.034974764
10422.17.F.3,-0.08772823,0.05638292,0.07824703,-0.08641231,-0.06389593,-0.07453922,0.05786154,-0.039576721,-0.027889781,-0.024444281
10422.17.F.4,-0.09195258,0.05062091,0.07417381,-0.08887333,-0.02018471,-0.035864497,0.08128657,-0.042169148,-0.023215416,0.024866731


In [7]:
dat = merge(mf, pc_10, by = "X.SampleID") 
dim(dat)

In [8]:
head(dat)

X.SampleID,BarcodeSequence,LinkerPrimerSequence,center_name,experiment_design_description,extraction_robot,extractionkit_lot,instrument_model,library_construction_protocol,linker,⋯,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
10422.17.F.10,GTTGTTCTGGGA,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-0.05976827,0.02645879,0.01189369,-0.0454783,0.01172535,-0.08154547,0.05848336,-0.054970208,-0.072858399,0.007478379
10422.17.F.11,TGTGCTTGTAGG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-0.06582195,0.03003458,0.04061219,-0.08585809,0.09319215,-0.064036326,0.04895493,0.009016226,-0.031530171,0.029190749
10422.17.F.12,AGAATCCACCAC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF1,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-0.0837718,0.03660798,0.03076735,-0.06438793,0.09441188,-0.016211537,0.05954417,0.004364525,-0.02868282,0.032081034
10422.17.F.13,CTGTAAAGGTTG,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-0.08596504,0.07530612,0.0337558,-0.08652844,0.09139416,-0.007040922,-0.0470271,0.003740529,0.005535416,0.034974764
10422.17.F.3,CTCCCGAGCTCC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-0.08772823,0.05638292,0.07824703,-0.08641231,-0.06389593,-0.07453922,0.05786154,-0.039576721,-0.027889781,-0.024444281
10422.17.F.4,GGTCTTAGCACC,GTGTGCCAGCMGCCGCGGTAA,UCSDMI,Mouse cohort exposed to apnea and controls to assess the effect on the gut microbiome in mice.,HOWE_KF2,PM16B24,Illumina HiSeq 2500,"EMP 16S V4 protocol 515fbc, 806r",GT,⋯,-0.09195258,0.05062091,0.07417381,-0.08887333,-0.02018471,-0.035864497,0.08128657,-0.042169148,-0.023215416,0.024866731


In [9]:
X = dat[, c('age', 'cage_number', 'exposure_type', 'weight')]
Y = dat[, c('PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10')]

In [10]:
dim(X)
dim(Y)

In [11]:
summary(X)

      age         cage_number    exposure_type     weight     
 Min.   :10.00   Min.   :5.000   Air:90        Min.   :22.10  
 1st Qu.:12.00   1st Qu.:5.250   IHH:92        1st Qu.:25.40  
 Median :13.50   Median :6.000                 Median :26.50  
 Mean   :13.28   Mean   :6.495                 Mean   :26.60  
 3rd Qu.:14.50   3rd Qu.:7.750                 3rd Qu.:27.88  
 Max.   :16.00   Max.   :8.000                 Max.   :32.60  

## RDA analysis to calculate effect size

In [14]:
mod1 <- rda(Y ~ 1, X)  # Model with intercept only
mod2 <- rda(Y ~ ., X)  # Model with all explanatory variables

In [15]:
mdl <- ordiR2step(mod1, mod2, perm.max = 1000)

Step: R2.adj= 0 
Call: Y ~ 1 
 
                R2.adjusted
<All variables>  0.29271674
+ exposure_type  0.11596935
+ cage_number    0.09760473
+ age            0.08244343
+ weight         0.06921500
<none>           0.00000000

                Df     AIC      F Pr(>F)   
+ exposure_type  1 -519.27 24.744  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.1159693 
Call: Y ~ exposure_type 
 
                R2.adjusted
<All variables>   0.2927167
+ cage_number     0.2045359
+ age             0.1994344
+ weight          0.1655510
<none>            0.1159693

              Df    AIC      F Pr(>F)   
+ cage_number  1 -537.5 21.041  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Step: R2.adj= 0.2045359 
Call: Y ~ exposure_type + cage_number 
 
                R2.adjusted
<All variables>   0.2927167
+ age             0.2877463
+ weight          0.2550476
<none>            0.2045359

      Df     AIC      F Pr(>F)   
+ a

## Output Result

In [17]:
table = mdl$anova
table

Unnamed: 0,R2.adj,Df,AIC,F,Pr(>F)
+ exposure_type,0.1159693,1.0,-519.2703,24.74403,0.002
+ cage_number,0.2045359,1.0,-537.4973,21.04111,0.002
<All variables>,0.2045359,,,,


In [None]:
                R2.adjusted
<All variables>  0.29271674
+ exposure_type  0.11596935
+ cage_number    0.09760473
+ age            0.08244343
+ weight         0.06921500
<none>           0.00000000

In [38]:
table$ES.independent = c(0.11596935, 0.09760473, 0.08244343, 0.29271674)
table$ES.RDA = c(table$R2.adj[1], table$R2.adj[2]-table$R2.adj[1], 
                       table$R2.adj[3]-table$R2.adj[2], table$R2.adj[4]-table$R2.adj[3])
table

Unnamed: 0,R2.adj,Df,AIC,F,Pr(>F),ES.independent,ES.RDA
+ exposure_type,0.1159693,1.0,-519.2703,24.74403,0.002,0.11596935,0.115969347
+ cage_number,0.2045359,1.0,-537.4973,21.04111,0.002,0.09760473,0.088566554
+ age,0.2877463,1.0,-556.6263,21.91202,0.002,0.08244343,0.083210398
<All variables>,0.2927167,,,,,0.29271674,0.004970444
