This repository has been archived by the owner on Oct 1, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 44
/
06-A-unconstrained-ordination-bonus.Rmd
130 lines (102 loc) · 5.68 KB
/
06-A-unconstrained-ordination-bonus.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
---
title: "Prepping data for unconstrained ordination"
author: "Anna Rasmussen"
date: "11/1/2017"
output: html_document
---
```{r include = FALSE}
# This code block sets up the r session when the page is rendered to html
# include = FALSE means that it will not be included in the html document
# Write every code block to the html document
knitr::opts_chunk$set(echo = TRUE)
# Write the results of every code block to the html document
knitr::opts_chunk$set(eval = TRUE)
# Define the directory where images generated by knit will be saved
knitr::opts_chunk$set(fig.path = "images/06-A/")
# Set the web address where R will look for files from this repository
# Do not change this address
repo_url <- "https://raw.githubusercontent.com/fukamilab/BIO202/master/"
```
```{r defining terms, echo=FALSE, message=FALSE, warning=FALSE}
## Defining terms, samples, month names, etc
salinities <- c("0 - <.5", ".5 - <5", "5 - <18", "18 - <30", "30 - <40" )
system.order<-c("Fresh", "Oligohaline", "Mesohaline", "Polyhaline", "Euhaline")
stat.order<-as.character(c(657, 649, 3, 6, 9, 13, 15, 18, 21, 24, 27, 30, 34, 36))
seas.order <- c("Spring", "Summer", "Fall", "Winter")
surf.samp<- c("C29_657S", "C29_3S", "C29_6S", "C29_9S")
months <- c("Apr", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar")
cruise.dates <- c("4/23/2013", "7/23/2013", "8/28/2013","9/26/2013","10/24/2013","11/19/2013", "12/3/2013", "1/14/2014","2/11/2014","3/11/2014")
cruise.names <- c("C29", "C30","C31","C32","C33","C34","C35","C36","C37","C38")
```
```{r, echo=FALSE, message=FALSE, warning=FALSE}
## make dataframe of environmental variables
envmat.all <-read.csv("~/Documents/SFB_JGI_first_plate/timeseries_all_data.csv", header=T, row.names = 1)
envmat.all$Station.Number<-as.factor(envmat.all$Station.Number)
envmat.all$system<-factor(envmat.all$system, levels=system.order)
```
```{r, echo=FALSE, message=FALSE, warning=FALSE}
variables.cols <- c(31, 7,9, 11, 14,15, 21, 22, 34:36, 72, 74, 29, 73, 5, 32)
envdat.corr <- na.omit(envmat.all %>%
select (., variables.cols))
rownames(envdat.corr) = envdat.corr$sample
envdat.corr <- envdat.corr[,-c(1)]
```
```{r V4, echo=F, message=FALSE, warning=FALSE}
### Making a V4 phyloseq object
#Phyloseq object and environmental variables for sequenced samples
library(phyloseq)
library(ggplot2)
otumat.V4 <- read.csv("~/Documents/SFB_JGI_first_plate/SFBayDeltaitags/itags/16S-V4-ver2/2-1992333/otu.tsv", sep= "") %>%
select(-taxonomy) ## remove non-numeric data
rownames(otumat.V4) <- paste(otumat.V4$OTU) ##make OTU numbers the row names
otumat.V4 <- otumat.V4 %>%
select(-OTU) ##remove OTU column
taxmat.V4 <- read.table("~/Documents/SFB_JGI_first_plate/SFBayDeltaitags/itags/16S-V4-ver2/2-1992333/tax.tsv", sep= "") %>%
select(c(1,3))
colnames(taxmat.V4) <- c("OTU", "tax") ## Define columns
taxmat.V4<-taxmat.V4 %>%
separate(tax, into=c("Domain", "Phylum", "Class", "Order", "Family", "Genus"), sep= ",") ## separate taxa names
rownames(taxmat.V4) <- paste0(taxmat.V4$OTU) ### Make rownames from the column "OTU"
taxmat.V4 <- taxmat.V4 %>%
select(-OTU) %>% ## remove column with OTU names
as.matrix(.)
JGItre.V4<-read_tree("~/Documents/SFB_JGI_first_plate/SFBayDeltaitags/itags/16S-V4-ver2/2-1992333/otu.tre")
OTU.V4 = otu_table(otumat.V4, taxa_are_rows = TRUE)
TAX.V4 = tax_table(taxmat.V4)
ENV.V4= sample_data(envdat.corr)
physeq.V4 <- phyloseq(OTU.V4, TAX.V4, ENV.V4, JGItre.V4)
physeq.nd<- physeq.V4 %>%
filter_taxa(., function(x) sum(x > 2) > 1, TRUE) ### remove doubltons
```
```{r , echo=FALSE, message=FALSE, warning=FALSE}
### DESeq 2 variance stabilization and geometric mean stabilizes variance from different library size.
### This normailizationn and transformation process is based on a negative binomial hierarchical model.
### In the formula, differences in libary size are accounted for. This transformation is basically a log
### transformation at larger values with wider spread in lower count values. See "Waste not want not: why rarefying ### microbiome data is inadmissable" paper by McMurdie and Holmes to get the following info in more detail:
### The negative binomial distribution is used to model the counts, K, for OTU i, in sample j according to:
### Kij ~ NB(sj, mui, phii)
### where sj is a linear scaling factor for sample j that accounts for its library size, mui is the mean proportion ### for OTU i, and phii is the dispersion parameter for OTU i. The variance is vi = sj*mui+phii*sj^2*mui^2 , with the ### NB distribution becoming Poisson when phi = 0
library(DESeq2)
gm_mean = function(x, na.rm=TRUE){
exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) ##geometric mean function
}
ps_dds1 = phyloseq_to_deseq2(physeq.nd, ~system) ##convert phyloseq object to deseq object, "treatments" are salinity level
geoMeans = apply(counts(ps_dds1), 1, gm_mean)
ps_dds1 = estimateSizeFactors(ps_dds1, geoMeans = geoMeans)
ps_dds1 = estimateDispersions(ps_dds1)
abund=getVarianceStabilizedData(ps_dds1) #this is the long processeing step, stabilizing the variance
rownames(abund)=substr(rownames(abund),1,5) %>% make.names(unique=T)
otusf1 <- as.numeric(row.names(otu_table(physeq.nd))) #Get original OTU names
abund2 <- abund
rownames(abund2) <- otusf1 #change new OTU names back to old OTU names so matches taxonomy and tree files
physeq.VST <- physeq.nd
otu_table(physeq.VST) <- otu_table(abund2, taxa_are_rows=T) #update the otu table of phyloseq object
```
```{r}
otusnorm <-as.data.frame(abund2)
write.csv(otusnorm, row.names=TRUE, "~/Documents/Stanford/Eco_stats_202/data/OTUsnormalized.csv")
```
```{r, echo=TRUE, message=FALSE, warning=FALSE}
envdat.phy <- as.data.frame(sample_data(physeq2))
write.csv(envdat.phy, row.names = TRUE, "~/Documents/Stanford/Eco_stats_202/data/envdat_phy.csv")
```