/
1_load_data.R
69 lines (58 loc) · 3.58 KB
/
1_load_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# load packages
source("Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/scripts/load_packages.R")
######## LOADING DATA ########
#### TE DATA ####
# read in AMR count matrix
amr_data <- read.table('Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/data/TE_AMR_analytic_matrix.csv', header=T, row.names=1, sep=',', quote = "")
# convert this into an 'otu_table' format required for phyloseq
amr_data <- otu_table(amr_data, taxa_are_rows = T)
#read in gene annotations
annotations <- read.table('Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/data/megares_modified_annotations_v2.00.csv', header=T, row.names=1, sep=",", quote = "")
#convert this into a 'taxonomy table' format required for phyloseq
annotations <- tax_table(as.matrix(annotations))
# read in TE metadata
amr_metadata <- read.table('Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/data/TE_metadata.csv', header=T, sep=',', row.names = 1, quote = "")
# convert to 'sample_data' format for phyloseq
amr_metadata <- sample_data(amr_metadata)
# merge the annotations, the count matrix, and metadata into a phyloseq object
amr.ps <- merge_phyloseq(amr_data, annotations, amr_metadata)
#### 16S DATA ####
# read in ASV table, tree, representative sequences
microbiome_data <- import_biom('Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/data/table-with-taxonomy.biom','Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/data/tree.nwk','Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/data/dna-sequences.fasta')
# read in metadata
microbiome_metadata <- read.table('Desktop/AMRplusplus_bioinformatic_workshop/R_analysis/data/16S_metadata.csv', header=T, sep=',', row.names = 1, quote = "")
# convert to 'sample_data' format for phyloseq
microbiome_metadata <- sample_data(microbiome_metadata)
# merge the ASVs, tree, rep-seqs, and metadata into phyloseq object
microbiome.ps <- merge_phyloseq(microbiome_data, microbiome_metadata)
# convert columns of taxonomy table to match taxonomic levels
colnames(tax_table(microbiome.ps)) <- c("Domain","Kingdom","Phylum","Class","Order","Family","Genus")
# changing the SILVA style naming (k__Bacteria, etc.)
tax.data <- data.frame(tax_table(microbiome.ps)) # extract the taxonomy table as a data frame
tax.data.names <- changeSILVAtaxa(tax.data) # this gets rid of the SILVA style
# now to change the NAs to a better naming scheme
for (i in 1:7){ tax.data.names[,i] <- as.character(tax.data.names[,i])} # converting all columns to characters
tax.data.names[is.na(tax.data.names)] <- "" # replacing the NAs with an empty string
# now filling in the empty slots with the highest assigned taxonomy
for (i in 1:nrow(tax.data.names)){
if (tax.data.names[i,2] == ""){
domain <- paste("unclassified ", tax.data.names[i,1], sep = "")
tax.data.names[i, 2:7] <- domain
} else if (tax.data.names[i,3] == ""){
kingdom <- paste("unclassified ", tax.data.names[i,2], sep = "")
tax.data.names[i, 3:7] <- kingdom
} else if (tax.data.names[i,4] == ""){
phylum <- paste("unclassified ", tax.data.names[i,3], sep = "")
tax.data.names[i, 4:7] <- phylum
} else if (tax.data.names[i,5] == ""){
class <- paste("unclassified ", tax.data.names[i,4], sep = "")
tax.data.names[i, 5:7] <- class
} else if (tax.data.names[i,6] == ""){
order <- paste("unclassified ", tax.data.names[i,5], sep = "")
tax.data.names[i, 6:7] <- order
} else if (tax.data.names[i,7] == ""){
tax.data.names$Genus[i] <- paste("unclassified ",tax.data.names$Family[i], sep = "")
}
}
tax_table(microbiome.ps) <- as.matrix(tax.data.names) # re-insert the taxonomy table into the phyloseq object
tax_table(microbiome.ps)