In [2]:
#load packages in this order to avoid masking issues
library(ShortRead)
library(dada2)
library(tidyverse)

In [3]:
######
#parameters to set before running
subsample_depth = 1000 #each sample will be randomly subsampled to this number of reads, prior to taxonomic assignment (after filtering and trimming). For no subsampling see Nanopore_no_rarefaction.R under "backups" 
path_to_taxonomy_database = "/media/zaramela/dumbo/databases/silva_nr99_v138.1_train_set.fa.gz" #change to location of taxonomy database in relation to working directory (easiest to copy taxonomy database to working directory)
path_to_working_directory = "." #leave as a "." if you want to set your working directory manually in RStudio "Session"--> "Set Directory" --> "Choose Directory"
minBoot = 50 #Set the minBoot parameter for assignTaxonomy. minBoot refers to the minimum bootstrapping support required to return a taxonomic classification. Choose a number between 0-100, with 100 being the most stringent. 
######

In [6]:
path = '/media/zaramela/dumbo/vania/raw_data/filtered'

In [10]:
#fastq filenames have format: 
filtFiles = sort(list.files(path, pattern=".fastq", full.names = TRUE))

In [11]:
#extract sample names, assuming filenames have format: #samplename_XXX.fastq
sample.names = sapply(strsplit(basename(filtFiles), "\\."), `[`, 1)
names(filtFiles) = sample.names

In [13]:
#import sequences and assign taxonomy - with subsetting to subsampling depth
#this will create a csv file for each sample with the sequence and its assigned taxonomy
for (fastq in filtFiles) {
print(fastq)
seqs = getSequences(fastq)
sub = sample(1:length(seqs), subsample_depth, replace=FALSE) 
seq2 = seqs[sub]
tax_rc = assignTaxonomy(seq2, path_to_taxonomy_database, multithread=TRUE, tryRC = TRUE, minBoot = minBoot)
base = basename(fastq)
samples = gsub("_filt.fastq", "", base)
write.csv(tax_rc, paste('tax', samples, 'csv', sep = '.' ))
}

[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID1-Sample-1_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID10-Sample-10_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID11-Sample-11_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID12-Sample-12_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID13-Sample-13_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID14-Sample-14_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID15-Sample-15_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID16-Sample-16_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID17-Sample-17_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID18-Sample-18_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID19-Sample-19_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID2-Sample-2_filt.fastq"
[1] "/media/zaramela/dumbo/vania/raw_data/filtered/ID20-Sample-20_fi