# Pipeline for analyzing local data (ITS2)

Arstingstall, K.A., S.J. DeBano, X. Li, D.E. Wooster, M.M. Rowland, S. Burrows, & K. Frost; 2020; DNA metabarcoding of native bee pollen loads

In [None]:
import os
from metaBarTools import metaBar_PreX

In [None]:
!qiime --version #version 2020.11.1

In [None]:
metaBar = metaBar_PreX()

In [None]:
# metafile and platesetup file
metafile = os.path.abspath("./meta_beepollen_all.csv")
platesetup = os.path.abspath("./beepollen_all.xlsx")

# reads
ITS_reads_local = os.path.abspath("./Local_reads/ITS_reads_local/reads_copy_by_location/")

In [None]:
ITS_f_len = len("ATGCGATACTTGGTGTGAAT")
ITS_r_len = len("TCCTCCGCTTATTGATATGC")

In [None]:
path_ITS = metaBar.metaBar_makeSubDir("Local_Results", ["ITS_result"])

In [None]:
path_ITS

In [None]:
# classifiers noncurated
# use updated classifiers non curated
ncrt_starky_classifier = os.path.abspath("./classifiers_updated/ITS2/classifiers/local/starky_ITS2_noncurated_classifier.qza")

ncrt_3m_classifier = os.path.abspath("./classifiers_updated/ITS2/classifiers/local/threemile_ITS2_noncurated_classifier.qza")

ncrt_zum_classifier = os.path.abspath("./classifiers_updated/ITS2/classifiers/local/zum_ITS2_noncurated_classifier.qza")


## Analyze ITS local data (with local ITS database)

In [None]:
# change working directory to ITS_result
os.chdir(path_ITS[0])

In [None]:
# make sub folders for each location

subpaths = []

for i in os.listdir(ITS_reads_local):
    if i in ["ZUM", "Star", "RDO"]:
        if not os.path.exists(i):
            os.makedirs(i)
            
        subpaths.append(os.path.abspath(i))

In [None]:
subpaths

## ZUM

In [None]:
os.chdir(subpaths[0])
print(os.getcwd())

In [None]:
# make manifest file
manifest_zum = metaBar.metaBar_Qiime2_Manifest(os.path.join(ITS_reads_local, "ZUM"), platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv zum_its_manifest.csv

In [None]:
# load reads into qiime2

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./zum_its_manifest.csv \
--output-path zum_its.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data zum_its.qza \
--o-visualization zum_its_seq.qzv

In [None]:
if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# dada2 denosing and quality score 25
# f 299
# r 258

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs zum_its.qza \
--output-dir dada2 \
--o-table feature-tables/table-zum \
--o-representative-sequences rep_seq_zum \
--p-trim-left-f $ITS_f_len \
--p-trim-left-r $ITS_r_len \
--p-trunc-len-f 299 \
--p-trunc-len-r 258 \
--o-denoising-stats dada2-stats/dada2_stats.qza \
--p-n-threads 12 

In [None]:
# visualize the dada2 stats

!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table-zum.qza \
--o-visualization feature-tables/table-zum.qzv \
--m-sample-metadata-file $metafile

In [None]:
if not os.path.exists("taxonomy_updated"):
    os.makedirs("taxonomy_updated")

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $ncrt_zum_classifier \
--i-reads rep_seq_zum.qza \
--o-classification ./taxonomy_updated/taxonomy_updated_noncurated_zum_ITS.qza

# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table-zum.qza \
--i-taxonomy taxonomy_updated/taxonomy_updated_noncurated_zum_ITS.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_updated/barplot_updated_ZUM_noncurated_ITS2.qzv

## Starky

In [None]:
os.chdir(subpaths[1])
print(os.getcwd())

In [None]:
# make manifest file
manifest_star = metaBar.metaBar_Qiime2_Manifest(os.path.join(ITS_reads_local, "Star"), platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv star_its_manifest.csv

In [None]:
# load reads into qiime2

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./star_its_manifest.csv \
--output-path star_its.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data star_its.qza \
--o-visualization star_its_seq.qzv

In [None]:
if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# dada2 denosing and quality score 25
# f 300
# r 253

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs star_its.qza \
--output-dir dada2 \
--o-table feature-tables/table-star \
--o-representative-sequences rep_seq_star \
--p-trim-left-f $ITS_f_len \
--p-trim-left-r $ITS_r_len \
--p-trunc-len-f 300 \
--p-trunc-len-r 249 \
--o-denoising-stats dada2-stats/dada2_stats.qza \
--p-n-threads 25

In [None]:
# visualize the dada2 stats
!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table-star.qza \
--o-visualization feature-tables/table-star.qzv \
--m-sample-metadata-file $metafile

In [None]:
if not os.path.exists("taxonomy_update"):
    os.makedirs("taxonomy_update")

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $ncrt_starky_classifier \
--i-reads rep_seq_star.qza \
--o-classification ./taxonomy_update/taxonomy_update_noncurated_star_ITS.qza

# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table-star.qza \
--i-taxonomy taxonomy_update/taxonomy_update_noncurated_star_ITS.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_update/barplot_update_star_noncurated_ITS2.qzv

## RDO

In [None]:
os.chdir(subpaths[2])
print(os.getcwd())

In [None]:
# make manifest file
manifest_star = metaBar.metaBar_Qiime2_Manifest(os.path.join(ITS_reads_local, "RDO"), platesetup, sheetname=0, matchby="sample")

!mv ITSS2F@ITS4R_manifest.csv rdo_its_manifest.csv

In [None]:
# load reads into qiime2
!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./rdo_its_manifest.csv \
--output-path rdo_its.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data rdo_its.qza \
--o-visualization rdo_its_seq.qzv

In [None]:
if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# dada2 denosing and quality score 25
# f 300
# r 258

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs rdo_its.qza \
--output-dir dada2 \
--o-table feature-tables/table-rdo \
--o-representative-sequences rep_seq_rdo \
--p-trim-left-f $ITS_f_len \
--p-trim-left-r $ITS_r_len \
--p-trunc-len-f 300 \
--p-trunc-len-r 258 \
--o-denoising-stats dada2-stats/dada2_stats.qza \
--p-n-threads 25

In [None]:
# visualize the dada2 stats
!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table-rdo.qza \
--o-visualization feature-tables/table-rdo.qzv \
--m-sample-metadata-file $metafile

In [None]:
if not os.path.exists("taxonomy_update"):
    os.makedirs("taxonomy_update")

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $ncrt_3m_classifier \
--i-reads rep_seq_rdo.qza \
--o-classification ./taxonomy_update/taxonomy_update_noncurated_rdo_ITS.qza

In [None]:
# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table-rdo.qza \
--i-taxonomy taxonomy_update/taxonomy_update_noncurated_rdo_ITS.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_update/barplot_update_rdo_noncurated_ITS2.qzv