# Pipeline for analyzing local data (rbcL)

Arstingstall, K.A., S.J. DeBano, X. Li, D.E. Wooster, M.M. Rowland, S. Burrows, & K. Frost; 2020; DNA metabarcoding of native bee pollen loads

In [None]:
import os
from metaBarTools import metaBar_PreX

In [None]:
!qiime --version #version 2020.11.1

In [None]:
metaBar = metaBar_PreX()

In [None]:
# metafile and platesetup file
metafile = os.path.abspath("./meta_beepollen_all.csv")
platesetup = os.path.abspath("./beepollen_all.xlsx")

In [None]:
rbcL_reads_local = os.path.abspath("./Local_reads/rbcL_reads_local/reads_copy_by_location/")

In [None]:
path_rbcL = metaBar.metaBar_makeSubDir("Local_Results", ["rbcL_result"]) # note analysize on paired end

In [None]:
path_rbcL

In [None]:
rbcL_f_len = len("TGGCAGCATTYCGAGTAACTC")
rbcL_r_len = len("GTAAAATCAAGTCCACCRCG")

In [None]:
# classifiers noncurated
# use updated classifiers with missed sp
ncrt_starky_classifier = os.path.abspath("./classifiers_updated/rbcL/classifiers/local/starky_rbcL_noncurated_classifier.qza")
ncrt_3m_classifier = os.path.abspath("./classifiers_updated/rbcL/classifiers/local/threemile_rbcL_noncurated_classifier.qza")
ncrt_zum_classifier = os.path.abspath("./classifiers_updated/rbcL/classifiers/local/zum_rbcL_noncurated_classifier.qza")

## Analyze rbcL local data (with local rbcL database)

In [None]:
# change working directory to rbcL_result
os.chdir(path_rbcL[0])
print(os.getcwd())

In [None]:
# make sub folders for each location

subpaths = []

for i in os.listdir(rbcL_reads_local):
    if i in ["ZUM", "Star", "RDO"]:
        if not os.path.exists(i):
            os.makedirs(i)
            
        subpaths.append(os.path.abspath(i))

In [None]:
subpaths

## ZUM

In [None]:
os.chdir(subpaths[0])
print(os.getcwd())

In [None]:
# make manifest file
manifest_zum = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "ZUM"), platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv zum_rbcL_manifest.csv

In [None]:
# load reads into qiime2

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./zum_rbcL_manifest.csv \
--output-path zum_rbcL.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data zum_rbcL.qza \
--o-visualization zum_rbcL_seq.qzv

In [None]:
if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# dada2 denosing

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs zum_rbcL.qza \
--output-dir dada2 \
--o-table feature-tables/table_rbcL_zum.qza \
--o-representative-sequences rep_rbcL_zum.qza \
--p-trim-left-f $rbcL_f_len \
--p-trim-left-r $rbcL_r_len \
--p-trunc-len-f 300 \
--p-trunc-len-r 262 \
--o-denoising-stats dada2-stats/dada2_stats.qza \
--p-n-threads 12

In [None]:
# visualize the dada2 stats
!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
if not os.path.exists("taxonomy_update"):
    os.makedirs("taxonomy_update")

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table_rbcL_zum.qza \
--o-visualization feature-tables/table_rbcL_zum.qzv \
--m-sample-metadata-file $metafile

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $ncrt_zum_classifier \
--i-reads rep_rbcL_zum.qza \
--o-classification ./taxonomy_update/taxonomy_update_noncurated_zum_rbcL.qza

# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table_rbcL_zum.qza \
--i-taxonomy taxonomy_update/taxonomy_update_noncurated_zum_rbcL.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_update/barplot_update_ZUM_noncurated_rbcL.qzv

## Star

In [None]:
os.chdir(subpaths[1])
print(os.getcwd())

In [None]:
# make manifest file
manifest_star = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "Star"), platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv star_rbcL_manifest.csv

In [None]:
# load reads into qiime2

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./star_rbcL_manifest.csv \
--output-path star_rbcL.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data star_rbcL.qza \
--o-visualization star_rbcL_seq.qzv

In [None]:
if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# dada2 denosing
# qscore >= 25
# f 300
# r 265
!qiime dada2 denoise-paired \
--i-demultiplexed-seqs star_rbcL.qza \
--output-dir dada2 \
--o-table feature-tables/table_rbcL_star.qza \
--o-representative-sequences rep_rbcL_star.qza \
--p-trim-left-f $rbcL_f_len \
--p-trim-left-r $rbcL_r_len \
--p-trunc-len-f 300 \
--p-trunc-len-r 265 \
--o-denoising-stats dada2-stats/dada2_stats.qza \
--p-n-threads 20

In [None]:
# visualize the dada2 stats
!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
if not os.path.exists("taxonomy_update"):
    os.makedirs("taxonomy_update")

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table_rbcL_star.qza \
--o-visualization feature-tables/table_rbcL_star.qzv \
--m-sample-metadata-file $metafile

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $ncrt_starky_classifier \
--i-reads rep_rbcL_star.qza \
--o-classification ./taxonomy_update/taxonomy_update_noncurated_star_rbcL.qza

# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table_rbcL_star.qza \
--i-taxonomy taxonomy_update/taxonomy_update_noncurated_star_rbcL.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_update/barplot_update_star_noncurated_rbcL.qzv

## RDO

In [None]:
os.chdir(subpaths[2])
print(os.getcwd())

In [None]:
# make manifest file
manifest_star = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "RDO"), platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv rdo_rbcL_manifest.csv

In [None]:
# load reads into qiime2
!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./rdo_rbcL_manifest.csv \
--output-path rdo_rbcL.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data rdo_rbcL.qza \
--o-visualization rdo_rbcL_seq.qzv

In [None]:
if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# dada2 denosing
# qscore >= 25
# f 300
# r 259
!qiime dada2 denoise-paired \
--i-demultiplexed-seqs rdo_rbcL.qza \
--output-dir dada2 \
--o-table feature-tables/table_rbcL_rdo.qza \
--o-representative-sequences rep_rbcL_rdo.qza \
--p-trim-left-f $rbcL_f_len \
--p-trim-left-r $rbcL_r_len \
--p-trunc-len-f 300 \
--p-trunc-len-r 259 \
--o-denoising-stats dada2-stats/dada2_stats.qza \
--p-n-threads 20

In [None]:
# visualize the dada2 stats
!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
if not os.path.exists("taxonomy_update"):
    os.makedirs("taxonomy_update")

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table_rbcL_rdo.qza \
--o-visualization feature-tables/table_rbcL_rdo.qzv \
--m-sample-metadata-file $metafile

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $ncrt_3m_classifier \
--i-reads rep_rbcL_rdo.qza \
--o-classification ./taxonomy_update/taxonomy_update_noncurated_rdo_rbcL.qza

# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table_rbcL_rdo.qza \
--i-taxonomy taxonomy_update/taxonomy_update_noncurated_rdo_rbcL.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_update/barplot_update_rdo_noncurated_rbcL.qzv