# Pipeline for analyzing beepollen data (ITS2)

Arstingstall, K.A., S.J. DeBano, X. Li, D.E. Wooster, M.M. Rowland, S. Burrows, & K. Frost; 2020; DNA metabarcoding of native bee pollen loads

##### Note: before you start analyzing, move 1. metaBarTools.py 2. meta file (.csv) 3. plate file (.xlsx) to your working directory.

In [None]:
import os
from metaBarTools import metaBar_PreX

In [None]:
!qiime --version #version 2020.11.1

In [None]:
metaBar = metaBar_PreX()

In [None]:
# reads path
ITS_reads = os.path.abspath("./Regional_reads/ITS-reads")

In [None]:
# setup path files

platesetup = os.path.abspath('./beepollen_all.xlsx')
metafile = os.path.abspath('./meta_beepollen_all.csv')

## Note: Feb,23, 2021 updated adding missed species in the classifier

In [None]:
# set up subdirectories for analysis
path_ITS, path_rbcL, path_rbcLSE = metaBar.metaBar_makeSubDir("Regional_output", ["ITS_results", "rbcL_results", "rbcL_results_SE"])

In [None]:
print(path_ITS)
print(path_rbcL)
print(path_rbcLSE)

#...../beepollen/Regional_output/ITS_results
#...../beepollen/Regional_output/rbcL_results
#...../2021/beepollen/Regional_output/rbcL_results_SE

## classifier path

* we used the non-curated in the paper

In [None]:
# non curated ITS2 regional
noncurated_ITS_reg = os.path.abspath("...../regional/ITS2_NONcurated_ref_classifier.qza")

## Analysis

In [None]:
path_ITS = "...../2021/beepollen/Regional_output/ITS_results"

In [None]:
os.chdir(path_ITS)

In [None]:
# ITS2 F: ITS-S2F and R: ITS4R
ITS_f_len = len("ATGCGATACTTGGTGTGAAT")
ITS_r_len = len("TCCTCCGCTTATTGATATGC")

In [None]:
# create manifest file for qiime2 to find the reads
manifest = metaBar.metaBar_Qiime2_Manifest(ITS_reads, platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv regional_ITS2_manifest.csv

In [None]:
# import data
!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./regional_ITS2_manifest.csv \
--output-path reg_ITS2.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
!qiime demux summarize \
--i-data ./reg_ITS2.qza \
--o-visualization ./reg_ITS2.qzv

In [None]:
# make folders to store dada2 and feature-table results

if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# run dada2 for denoising, set truncating quality cutoff at 30
# f 298
# r 235
# dada2 stats 48%

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs reg_ITS2.qza \
--output-dir dada2 \
--o-table feature-tables/table_reg_ITS2.qza \
--o-representative-sequences rep_seq_reg_ITS.qza \
--p-trim-left-f $ITS_f_len \
--p-trim-left-r $ITS_r_len \
--p-trunc-len-f 298 \
--p-trunc-len-r 235 \
--p-n-threads 20 \
--o-denoising-stats dada2-stats/dada2_stats.qza

In [None]:
# visualize the dada2 stats

!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table_reg_ITS2.qza \
--o-visualization feature-tables/table_reg_ITS2.qzv \
--m-sample-metadata-file $metafile

In [None]:
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

In [None]:
if not os.path.exists("taxonomy_updated_missedsp"):
    os.makedirs("taxonomy_updated_missedsp")

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $noncurated_ITS_reg \
--i-reads rep_seq_reg_ITS.qza \
--o-classification ./taxonomy_updated_missedsp/taxonomy_updated_noncurated_ITS2.qza

# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table_reg_ITS2.qza \
--i-taxonomy taxonomy_updated_missedsp/taxonomy_updated_noncurated_ITS2.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_updated_missedsp/barplot_updated_noncurated_ITS2.qzv

## export for R

In [None]:
!qiime tools export \
--input-path feature-tables/table_reg_ITS2.qza \
--output-path R_process/

In [None]:
!qiime tools export \
--input-path taxonomy/taxonomy_noncurated_ITS2.qza \
--output-path R_process/noncurated

In [None]:
!qiime tools export \
--input-path taxonomy/taxonomy_curated_ITS2.qza \
--output-path R_process/curated/

In [None]:
!mv R_process/noncurated/taxonomy.tsv R_process/noncurated/noncurated_taxonomy.tsv

In [None]:
!mv R_process/curated/taxonomy.tsv R_process/curated/curated_taxonomy.tsv

In [None]:
!cp R_process/noncurated/noncurated_taxonomy.tsv R_process/

In [None]:
!cp R_process/curated/curated_taxonomy.tsv R_process/

In [None]:
!cp R_process/noncurated_taxonomy.tsv R_process/noncurated_biom-taxonomy.tsv

In [None]:
!cp R_process/curated_taxonomy.tsv R_process/curated_biom-taxonomy.tsv

In [None]:
# change header
!sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' R_process/noncurated_biom-taxonomy.tsv

In [None]:
# change header
!sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' R_process/curated_biom-taxonomy.tsv

In [None]:
!biom add-metadata \
-i R_process/feature-table.biom \
-o R_process/noncurated_feature-table-tax.biom \
--observation-metadata-fp R_process/noncurated_biom-taxonomy.tsv \
--sample-metadata-fp $metafile \
--sc-separated taxonomy

In [None]:
!biom add-metadata \
-i R_process/feature-table.biom \
-o R_process/curated_feature-table-tax.biom \
--observation-metadata-fp R_process/curated_biom-taxonomy.tsv \
--sample-metadata-fp $metafile \
--sc-separated taxonomy

In [None]:
!biom convert \
-i R_process/noncurated_feature-table-tax.biom \
-o R_process/noncurated_feature-table.tsv \
--to-tsv

!biom convert \
-i R_process/curated_feature-table-tax.biom \
-o R_process/curated_feature-table.tsv \
--to-tsv