# Pipeline for analyzing beepollen data (rbcL)

Arstingstall, K.A., S.J. DeBano, X. Li, D.E. Wooster, M.M. Rowland, S. Burrows, & K. Frost; 2020; DNA metabarcoding of native bee pollen loads

##### Note: before you start analyzing, move 1. metaBarTools.py 2. meta file (.csv) 3. plate file (.xlsx) to your working directory.

In [None]:
import os
from metaBarTools import metaBar_PreX

In [None]:
!qiime --version #version 2020.11.1

In [None]:
metaBar = metaBar_PreX()

In [None]:
# reads path
rbcL_reads = os.path.abspath("./Regional_reads/rbcL-reads/")

In [None]:
# setup path files, plate setup and metadata
platesetup = os.path.abspath('./beepollen_all.xlsx')
metafile = os.path.abspath('./meta_beepollen_all.csv')

In [None]:
# output path
path_rbcL = "...../beepollen/Regional_output/rbcL_results"

## classifier path

In [None]:
# non curated rbcL regional
# use new classifier with missed sp

noncurated_rbcL_reg = os.path.abspath("...../classifiers/regional/rbcL_NONcurated_ref_classifier.qza")

## Analysis

* use paired end reads

In [None]:
os.chdir(path_rbcL)

In [None]:
# making manifest file
manifest_rbcL_single = metaBar.metaBar_Qiime2_Manifest(rbcL_reads, platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv rbcl_PE_manifest.csv

In [None]:
rbcL_f_len = len("TGGCAGCATTYCGAGTAACTC")
rbcL_r_len = len("GTAAAATCAAGTCCACCRCG")

In [None]:
if not os.path.exists("dada2-stats"):
    os.makedirs("dada2-stats")
    
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")

In [None]:
# import into qiime2

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./rbcl_PE_manifest.csv \
--output-path reg_rbcL_seq.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
!qiime demux summarize \
--i-data ./reg_rbcL_seq.qza \
--o-visualization ./reg_rbcL_seq.qzv

In [None]:
# dada2 for paired end
# f 300
# r 244

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs reg_rbcL_seq.qza \
--output-dir dada2 \
--o-table feature-tables/table_reg_rbcL.qza \
--o-representative-sequences rep_reg_rbcL.qza \
--p-trim-left-f $rbcL_f_len \
--p-trim-left-r $rbcL_r_len \
--p-trunc-len-f 300 \
--p-trunc-len-r 244 \
--p-n-threads 20 \
--o-denoising-stats dada2-stats/dada2_stats.qza

In [None]:
# visualize the dada2 stats
!qiime metadata tabulate \
--m-input-file dada2-stats/dada2_stats.qza \
--o-visualization dada2-stats/dada2_stats.qzv

In [None]:
# visualize the feature table (OTU table)

!qiime feature-table summarize \
--i-table feature-tables/table_reg_rbcL.qza \
--o-visualization feature-tables/table_reg_rbcL.qzv \
--m-sample-metadata-file $metafile

In [None]:
if not os.path.exists("taxonomy_updated_missedsp"):
    os.makedirs("taxonomy_updated_missedsp")

In [None]:
# assign taxonomy - noncurated
!qiime feature-classifier classify-sklearn \
--i-classifier $noncurated_rbcL_reg \
--i-reads rep_reg_rbcL.qza \
--o-classification ./taxonomy_updated_missedsp/taxonomy_updated_noncurated_rbcL.qza

# making relative taxonomy barplot
!qiime taxa barplot \
--i-table feature-tables/table_reg_rbcL.qza \
--i-taxonomy taxonomy_updated_missedsp/taxonomy_updated_noncurated_rbcL.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy_updated_missedsp/barplot_updated_noncurated_rbcL.qzv

In [None]:
# !qiime tools export \
# --input-path feature-tables/table_reg_rbcL.qza \
# --output-path R_process/

# !qiime tools export \
# --input-path taxonomy/taxonomy_noncurated_rbcL.qza \
# --output-path R_process/noncurated

# !qiime tools export \
# --input-path taxonomy/taxonomy_curated_rbcL.qza \
# --output-path R_process/curated/

# !mv R_process/noncurated/taxonomy.tsv R_process/noncurated/noncurated_taxonomy.tsv

# !mv R_process/curated/taxonomy.tsv R_process/curated/curated_taxonomy.tsv

# !cp R_process/noncurated/noncurated_taxonomy.tsv R_process/

# !cp R_process/curated/curated_taxonomy.tsv R_process/

# !cp R_process/noncurated_taxonomy.tsv R_process/noncurated_biom-taxonomy.tsv

# !cp R_process/curated_taxonomy.tsv R_process/curated_biom-taxonomy.tsv

# # change header
# !sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' R_process/noncurated_biom-taxonomy.tsv

# # change header
# !sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' R_process/curated_biom-taxonomy.tsv

# !biom add-metadata \
# -i R_process/feature-table.biom \
# -o R_process/noncurated_feature-table-tax.biom \
# --observation-metadata-fp R_process/noncurated_biom-taxonomy.tsv \
# --sample-metadata-fp $metafile \
# --sc-separated taxonomy

# !biom add-metadata \
# -i R_process/feature-table.biom \
# -o R_process/curated_feature-table-tax.biom \
# --observation-metadata-fp R_process/curated_biom-taxonomy.tsv \
# --sample-metadata-fp $metafile \
# --sc-separated taxonomy

# !biom convert \
# -i R_process/noncurated_feature-table-tax.biom \
# -o R_process/noncurated_feature-table.tsv \
# --to-tsv

# !biom convert \
# -i R_process/curated_feature-table-tax.biom \
# -o R_process/curated_feature-table.tsv \
# --to-tsv

# !qiime tools export \
# --input-path rooted_tree.qza \
# --output-path R_process/