# Pipeline for analyzing beepollen data (Local database)

Author: Xiaoping Li  
Organization: Oregon State University Hermiston Agricultural Research and Extension Center

In [None]:
from metaBarTools import metaBar_PreX
from sort_reads import Sort_reads
import os

In [None]:
metaBar = metaBar_PreX()

In [None]:
# metafile and platesetup file
metafile = os.path.abspath("./meta_beepollen_all.csv")
platesetup = os.path.abspath("./beepollen_all.xlsx")


# reads
ITS_reads_local = os.path.abspath("./Local_reads/ITS_reads_local/reads_copy_by_location/")
rbcL_reads_local = os.path.abspath("./Local_reads/rbcL_reads_local/reads_copy_by_location/")


In [None]:
# database path
ITS_starkey_db, ITS_starkey_mapping = os.path.abspath("./database_beepollen/Local/ITS2_Starkey_Local.fasta"),os.path.abspath("./database_beepollen/Local/ITS2_Starkey_Local.mapping")

ITS_zum_db, ITS_zum_mapping = os.path.abspath("./database_beepollen/Local/ITS2_Zumwalt_Local.fasta"), os.path.abspath("./database_beepollen/Local/ITS2_Zumwalt_Local.mapping")

ITS_rdo_db, ITS_rdo_mapping = os.path.abspath("./database_beepollen/Local/ITS2_Threemile_Local.fasta"), os.path.abspath("./database_beepollen/Local/ITS2_Threemile_Local.mapping")

rbcl_starkey_db, rbcl_starkey_mapping = os.path.abspath("./database_beepollen/Local/rbcL_Starkey_Local.fasta"),os.path.abspath("./database_beepollen/Local/rbcL_Starkey_Local.mapping")

rbcl_zum_db, rbcl_zum_mapping = os.path.abspath("./database_beepollen/Local/rbcL_Zumwalt_Local.fasta"),os.path.abspath("./database_beepollen/Local/rbcL_Zumwalt_Local.mapping")

rbcl_rdo_db, rbcl_rdo_mapping = os.path.abspath("./database_beepollen/Local/rbcL_Threemile_Local.fasta"), os.path.abspath("./database_beepollen/Local/rbcL_Threemile_Local.mapping")

In [None]:
ITS_f_len = len("ATGCGATACTTGGTGTGAAT")
ITS_r_len = len("TCCTCCGCTTATTGATATGC")

In [None]:
path_ITS, path_rbcL, path_rbcL_single = metaBar.metaBar_makeSubDir("Local_Results", ["ITS_result", "rbcL_result", "rbcL_result_single"])

## ITS2 analysis with local database

In [None]:
# change working directory to ITS_result
os.chdir(path_ITS)

> assume you have reads_copy_by_location in current working directory

In [None]:
# make sub folders for each location

subpaths = []

for i in os.listdir(ITS_reads_local):
    if i in ["ZUM", "Star", "RDO"]:
        if not os.path.exists(i):
            os.makedirs(i)
            
        subpaths.append(os.path.abspath(i))


### Zumwalt

In [None]:
os.chdir(subpaths[0])

In [None]:
os.getcwd()

In [None]:
# make manifest file
manifest_zum = metaBar.metaBar_Qiime2_Manifest(os.path.join(ITS_reads_local, "ZUM"), platesetup, sheetname=0, matchby="sample")

In [None]:
!mv ITSS2F@ITS4R_manifest.csv zum_its_manifest.csv

In [None]:
# load reads into qiime2

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./zum_its_manifest.csv \
--output-path zum_its.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data zum_its.qza \
--o-visualization zum_its_seq.qzv

In [None]:
# dada2 denosing and quality score 22

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs zum_its.qza \
--output-dir dada2 \
--o-table table-zum \
--o-representative-sequences rep_seq_zum \
--p-trim-left-f $ITS_f_len \
--p-trim-left-r $ITS_r_len \
--p-trunc-len-f 288 \
--p-trunc-len-r 237 \
--p-n-threads 12 

In [None]:
# visualization

!qiime feature-table summarize \
--i-table table-zum.qza \
--o-visualization table-zum.qzv \
--m-sample-metadata-file $metafile

In [None]:
!qiime feature-table filter-features \
--i-table table-zum.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered-table.qza

In [None]:
if not os.path.exists("classifier"):
    os.makedirs("classifier")
    
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

In [None]:
# import ref sequences into qiime2
!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $ITS_zum_db \
--output-path ./classifier/ITS2_loc_ZUM_site.qza

In [None]:
# import mapping into qiime

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $ITS_zum_mapping \
--output-path ./classifier/ITS2_taxonomy_loc_ZUM_site.qza

In [None]:
# build classifier
!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/ITS2_loc_ZUM_site.qza \
--i-reference-taxonomy ./classifier/ITS2_taxonomy_loc_ZUM_site.qza \
--o-classifier ./classifier/classifier_ITS2_loc_ZUM.qza

In [None]:
# assign taxonomy
!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_ITS2_loc_ZUM.qza \
--i-reads rep_seq_zum.qza \
--o-classification ./taxonomy/taxonomy_ITS2_loc_ZUM.qza

In [None]:
# visualize taxonomy assingment
!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_ITS2_loc_ZUM.qza \
--o-visualization taxonomy/taxonomy_ITS2_loc_ZUM.qzv

In [None]:
# taxonomy composition barplot
!qiime taxa barplot \
--i-table filtered-table.qza \
--i-taxonomy taxonomy/taxonomy_ITS2_loc_ZUM.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_ITS2_loc_ZUM.qzv

### Starkey

In [None]:
# change directory to starkey folder
os.chdir(subpaths[1])

In [None]:
# generate manifest file for starkey
manifest_star = metaBar.metaBar_Qiime2_Manifest(os.path.join(ITS_reads_local, "Star"), platesetup, sheetname=0, matchby="sample")

!mv ITSS2F@ITS4R_manifest.csv star_its_manifest.csv

In [None]:
# import reads into qiime2

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./star_its_manifest.csv \
--output-path star_its.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality

!qiime demux summarize \
--i-data star_its.qza \
--o-visualization star_its_seq.qzv

In [None]:
# run dada2. Truncate quality at 22

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs star_its.qza \
--output-dir dada2 \
--o-table table-star \
--o-representative-sequences rep_seq_star \
--p-trim-left-f $ITS_f_len \
--p-trim-left-r $ITS_r_len \
--p-trunc-len-f 299 \
--p-trunc-len-r 244 \
--p-n-threads 12 

In [None]:
# filter OTU table

!qiime feature-table filter-features \
--i-table table-star.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered-table.qza

In [None]:
if not os.path.exists("classifier"):
    os.makedirs("classifier")
    
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

In [None]:
# import ref sequence and mapping into qiime2

!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $ITS_starkey_db \
--output-path ./classifier/ITS2_loc_STAR_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $ITS_starkey_mapping \
--output-path ./classifier/ITS2_taxonomy_loc_STAR_site.qza

In [None]:
# fit classifier
!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/ITS2_loc_STAR_site.qza \
--i-reference-taxonomy ./classifier/ITS2_taxonomy_loc_STAR_site.qza \
--o-classifier ./classifier/classifier_ITS2_loc_STAR.qza

In [None]:
# assign taxonomy
!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_ITS2_loc_STAR.qza \
--i-reads rep_seq_star.qza \
--o-classification ./taxonomy/taxonomy_ITS2_loc_STAR.qza

In [None]:
# tabulate taxonomy assignment

!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_ITS2_loc_STAR.qza \
--o-visualization taxonomy/taxonomy_ITS2_loc_STAR.qzv

In [None]:
# visualize composition

!qiime taxa barplot \
--i-table filtered-table.qza \
--i-taxonomy taxonomy/taxonomy_ITS2_loc_STAR.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_ITS2_loc_STAR.qzv

### Threemile farm

In [None]:
os.chdir(subpaths[2])

In [None]:
# generate manifest file for RDO
manifest_rdo = metaBar.metaBar_Qiime2_Manifest(os.path.join(ITS_reads_local, "RDO"), platesetup, sheetname=0, matchby="sample")

!mv ITSS2F@ITS4R_manifest.csv rdo_its_manifest.csv

In [None]:
# import reads

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./rdo_its_manifest.csv \
--output-path rdo_its.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data rdo_its.qza \
--o-visualization rdo_its_seq.qzv

In [None]:
# dada2 denoising; truncate cutoff 22

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs rdo_its.qza \
--output-dir dada2 \
--o-table table-rdo \
--o-representative-sequences rep_seq_rdo \
--p-trim-left-f $ITS_f_len \
--p-trim-left-r $ITS_r_len \
--p-trunc-len-f 299 \
--p-trunc-len-r 243 \
--p-n-threads 12

In [None]:
# filter OTU table (feature table)

!qiime feature-table filter-features \
--i-table table-rdo.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered-table.qza

In [None]:
if not os.path.exists("classifier"):
    os.makedirs("classifier")
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

In [None]:
# import ref sequences and mapping

!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $ITS_rdo_db \
--output-path ./classifier/ITS2_loc_rdo_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $ITS_rdo_mapping \
--output-path ./classifier/ITS2_taxonomy_loc_rdo_site.qza

In [None]:
# fit classifier

!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/ITS2_loc_rdo_site.qza \
--i-reference-taxonomy ./classifier/ITS2_taxonomy_loc_rdo_site.qza \
--o-classifier ./classifier/classifier_ITS2_loc_rdo.qza

In [None]:
# assign taxonomy
!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_ITS2_loc_rdo.qza \
--i-reads rep_seq_rdo.qza \
--o-classification ./taxonomy/taxonomy_ITS2_loc_rdo.qza

In [None]:
# tabulate taxonomy assignment

!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_ITS2_loc_rdo.qza \
--o-visualization taxonomy/taxonomy_ITS2_loc_rdo.qzv

In [None]:
# visualize taxonomy composition

!qiime taxa barplot \
--i-table filtered-table.qza \
--i-taxonomy taxonomy/taxonomy_ITS2_loc_rdo.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_ITS2_loc_rdo.qzv

## rbcL Paired End (local)

In [None]:
rbcL_f_len = len("TGGCAGCATTYCGAGTAACTC")
rbcL_r_len = len("GTAAAATCAAGTCCACCRCG")

In [None]:
os.chdir(path_rbcL)

In [None]:
subpaths = []

for i in os.listdir(rbcL_reads_local):
    if i in ["ZUM", "Star", "RDO"]:
        if not os.path.exists(i):
            os.makedirs(i)
        subpaths.append(os.path.abspath(i))

### Zumwalt

In [None]:
os.chdir(subpaths[0])

In [None]:
# generate manifest file for zumwalt

manifest_zum = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "ZUM"), platesetup, sheetname=0, matchby="sample")

# default by the primer name provided in platesetup
!mv ITSS2F@ITS4R_manifest.csv zum_rbcl_manifest.csv

In [None]:
# import reads

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./zum_rbcl_manifest.csv \
--output-path zum_rbcl.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data zum_rbcl.qza \
--o-visualization zum_rbcl_seq.qzv

In [None]:
# dada2 denosing

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs zum_rbcl.qza \
--output-dir dada2 \
--o-table table_rbcl_zum.qza \
--o-representative-sequences rep_seq_zum_rbcl.qza \
--p-trim-left-f $rbcL_f_len \
--p-trim-left-r $rbcL_r_len \
--p-trunc-len-f 299 \
--p-trunc-len-r 243 \
--p-n-threads 12

In [None]:
!qiime feature-table filter-features \
--i-table table_rbcl_zum.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered_table.qza

In [None]:
if not os.path.exists("classifier"):
    os.makedirs("classifier")
    
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

In [None]:
# import ref database

!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $rbcl_zum_db \
--output-path ./classifier/rbcL_loc_zum_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $rbcl_zum_mapping \
--output-path ./classifier/rbcL_taxonomy_loc_zum_site.qza

In [None]:
# fit classifier

!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/rbcL_loc_zum_site.qza \
--i-reference-taxonomy ./classifier/rbcL_taxonomy_loc_zum_site.qza \
--p-feat-ext--ngram-range '[32, 32]' \
--o-classifier ./classifier/classifier_rbcL_loc_zum.qza

In [None]:
# assign taxonomy

!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_rbcL_loc_zum.qza \
--i-reads rep_seq_zum_rbcl.qza \
--p-confidence 0.6 \
--o-classification ./taxonomy/taxonomy_rbcL_loc_zum.qza

In [None]:
# tabulate taxonomy assignment

!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_rbcL_loc_zum.qza \
--o-visualization taxonomy/taxonomy_rbcL_loc_zum.qzv

In [None]:
# view taxonomy composition

!qiime taxa barplot \
--i-table filtered_table.qza \
--i-taxonomy taxonomy/taxonomy_rbcL_loc_zum.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_rbcL_loc_zum.qzv

### Starkey

In [None]:
# change working directory to starkey

os.chdir(subpaths[1])

In [None]:
# generate manifest file

manifest_star = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "Star"), platesetup, sheetname=0, matchby="sample")

!mv ITSS2F@ITS4R_manifest.csv star_rbcl_manifest.csv

In [None]:
# import reads

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./star_rbcl_manifest.csv \
--output-path star_rbcl.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data star_rbcl.qza \
--o-visualization star_rbcl_seq.qzv

In [None]:
# dada2 denoising

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs star_rbcl.qza \
--output-dir dada2 \
--o-table table_rbcl_star.qza \
--o-representative-sequences rep_seq_star_rbcl.qza \
--p-trim-left-f $rbcL_f_len \
--p-trim-left-r $rbcL_r_len \
--p-trunc-len-f 299 \
--p-trunc-len-r 268 \
--p-n-threads 12

In [None]:
# filtering feature table

!qiime feature-table filter-features \
--i-table table_rbcl_star.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered_table.qza

In [None]:
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")
    
if not os.path.exists("classifier"):
    os.makedirs("classifier")

In [None]:
# import ref database

!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $rbcl_starkey_db \
--output-path ./classifier/rbcl_loc_starkey_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $rbcl_starkey_mapping \
--output-path ./classifier/rbcL_taxonomy_loc_starkey_site.qza

In [None]:
# fit classifier

!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/rbcl_loc_starkey_site.qza \
--i-reference-taxonomy ./classifier/rbcL_taxonomy_loc_starkey_site.qza \
--o-classifier ./classifier/classifier_rbcL_loc_star.qza

In [None]:
# assign taxonomy

!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_rbcL_loc_star.qza \
--i-reads rep_seq_star_rbcl.qza \
--o-classification ./taxonomy/taxonomy_rbcL_loc_star.qza

In [None]:
# tabulate taxonomy

!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_rbcL_loc_star.qza \
--o-visualization taxonomy/taxonomy_rbcL_loc_star.qzv

In [None]:
# visualize taxonomy composition

!qiime taxa barplot \
--i-table filtered_table.qza \
--i-taxonomy taxonomy/taxonomy_rbcL_loc_star.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_rbcL_loc_star.qzv

### Threemile farm

In [None]:
# change directory to RDO

os.chdir(subpaths[2])

In [None]:
# generate manifest file

manifest_rdo = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "RDO"), platesetup, sheetname=0, matchby="sample")

!mv ITSS2F@ITS4R_manifest.csv rdo_rbcl_manifest.csv

In [None]:
# import reads

!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path ./rdo_rbcl_manifest.csv \
--output-path rdo_rbcl.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data rdo_rbcl.qza \
--o-visualization rdo_rbcl_seq.qzv

In [None]:
# dada2 denoising

!qiime dada2 denoise-paired \
--i-demultiplexed-seqs rdo_rbcl.qza \
--output-dir dada2 \
--o-table table_rbcl_rdo.qza \
--o-representative-sequences rep_seq_rdo_rbcl.qza \
--p-trim-left-f $rbcL_f_len \
--p-trim-left-r $rbcL_r_len \
--p-trunc-len-f 299 \
--p-trunc-len-r 243 \
--p-n-threads 12

In [None]:
# filter OTU table

!qiime feature-table filter-features \
--i-table table_rbcl_rdo.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered_table.qza

In [None]:
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")
    
if not os.path.exists("classifier"):
    os.makedirs("classifier")

In [None]:
# import ref database

!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $rbcl_rdo_db \
--output-path ./classifier/rbcl_loc_rdo_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $rbcl_rdo_mapping \
--output-path ./classifier/rbcL_taxonomy_loc_rdo_site.qza

In [None]:
# fit classifier
!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/rbcl_loc_rdo_site.qza \
--i-reference-taxonomy ./classifier/rbcL_taxonomy_loc_rdo_site.qza \
--o-classifier ./classifier/classifier_rbcL_loc_rdo.qza

In [None]:
# assign taxonomy
!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_rbcL_loc_rdo.qza \
--i-reads rep_seq_rdo_rbcl.qza \
--o-classification ./taxonomy/taxonomy_rbcL_loc_rdo.qza

In [None]:
# tabulate taxonomy assignment
!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_rbcL_loc_rdo.qza \
--o-visualization taxonomy/taxonomy_rbcL_loc_rdo.qzv

In [None]:
# visualize taxonomy composition
!qiime taxa barplot \
--i-table filtered_table.qza \
--i-taxonomy taxonomy/taxonomy_rbcL_loc_rdo.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_rbcL_loc_rdo.qzv

## rbcL analysis (single end)

In [None]:
os.chdir(path_rbcL_single)

In [None]:
subpaths = []

for i in os.listdir(rbcL_reads_local):
    if i in ["ZUM", "Star", "RDO"]:
        if not os.path.exists(i):
            os.makedirs(i)
        subpaths.append(os.path.abspath(i))

### Zumwalt

In [None]:
# change directory
os.chdir(subpaths[0])

In [None]:
# os.getcwd()

In [None]:
# generate manifest file for qiime2
manifest_zum = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "ZUM"), platesetup, sheetname=0, matchby="sample", paired=False)

!mv ITSS2F@ITS4R_manifest.csv zum_rbcl_manifest.csv

In [None]:
# import reads into qiime2, using single end

!qiime tools import \
--type 'SampleData[SequencesWithQuality]' \
--input-path ./zum_rbcl_manifest.csv \
--output-path zum_rbcl.qza \
--input-format SingleEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data zum_rbcl.qza \
--o-visualization zum_rbcl_seq.qzv

In [None]:
# dada2 denosing

!qiime dada2 denoise-single \
--i-demultiplexed-seqs zum_rbcl.qza \
--output-dir dada2 \
--o-table table-zum.qza \
--o-representative-sequences rep_zum \
--p-trim-left $rbcL_f_len \
--p-trunc-len 299 \
--p-n-threads 12

In [None]:
# filter OTU table

!qiime feature-table filter-features \
--i-table table-zum.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered_table.qza

In [None]:
if not os.path.exists("classifier"):
    os.makedirs("classifier")

if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

In [None]:
# import ref database

!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $rbcl_zum_db \
--output-path ./classifier/rbcL_loc_zum_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $rbcl_zum_mapping \
--output-path ./classifier/rbcL_taxonomy_loc_zum_site.qza

In [None]:
# fit classifier

!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/rbcL_loc_zum_site.qza \
--i-reference-taxonomy ./classifier/rbcL_taxonomy_loc_zum_site.qza \
--o-classifier ./classifier/classifier_rbcL_loc_zum.qza

In [None]:
# assign taxonomy

!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_rbcL_loc_zum.qza \
--i-reads rep_zum.qza \
--o-classification ./taxonomy/taxonomy_rbcL_loc_zum.qza

In [None]:
# tabulate taxonomy assignment

!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_rbcL_loc_zum.qza \
--o-visualization taxonomy/taxonomy_rbcL_loc_zum.qzv

In [None]:
# visualize taxonomy composition

!qiime taxa barplot \
--i-table filtered_table.qza \
--i-taxonomy taxonomy/taxonomy_rbcL_loc_zum.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_rbcL_loc_zum.qzv

### Starkey

In [None]:
os.chdir(subpaths[1])

In [None]:
# generate manifest file

manifest_star = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "Star"), platesetup, sheetname=0, matchby="sample", paired=False)

!mv ITSS2F@ITS4R_manifest.csv star_rbcl_manifest.csv

In [None]:
# import reads

!qiime tools import \
--type 'SampleData[SequencesWithQuality]' \
--input-path ./star_rbcl_manifest.csv \
--output-path star_rbcl.qza \
--input-format SingleEndFastqManifestPhred33

In [None]:
# view summarise of the quality

!qiime demux summarize \
--i-data star_rbcl.qza \
--o-visualization star_rbcl_seq.qzv

In [None]:
# dada2 denoising

!qiime dada2 denoise-single \
--i-demultiplexed-seqs star_rbcl.qza \
--output-dir dada2 \
--o-table table-star.qza \
--o-representative-sequences rep_star \
--p-trim-left $rbcL_f_len \
--p-trunc-len 299 \
--p-n-threads 12

In [None]:
# filtering OTU features

!qiime feature-table filter-features \
--i-table table-star.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered_table.qza

In [None]:
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")
    
if not os.path.exists("classifier"):
    os.makedirs("classifier")

In [None]:
# import ref database
!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $rbcl_starkey_db \
--output-path ./classifier/rbcl_loc_starkey_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $rbcl_starkey_mapping \
--output-path ./classifier/rbcL_taxonomy_loc_starkey_site.qza

In [None]:
# fit classifier

!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/rbcl_loc_starkey_site.qza \
--i-reference-taxonomy ./classifier/rbcL_taxonomy_loc_starkey_site.qza \
--o-classifier ./classifier/classifier_rbcL_loc_star.qza

In [None]:
# assign taxonomy

!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_rbcL_loc_star.qza \
--i-reads rep_star.qza \
--o-classification ./taxonomy/taxonomy_rbcL_loc_star.qza


In [None]:
# tabulate taxonomy assignment

!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_rbcL_loc_star.qza \
--o-visualization taxonomy/taxonomy_rbcL_loc_star.qzv

In [None]:
# visualize taxonomy composition

!qiime taxa barplot \
--i-table filtered_table.qza \
--i-taxonomy taxonomy/taxonomy_rbcL_loc_star.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_rbcL_loc_star.qzv

### Threemile farm

In [None]:
# change directory to RDO
os.chdir(subpaths[2])

In [None]:
# generate manifest file for qiime2

manifest_rdo = metaBar.metaBar_Qiime2_Manifest(os.path.join(rbcL_reads_local, "RDO"), platesetup, sheetname=0, matchby="sample", paired=False)

!mv ITSS2F@ITS4R_manifest.csv rdo_rbcl_manifest.csv

In [None]:
!qiime tools import \
--type 'SampleData[SequencesWithQuality]' \
--input-path ./rdo_rbcl_manifest.csv \
--output-path rdo_rbcl.qza \
--input-format SingleEndFastqManifestPhred33

In [None]:
# view summarise of the quality
!qiime demux summarize \
--i-data rdo_rbcl.qza \
--o-visualization rdo_rbcl_seq.qzv

In [None]:
# dada2 denoising

!qiime dada2 denoise-single \
--i-demultiplexed-seqs rdo_rbcl.qza \
--output-dir dada2 \
--o-table table-rdo.qza \
--o-representative-sequences rep_rdo \
--p-trim-left $rbcL_f_len \
--p-trunc-len 299 \
--p-n-threads 12

In [None]:
# filtering OTU feature table

!qiime feature-table filter-features \
--i-table table-rdo.qza \
--p-min-frequency 1000 \
--p-min-samples 4 \
--o-filtered-table filtered_table.qza

In [None]:
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")
    
if not os.path.exists("classifier"):
    os.makedirs("classifier")

In [None]:
# import ref database

!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $rbcl_rdo_db \
--output-path ./classifier/rbcl_loc_rdo_site.qza

!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $rbcl_rdo_mapping \
--output-path ./classifier/rbcL_taxonomy_loc_rdo_site.qza

In [None]:
# fit classifier

!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/rbcl_loc_rdo_site.qza \
--i-reference-taxonomy ./classifier/rbcL_taxonomy_loc_rdo_site.qza \
--o-classifier ./classifier/classifier_rbcL_loc_rdo.qza

In [None]:
# assign taxonomy

!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_rbcL_loc_rdo.qza \
--i-reads rep_rdo.qza \
--o-classification ./taxonomy/taxonomy_rbcL_loc_rdo.qza

In [None]:
# tabulate taxonomy assignment

!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_rbcL_loc_rdo.qza \
--o-visualization taxonomy/taxonomy_rbcL_loc_rdo.qzv

In [None]:
# visualize taxonomy composition

!qiime taxa barplot \
--i-table filtered_table.qza \
--i-taxonomy taxonomy/taxonomy_rbcL_loc_rdo.qza \
--m-metadata-file $metafile \
--o-visualization taxonomy/barplot_rbcL_loc_rdo.qzv