In [2]:
# importing all required packages at the start of the notebook
import IPython
import os
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [3]:
data_dir = "project_data" #Store the folder's path

# 3. Taxonomy classification

## 3.1 Reference database construction

First we are going to download the UNITE database using RESCRIPt:

In [3]:
!mkdir -p $data_dir/uniteDB

In [4]:
!qiime rescript get-unite-data \
    --p-version "2025-02-19" \
    --p-taxon-group eukaryotes \
    --p-cluster-id dynamic \
    --p-no-singletons \
    --verbose \
    --o-taxonomy $data_dir/uniteDB/taxonomy.qza \
    --o-sequences $data_dir/uniteDB/sequences.qza

  import pkg_resources
[32mSaved FeatureData[Taxonomy] to: project_data/uniteDB/taxonomy.qza[0m
[32mSaved FeatureData[Sequence] to: project_data/uniteDB/sequences.qza[0m
[0m[?25h

In [5]:
! ls -lh $data_dir/uniteDB

total 51M
-rwxr-xr-x 1 jovyan jovyan 281K Oct 15 14:37 its-region.qza
-rwxr-xr-x 1 jovyan jovyan  39K Oct 15 14:07 sequences-discarded.qza
-rwxr-xr-x 1 jovyan jovyan  25M Oct 15 14:07 sequences-filtered.qza
-rwxr-xr-x 1 jovyan jovyan  22M Oct 21 11:57 sequences.qza
-rwxr-xr-x 1 jovyan jovyan 3.9M Oct 21 11:56 taxonomy.qza


In [6]:
!qiime rescript filter-seqs-length \
  --i-sequences $data_dir/uniteDB/sequences.qza \
  --p-global-min 100 \
  --o-filtered-seqs $data_dir/uniteDB/sequences-filtered.qza \
  --o-discarded-seqs $data_dir/uniteDB/sequences-discarded.qza

  import pkg_resources
[32mSaved FeatureData[Sequence] to: project_data/uniteDB/sequences-filtered.qza[0m
[32mSaved FeatureData[Sequence] to: project_data/uniteDB/sequences-discarded.qza[0m
[0m[?25h

In [8]:
!qiime feature-classifier extract-reads \
  --i-sequences $data_dir/uniteDB/sequences-filtered.qza \
  --p-f-primer "CTTGGTCATTTAGAGGAAGTAA" \
  --p-r-primer "GCATCGATGAAGAACGCAGC" \
  --p-read-orientation "forward" \
  --o-reads $data_dir/uniteDB/sequences-filtered-its1.qza

  import pkg_resources
[32mSaved FeatureData[Sequence] to: project_data/uniteDB/sequences-filtered-its1.qza[0m
[0m[?25h

## 3.2 Training taxonomy classifier

In [13]:
! qiime feature-classifier fit-classifier-naive-bayes \
    --i-reference-reads $data_dir/uniteDB/sequences-filtered-its1.qza \
    --i-reference-taxonomy $data_dir/uniteDB/taxonomy.qza \
    --o-classifier $data_dir/uniteDB/classifier.qza

  import pkg_resources
[32mSaved TaxonomicClassifier to: project_data/uniteDB/classifier.qza[0m
[0m[?25h

To evaluate the classifier

## 3.3 Taxonomy assignment

In [14]:
! qiime feature-classifier classify-sklearn \
    --i-classifier $data_dir/uniteDB/classifier.qza \
    --i-reads $data_dir/dada2_rep_set.qza \
    --o-classification $data_dir/taxonomy.qza

  import pkg_resources
[32mSaved FeatureData[Taxonomy] to: project_data/taxonomy.qza[0m
[0m[?25h

In [6]:
# Same but with trimmed

! qiime feature-classifier classify-sklearn \
    --i-classifier $data_dir/uniteDB/classifier.qza \
    --i-reads $data_dir/dada2_trimmed_rep_set.qza \
    --o-classification $data_dir/taxonomy_trimmed.qza

  import pkg_resources
[32mSaved FeatureData[Taxonomy] to: project_data/taxonomy_trimmed.qza[0m
[0m[?25h

In [15]:
! qiime metadata tabulate \
    --m-input-file $data_dir/taxonomy.qza \
    --o-visualization $data_dir/taxonomy.qzv

  import pkg_resources
[32mSaved Visualization to: project_data/taxonomy.qzv[0m
[0m[?25h

In [7]:
# Same but with trimmed
! qiime metadata tabulate \
    --m-input-file $data_dir/taxonomy_trimmed.qza \
    --o-visualization $data_dir/taxonomy_trimmed.qzv

  import pkg_resources
[32mSaved Visualization to: project_data/taxonomy_trimmed.qzv[0m
[0m[?25h

In [4]:
Visualization.load(f"{data_dir}/taxonomy.qzv")

In [8]:
#Same but with trimmed

Visualization.load(f"{data_dir}/taxonomy_trimmed.qzv")

In [12]:
! qiime taxa filter-table \
    --i-table $data_dir/dada2_table.qza \
    --i-taxonomy $data_dir/taxonomy.qza \
    --p-include c__ \ #can be modified according to the classification level you want
    --o-filtered-table $data_dir/dada2_table_filtered.qza

! qiime taxa filter-seqs \
    --i-sequences $data_dir/dada2_rep_set.qza \
    --i-taxonomy $data_dir/taxonomy.qza \
    --p-include c__ \
    --o-filtered-sequences $data_dir/dada2_rep_set_filtered.qza

IndentationError: unexpected indent (2697389468.py, line 2)