# 3. Taxonomic Classification
## Import data & packages

In [12]:
# 1 - Import all packages
import IPython
import pandas as pd
import matplotlib.pyplot as plt
import os
import qiime2 as q2
from qiime2 import Visualization

%matplotlib inline

In [13]:
# 2 - Set working directory
os.chdir("/home/jovyan/Project/MicrobiomeAnalysis_TummyTribe/")

# Verify that your wroking directory is the overall project folder (.../MicrobiomeAnalysis_TummyTribe)
print("Current working directory:", os.getcwd())

Current working directory: /home/jovyan/Project/MicrobiomeAnalysis_TummyTribe


In [14]:
# 3 - Data directory for the raw data
data_dir = "data/raw"
processed_data_dir = "data/processed-pre_trained"

##  Set-up SILVA (ran!)

Assign taxonomy using a SILVA v4 pre-trained classifier (as said in project description)
- https://forum.qiime2.org/t/processing-filtering-and-evaluating-the-silva-database-and-other-reference-sequence-data-with-rescript/15494## Import data & packages

In [4]:
# also takes a long time to run all these cells! 

# ! qiime rescript get-silva-data \
#     --p-version '138.2' \
#     --p-target 'SSURef_NR99' \
#     --o-silva-sequences  $processed_data_dir/silva-138.2-ssu-nr99-rna-seqs.qza \
#     --o-silva-taxonomy $processed_data_dir/silva-138.2-ssu-nr99-tax.qza

In [5]:
# ! qiime rescript reverse-transcribe \
#     --i-rna-sequences $processed_data_dir/silva-138.2-ssu-nr99-rna-seqs.qza \
#     --o-dna-sequences $processed_data_dir/silva-138.2-ssu-nr99-seqs.qza

In [6]:
# ! qiime rescript cull-seqs \
#     --i-sequences $processed_data_dir/silva-138.2-ssu-nr99-seqs.qza \
#     --o-clean-sequences $processed_data_dir/silva-138.2-ssu-nr99-seqs-cleaned.qza

In [7]:
# ! qiime rescript filter-seqs-length-by-taxon \
#     --i-sequences $processed_data_dir/silva-138.2-ssu-nr99-seqs-cleaned.qza \
#     --i-taxonomy $processed_data_dir/silva-138.2-ssu-nr99-tax.qza \
#     --p-labels Archaea Bacteria Eukaryota \
#     --p-min-lens 900 1200 1400 \
#     --o-filtered-seqs $processed_data_dir/silva-138.2-ssu-nr99-seqs-filt.qza \
#     --o-discarded-seqs $processed_data_dir/silva-138.2-ssu-nr99-seqs-discard.qza 

In [8]:
# ! qiime rescript dereplicate \
#     --i-sequences $processed_data_dir/silva-138.2-ssu-nr99-seqs-filt.qza  \
#     --i-taxa $processed_data_dir/silva-138.2-ssu-nr99-tax.qza \
#     --p-mode 'uniq' \
#     --o-dereplicated-sequences $processed_data_dir/silva-138.2-ssu-nr99-seqs-derep-uniq.qza \
#     --o-dereplicated-taxa $processed_data_dir/silva-138.2-ssu-nr99-tax-derep-uniq.qza

In [9]:
# ! qiime feature-classifier fit-classifier-naive-bayes \
#   --i-reference-reads  $processed_data_dir/silva-138.2-ssu-nr99-seqs-derep-uniq.qza \
#   --i-reference-taxonomy $processed_data_dir/silva-138.2-ssu-nr99-tax-derep-uniq.qza \
#   --p-classify--chunk-size 10 \
#   --o-classifier $processed_data_dir/silva-138.2-ssu-nr99-classifier.qza \
#   if [ ! $? -eq 0 ]; then echo "Command failed - most likely reason is not enough memory."; fi

##  Assign Taxonomy

pre-trained classifier from https://library.qiime2.org/data-resources, EXPERIMENTAL: human stool weighted Silva 138 99% OTUs full-length sequences



In [None]:
# It denies permission for me :/
! wget -O $raw_data/silva-138-99-nb-human-stool-weighted-classifier.qza \
https://data.qiime2.org/classifiers/sklearn-1.4.2/silva/silva-138-99-nb-human-stool-weighted-classifier.qza

--2025-10-21 11:05:44--  https://data.qiime2.org/classifiers/sklearn-1.4.2/silva/silva-138-99-nb-human-stool-weighted-classifier.qza
Resolving data.qiime2.org (data.qiime2.org)... 54.200.1.12
Connecting to data.qiime2.org (data.qiime2.org)|54.200.1.12|:443... connected.
HTTP request sent, awaiting response... 302 FOUND
Location: https://s3-us-west-2.amazonaws.com/qiime2-data/classifiers/sklearn-1.4.2/silva/silva-138-99-nb-human-stool-weighted-classifier.qza [following]
--2025-10-21 11:05:45--  https://s3-us-west-2.amazonaws.com/qiime2-data/classifiers/sklearn-1.4.2/silva/silva-138-99-nb-human-stool-weighted-classifier.qza
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 3.5.86.24, 52.92.236.144, 52.218.234.80, ...
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|3.5.86.24|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 218311668 (208M) [binary/octet-stream]
Saving to: ‘data/raw/silva-138-99-nb-human-stool-weighted-class

In [None]:
! qiime feature-classifier classify-sklearn \
    --i-classifier $data_dir/silva-138-99-nb-human-stool-weighted-classifier.qza \
    --i-reads $processed_data_dir/dada2_rep_set_140.qza \
    --o-classification $processed_data_dir/taxonomy_140.qza

  import pkg_resources
[32mSaved FeatureData[Taxonomy] to: data/processed-pre_trained/taxonomy_140.qza[0m
[0m[?25h

In [None]:
! qiime tools peek $processed_data_dir/taxonomy_140.qza

<<<<<<< local <modified: >


[32mUUID[0m:        5fae7627-7901-48e3-8b7a-22db477b8a22
[32mType[0m:        FeatureData[Taxonomy]
[32mData format[0m: TSVTaxonomyDirectoryFormat




[32mUUID[0m:        b2e2ff48-8578-496e-89d2-44d637e73c98
[32mType[0m:        FeatureData[Taxonomy]
[32mData format[0m: TSVTaxonomyDirectoryFormat


>>>>>>> remote <modified: >


In [None]:
! qiime metadata tabulate \
    --m-input-file $processed_data_dir/taxonomy_140.qza \
    --o-visualization $processed_data_dir/taxonomy_140.qzv

  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/taxonomy_140.qzv[0m
[0m[?25h

In [None]:
Visualization.load(f"{processed_data_dir}/taxonomy_140.qzv")

<<<<<<< local


>>>>>>> remote
<<<<<<< local <removed>


>>>>>>> remote <modified: text/html, text/plain>


In [None]:
! qiime taxa barplot \
    --i-table $processed_data_dir/dada2_table_140.qza \
    --i-taxonomy $processed_data_dir/taxonomy_140.qza \
    --m-metadata-file $data_dir/metadata.tsv \
    --o-visualization $processed_data_dir/taxa-bar-plots.qzv

  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/taxa-bar-plots.qzv[0m
[0m[?25h

In [None]:
Visualization.load(f"{processed_data_dir}/taxa-bar-plots.qzv")

<<<<<<< local




>>>>>>> remote


## Filtering feature tables and sequences based on taxonomy annotations

In [None]:
! qiime taxa filter-table \
    --i-table $processed_data_dir/dada2_table_140.qza \
    --i-taxonomy $processed_data_dir/taxonomy_140.qza \
    --p-exclude mitochondria,chloroplast \
    --p-include c__ \
    --o-filtered-table $processed_data_dir/table-filtered_140.qza

! qiime taxa filter-seqs \
    --i-sequences $processed_data_dir/dada2_rep_set_140.qza \
    --i-taxonomy $processed_data_dir/taxonomy_140.qza \
    --p-exclude mitochondria,chloroplast \
    --p-include c__ \
    --o-filtered-sequences $processed_data_dir/rep-seqs-filtered_140.qza

<<<<<<< local <removed>


  import pkg_resources
[32mSaved FeatureTable[Frequency] to: data/processed-pre_trained/table-filtered_140.qza[0m
  import pkg_resources
[32mSaved FeatureData[Sequence] to: data/processed-pre_trained/rep-seqs-filtered_140.qza[0m
[0m[?25h

>>>>>>> remote <modified: >


In [None]:
! qiime taxa barplot \
  --i-table $processed_data_dir/table-filtered_140.qza \
  --i-taxonomy $processed_data_dir/taxonomy_140.qza \
  --m-metadata-file $data_dir/metadata.tsv \
  --o-visualization $processed_data_dir/taxa-bar-plots_filtered.qzv

<<<<<<< local <removed>


  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/taxa-bar-plots_filtered.qzv[0m
[0m[?25h

>>>>>>> remote <modified: >


In [None]:
Visualization.load(f"{processed_data_dir}/taxa-bar-plots_filtered.qzv")