In [2]:
import os
import pandas as pd
from qiime2 import Visualization
import matplotlib.pyplot as plt
import numpy as np
import qiime2 as q2

data_dir = "data"

if not os.path.isdir(data_dir):
    os.makedirs(data_dir)

In [3]:
#data import: Correct import links in progress (midterm)
! wget -nv -O $data_dir/rep-seq-filtered.qza 'https://polybox.ethz.ch/index.php/s/vtDJzaCrmebL2u7/download'
! wget -nv -O $data_dir/taxonomy_classification.qza 'https://polybox.ethz.ch/index.php/s/X8AoIryeOnLaCn5/download'
! wget -nv -O $data_dir/feature-table.qza 'https://polybox.ethz.ch/index.php/s/IAxJdqn5l3NqIQC/download'

2022-10-30 11:50:09 URL:https://polybox.ethz.ch/index.php/s/vtDJzaCrmebL2u7/download [552613/552613] -> "data/rep-seq-filtered.qza" [1]
2022-10-30 11:50:09 URL:https://polybox.ethz.ch/index.php/s/c3rjz7z9GBUXvE8/download [1967910/1967910] -> "data/taxonomy_classification.qza" [1]
2022-10-30 11:50:10 URL:https://polybox.ethz.ch/index.php/s/IAxJdqn5l3NqIQC/download [919161/919161] -> "data/feature-table.qza" [1]


In [4]:
#check imported file as control
! qiime tools peek $data_dir/rep-seq-filtered.qza

[32mUUID[0m:        5f62041b-744a-4726-8a47-fcec99c76474
[32mType[0m:        FeatureData[Sequence]
[32mData format[0m: DNASequencesDirectoryFormat


### 1. Reference-based fragment insertion 

In [10]:
#Download reference tree from Greengenes 13_8 database at 99% identity
! wget -nv -O $data_dir/sepp-refs-gg-13-8.qza https://data.qiime2.org/2021.4/common/sepp-refs-gg-13-8.qza

2022-10-25 14:25:12 URL:https://s3-us-west-2.amazonaws.com/qiime2-data/2021.4/common/sepp-refs-gg-13-8.qza [50161069/50161069] -> "data/sepp-refs-gg-13-8.qza" [1]


In [11]:
#Insert sequences into the reference tree
! qiime fragment-insertion sepp \
    --i-representative-sequences $data_dir/rep-seq-filtered.qza \
    --i-reference-database $data_dir/sepp-refs-gg-13-8.qza \
    --p-threads 2 \
    --o-tree $data_dir/reference-tree.qza \
    --o-placements $data_dir/reference-tree-placements.qza

[32mSaved Phylogeny[Rooted] to: data/reference-tree.qza[0m
[32mSaved Placements to: data/reference-tree-placements.qza[0m
[0m

In [12]:
! qiime tools peek $data_dir/taxonomy_classification.qza

[32mUUID[0m:        9d6f3b3c-61b6-4853-b358-4d139367de63
[32mType[0m:        FeatureData[Taxonomy]
[32mData format[0m: TSVTaxonomyDirectoryFormat


In [14]:
#Tree visualization
! qiime empress tree-plot \
    --i-tree $data_dir/reference-tree.qza \
    --m-feature-metadata-file $data_dir/taxonomy_classification.qza \
    --o-visualization $data_dir/fragment-tree.qzv

[32mSaved Visualization to: data/fragment-tree.qzv[0m
[0m

In [15]:
Visualization.load(f'{data_dir}/fragment-tree.qzv')

### 2. Filter feature tables 

Feature tables are filtered such that one output only contains fragments that are in the insertion tree and another output contains fragments that are NOT in the insertion tree (SEPP might reject some fragments). **For calculation of Faith PD (alpha diversity) use only the fragments in phylogeny_filtered_table.qza**

In [16]:
! qiime tools peek $data_dir/feature-table.qza

[32mUUID[0m:        3bac291f-31ad-4ae0-9eb2-7aa61da41ad2
[32mType[0m:        FeatureTable[Frequency]
[32mData format[0m: BIOMV210DirFmt


In [21]:
! qiime fragment-insertion filter-features \
    --i-table $data_dir/feature-table.qza \
    --i-tree $data_dir/reference-tree.qza \
    --o-filtered-table $data_dir/phylogeny_filtered_table.qza \
    --o-removed-table $data_dir/phylogeny_removed_table.qza

[32mSaved FeatureTable[Frequency] to: data/phylogeny_filtered_table.qza[0m
[32mSaved FeatureTable[Frequency] to: data/phylogeny_removed_table.qza[0m
[0m

Polyboxlink with data: https://polybox.ethz.ch/index.php/s/pNA39R0rl2xMMj9