# 1.Import packages

In [13]:
# Importing all required packages at the start of the notebook
import IPython

from qiime2 import Visualization

import qiime2 as q2
import pandas as pd
import matplotlib.pyplot as plt
import os

%matplotlib inline

# 2.Import the data

In [14]:
# Location of the projects data
!mkdir -p "Project_data"
data_dir = "Project_data/Import_and_Denoising"

In [15]:
%%bash -s $data_dir
mkdir -p "$1"

wget -nc --progress=dot:giga -P "$1" https://polybox.ethz.ch/index.php/s/uV06vmm96ZzB5eM/download/fungut_forward_reads.qza

chmod -R +rxw "$1"

File ‘Project_data/Import_and_Denoising/fungut_forward_reads.qza’ already there; not retrieving.



# 3.Feature table construction

In [4]:
# Visual summary of the data
! qiime demux summarize \
    --i-data $data_dir/fungut_forward_reads.qza \
    --o-visualization $data_dir/fungut_forward_reads_demux_seqs.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Import_and_Denoizing/fungut_forward_reads_demux_seqs.qzv[0m
[0m[?25h

In [11]:
Visualization.load(f"{data_dir}/fungut_forward_reads_demux_seqs.qzv")

The mean quality score is quite high along the nucleotides positions (quality score = 38 at the last position). However, at an early position, the lower whisker drops drastically, suggesting heterogeneity in quality among the different reads.
Because of this, and because ITS have a variable size, we decided to first filter based on PHRED quality score, and then to not use a length truncation.

# 4. Quality filtering

In [6]:
! qiime quality-filter q-score \
    --i-demux $data_dir/fungut_forward_reads.qza \
    --p-min-quality 30 \
    --o-filtered-sequences $data_dir/fungut_forward_reads_quality_filtered.qza \
    --o-filter-stats $data_dir/quality_filtering_stats.qza 

  import pkg_resources
[32mSaved SampleData[SequencesWithQuality] to: Project_data/Import_and_Denoizing/fungut_forward_reads_quality_filtered.qza[0m
[32mSaved QualityFilterStats to: Project_data/Import_and_Denoizing/quality_filtering_stats.qza[0m
[0m[?25h

In [7]:
! qiime metadata tabulate \
    --m-input-file $data_dir/quality_filtering_stats.qza \
    --o-visualization $data_dir/quality_filtering_stats.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Import_and_Denoizing/quality_filtering_stats.qzv[0m
[0m[?25h

In [5]:
Visualization.load(f"{data_dir}/quality_filtering_stats.qzv")

In [6]:
! qiime demux summarize \
    --i-data $data_dir/fungut_forward_reads_quality_filtered.qza \
    --o-visualization $data_dir/fungut_forward_reads_filtered_demux_seqs.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Import_and_Denoising/fungut_forward_reads_filtered_demux_seqs.qzv[0m
[0m[?25h

In [7]:
Visualization.load(f"{data_dir}/fungut_forward_reads_filtered_demux_seqs.qzv")

# 5. Denoising

In [9]:
! qiime dada2 denoise-single \
    --i-demultiplexed-seqs $data_dir/fungut_forward_reads_quality_filtered.qza \
    --p-trunc-len 0 \
    --p-n-threads 3 \
    --o-table $data_dir/dada2_table.qza \
    --o-representative-sequences $data_dir/dada2_rep_set.qza \
    --o-denoising-stats $data_dir/dada2_stats.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: Project_data/Import_and_Denoizing/dada2_table.qza[0m
[32mSaved FeatureData[Sequence] to: Project_data/Import_and_Denoizing/dada2_rep_set.qza[0m
[32mSaved SampleData[DADA2Stats] to: Project_data/Import_and_Denoizing/dada2_stats.qza[0m
[0m[?25h

In [10]:
! qiime metadata tabulate \
  --m-input-file $data_dir/dada2_stats.qza \
  --o-visualization $data_dir/dada2_stats.qzv

! qiime feature-table tabulate-seqs \
  --i-data $data_dir/dada2_rep_set.qza \
  --o-visualization $data_dir/dada2_rep_set.qzv

! qiime feature-table summarize \
  --i-table $data_dir/dada2_table.qza \
  --m-sample-metadata-file Project_data/Metadata/updated_fungut_metadata.tsv \
  --o-visualization $data_dir/dada2_table.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Import_and_Denoizing/dada2_stats.qzv[0m
  import pkg_resources
[32mSaved Visualization to: Project_data/Import_and_Denoizing/dada2_rep_set.qzv[0m
  import pkg_resources
[32mSaved Visualization to: Project_data/Import_and_Denoizing/dada2_table.qzv[0m
[0m[?25h

In [8]:
Visualization.load(f"{data_dir}/dada2_stats.qzv")

In [9]:
Visualization.load(f"{data_dir}/dada2_rep_set.qzv")

In [10]:
Visualization.load(f"{data_dir}/dada2_table.qzv")