In [3]:
import IPython

import pandas as pd
import matplotlib.pyplot as plt
import qiime2 as q2
import seaborn as sns
from qiime2 import Visualization


import os

import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
PATH = "livia_data/fungut_metadata.tsv"

In [5]:
surveys_df = pd.read_csv(PATH, sep="\t")

In [19]:
surveys_df.head()

Unnamed: 0,ID,country_sample,state_sample,latitude_sample,longitude_sample,sex_sample,age_years_sample,height_cm_sample,weight_kg_sample,bmi_sample,diet_type_sample,ibd_sample,gluten_sample
0,ERR5327198,USA,TN,36.1,-86.8,female,67.0,152.0,41.0,17.75,Omnivore,I do not have this condition,No
1,ERR5327199,USA,DC,38.9,-77.1,male,55.0,182.0,79.0,23.73,Omnivore,I do not have this condition,I was diagnosed with gluten allergy (anti-glut...
2,ERR5327266,USA,VA,38.9,-77.1,female,28.0,175.0,61.0,19.94,Omnivore,I do not have this condition,I do not eat gluten because it makes me feel bad
3,ERR5327282,United Kingdom,Not provided,51.6,-0.2,female,26.0,166.0,60.0,21.77,Omnivore,I do not have this condition,No
4,ERR5327284,United Kingdom,Not provided,51.5,-0.2,female,25.0,173.0,59.0,20.01,Vegetarian but eat seafood,I do not have this condition,No


In [6]:
data_dir = 'livia_data'

In [5]:
!qiime tools peek $data_dir/fungut_forward_reads.qza


[32mUUID[0m:        3638611d-1767-413b-9390-70ee3d78e4ff
[32mType[0m:        SampleData[SequencesWithQuality]
[32mData format[0m: SingleLanePerSampleSingleEndFastqDirFmt


In [7]:
!qiime demux summarize \
  --i-data $data_dir/fungut_forward_reads.qza \
  --o-visualization $data_dir/demux_summary.qzv

  import pkg_resources
[32mSaved Visualization to: livia_data/demux_summary.qzv[0m
[0m[?25h

In [18]:
Visualization.load(f"{data_dir}/demux_summary.qzv")

# Trimming the primers

In [10]:
!qiime cutadapt trim-single \
  --i-demultiplexed-sequences $data_dir/fungut_forward_reads.qza \
  --p-front CTTGGTCATTTAGAGGAAGTAA \
  --o-trimmed-sequences $data_dir/fungut_forward_reads_trimmed.qza \
  --verbose

  import pkg_resources
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt -u 0 --error-rate 0.1 --times 1 --overlap 3 --minimum-length 1 -q 0,0 --quality-base 33 --cores 1 -o /tmp/qiime2/jovyan/processes/627-1761215648.58@jovyan/tmp/q2-OutPath-trnksspa/ERR5327198_01_L001_R1_001.fastq.gz --front CTTGGTCATTTAGAGGAAGTAA /tmp/qiime2/jovyan/data/3638611d-1767-413b-9390-70ee3d78e4ff/data/ERR5327198_01_L001_R1_001.fastq.gz

This is cutadapt 5.1 with Python 3.10.14
Command line parameters: -u 0 --error-rate 0.1 --times 1 --overlap 3 --minimum-length 1 -q 0,0 --quality-base 33 --cores 1 -o /tmp/qiime2/jovyan/processes/627-1761215648.58@jovyan/tmp/q2-OutPath-trnksspa/ERR5327198_01_L001_R1_001.fastq.gz --front CTTGGTCATTTAGAGGAAGTAA /tmp/qiime2/jovyan/data/3638611d-1767-413b-9390-70ee3d78e4ff/data/E

# Denoising

In [12]:
!qiime dada2 denoise-single \
   --i-demultiplexed-seqs $data_dir/fungut_forward_reads.qza \
   --p-trim-left 0 \
   --p-trunc-len 0 \
   --p-min-fold-parent-over-abundance 4 \
   --p-max-ee 4 \
    --o-representative-sequences $data_dir/rep_seqs.qza \
    --o-table $data_dir/table.qza \
    --o-denoising-stats $data_dir/stats.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: livia_data/table.qza[0m
[32mSaved FeatureData[Sequence] to: livia_data/rep_seqs.qza[0m
[32mSaved SampleData[DADA2Stats] to: livia_data/stats.qza[0m
[0m[?25h

In [13]:
!qiime feature-table summarize \
  --i-table $data_dir/table.qza \
  --o-visualization $data_dir/table_summary.qzv \
--m-sample-metadata-file $data_dir/fungut_metadata.tsv

  import pkg_resources
[32mSaved Visualization to: livia_data/table_summary.qzv[0m
[0m[?25h

In [14]:
! qiime feature-table tabulate-seqs \
  --i-data $data_dir/rep_seqs.qza \
  --o-visualization $data_dir/rep_seqs.qzv

  import pkg_resources
[32mSaved Visualization to: livia_data/rep_seqs.qzv[0m
[0m[?25h

In [21]:
Visualization.load(f"{data_dir}/rep_seqs.qzv")

Denoising with trimmed sequences

In [11]:
!qiime dada2 denoise-single \
   --i-demultiplexed-seqs $data_dir/fungut_forward_reads_trimmed.qza \
   --p-trim-left 0 \
   --p-trunc-len 0 \
   --p-min-fold-parent-over-abundance 4 \
   --p-max-ee 4 \
    --o-representative-sequences $data_dir/rep_seqs_trimmed.qza \
    --o-table $data_dir/table_trimmed.qza \
    --o-denoising-stats $data_dir/stats_trimmed.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: livia_data/table_trimmed.qza[0m
[32mSaved FeatureData[Sequence] to: livia_data/rep_seqs_trimmed.qza[0m
[32mSaved SampleData[DADA2Stats] to: livia_data/stats_trimmed.qza[0m
[0m[?25h

In [15]:
! qiime feature-table tabulate-seqs \
    --i-data $data_dir/rep_seqs_trimmed.qza \
    --o-visualization $data_dir/rep_seqs_trimmed.qzv

  import pkg_resources
[32mSaved Visualization to: livia_data/rep_seqs_trimmed.qzv[0m
[0m[?25h

In [22]:
Visualization.load(f"{data_dir}/rep_seqs_trimmed.qzv")

# Taxonomy