# Setup

In [1]:
import IPython

import pandas as pd
import matplotlib.pyplot as plt
import qiime2 as q2
import seaborn as sns
from qiime2 import Visualization


import os

import matplotlib.pyplot as plt
%matplotlib inline

data_dir = 'livia_data'

In [2]:
surveys_df = pd.read_csv(f"{data_dir}/fungut_metadata.tsv", sep="\t")

In [3]:
!qiime tools peek $data_dir/fungut_forward_reads.qza


[32mUUID[0m:        3638611d-1767-413b-9390-70ee3d78e4ff
[32mType[0m:        SampleData[SequencesWithQuality]
[32mData format[0m: SingleLanePerSampleSingleEndFastqDirFmt


In [4]:
!qiime demux summarize \
  --i-data $data_dir/fungut_forward_reads.qza \
  --o-visualization $data_dir/demux_summary.qzv

  import pkg_resources
^C

Aborted!
[0m[?25h

In [5]:
Visualization.load(f"{data_dir}/demux_summary.qzv")

# Adjusting Metadata

In [6]:
metadata_cleaned = pd.read_csv(f"{data_dir}/metadata_cleaned_BMI_habitat.tsv", sep="\t")

Removing IDs that have too many unassigned reads

In [7]:
rem = ["ERR5327575", "ERR5327509", "ERR5327351", "ERR5327544", "ERR5327300", "ERR5327338", "ERR5327529", "ERR5327533", "ERR5327364", "ERR5327535"]

In [8]:
metadata_cleaned_rem = metadata_cleaned[~metadata_cleaned["ID"].isin(rem)]

In [9]:
metadata_cleaned_rem.to_csv(f"{data_dir}/met_cle_rem.tsv", sep="\t", index=False)


In [10]:
metadata_cleaned_rem

Unnamed: 0,fid,ID,country_sample,state_sample,latitude_sample,longitude_sample,sex_sample,age_years_sample,height_cm_sample,weight_kg_sample,bmi_sample,diet_type_sample,ibd_sample,gluten_sample,BMI_category,habitat_density1,density_percent,urban/rural/peri
0,1,ERR5327198,USA,TN,36.1,-86.8,female,67.0,152.0,41.0,17.75,Omnivore,I do not have this condition,No,Underweight,167906.0,42,peri-urban
1,2,ERR5327199,USA,DC,38.9,-77.1,male,55.0,182.0,79.0,23.73,Omnivore,I do not have this condition,I was diagnosed with gluten allergy (anti-glut...,Normal,149537.0,37,peri-urban
2,3,ERR5327266,USA,VA,38.9,-77.1,female,28.0,175.0,61.0,19.94,Omnivore,I do not have this condition,I do not eat gluten because it makes me feel bad,Normal,149537.0,37,peri-urban
3,4,ERR5327282,United Kingdom,,51.6,-0.2,female,26.0,166.0,60.0,21.77,Omnivore,I do not have this condition,No,Normal,269388.0,67,urban
4,5,ERR5327284,United Kingdom,,51.5,-0.2,female,25.0,173.0,59.0,20.01,Vegetarian but eat seafood,I do not have this condition,No,Normal,380431.0,95,urban
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,146,ERR5327599,Isle of Man,,54.2,-4.7,female,58.0,172.0,65.0,21.97,Omnivore,I do not have this condition,No,Normal,1586.0,0,rural
146,147,ERR5327604,United Kingdom,,51.4,-0.4,female,64.0,157.0,60.0,24.19,Omnivore but do not eat red meat,I do not have this condition,No,Normal,50696.0,13,rural
147,148,ERR5327605,United Kingdom,,52.8,-1.3,male,80.0,175.0,73.0,23.77,Vegetarian,I do not have this condition,No,Normal,2313.0,1,rural
148,149,ERR5327615,United Kingdom,,51.8,-1.3,female,53.0,176.0,64.0,20.66,Omnivore,I do not have this condition,No,Normal,9648.0,2,rural


# Trimming the primers

In [10]:
!qiime cutadapt trim-single \
  --i-demultiplexed-sequences $data_dir/fungut_forward_reads.qza \
  --p-front CTTGGTCATTTAGAGGAAGTAA \
  --o-trimmed-sequences $data_dir/fungut_forward_reads_trimmed.qza \
  --verbose

  import pkg_resources
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt -u 0 --error-rate 0.1 --times 1 --overlap 3 --minimum-length 1 -q 0,0 --quality-base 33 --cores 1 -o /tmp/qiime2/jovyan/processes/627-1761215648.58@jovyan/tmp/q2-OutPath-trnksspa/ERR5327198_01_L001_R1_001.fastq.gz --front CTTGGTCATTTAGAGGAAGTAA /tmp/qiime2/jovyan/data/3638611d-1767-413b-9390-70ee3d78e4ff/data/ERR5327198_01_L001_R1_001.fastq.gz

This is cutadapt 5.1 with Python 3.10.14
Command line parameters: -u 0 --error-rate 0.1 --times 1 --overlap 3 --minimum-length 1 -q 0,0 --quality-base 33 --cores 1 -o /tmp/qiime2/jovyan/processes/627-1761215648.58@jovyan/tmp/q2-OutPath-trnksspa/ERR5327198_01_L001_R1_001.fastq.gz --front CTTGGTCATTTAGAGGAAGTAA /tmp/qiime2/jovyan/data/3638611d-1767-413b-9390-70ee3d78e4ff/data/E

# Denoising

In [12]:
!qiime dada2 denoise-single \
   --i-demultiplexed-seqs $data_dir/fungut_forward_reads.qza \
   --p-trim-left 0 \
   --p-trunc-len 0 \
   --p-min-fold-parent-over-abundance 4 \
   --p-max-ee 4 \
    --o-representative-sequences $data_dir/rep_seqs.qza \
    --o-table $data_dir/table.qza \
    --o-denoising-stats $data_dir/stats.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: livia_data/table.qza[0m
[32mSaved FeatureData[Sequence] to: livia_data/rep_seqs.qza[0m
[32mSaved SampleData[DADA2Stats] to: livia_data/stats.qza[0m
[0m[?25h

In [13]:
!qiime feature-table summarize \
  --i-table $data_dir/table.qza \
  --o-visualization $data_dir/table_summary.qzv \
--m-sample-metadata-file $data_dir/fungut_metadata.tsv

  import pkg_resources
[32mSaved Visualization to: livia_data/table_summary.qzv[0m
[0m[?25h

In [14]:
! qiime feature-table tabulate-seqs \
  --i-data $data_dir/rep_seqs.qza \
  --o-visualization $data_dir/rep_seqs.qzv

  import pkg_resources
[32mSaved Visualization to: livia_data/rep_seqs.qzv[0m
[0m[?25h

In [21]:
Visualization.load(f"{data_dir}/rep_seqs.qzv")

Denoising with trimmed sequences

In [11]:
!qiime dada2 denoise-single \
   --i-demultiplexed-seqs $data_dir/fungut_forward_reads_trimmed.qza \
   --p-trim-left 0 \
   --p-trunc-len 0 \
   --p-min-fold-parent-over-abundance 4 \
   --p-max-ee 4 \
    --o-representative-sequences $data_dir/rep_seqs_trimmed.qza \
    --o-table $data_dir/table_trimmed.qza \
    --o-denoising-stats $data_dir/stats_trimmed.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: livia_data/table_trimmed.qza[0m
[32mSaved FeatureData[Sequence] to: livia_data/rep_seqs_trimmed.qza[0m
[32mSaved SampleData[DADA2Stats] to: livia_data/stats_trimmed.qza[0m
[0m[?25h

In [15]:
! qiime feature-table tabulate-seqs \
    --i-data $data_dir/rep_seqs_trimmed.qza \
    --o-visualization $data_dir/rep_seqs_trimmed.qzv

  import pkg_resources
[32mSaved Visualization to: livia_data/rep_seqs_trimmed.qzv[0m
[0m[?25h

In [22]:
Visualization.load(f"{data_dir}/rep_seqs_trimmed.qzv")

# Taxonomy