# 1. Import Packages

In [1]:
# Importing all required packages at the start of the notebook
import os
import matplotlib.pyplot as plt
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
import seaborn as sns
from scipy.stats import shapiro, kruskal, f_oneway

# 2. Data Directionary

In [2]:
# Location
data_dir = "Project_data/Differential_Abundance"
! mkdir -p "$data_dir"

In [3]:
# Paths to project inputs
input_table    = "Project_data/Taxonomy/table_filtered.qza"
input_taxonomy = "Project_data/Taxonomy/taxonomy_pretrained.qza"
input_metadata = "Project_data/Metadata/updated_fungut_metadata.tsv"

# 3. Differential Abundance - IBD Status

In [4]:
# Filter features: keep only reasonably frequent ASVs
! qiime feature-table filter-features \
  --i-table $input_table \
  --p-min-frequency 25 \
  --p-min-samples 4 \
  --o-filtered-table $data_dir/table_abund.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: Project_data/Differential_Abundance/table_abund.qza[0m
[0m[?25h

In [5]:
# Collapse to species level (L7)
! qiime taxa collapse \
  --i-table $data_dir/table_abund.qza \
  --i-taxonomy $input_taxonomy \
  --p-level 7 \
  --o-collapsed-table $data_dir/table_abund_L7.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: Project_data/Differential_Abundance/table_abund_L7.qza[0m
[0m[?25h

In [6]:
# ANCOM-BC: effect of IBD
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file $input_metadata \
  --p-formula "ibd_sample" \
  --o-differentials $data_dir/ancombc_ibd_L7_diffs.qza

  import pkg_resources
[32mSaved FeatureData[DifferentialAbundance] to: Project_data/Differential_Abundance/ancombc_ibd_L7_diffs.qza[0m
[0m[?25h

In [7]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_ibd_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_ibd_L7_barplot.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_ibd_L7_barplot.qzv[0m
[0m[?25h

In [8]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_ibd_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_ibd_L7_results.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_ibd_L7_results.qzv[0m
[0m[?25h

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_ibd_L7_barplot.qzv")

In [10]:
Visualization.load("Project_data/Differential_Abundance/ancombc_ibd_L7_results.qzv")

# 4. Differential Abundance - Gluten Status

In [11]:
# because there were "/" in the gluten column
meta = pd.read_csv(input_metadata, sep="\t")

# create a cleaned version of the gluten column without '/'
meta["gluten_clean"] = meta["gluten_sample"].str.replace("/", "_", regex=False)

meta_clean_path = "Project_data/Differential_Abundance/metadata_gluten_clean.tsv"
meta.to_csv(meta_clean_path, sep="\t", index=False)

meta_clean_path

'Project_data/Differential_Abundance/metadata_gluten_clean.tsv'

In [12]:
# ANCOM-BC: effect of Gluten
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file Project_data/Differential_Abundance/metadata_gluten_clean.tsv \
  --p-formula "gluten_clean" \
  --o-differentials $data_dir/ancombc_gluten_L7_diffs.qza

  import pkg_resources
[32mSaved FeatureData[DifferentialAbundance] to: Project_data/Differential_Abundance/ancombc_gluten_L7_diffs.qza[0m
[0m[?25h

In [13]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_gluten_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_gluten_L7_barplot.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_gluten_L7_barplot.qzv[0m
[0m[?25h

In [14]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_gluten_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_gluten_L7_results.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_gluten_L7_results.qzv[0m
[0m[?25h

In [15]:
Visualization.load("Project_data/Differential_Abundance/ancombc_gluten_L7_barplot.qzv")

In [16]:
Visualization.load("Project_data/Differential_Abundance/ancombc_gluten_L7_results.qzv")

# 5. Differential Abundance - Diet

In [17]:
# because there were "/" in the gluten column
meta = pd.read_csv(input_metadata, sep="\t")
meta["diet_type_sample"].unique()

meta["diet_clean"] = (
    meta["diet_type_sample"]
    .str.replace("/", "_", regex=False)
    .str.replace(" ", "_", regex=False)
)
clean_meta_path = "Project_data/Differential_Abundance/metadata_diet_clean.tsv"
meta.to_csv(clean_meta_path, sep="\t", index=False)

clean_meta_path

'Project_data/Differential_Abundance/metadata_diet_clean.tsv'

In [18]:
# ANCOM-BC: effect of Diet
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file Project_data/Differential_Abundance/metadata_diet_clean.tsv \
  --p-formula "diet_clean" \
  --o-differentials $data_dir/ancombc_diet_L7_diffs.qza

  import pkg_resources
[32mSaved FeatureData[DifferentialAbundance] to: Project_data/Differential_Abundance/ancombc_diet_L7_diffs.qza[0m
[0m[?25h

In [19]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_diet_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_diet_L7_barplot.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_diet_L7_barplot.qzv[0m
[0m[?25h

In [20]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_diet_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_diet_L7_results.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_diet_L7_results.qzv[0m
[0m[?25h

In [21]:
Visualization.load("Project_data/Differential_Abundance/ancombc_diet_L7_barplot.qzv")

In [22]:
Visualization.load("Project_data/Differential_Abundance/ancombc_diet_L7_results.qzv")

# 6. Differential Abundance - Gender

In [23]:
# ANCOM-BC: effect of Sex
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file $input_metadata \
  --p-formula "sex_sample" \
  --o-differentials $data_dir/ancombc_sex_L7_diffs.qza

  import pkg_resources
[32mSaved FeatureData[DifferentialAbundance] to: Project_data/Differential_Abundance/ancombc_sex_L7_diffs.qza[0m
[0m[?25h

In [24]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_sex_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_sex_L7_barplot.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_sex_L7_barplot.qzv[0m
[0m[?25h

In [25]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_sex_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_sex_L7_results.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_sex_L7_results.qzv[0m
[0m[?25h

In [26]:
Visualization.load("Project_data/Differential_Abundance/ancombc_sex_L7_barplot.qzv")

In [27]:
Visualization.load("Project_data/Differential_Abundance/ancombc_sex_L7_results.qzv")

# 7. Differential Abundance - BMI

In [30]:
# ANCOM-BC: effect of BMI
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file $input_metadata \
  --p-formula "bmi_category" \
  --o-differentials $data_dir/ancombc_bmi_L7_diffs.qza

  import pkg_resources
[32mSaved FeatureData[DifferentialAbundance] to: Project_data/Differential_Abundance/ancombc_bmi_L7_diffs.qza[0m
[0m[?25h

In [31]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_bmi_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_bmi_L7_barplot.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_bmi_L7_barplot.qzv[0m
[0m[?25h

In [32]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_bmi_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_bmi_L7_results.qzv

  import pkg_resources
[32mSaved Visualization to: Project_data/Differential_Abundance/ancombc_bmi_L7_results.qzv[0m
[0m[?25h

In [33]:
Visualization.load("Project_data/Differential_Abundance/ancombc_bmi_L7_barplot.qzv")

In [34]:
Visualization.load("Project_data/Differential_Abundance/ancombc_bmi_L7_results.qzv")