# 1. Import Packages

In [3]:
# Importing all required packages at the start of the notebook
import os
import matplotlib.pyplot as plt
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
import seaborn as sns
from scipy.stats import shapiro, kruskal, f_oneway

# 2. Data Directionary

In [4]:
# Location
data_dir = "Project_data/Differential_Abundance"
! mkdir -p "$data_dir"

In [12]:
# Paths to project inputs
input_table    = "Project_data/Taxonomy/table_filtered.qza"
input_taxonomy = "Project_data/Taxonomy/taxonomy_pretrained.qza"
input_metadata = "Project_data/Metadata/updated_fungut_metadata.tsv"

# 3. Differential Abundance - IBD Status

In [6]:
# Filter features: keep only reasonably frequent ASVs
! qiime feature-table filter-features \
  --i-table $input_table \
  --p-min-frequency 25 \
  --p-min-samples 4 \
  --o-filtered-table $data_dir/table_abund.qza

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: Project_data/Differential_Abundance/table_abund.qza[0m
[0m[?25h

In [7]:
# Collapse to species level (L7)
! qiime taxa collapse \
  --i-table $data_dir/table_abund.qza \
  --i-taxonomy $input_taxonomy \
  --p-level 7 \
  --o-collapsed-table $data_dir/table_abund_L7.qza

  import pkg_resources
[31m[1mPlugin error from taxa:

  Feature IDs found in the table are missing from the taxonomy: {'94bf9cc7f4495f54e7e22a30a4dce588', '409483f487451668612cd67444b4be6e', '19b157034e328618b3dd6fb249c9518a', 'a840b10412c17f2512e50833cce8e6dc', 'de1a3169399379c0143705619f85470c', 'f1e00c6f31a5546a15c206010ff3583c', 'd8c7c2823a15d6d0e95a8003cf1cc15b', '1eb1248d80428380dfd53918dbd9afe3', '10a02738303efb2c566db155c337896a', '9a299f5b4933f9ddbacc10580b459cc3', 'a0beafd988014f7efd809bbcf8c0e70b', '59b6eadc93f95f5de0c0e533823fbf63', 'c629357ee1f9669039392c3546ba3dc2', 'ce7ae9a30872324f53c648144647ea16', '57585472bece5ec7baa62510ced98912', '847785aa457b96c33fe3bb9fa5d84d2f', '6bf86b88bc8c09dfef658ccbf82aa1cf', '839871f76d15cc2d824ea67707a0c692', 'cf8dc9924a2f5720a61e59c8b16ad98a', '680a2e8ac45b49e7438f42e3837bee6e', 'f03e4889f73ea08958135b0757e539dd', '17904a7fd80f3cf455413ecd3e913778', 'fbba7306f12b1cd57a6bca93284c88ac', '1d0faf5fd180a8fdb2bf814609069c4a', '6f19d4e3d5661

In [8]:
# ANCOM-BC: effect of IBD
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file $input_metadata \
  --p-formula "ibd_sample" \
  --o-differentials $data_dir/ancombc_ibd_L7_diffs.qza

  import pkg_resources
Usage: [94mqiime composition ancombc[0m [OPTIONS]

  Apply Analysis of Compositions of Microbiomes with Bias Correction (ANCOM-
  BC) to identify features that are differentially abundant across groups.

[1mInputs[0m:
  [94m[4m--i-table[0m ARTIFACT [32mFeatureTable[Frequency][0m
                         The feature table to be used for ANCOM-BC
                         computation.                               [35m[required][0m
[1mParameters[0m:
  [94m[4m--m-metadata-file[0m METADATA...
    (multiple            The sample metadata.
     arguments will be   
     merged)                                                        [35m[required][0m
  [94m[4m--p-formula[0m TEXT       How the microbial absolute abundances for each taxon
                         depend on the variables within the `metadata`.
                                                                    [35m[required][0m
  [94m--p-p-adj-method[0m TEXT [32mChoices('holm', 'ho

In [9]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_ibd_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_ibd_L7_barplot.qzv

  import pkg_resources
Usage: [94mqiime composition da-barplot[0m [OPTIONS]

  Generate bar plot views of ANCOM-BC output. One plot will be present per
  column in the ANCOM-BC output. The `significance_threshold`,
  `effect_size_threshold` and `feature_ids` filter results are intersected,
  such that only features that remain after all three filters have been
  applied will be present in the output.

[1mInputs[0m:
  [94m[4m--i-data[0m ARTIFACT [32mFeatureData[DifferentialAbundance][0m
                         The ANCOM-BC output to be plotted.         [35m[required][0m
[1mParameters[0m:
  [94m--p-effect-size-label[0m TEXT
                         Label for effect sizes in `data`.    [35m[default: 'lfc'][0m
  [94m--p-feature-id-label[0m TEXT
                         Label for feature ids in `data`.      [35m[default: 'id'][0m
  [94m--p-error-label[0m TEXT   Label for effect size errors in `data`.
                                                               [35

In [10]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_ibd_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_ibd_L7_results.qzv

  import pkg_resources
Usage: [94mqiime composition tabulate[0m [OPTIONS]

  Generate tabular view of ANCOM-BC or ANCOM-BC2 output, which includes per-
  page views for the log-fold change (lfc), standard error (se), P values, Q
  values, and W scores.

[1mInputs[0m:
  [94m[4m--i-data[0m ARTIFACT [32mFeatureData[DifferentialAbundance | ANCOMBC2Output][0m
                         The ANCOM-BC or ANCOM-BC2 output to be tabulated.
                                                                    [35m[required][0m
[1mOutputs[0m:
  [94m[4m--o-visualization[0m VISUALIZATION
                                                                    [35m[required][0m
[1mMiscellaneous[0m:
  [94m--output-dir[0m PATH      Output unspecified results to a directory
  [94m--verbose[0m / [94m--quiet[0m    Display verbose output to stdout and/or stderr
                         during execution of this action. Or silence output if
                         execution is successful (s

In [11]:
Visualization.load("Project_data/Differential_Abundance/ancombc_ibd_L7_barplot.qzv")

ValueError: Project_data/Differential_Abundance/ancombc_ibd_L7_barplot.qzv does not exist.

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_ibd_L7_results.qzv")

# 4. Differential Abundance - Gluten Status

In [None]:
# because there were "/" in the gluten column
meta = pd.read_csv(input_metadata, sep="\t")

# create a cleaned version of the gluten column without '/'
meta["gluten_clean"] = meta["gluten_sample"].str.replace("/", "_", regex=False)

meta_clean_path = "Project_data/Differential_Abundance/metadata_gluten_clean.tsv"
meta.to_csv(meta_clean_path, sep="\t", index=False)

meta_clean_path

In [None]:
# ANCOM-BC: effect of Gluten
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file Project_data/Differential_Abundance/metadata_gluten_clean.tsv \
  --p-formula "gluten_clean" \
  --o-differentials $data_dir/ancombc_gluten_L7_diffs.qza

In [None]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_gluten_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_gluten_L7_barplot.qzv

In [None]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_gluten_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_gluten_L7_results.qzv

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_gluten_L7_barplot.qzv")

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_gluten_L7_results.qzv")

# 5. Differential Abundance - Diet

In [None]:
# because there were "/" in the gluten column
meta = pd.read_csv(input_metadata, sep="\t")
meta["diet_type_sample"].unique()

meta["diet_clean"] = (
    meta["diet_type_sample"]
    .str.replace("/", "_", regex=False)
    .str.replace(" ", "_", regex=False)
)
clean_meta_path = "Project_data/Differential_Abundance/metadata_diet_clean.tsv"
meta.to_csv(clean_meta_path, sep="\t", index=False)

clean_meta_path

In [None]:
# ANCOM-BC: effect of Diet
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file Project_data/Differential_Abundance/metadata_diet_clean.tsv \
  --p-formula "diet_clean" \
  --o-differentials $data_dir/ancombc_diet_L7_diffs.qza

In [None]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_diet_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_diet_L7_barplot.qzv

In [None]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_diet_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_diet_L7_results.qzv

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_diet_L7_barplot.qzv")

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_diet_L7_results.qzv")

# 6. Differential Abundance - Gender

In [None]:
# ANCOM-BC: effect of Sex
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file $input_metadata \
  --p-formula "sex_sample" \
  --o-differentials $data_dir/ancombc_sex_L7_diffs.qza

In [None]:
# Barplot results
! qiime composition da-barplot \
  --i-data $data_dir/ancombc_sex_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_sex_L7_barplot.qzv

In [None]:
! qiime composition tabulate \
  --i-data $data_dir/ancombc_sex_L7_diffs.qza \
  --o-visualization $data_dir/ancombc_sex_L7_results.qzv

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_sex_L7_barplot.qzv")

In [None]:
Visualization.load("Project_data/Differential_Abundance/ancombc_sex_L7_results.qzv")

# 7. Differential Abundance - BMI

In [None]:
# ANCOM-BC: effect of BMI
! qiime composition ancombc \
  --i-table $data_dir/table_abund_L7.qza \
  --m-metadata-file $input_metadata \
  --p-formula "bmi_category" \
  --o-differentials $data_dir/ancombc_sex_L7_diffs.qza