# 4. Diversity Analysis (Alpha & Beta)
## Import data & packages

In [1]:
# 1 - Import all packages
import IPython
import pandas as pd
import matplotlib.pyplot as plt
import os
import qiime2 as q2
from qiime2 import Visualization

%matplotlib inline

In [2]:
# 2 - Set working directory
os.chdir("/home/jovyan/MicrobiomeAnalysis_TummyTribe/")

# Verify that your wroking directory is the overall project folder (.../MicrobiomeAnalysis_TummyTribe)
print("Current working directory:", os.getcwd())

Current working directory: /home/jovyan/MicrobiomeAnalysis_TummyTribe


In [3]:
# 3 - Data directory for the raw data
data_dir = "data/raw"
processed_data_dir = "data/processed-pre_trained"

## Alpha rarefaction

In [4]:
! qiime diversity alpha-rarefaction \
    --i-table $processed_data_dir/table-filtered_140.qza \
    --p-max-depth 10000 \
    --m-metadata-file $data_dir/metadata.tsv \
    --o-visualization $processed_data_dir/alpha-rarefaction.qzv

  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/alpha-rarefaction.qzv[0m
[0m[?25h

In [5]:
Visualization.load(f"{processed_data_dir}/alpha-rarefaction.qzv")

## Diversity analysis

In [6]:
! qiime diversity core-metrics \
  --i-table $processed_data_dir/table-filtered_140.qza \
  --m-metadata-file $data_dir/metadata.tsv \
  --p-sampling-depth 1000 \
  --output-dir $processed_data_dir/core-metrics-results

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: data/processed-pre_trained/core-metrics-results/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/processed-pre_trained/core-metrics-results/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/processed-pre_trained/core-metrics-results/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/processed-pre_trained/core-metrics-results/evenness_vector.qza[0m
[32mSaved DistanceMatrix to: data/processed-pre_trained/core-metrics-results/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: data/processed-pre_trained/core-metrics-results/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: data/processed-pre_trained/core-metrics-results/jaccard_pcoa_results.qza[0m
[32mSaved PCoAResults to: data/processed-pre_trained/core-metrics-results/bray_curtis_pcoa_results.qza[0m
[32mSaved Visualization to: data/processed-pre_trained/core-metrics-results/jacc

## Alpha Diversity

In [7]:
! qiime kmerizer core-metrics \
  --i-table $processed_data_dir/table-filtered_140.qza \
  --i-sequences $processed_data_dir/rep-seqs-filtered_140.qza \
  --m-metadata-file $data_dir/metadata.tsv \
  --p-sampling-depth 1500 \
  --p-kmer-size 8 \
  --output-dir $processed_data_dir/kmerizer-results

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: data/processed-pre_trained/kmerizer-results/rarefied_table.qza[0m
[32mSaved FeatureTable[Frequency] to: data/processed-pre_trained/kmerizer-results/kmer_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/processed-pre_trained/kmerizer-results/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/processed-pre_trained/kmerizer-results/shannon_vector.qza[0m
[32mSaved DistanceMatrix to: data/processed-pre_trained/kmerizer-results/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: data/processed-pre_trained/kmerizer-results/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: data/processed-pre_trained/kmerizer-results/jaccard_pcoa_results.qza[0m
[32mSaved PCoAResults to: data/processed-pre_trained/kmerizer-results/bray_curtis_pcoa_results.qza[0m
[32mSaved Visualization to: data/processed-pre_trained/kmerizer-results/scatterplot.qzv[0m
[0m[?25h

In [8]:
! qiime diversity alpha-group-significance \
  --i-alpha-diversity $processed_data_dir/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_dir/metadata.tsv \
  --o-visualization $processed_data_dir/core-metrics-results/shannon-group-significance.qzv

  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/core-metrics-results/shannon-group-significance.qzv[0m
[0m[?25h

In [9]:
Visualization.load(f"{processed_data_dir}/core-metrics-results/shannon-group-significance.qzv")

In [10]:
! qiime diversity alpha-correlation \
  --i-alpha-diversity $processed_data_dir/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_dir/metadata.tsv \
  --o-visualization $processed_data_dir/core-metrics-results/shannon-group-significance-numeric.qzv

  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/core-metrics-results/shannon-group-significance-numeric.qzv[0m
[0m[?25h

In [11]:
Visualization.load(f"{processed_data_dir}/core-metrics-results/shannon-group-significance-numeric.qzv")

## Beta Diversity

In [12]:
Visualization.load(f"{processed_data_dir}/kmerizer-results/scatterplot.qzv")

In [13]:
Visualization.load(f"{processed_data_dir}/core-metrics-results/bray_curtis_emperor.qzv")

Using Bray Curtis Metric

In [14]:
! qiime diversity beta-group-significance \
    --i-distance-matrix $processed_data_dir/kmerizer-results/bray_curtis_distance_matrix.qza \
    --m-metadata-file $data_dir/metadata.tsv \
    --m-metadata-column geo_location_name \
    --p-pairwise \
    --o-visualization $processed_data_dir/kmerizer-results/bray_curtis-geo_location_name-significance.qzv

  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/kmerizer-results/bray_curtis-geo_location_name-significance.qzv[0m
[0m[?25h

In [15]:
Visualization.load(f"{processed_data_dir}/kmerizer-results/bray_curtis-geo_location_name-significance.qzv")

Using Jaccard Distance

In [16]:
! qiime diversity beta-group-significance \
    --i-distance-matrix $processed_data_dir/core-metrics-results/jaccard_distance_matrix.qza \
    --m-metadata-file $data_dir/metadata.tsv \
    --m-metadata-column geo_location_name \
    --p-pairwise \
    --o-visualization $processed_data_dir/core-metrics-results/jaccard-geo_location_name-significance.qzv

  import pkg_resources
[32mSaved Visualization to: data/processed-pre_trained/core-metrics-results/jaccard-geo_location_name-significance.qzv[0m
[0m[?25h

In [17]:
Visualization.load(f"{processed_data_dir}/core-metrics-results/jaccard-geo_location_name-significance.qzv")