1 Processing of the raw sequence data<br>
    1.1 Filter and trim<br>
    1.2 Learn the error rates<br>
    1.3 Sample inference<br>
    1.4 Merge paired reads<br>
    1.5 Construct sequence table and remove chimeras<br>
    1.6 Track reads through the pipeline<br>
    1.7 Assign taxonomy<br>
    1.8 Renaming the samples and bringing them in the right order<br>
2 Bacterial community composition (BCC)<br>
    2.1 Visualise alpha-diversity<br>
    2.2 BCC on Phylum-, Class-, Order-, Family- and Genus-level<br>

In [1]:
import os
import glob
import wget
import qiime2
import subprocess
import pandas as pd
from qiime2.plugins import demux
from qiime2.plugins import dada2
from qiime2.plugins import phylogeny
from qiime2.plugins import empress
from qiime2.plugins import feature_table
from qiime2.plugins import diversity
from qiime2.plugins import feature_classifier
from qiime2.plugins import taxa
from qiime2.plugins import composition

In [2]:
def save_results(variable_name, output_dir):
    """
    Save QIIME 2 artifacts and visualizations from a variable containing matrices and visualizations.
    
    This function checks if the provided variable has the _fields attribute, indicating that
    it contains matrices and possibly visualizations. It then attempts to save each matrix
    or visualization in the provided output directory.
    
    Parameters:
        variable_name (object): The variable containing matrices and visualizations to be saved.
        output_dir (str): The directory where the matrices and visualizations will be saved as QIIME 2 artifacts.
    
    Returns:
        None

    Example usage:
        save_results(core_metrics, "output/diversity/")
    """
    os.makedirs(output_dir, exist_ok=True)
    
    # Check if the variable has the _fields attribute
    if hasattr(variable_name, '_fields'):
        # Iterate over each matrix in the _fields attribute
        for matrix_name in variable_name._fields:
            # Get the matrix attribute dynamically using getattr
            matrix = getattr(variable_name, matrix_name)
            # Check if the matrix is an Artifact instance
            if isinstance(matrix, qiime2.Artifact):
                # Save the matrix with the appropriate name
                out_filepath = os.path.join(output_dir, f"{matrix_name}.qza")
                matrix.save(out_filepath)
                print(f"Saved matrix {matrix_name} successfully at {out_filepath}.")
            # Check if the matrix is an Visualization instance
            elif isinstance(matrix, qiime2.Visualization):
                # Save the matrix with the appropriate name
                out_filepath = os.path.join(output_dir, f"{matrix_name}.qzv")
                matrix.save(out_filepath)
                print(f"Saved matrix {matrix_name} successfully at {out_filepath}.")
            else:
                print(f"Matrix {matrix_name} is not a valid Artifact or Visualization. Skipping.")
    else:
        print(f"Save failed! Provided {variable_name} objects could not be saved.")

## Import Data into Qiime2

In [3]:
data_dir = "jakob_primer/Raw_sequence_data"

In [105]:
def get_paired_end_manifest_file(data_dir):
    """
    Generate a paired-end manifest file for DNA sequencing data.

    Args:
        data_dir (str): The directory containing the sequencing data files.

    Returns:
        str: The filepath of the generated paired-end manifest file.
    """
    # Create a directory to store the manifest file
    os.makedirs("input/", exist_ok=True)
    out_filepath = "input/paired_end_manifest.csv"
    
    # Get a list of all fastq files in the provided data directory
    filenames = glob.glob(os.path.join(data_dir, "*/*.fastq*"))
    dataset = [os.path.abspath(file_path) for file_path in filenames]
    
    # Create a dictionary to store sample IDs and their respective file paths
    path_dict = {}
    for path in dataset:
        filename = os.path.basename(path)
        base_dir = os.path.dirname(path)
        sample_id = filename.strip().split("_")[0]
        path_dict.setdefault(sample_id, []).append(os.path.join(base_dir, filename))

    # Write the manifest file
    with open(out_filepath, "w") as ifile:
        ifile.write("sample-id,absolute-filepath,direction\n")
        for Id, filenames in path_dict.items():
            for filename in filenames:
                if "R1" in os.path.basename(filename).upper():
                    forward = filename
                else:
                    reverse = filename
            base_dir = os.path.dirname(forward)
            ifile.write(f"{Id},{forward},forward\n")
            ifile.write(f"{Id},{reverse},reverse\n")
        
    return out_filepath

In [106]:
get_paired_end_manifest_file(data_dir)

'input/paired_end_manifest.csv'

In [8]:
# Import data
paired_end_manifest = "input/paired_end_manifest.csv"
sequences = qiime2.Artifact.import_data('SampleData[PairedEndSequencesWithQuality]',
                                        paired_end_manifest,
                                        view_type='PairedEndFastqManifestPhred33')
sequences.save("output/sequences.qza")

'output/sequences.qza'

### Metadata

In [9]:
metadata = qiime2.Metadata.load('metadata.tsv')

### q2-feature-classifier

In [10]:
def get_classifier(url, out_dir):
    """
    Download a classifier from a URL and save it to the specified directory.
    
    This function creates the output directory if it doesn't exist, and then
    downloads the classifier from the provided URL to that directory.
    
    Parameters:
        url (str): The URL of the classifier to be downloaded.
        out_dir (str): The directory where the downloaded classifier will be saved.
    
    Returns:
        str: The file path to the downloaded classifier.
    
    Example usage:
        classifier_path = get_classifier("https://data.qiime2.org/2023.5/common/silva-138-99-nb-classifier.qza", 
        "classifiers/")
    """
    os.makedirs(out_dir, exist_ok=True)
    filename = os.path.basename(url)
    out_filepath = os.path.join(out_dir, filename)
    
    # Check if the file already exists before downloading
    if not os.path.exists(out_filepath):
        wget.download(url=url, out=out_dir)
        print(f"Downloaded classifier from {url} to {out_filepath}")
    else:
        print(f"Classifier already exists at {out_filepath}. Skipping download.")
    
    return out_filepath

# Example usage
# Make sure to replace these with actual URL and output directory
#classifier_path = get_classifier("https://data.qiime2.org/2023.5/common/silva-138-99-tax-515-806.qza",
#                                 "input/taxonomy_classifier/")


In [11]:
classifier_urls = {
    "silva_full": "https://data.qiime2.org/2023.5/common/silva-138-99-nb-classifier.qza",
    "silva_region": "https://data.qiime2.org/2023.5/common/silva-138-99-515-806-nb-classifier.qza",
    "gg_full": "https://data.qiime2.org/classifiers/greengenes/gg_2022_10_backbone_full_length.nb.qza",
    "gg_region": "https://data.qiime2.org/classifiers/greengenes/gg_2022_10_backbone.v4.nb.qza",
    "silva_weighted_full": "https://data.qiime2.org/2023.5/common/silva-138-99-nb-weighted-classifier.qza",
    "gg_weighted_full": "https://data.qiime2.org/2023.5/common/gg-13-8-99-nb-weighted-classifier.qza",
    "gg_weighted_region": "https://data.qiime2.org/2023.5/common/gg-13-8-99-515-806-nb-weighted-classifier.qza"}

In [12]:
classifier_urls.keys()

dict_keys(['silva_full', 'silva_region', 'gg_full', 'gg_region', 'silva_weighted_full', 'gg_weighted_full', 'gg_weighted_region'])

In [107]:
print(", ".join(['silva_full', 'silva_region', 'gg_full', 'gg_region', 'silva_weighted_full', 'gg_weighted_full', 'gg_weighted_region']))

silva_full, silva_region, gg_full, gg_region, silva_weighted_full, gg_weighted_full, gg_weighted_region


## Demultiplexing

In [13]:
# Demux summarize
demux_summary = demux.visualizers.summarize(sequences)
demux_summary.visualization.save("output/qualities.qzv")

'output/qualities.qzv'

In [14]:
demux_summary.visualization

## Denoising and QC filtering

In [59]:
# DADA2 denoise-paired
dada2_denoised = dada2.methods.denoise_paired(demultiplexed_seqs=sequences,
                                              trunc_len_f=285,
                                              trunc_len_r=240,
                                              max_ee_f=15,
                                              max_ee_r=15,
                                              n_threads=4)

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada_paired.R /tmp/tmpic9i4t0u/forward /tmp/tmpic9i4t0u/reverse /tmp/tmpic9i4t0u/output.tsv.biom /tmp/tmpic9i4t0u/track.tsv /tmp/tmpic9i4t0u/filt_f /tmp/tmpic9i4t0u/filt_r 285 240 0 0 15 15 2 independent consensus 1.0 4 1000000



In [60]:
summary_table = feature_table.visualizers.summarize(table=dada2_denoised.table,
                                         sample_metadata=metadata)
summary_rep_seq = feature_table.visualizers.tabulate_seqs(data=dada2_denoised.representative_sequences)

  os.path.join(output_dir, 'sample-frequency-detail.csv'))
  os.path.join(output_dir, 'feature-frequency-detail.csv'))


In [61]:
summary_table.visualization

In [62]:
save_results(dada2_denoised, "output/dada2/")

Saved matrix table successfully at output/dada2/table.qza.
Saved matrix representative_sequences successfully at output/dada2/representative_sequences.qza.
Saved matrix denoising_stats successfully at output/dada2/denoising_stats.qza.


In [63]:
table_core_features = feature_table.visualizers.core_features(table=dada2_denoised.table)
table_core_features.visualization

Invalid limit will be ignored.
  ax.set_ybound(0, max(df['Feature count']) + 1)


In [64]:
summary_table.visualization

In [65]:
summary_rep_seq.visualization

## Build a phylogenetic tree

In [66]:
# Phylogeny align-to-tree-mafft-fasttree
representative_sequences = dada2_denoised.representative_sequences
phylo_tree = phylogeny.pipelines.align_to_tree_mafft_fasttree(representative_sequences)

Running external command line application. This may print messages to stdout and/or stderr.
The command being run is below. This command cannot be manually re-run as it will depend on temporary files that no longer exist.

Command: mafft --preservecase --inputorder --thread 1 /tmp/qiime2-archive-y8m_hexb/1cadd265-865f-4e88-9164-8b172ad7d83b/data/dna-sequences.fasta

Running external command line application. This may print messages to stdout and/or stderr.
The command being run is below. This command cannot be manually re-run as it will depend on temporary files that no longer exist.

Command: FastTree -quote -nt /tmp/qiime2-archive-z32uycpf/cc374102-575d-4f39-8bc2-bba60ffaf23d/data/aligned-dna-sequences.fasta



In [67]:
save_results(phylo_tree, "output/tree/")

Saved matrix alignment successfully at output/tree/alignment.qza.
Saved matrix masked_alignment successfully at output/tree/masked_alignment.qza.
Saved matrix tree successfully at output/tree/tree.qza.
Saved matrix rooted_tree successfully at output/tree/rooted_tree.qza.


In [68]:
# Empress tree-plot
#tree = qiime2.Artifact.load("tree/rooted_tree.qza")
emp_tree_plot = empress.visualizers.tree_plot(phylo_tree.rooted_tree)
emp_tree_plot.visualization.save("output/tree/rooted_tree.qzv")



'output/tree/rooted_tree.qzv'

In [69]:
#jupyter serverextension enable --py qiime2 --sys-prefix

In [70]:
# Replace 'empress-tree.qzv' with the actual path to your visualization file
visualization = qiime2.Visualization.load('output/tree/rooted_tree.qzv')
visualization

In [71]:
#!qiime tools view output/tree/rooted_tree.qzv

In [72]:
alpha_rarefaction_viz = diversity.visualizers.alpha_rarefaction(
                            table=dada2_denoised.table,
                            max_depth=20,
                            phylogeny=phylo_tree.rooted_tree,
                            metadata=metadata)

alpha_rarefaction_viz.visualization

## Alpha Diversity

In [73]:
# Alpha diversity
table = dada2_denoised.table
table_summary = feature_table.visualizers.summarize(table, sample_metadata=metadata)
table_summary.visualization.save("output/dada2/table_summary.qzv")

  os.path.join(output_dir, 'sample-frequency-detail.csv'))
  os.path.join(output_dir, 'feature-frequency-detail.csv'))


'output/dada2/table_summary.qzv'

In [74]:
table_summary.visualization

In [75]:
# Core metrics phylogenetic
core_metrics = diversity.pipelines.core_metrics_phylogenetic(
    table=table,
    phylogeny=phylo_tree.rooted_tree,
    sampling_depth=16,
    metadata=metadata,
)



In [76]:
# Make sure to replace these with actual variable names and output directory
save_results(core_metrics, "output/diversity/")

Saved matrix rarefied_table successfully at output/diversity/rarefied_table.qza.
Saved matrix faith_pd_vector successfully at output/diversity/faith_pd_vector.qza.
Saved matrix observed_features_vector successfully at output/diversity/observed_features_vector.qza.
Saved matrix shannon_vector successfully at output/diversity/shannon_vector.qza.
Saved matrix evenness_vector successfully at output/diversity/evenness_vector.qza.
Saved matrix unweighted_unifrac_distance_matrix successfully at output/diversity/unweighted_unifrac_distance_matrix.qza.
Saved matrix weighted_unifrac_distance_matrix successfully at output/diversity/weighted_unifrac_distance_matrix.qza.
Saved matrix jaccard_distance_matrix successfully at output/diversity/jaccard_distance_matrix.qza.
Saved matrix bray_curtis_distance_matrix successfully at output/diversity/bray_curtis_distance_matrix.qza.
Saved matrix unweighted_unifrac_pcoa_results successfully at output/diversity/unweighted_unifrac_pcoa_results.qza.
Saved matrix

In [77]:
# Alpha group significance
alpha_group_significance = diversity.visualizers.alpha_group_significance(
    alpha_diversity=core_metrics.shannon_vector,
    metadata=metadata,
)

In [78]:
alpha_group_significance.visualization

In [79]:
alpha_group_significance_faith = diversity.visualizers.alpha_group_significance(
    alpha_diversity=core_metrics.faith_pd_vector,
    metadata=metadata)

#alpha_group_significance_even = diversity.visualizers.alpha_group_significance(
#    alpha_diversity=core_metrics.evenness_vector,
#    metadata=metadata)

In [80]:
alpha_group_significance_faith.visualization

In [81]:
#alpha_group_significance_even.visualization

In [82]:
# Beta diversity adonis
adonis_result = diversity.visualizers.adonis(
    distance_matrix=core_metrics.unweighted_unifrac_distance_matrix,
    metadata=metadata,
    formula="SampleName",
    n_jobs=4
)

Running external command line application. This may print messages to stdout and/or stderr.
The command being run is below. This command cannot be manually re-run as it will depend on temporary files that no longer exist.

Command: run_adonis.R /tmp/tmppuokzzvs/dm.tsv /tmp/tmppuokzzvs/md.tsv SampleName 999 4 /tmp/qiime2-temp-9v0qqy1h/adonis.tsv



In [83]:
adonis_result.visualization.save("output/diversity/permanova.qzv")

'output/diversity/permanova.qzv'

In [84]:
dada2_denoised

Results (name = value)
---------------------------------------------------------------------------------------------------------
table                    = <artifact: FeatureTable[Frequency] uuid: c5af5e8b-e74c-46ba-b8a7-d0ebb4603f85>
representative_sequences = <artifact: FeatureData[Sequence] uuid: 1cadd265-865f-4e88-9164-8b172ad7d83b>
denoising_stats          = <artifact: SampleData[DADA2Stats] uuid: bb16f405-b37e-42ad-960a-e37dbe5511b2>

In [85]:
# Taxonomy classification
reads = dada2_denoised.representative_sequences
# Path to the classifier
classifier = qiime2.Artifact.load("input/taxonomy_classifier/gg-13-8-99-515-806-nb-weighted-classifier.qza")
taxa_classified = feature_classifier.methods.classify_sklearn(
    reads=reads,
    classifier=classifier,
    n_jobs=4
)
taxa_classified.classification.save("output/taxa.qza")

'output/taxa.qza'

In [86]:
# Taxa barplot
taxonomy = taxa_classified.classification
taxa_barplot = taxa.visualizers.barplot(
    table=table,
    taxonomy=taxonomy,
    metadata=metadata,
)
taxa_barplot.visualization.save("output/taxa_barplot.qzv")


'output/taxa_barplot.qzv'

In [87]:
taxa_barplot.visualization

In [121]:
# Taxa collapse
bcc_table = taxa.methods.collapse(
    table=table,
    taxonomy=taxonomy,
    level=2)
bcc_table.collapsed_table.save("output/bcc_phylum.qza")

df2 = bcc_table.collapsed_table.view(pd.DataFrame)
df2

Unnamed: 0,Unassigned;__,k__Archaea;p__Euryarchaeota,k__Archaea;p__[Parvarchaeota],k__Bacteria;__,k__Bacteria;p__,k__Bacteria;p__Actinobacteria,k__Bacteria;p__Bacteroidetes,k__Bacteria;p__Chlamydiae,k__Bacteria;p__Chloroflexi,k__Bacteria;p__Cyanobacteria,...,k__Bacteria;p__NC10,k__Bacteria;p__Nitrospirae,k__Bacteria;p__OD1,k__Bacteria;p__OP11,k__Bacteria;p__OP3,k__Bacteria;p__Planctomycetes,k__Bacteria;p__Proteobacteria,k__Bacteria;p__TM6,k__Bacteria;p__TM7,k__Bacteria;p__Verrucomicrobia
S152,0.0,0.0,0.0,123.0,217.0,43.0,71.0,0.0,0.0,0.0,...,0.0,51.0,104.0,0.0,0.0,52.0,303.0,0.0,0.0,27.0
S153,0.0,0.0,2.0,2.0,79.0,541.0,15.0,10.0,0.0,0.0,...,0.0,0.0,6.0,0.0,0.0,0.0,227.0,0.0,52.0,0.0
S18,0.0,0.0,5.0,8.0,82.0,242.0,20.0,0.0,0.0,0.0,...,0.0,0.0,58.0,0.0,0.0,35.0,321.0,0.0,50.0,11.0
S191,0.0,0.0,0.0,0.0,0.0,496.0,112.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,48.0,0.0,0.0,67.0
S192,0.0,0.0,0.0,0.0,431.0,109.0,14.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S193,0.0,0.0,0.0,40.0,173.0,288.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,371.0,0.0,0.0,0.0
S194,0.0,0.0,0.0,0.0,64.0,308.0,89.0,0.0,0.0,84.0,...,0.0,0.0,0.0,0.0,0.0,0.0,199.0,0.0,0.0,0.0
S195,0.0,0.0,0.0,0.0,221.0,134.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,169.0,0.0,0.0,0.0
S197,0.0,0.0,0.0,0.0,0.0,597.0,0.0,0.0,0.0,255.0,...,0.0,0.0,0.0,0.0,0.0,0.0,131.0,0.0,0.0,0.0
S254,0.0,3.0,16.0,100.0,377.0,84.0,73.0,0.0,0.0,0.0,...,0.0,32.0,119.0,0.0,0.0,47.0,417.0,0.0,17.0,22.0


In [122]:
df2.describe()

Unnamed: 0,Unassigned;__,k__Archaea;p__Euryarchaeota,k__Archaea;p__[Parvarchaeota],k__Bacteria;__,k__Bacteria;p__,k__Bacteria;p__Actinobacteria,k__Bacteria;p__Bacteroidetes,k__Bacteria;p__Chlamydiae,k__Bacteria;p__Chloroflexi,k__Bacteria;p__Cyanobacteria,...,k__Bacteria;p__NC10,k__Bacteria;p__Nitrospirae,k__Bacteria;p__OD1,k__Bacteria;p__OP11,k__Bacteria;p__OP3,k__Bacteria;p__Planctomycetes,k__Bacteria;p__Proteobacteria,k__Bacteria;p__TM6,k__Bacteria;p__TM7,k__Bacteria;p__Verrucomicrobia
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,0.611111,0.166667,2.277778,62.111111,224.833333,177.944444,53.388889,1.722222,0.277778,19.555556,...,0.611111,15.055556,95.055556,0.833333,1.444444,23.0,264.333333,0.944444,6.611111,7.055556
std,1.851514,0.707107,4.127463,61.271708,167.529541,203.792995,53.675648,3.99714,1.178511,62.008749,...,2.592725,26.831659,98.509762,3.535534,4.259553,20.905952,173.496228,4.006938,16.638506,17.024108
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.5,87.0,0.0,14.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,140.5,0.0,0.0,0.0
50%,0.0,0.0,0.0,58.5,207.0,96.5,37.0,0.0,0.0,0.0,...,0.0,0.0,81.0,0.0,0.0,26.0,236.0,0.0,0.0,0.0
75%,0.0,0.0,3.0,117.25,356.5,303.0,78.0,0.0,0.0,0.0,...,0.0,18.25,149.5,0.0,0.0,37.5,358.5,0.0,0.0,0.0
max,7.0,3.0,16.0,160.0,617.0,597.0,197.0,12.0,5.0,255.0,...,11.0,85.0,295.0,15.0,15.0,52.0,649.0,17.0,52.0,67.0


In [123]:
# Taxa collapse
bcc_table = taxa.methods.collapse(
    table=table,
    taxonomy=taxonomy,
    level=3)
bcc_table.collapsed_table.save("output/bcc_class.qza")

df3 = bcc_table.collapsed_table.view(pd.DataFrame)
df3

Unnamed: 0,Unassigned;__;__,k__Archaea;p__Euryarchaeota;c__DSEG,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea],k__Bacteria;__;__,k__Bacteria;p__;c__,k__Bacteria;p__Actinobacteria;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria,k__Bacteria;p__Bacteroidetes;__,k__Bacteria;p__Bacteroidetes;c__Bacteroidia,k__Bacteria;p__Bacteroidetes;c__Cytophagia,...,k__Bacteria;p__Proteobacteria;__,k__Bacteria;p__Proteobacteria;c__Alphaproteobacteria,k__Bacteria;p__Proteobacteria;c__Betaproteobacteria,k__Bacteria;p__Proteobacteria;c__Deltaproteobacteria,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,k__Bacteria;p__TM6;c__SBRH58,k__Bacteria;p__TM7;c__TM7-3,k__Bacteria;p__Verrucomicrobia;c__Opitutae,k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae]
S152,0.0,0.0,0.0,123.0,217.0,10.0,33.0,0.0,38.0,0.0,...,17.0,61.0,64.0,55.0,106.0,0.0,0.0,0.0,27.0,0.0
S153,0.0,0.0,2.0,2.0,79.0,0.0,541.0,10.0,0.0,0.0,...,0.0,56.0,113.0,0.0,58.0,0.0,52.0,0.0,0.0,0.0
S18,0.0,0.0,5.0,8.0,82.0,0.0,242.0,0.0,20.0,0.0,...,0.0,153.0,18.0,7.0,143.0,0.0,50.0,11.0,0.0,0.0
S191,0.0,0.0,0.0,0.0,0.0,0.0,496.0,0.0,0.0,62.0,...,0.0,0.0,48.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0
S192,0.0,0.0,0.0,0.0,431.0,0.0,109.0,0.0,14.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S193,0.0,0.0,0.0,40.0,173.0,0.0,288.0,0.0,0.0,0.0,...,0.0,0.0,337.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0
S194,0.0,0.0,0.0,0.0,64.0,0.0,308.0,0.0,0.0,0.0,...,0.0,0.0,199.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S195,0.0,0.0,0.0,0.0,221.0,0.0,134.0,0.0,0.0,0.0,...,0.0,0.0,169.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S197,0.0,0.0,0.0,0.0,0.0,0.0,597.0,0.0,0.0,0.0,...,0.0,0.0,131.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S254,0.0,3.0,16.0,100.0,377.0,0.0,84.0,0.0,32.0,9.0,...,6.0,301.0,14.0,25.0,71.0,0.0,17.0,0.0,18.0,4.0


In [124]:
df3.describe()

Unnamed: 0,Unassigned;__;__,k__Archaea;p__Euryarchaeota;c__DSEG,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea],k__Bacteria;__;__,k__Bacteria;p__;c__,k__Bacteria;p__Actinobacteria;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria,k__Bacteria;p__Bacteroidetes;__,k__Bacteria;p__Bacteroidetes;c__Bacteroidia,k__Bacteria;p__Bacteroidetes;c__Cytophagia,...,k__Bacteria;p__Proteobacteria;__,k__Bacteria;p__Proteobacteria;c__Alphaproteobacteria,k__Bacteria;p__Proteobacteria;c__Betaproteobacteria,k__Bacteria;p__Proteobacteria;c__Deltaproteobacteria,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria,k__Bacteria;p__TM6;c__SBRH58,k__Bacteria;p__TM7;c__TM7-3,k__Bacteria;p__Verrucomicrobia;c__Opitutae,k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae]
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,0.611111,0.166667,2.277778,62.111111,224.833333,2.666667,175.277778,4.333333,19.888889,9.666667,...,2.944444,76.055556,101.055556,19.777778,64.5,0.944444,6.611111,4.333333,2.5,0.222222
std,1.851514,0.707107,4.127463,61.271708,167.529541,9.126561,205.910833,6.63325,25.781637,18.068497,...,6.043686,93.027176,91.218853,26.125389,67.776666,4.006938,16.638506,15.852259,7.437979,0.942809
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.5,87.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,36.0,0.0,1.75,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,58.5,207.0,0.0,96.5,0.0,11.5,0.0,...,0.0,52.0,76.0,12.0,46.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,3.0,117.25,356.5,0.0,303.0,10.0,29.75,10.5,...,0.0,135.0,131.0,26.5,103.25,0.0,0.0,0.0,0.0,0.0
max,7.0,3.0,16.0,160.0,617.0,38.0,597.0,20.0,97.0,62.0,...,17.0,301.0,337.0,95.0,245.0,17.0,52.0,67.0,27.0,4.0


In [125]:
# Taxa collapse
bcc_table = taxa.methods.collapse(
    table=table,
    taxonomy=taxonomy,
    level=4)
bcc_table.collapsed_table.save("output/bcc_order.qza")

df4 = bcc_table.collapsed_table.view(pd.DataFrame)
df4

Unnamed: 0,Unassigned;__;__;__,k__Archaea;p__Euryarchaeota;c__DSEG;o__ArcA07,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__WCHD3-30,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__YLA114,k__Bacteria;__;__;__,k__Bacteria;p__;c__;o__,k__Bacteria;p__Actinobacteria;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales,k__Bacteria;p__Bacteroidetes;__;__,k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales,...,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Legionellales,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Methylococcales,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales,k__Bacteria;p__TM6;c__SBRH58;o__,k__Bacteria;p__TM7;c__TM7-3;o__EW055,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__Opitutales,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__[Cerasicoccales],k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae];o__[Pedosphaerales]
S152,0.0,0.0,0.0,0.0,123.0,217.0,10.0,33.0,0.0,38.0,...,35.0,0.0,30.0,26.0,0.0,0.0,0.0,0.0,27.0,0.0
S153,0.0,0.0,2.0,0.0,2.0,79.0,0.0,541.0,10.0,0.0,...,0.0,28.0,0.0,30.0,0.0,52.0,0.0,0.0,0.0,0.0
S18,0.0,0.0,5.0,0.0,8.0,82.0,0.0,242.0,0.0,20.0,...,0.0,68.0,40.0,35.0,0.0,50.0,11.0,0.0,0.0,0.0
S191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,496.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0
S192,0.0,0.0,0.0,0.0,0.0,431.0,0.0,109.0,0.0,14.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S193,0.0,0.0,0.0,0.0,40.0,173.0,0.0,288.0,0.0,0.0,...,0.0,0.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0
S194,0.0,0.0,0.0,0.0,0.0,64.0,0.0,308.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S195,0.0,0.0,0.0,0.0,0.0,221.0,0.0,134.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,597.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S254,0.0,3.0,10.0,6.0,100.0,377.0,0.0,84.0,0.0,32.0,...,2.0,35.0,0.0,34.0,0.0,17.0,0.0,0.0,18.0,4.0


In [126]:
df4.describe()

Unnamed: 0,Unassigned;__;__;__,k__Archaea;p__Euryarchaeota;c__DSEG;o__ArcA07,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__WCHD3-30,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__YLA114,k__Bacteria;__;__;__,k__Bacteria;p__;c__;o__,k__Bacteria;p__Actinobacteria;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales,k__Bacteria;p__Bacteroidetes;__;__,k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales,...,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Legionellales,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Methylococcales,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales,k__Bacteria;p__TM6;c__SBRH58;o__,k__Bacteria;p__TM7;c__TM7-3;o__EW055,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__Opitutales,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__[Cerasicoccales],k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae];o__[Pedosphaerales]
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,0.611111,0.166667,1.611111,0.666667,62.111111,224.833333,2.666667,175.277778,4.333333,19.888889,...,3.944444,26.722222,6.777778,10.833333,0.944444,6.611111,0.611111,3.722222,2.5,0.222222
std,1.851514,0.707107,3.08962,1.644957,61.271708,167.529541,9.126561,205.910833,6.63325,25.781637,...,11.127455,33.588873,13.554055,14.685928,4.006938,16.638506,2.592725,15.792051,7.437979,0.942809
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.5,87.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,58.5,207.0,0.0,96.5,0.0,11.5,...,0.0,17.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,1.5,0.0,117.25,356.5,0.0,303.0,10.0,29.75,...,0.0,43.0,0.0,24.75,0.0,0.0,0.0,0.0,0.0,0.0
max,7.0,3.0,10.0,6.0,160.0,617.0,38.0,597.0,20.0,97.0,...,35.0,124.0,40.0,35.0,17.0,52.0,11.0,67.0,27.0,4.0


In [128]:
# Taxa collapse
bcc_table = taxa.methods.collapse(
    table=table,
    taxonomy=taxonomy,
    level=5)
bcc_table.collapsed_table.save("output/bcc_family.qza")

df5 = bcc_table.collapsed_table.view(pd.DataFrame)
df5

Unnamed: 0,Unassigned;__;__;__;__,k__Archaea;p__Euryarchaeota;c__DSEG;o__ArcA07;f__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__WCHD3-30;f__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__YLA114;f__,k__Bacteria;__;__;__;__,k__Bacteria;p__;c__;o__;f__,k__Bacteria;p__Actinobacteria;__;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__ACK-M1,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Microbacteriaceae,...,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Methylococcales;f__Methylococcaceae,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales;f__Pseudomonadaceae,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Sinobacteraceae,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae,k__Bacteria;p__TM6;c__SBRH58;o__;f__,k__Bacteria;p__TM7;c__TM7-3;o__EW055;f__,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__Opitutales;f__Opitutaceae,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__[Cerasicoccales];f__[Cerasicoccaceae],k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales;f__Verrucomicrobiaceae,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae];o__[Pedosphaerales];f__[Pedosphaeraceae]
S152,0.0,0.0,0.0,0.0,123.0,217.0,10.0,0.0,0.0,0.0,...,0.0,30.0,20.0,6.0,0.0,0.0,0.0,0.0,27.0,0.0
S153,0.0,0.0,2.0,0.0,2.0,79.0,0.0,31.0,0.0,0.0,...,0.0,0.0,0.0,30.0,0.0,52.0,0.0,0.0,0.0,0.0
S18,0.0,0.0,5.0,0.0,8.0,82.0,0.0,0.0,0.0,46.0,...,0.0,40.0,8.0,27.0,0.0,50.0,11.0,0.0,0.0,0.0
S191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,496.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0
S192,0.0,0.0,0.0,0.0,0.0,431.0,0.0,0.0,109.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S193,0.0,0.0,0.0,0.0,40.0,173.0,0.0,0.0,288.0,0.0,...,0.0,0.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0
S194,0.0,0.0,0.0,0.0,0.0,64.0,0.0,0.0,308.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S195,0.0,0.0,0.0,0.0,0.0,221.0,0.0,0.0,134.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.0,476.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S254,0.0,3.0,10.0,6.0,100.0,377.0,0.0,24.0,0.0,34.0,...,12.0,0.0,21.0,13.0,0.0,17.0,0.0,0.0,18.0,4.0


In [129]:
df5.describe()

Unnamed: 0,Unassigned;__;__;__;__,k__Archaea;p__Euryarchaeota;c__DSEG;o__ArcA07;f__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__WCHD3-30;f__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__YLA114;f__,k__Bacteria;__;__;__;__,k__Bacteria;p__;c__;o__;f__,k__Bacteria;p__Actinobacteria;__;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__ACK-M1,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Microbacteriaceae,...,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Methylococcales;f__Methylococcaceae,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales;f__Pseudomonadaceae,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Sinobacteraceae,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae,k__Bacteria;p__TM6;c__SBRH58;o__;f__,k__Bacteria;p__TM7;c__TM7-3;o__EW055;f__,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__Opitutales;f__Opitutaceae,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__[Cerasicoccales];f__[Cerasicoccaceae],k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales;f__Verrucomicrobiaceae,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae];o__[Pedosphaerales];f__[Pedosphaeraceae]
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,0.611111,0.166667,1.611111,0.666667,62.111111,224.833333,2.666667,9.777778,100.611111,6.0,...,2.611111,6.777778,3.555556,7.277778,0.944444,6.611111,0.611111,3.722222,2.5,0.222222
std,1.851514,0.707107,3.08962,1.644957,61.271708,167.529541,9.126561,29.161932,171.160447,14.158764,...,5.392139,13.554055,7.285943,12.072358,4.006938,16.638506,2.592725,15.792051,7.437979,0.942809
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.5,87.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,58.5,207.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,1.5,0.0,117.25,356.5,0.0,0.0,127.75,0.0,...,0.0,0.0,0.0,11.25,0.0,0.0,0.0,0.0,0.0,0.0
max,7.0,3.0,10.0,6.0,160.0,617.0,38.0,121.0,496.0,46.0,...,18.0,40.0,21.0,34.0,17.0,52.0,11.0,67.0,27.0,4.0


In [130]:
# Taxa collapse
bcc_table = taxa.methods.collapse(
    table=table,
    taxonomy=taxonomy,
    level=6)
bcc_table.collapsed_table.save("output/bcc_genus.qza")

df6 = bcc_table.collapsed_table.view(pd.DataFrame)
df6

Unnamed: 0,Unassigned;__;__;__;__;__,k__Archaea;p__Euryarchaeota;c__DSEG;o__ArcA07;f__;g__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__WCHD3-30;f__;g__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__YLA114;f__;g__,k__Bacteria;__;__;__;__;__,k__Bacteria;p__;c__;o__;f__;g__,k__Bacteria;p__Actinobacteria;__;__;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__ACK-M1;g__Planktophila,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Microbacteriaceae;__,...,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Sinobacteraceae;g__Nevskia,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Arenimonas,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Luteibacter,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Lysobacter,k__Bacteria;p__TM6;c__SBRH58;o__;f__;g__,k__Bacteria;p__TM7;c__TM7-3;o__EW055;f__;g__,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__Opitutales;f__Opitutaceae;g__,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__[Cerasicoccales];f__[Cerasicoccaceae];g__,k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales;f__Verrucomicrobiaceae;g__Verrucomicrobium,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae];o__[Pedosphaerales];f__[Pedosphaeraceae];g__
S152,0.0,0.0,0.0,0.0,123.0,217.0,10.0,0.0,0.0,0.0,...,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,27.0,0.0
S153,0.0,0.0,2.0,0.0,2.0,79.0,0.0,31.0,0.0,0.0,...,0.0,0.0,0.0,30.0,0.0,52.0,0.0,0.0,0.0,0.0
S18,0.0,0.0,5.0,0.0,8.0,82.0,0.0,0.0,0.0,46.0,...,8.0,0.0,27.0,0.0,0.0,50.0,11.0,0.0,0.0,0.0
S191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,496.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0
S192,0.0,0.0,0.0,0.0,0.0,431.0,0.0,0.0,109.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S193,0.0,0.0,0.0,0.0,40.0,173.0,0.0,0.0,288.0,0.0,...,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S194,0.0,0.0,0.0,0.0,0.0,64.0,0.0,0.0,308.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S195,0.0,0.0,0.0,0.0,0.0,221.0,0.0,0.0,134.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.0,476.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S254,0.0,3.0,10.0,6.0,100.0,377.0,0.0,24.0,0.0,34.0,...,11.0,0.0,13.0,0.0,0.0,17.0,0.0,0.0,18.0,4.0


In [131]:
df6.describe()

Unnamed: 0,Unassigned;__;__;__;__;__,k__Archaea;p__Euryarchaeota;c__DSEG;o__ArcA07;f__;g__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__WCHD3-30;f__;g__,k__Archaea;p__[Parvarchaeota];c__[Parvarchaea];o__YLA114;f__;g__,k__Bacteria;__;__;__;__;__,k__Bacteria;p__;c__;o__;f__;g__,k__Bacteria;p__Actinobacteria;__;__;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;__;__,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__ACK-M1;g__Planktophila,k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Microbacteriaceae;__,...,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Sinobacteraceae;g__Nevskia,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Arenimonas,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Luteibacter,k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Xanthomonadales;f__Xanthomonadaceae;g__Lysobacter,k__Bacteria;p__TM6;c__SBRH58;o__;f__;g__,k__Bacteria;p__TM7;c__TM7-3;o__EW055;f__;g__,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__Opitutales;f__Opitutaceae;g__,k__Bacteria;p__Verrucomicrobia;c__Opitutae;o__[Cerasicoccales];f__[Cerasicoccaceae];g__,k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales;f__Verrucomicrobiaceae;g__Verrucomicrobium,k__Bacteria;p__Verrucomicrobia;c__[Pedosphaerae];o__[Pedosphaerales];f__[Pedosphaeraceae];g__
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,...,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,0.611111,0.166667,1.611111,0.666667,62.111111,224.833333,2.666667,9.777778,100.611111,6.0,...,1.055556,1.888889,2.555556,2.833333,0.944444,6.611111,0.611111,3.722222,2.5,0.222222
std,1.851514,0.707107,3.08962,1.644957,61.271708,167.529541,9.126561,29.161932,171.160447,14.158764,...,3.114902,8.013877,6.93009,8.389419,4.006938,16.638506,2.592725,15.792051,7.437979,0.942809
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.5,87.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,58.5,207.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,1.5,0.0,117.25,356.5,0.0,0.0,127.75,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,7.0,3.0,10.0,6.0,160.0,617.0,38.0,121.0,496.0,46.0,...,11.0,34.0,27.0,30.0,17.0,52.0,11.0,67.0,27.0,4.0


In [89]:
com_plot_command = [
    'qiime', 'empress', 'community-plot',
    '--i-tree', 'output/tree/rooted_tree.qza',
    '--i-feature-table', 'output/dada2/table.qza',
    '--m-sample-metadata-file', 'metadata.tsv',
    '--m-feature-metadata-file', 'output/taxa.qza',
    '--o-visualization', 'output/community-tree-viz.qzv'
]

subprocess.run(com_plot_command)

CompletedProcess(args=['qiime', 'empress', 'community-plot', '--i-tree', 'output/tree/rooted_tree.qza', '--i-feature-table', 'output/dada2/table.qza', '--m-sample-metadata-file', 'metadata.tsv', '--m-feature-metadata-file', 'output/taxa.qza', '--o-visualization', 'output/community-tree-viz.qzv'], returncode=0)

## Differential abundance

In [90]:
table_filter_samples = feature_table.methods.filter_samples(
                            table=dada2_denoised.table,
                            min_frequency=10)

In [91]:
table_filter_samples.filtered_table.save("output/filtered_table.qza")

'output/filtered_table.qza'

In [92]:
table_filter_samples.filtered_table.view(pd.DataFrame)

Unnamed: 0,26877eeeecad38be9b80f358f5af2481,a984da8b02e73a9385c00ab984460888,1c8f9fbe17d6594ee1decbac7b6f456b,7d27abe44e03899413bac2775bfda6c4,110e9bea64a4858bc707cc5e05d03691,094a549b1efc1c25cac0d79670643c8d,bb194447b895f2bc8768f010925dc16a,a67fa787cc33aefc85bfc491f5166226,9d1b397eeaca4f7817cb0498894eee81,5c4093eeb258839e5c602de285a47be4,...,cbff636fe7122878f0f43197618c9136,7548defac8199fbe01fb7b35624aaee7,ecab93912766e61e693d48d45253f98c,368b4252eecc2189780e110878290253,1a82638b975d09f83e1f2580fcbb45c4,0fb578672a8e71f1d1a8bb8906d9b4e0,4815450713f349a101a306c439579007,662be234c50a405ae2aa642e1c9e2d4d,e9c36a7387d30b625571138eafc7c789,6c1479180f0911f415ba6803da1a1e7f
S152,0.0,0.0,7.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S153,0.0,0.0,114.0,0.0,0.0,0.0,0.0,91.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S18,0.0,0.0,74.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S191,330.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S192,0.0,0.0,0.0,0.0,0.0,230.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S193,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,173.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S194,0.0,0.0,0.0,0.0,254.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S195,0.0,0.0,0.0,0.0,0.0,0.0,214.0,0.0,0.0,162.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S197,0.0,297.0,0.0,255.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
S254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [93]:
table_rel_features = feature_table.methods.relative_frequency(table=table_filter_samples.filtered_table)

In [94]:
table_rel_features

Results (name = value)
-----------------------------------------------------------------------------------------------------------------
relative_frequency_table = <artifact: FeatureTable[RelativeFrequency] uuid: e6edfd9c-0bb0-459b-a38d-f5319e549792>

In [95]:
# Convert the relative frequency table to a frequency table
#table_freq = table_rel_features.relative_frequency_table

# Use the frequency table for core features analysis
core_features = feature_table.visualizers.core_features(
                        table=table_filter_samples.filtered_table)

Invalid limit will be ignored.
  ax.set_ybound(0, max(df['Feature count']) + 1)


In [96]:
core_features.visualization

In [97]:
comp_pseudo = composition.methods.add_pseudocount(table=dada2_denoised.table)

In [98]:
comp_pseudo.composition_table.view(pd.DataFrame)

Unnamed: 0,26877eeeecad38be9b80f358f5af2481,a984da8b02e73a9385c00ab984460888,1c8f9fbe17d6594ee1decbac7b6f456b,7d27abe44e03899413bac2775bfda6c4,110e9bea64a4858bc707cc5e05d03691,094a549b1efc1c25cac0d79670643c8d,bb194447b895f2bc8768f010925dc16a,a67fa787cc33aefc85bfc491f5166226,9d1b397eeaca4f7817cb0498894eee81,5c4093eeb258839e5c602de285a47be4,...,cbff636fe7122878f0f43197618c9136,7548defac8199fbe01fb7b35624aaee7,ecab93912766e61e693d48d45253f98c,368b4252eecc2189780e110878290253,1a82638b975d09f83e1f2580fcbb45c4,0fb578672a8e71f1d1a8bb8906d9b4e0,4815450713f349a101a306c439579007,662be234c50a405ae2aa642e1c9e2d4d,e9c36a7387d30b625571138eafc7c789,6c1479180f0911f415ba6803da1a1e7f
S152,1.0,1.0,8.0,1.0,1.0,1.0,1.0,7.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S153,1.0,1.0,115.0,1.0,1.0,1.0,1.0,92.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S18,1.0,1.0,75.0,1.0,1.0,1.0,1.0,29.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S191,331.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S192,1.0,1.0,1.0,1.0,1.0,231.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S193,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,174.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S194,1.0,1.0,1.0,1.0,255.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S195,1.0,1.0,1.0,1.0,1.0,1.0,215.0,1.0,1.0,163.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S197,1.0,298.0,1.0,256.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
S254,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [99]:
comp_ancom = composition.visualizers.ancom(
                    table=comp_pseudo.composition_table,
                    metadata=metadata.get_column("SampleName"))



In [100]:
comp_ancom.visualization