# 01: Pre-processing data with Qiime2

This notebook takes the combined data outputs from Qiita (metadata, taxonomy, and ASV biome files) and splits them into genus-level biome tables for downstream analysis.

In [89]:
from biom import Table
from biom.util import biom_open
from skbio import DistanceMatrix
from os.path import abspath, join
from qiime2 import Artifact
from os import makedirs
from qiime2.plugins import diversity
from qiime2.plugins.feature_table.methods import filter_samples
from qiime2 import Metadata
import pandas as pd

### Data filepaths

In [10]:
# get biom qza

biom_fp = './data/primate_micro_filtered_rarefied_table.qza' 


# get taxonomy qza

tax_fp = './data/taxonomy_assignment_primate_micro_rarefied.qza'


# get metadata

md_fp = './data/primate_micro_filtered_metadata.txt'

### Read in data

#### Biom table

In [11]:
# read biom qza into qiime2 Artifact class

biom_art = Artifact.load(abspath(biom_fp))

# load the qiime2 artifact into biom Table class

biom = biom_art.view(Table)

#### Taxonomy table

In [12]:
# read biom tax into qiime2 Artifact class

tax_art = Artifact.load(abspath(tax_fp))

# read taxonomy artifact as Pandas DF

tax_df = tax_art.view(pd.DataFrame)

#### Metadata file

In [13]:
# read in metadata

metadata = Metadata.load(md_fp)

### Write separate Biom tables per genus

In [50]:
# group all the code into a single method to facilitate rerunning

def split_otu_tables_by_tax(biom_t, tax_df, output_dir,
                            metadata,
                            threshold=5,
                            level=5,
                            tax_names=['Kingdom',
                                       'Phylum',
                                       'Class',
                                       'Order',
                                       'Family', 
                                       'Genus',
                                       'Species'],
                            sampling_depth=5,
                            export_viz=False):
    # fix the taxonomy
    tax_cols = tax_df['Taxon'].str.split('; ', expand=True)

    tax_cols.columns = tax_names
    
    # make concatenated tax string at appropriate level
    cat_cols = tax_names[:level+1]
    print(cat_cols)
    tax_str = tax_cols[cat_cols].fillna(' ').apply(lambda x: '; '.join(x), axis=1)
    
    # find taxa above threshold number of OTUs
    tax_thr = pd.Series(tax_str.value_counts()).where(lambda x : x >= threshold).dropna().index
    
    # make output dir
    makedirs(output_dir, exist_ok=True)
    
    # for each tax_thr value, filter the OTU table and write to file
    
    # also, make a dict of all filtered tables and keep in memory for downstream analysis
    tax_arts = {}
    
    for t in tax_thr:
        t_ids =  pd.Series(tax_str).where(lambda x : x == t).dropna().index
        tax_otu = biom_t.filter(t_ids, axis='observation', inplace=False)
        tax_otu.remove_empty(inplace=True)
        
        
        output_f = t.replace(';','_').replace(' ','')
        output_fn = '{0}.{1}.qza'.format(tax_names[level], output_f)
        output_fp = join(output_dir, output_fn)

        # export as q2 artifact
        tax_art = Artifact.import_data("FeatureTable[Frequency]", tax_otu)
        tax_art.save(output_fp)
        
        tax_arts[t] = tax_art
        
        if export_viz:
            # export the bc and jaccard emperor viz
            (rarefied_table,
             observed_otus_vector,
             shannon_vector,
             evenness_vector,
             jaccard_distance_matrix,
             bray_curtis_distance_matrix,
             jaccard_pcoa_results,
             bray_curtis_pcoa_results,
             jaccard_emperor,
             bray_curtis_emperor) = diversity.pipelines.core_metrics(table=tax_art, 
                                                                    sampling_depth=sampling_depth,
                                                                    metadata=metadata)

            jaccard_fp = join(output_dir, '{0}.{1}.emperor.jaccard.qzv'.format(tax_names[level], output_f))
            bc_fp = join(output_dir, '{0}.{1}.emperor.braycurtis.qzv'.format(tax_names[level], output_f))
            jaccard_emperor.save(jaccard_fp)
            bray_curtis_emperor.save(bc_fp)

    return(tax_arts)

In [51]:
output_dir = './output/genus_asv_tables'

makedirs(output_dir, exist_ok=True)

In [52]:
genus_tables = split_otu_tables_by_tax(biom,
                                       tax_df,
                                       output_dir,
                                       metadata,
                                       level=5,
                                       threshold=5)

['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus']


## Calculate Sorensen-Dice beta diversity and Host Specificity metrics

In [147]:
metric = 'dice'
distance_dir = 'output/distance'
metadata_col = 'species_geo_captivity'
makedirs(distance_dir, exist_ok=True)

for genus in genus_tables:
    tax_dir = join(distance_dir,
                   genus.replace(';','_').replace(' ',''))
    makedirs(tax_dir, exist_ok=True)
    
    # filter empty samples
    genus_filtered = filter_samples(genus_tables[genus],
                                    min_frequency=1)
    
    # get distance matrix
    dm = diversity.actions.beta(genus_filtered.filtered_table,
                            metric)
    
    # write distance matrix to file 
    dm.distance_matrix.view(DistanceMatrix).to_series().to_csv(join(tax_dir,
                                                                    'distance_list.dice.tsv'),
                                                               sep='\t')
    try:
        # calculate beta group significance
        bgs = diversity.actions.beta_group_significance(dm.distance_matrix,
                                                        metadata.get_column(metadata_col),
                                                        pairwise=True,
                                                        method='permanova')
    except ValueError:
        print("Problem with taxon %s" % genus)
    
    # write results to directory
    bgs.visualization.export_data(join(tax_dir,'permanova'))
    

  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__RFN20


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Rikenellaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Verrucomicrobia; c__Verrucomicrobiae; o__Verrucomicrobiales; f__Verrucomicrobiaceae; g__Akkermansia




Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Spirochaetes; c__Spirochaetes; o__Sphaerochaetales; f__Sphaerochaetaceae; g__Sphaerochaeta


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales; f__Moraxellaceae; g__Acinetobacter


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Desulfovibrionales; f__Desulfovibrionaceae; g__Desulfovibrio


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__p-75-a5


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Verrucomicrobia; c__Opitutae; o__[Cerasicoccales]; f__[Cerasicoccaceae]; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Betaproteobacteria;  ;  ;  




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__Anaerococcus




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Corynebacteriaceae; g__Corynebacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter




Problem with taxon k__Bacteria; p__Actinobacteria; c__Thermoleophilia; o__Solirubrobacterales; f__; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Thermoleophilia; o__Gaiellales; f__Gaiellaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Actinomycetaceae; g__Actinomyces


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__Subdoligranulum




Problem with taxon k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Comamonadaceae;  




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__Butyricicoccus


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__[Barnesiellaceae]; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__Peptoniphilus


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Olsenella


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Sphingomonas




Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Rikenellaceae;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Planctomycetes; c__Planctomycetia; o__Pirellulales; f__Pirellulaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__Anaerofilum


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptostreptococcaceae;  




Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria;  ;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__[Odoribacteraceae]; g__Odoribacter


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Eubacteriaceae; g__Pseudoramibacter_Eubacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Mycobacteriaceae; g__Mycobacterium




Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Elusimicrobia; c__Elusimicrobia; o__Elusimicrobiales; f__Elusimicrobiaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Myxococcales; f__; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Hyphomicrobiaceae; g__Rhodoplanes


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria;  ;  ;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Sphingobacteriia; o__Sphingobacteriales; f__Sphingobacteriaceae; g__Sphingobacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__; c__; o__; f__; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanosphaera


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Streptococcaceae; g__Lactococcus




Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__[Paraprevotellaceae]; g__Paraprevotella


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__Lachnobacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Anaerovibrio


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Acidobacteria; c__[Chloracidobacteria]; o__RB41; f__Ellin6075; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodospirillales; f__Rhodospirillaceae; g__




Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Epsilonproteobacteria; o__Campylobacterales; f__Helicobacteraceae; g__Flexispira




Problem with taxon k__Archaea; p__Crenarchaeota; c__Thaumarchaeota; o__Nitrososphaerales; f__Nitrososphaeraceae; g__Candidatus Nitrososphaera


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__TM7; c__TM7-3; o__CW040; f__F16; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__Ruminococcus




Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Enterococcaceae; g__Enterococcus




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Clostridiaceae; g__02d06


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__BS11; g__




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Hyphomicrobiaceae; g__Devosia


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Thermoleophilia; o__Solirubrobacterales;  ;  




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Syntrophomonadaceae; g__Syntrophomonas


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Sinobacteraceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae; g__Paracoccus


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__ph2


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Enterococcaceae;  




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Methylobacteriaceae; g__Methylobacterium




Problem with taxon k__Bacteria; p__Bacteroidetes; c__Sphingobacteriia; o__Sphingobacteriales; f__Sphingobacteriaceae;  




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Frankiaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Streptomycetaceae; g__Streptomyces


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__GMD14H09; f__; g__




Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Sinobacteraceae; g__Steroidobacter




Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Porphyromonadaceae; g__Dysgonomonas


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Synergistetes; c__Synergistia; o__Synergistales; f__Dethiosulfovibrionaceae; g__Pyramidobacter


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Enterococcus
Problem with taxon k__Bacteria; p__Actinobacteria; c__Thermoleophilia; o__Solirubrobacterales; f__Solirubrobacteraceae; g__




Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__cc_115




Problem with taxon k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Eggerthella


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Lactobacillaceae;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Oxalobacteraceae;  




Problem with taxon k__Bacteria; p__Acidobacteria; c__Acidobacteriia; o__Acidobacteriales; f__Acidobacteriaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Porphyromonadaceae; g__Tannerella


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pasteurellales; f__Pasteurellaceae;  




Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Bacillaceae;  




Problem with taxon k__Bacteria; p__Planctomycetes; c__Planctomycetia; o__Gemmatales; f__Gemmataceae; g__




Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__Sharpea


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Novosphingobium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__Clostridium
Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodobacterales; f__Rhodobacteraceae;  




In [138]:

foo = genus_filtered.filtered_table.view(Table)

In [146]:
metadata.get_column(metadata_col).to_series()[foo.ids(axis='sample')]

#SampleID
62017.850.USygt52.M                         human_USA
62017.850.USygt9.T1                         human_USA
62017.850.USygt44.T1                        human_USA
62017.850.TS13                              human_USA
64778.12140.chimp.4         chimp_USA_captive_moeller
                                      ...            
100030.13393.SRR8978415     gorilla_DRC_wild_campbell
100030.13393.SRR8978490    chimp_USA_captive_campbell
100030.13393.SRR8978416     gorilla_DRC_wild_campbell
100030.13393.SRR8978444    chimp_USA_captive_campbell
100030.13393.SRR8978472    chimp_USA_captive_campbell
Name: species_geo_captivity, Length: 214, dtype: object

In [141]:
foo.ids(axis='sample')

array(['62017.850.USygt52.M', '62017.850.USygt9.T1',
       '62017.850.USygt44.T1', '62017.850.TS13', '64778.12140.chimp.4',
       '64778.12140.chimp.3', '64778.12140.chimp.2',
       '64778.12140.chimp.5', '64778.12140.chimp.11',
       '64778.12140.chimp.8', '64778.12140.chimp.1',
       '64778.12140.chimp.6', '64778.12140.chimp.13',
       '64778.12140.chimp.7', '64778.12140.chimp.9', '65716.12173.99',
       '65716.12173.20', '65716.12173.27', '65716.12173.136',
       '65716.12173.68', '65716.12173.90', '65716.12173.153',
       '65716.12173.29', '65716.12173.42', '65716.12173.45',
       '65716.12173.61', '65716.12173.103', '65716.12173.12',
       '65716.12173.79', '65716.12173.15', '65716.12173.104',
       '65716.12173.31', '65716.12173.120', '65716.12173.145',
       '65716.12173.2', '65716.12173.70', '65716.12173.77',
       '65716.12173.5', '65716.12173.38', '65716.12173.129',
       '65716.12173.60', '65716.12173.155', '65716.12173.44',
       '65716.12173.43', '65716.121