# 01: Pre-processing data with Qiime2

This notebook takes the combined data outputs from Qiita (metadata, taxonomy, and ASV biome files) and splits them into genus-level biome tables for downstream analysis.

In [1]:
from biom import Table
from biom.util import biom_open
from skbio import DistanceMatrix
from os.path import abspath, join
from qiime2 import Artifact
from os import makedirs
from qiime2.plugins import diversity
from qiime2.plugins.feature_table.methods import filter_samples
from qiime2 import Metadata
import pandas as pd

### Data filepaths

In [2]:
# get biom qza

biom_fp = './data/american_gut_filtered_rarefied_table.qza' 


# get taxonomy qza

tax_fp = './data/taxonomy_assignment_american_gut_rarefied.qza'


# get metadata

md_fp = './data/american_gut_filtered_metadata.txt'

### Read in data

#### Biom table

In [3]:
# read biom qza into qiime2 Artifact class

biom_art = Artifact.load(abspath(biom_fp))

# load the qiime2 artifact into biom Table class

biom = biom_art.view(Table)

#### Taxonomy table

In [4]:
# read biom tax into qiime2 Artifact class

tax_art = Artifact.load(abspath(tax_fp))

# read taxonomy artifact as Pandas DF

tax_df = tax_art.view(pd.DataFrame)

#### Metadata file

In [5]:
# read in metadata

metadata = Metadata.load(md_fp)

### Write separate Biom tables per genus

In [6]:
# group all the code into a single method to facilitate rerunning

def split_otu_tables_by_tax(biom_t, tax_df, output_dir,
                            metadata,
                            threshold=5,
                            level=5,
                            tax_names=['Kingdom',
                                       'Phylum',
                                       'Class',
                                       'Order',
                                       'Family', 
                                       'Genus',
                                       'Species'],
                            sampling_depth=5,
                            export_viz=False):
    # fix the taxonomy
    tax_cols = tax_df['Taxon'].str.split('; ', expand=True)

    tax_cols.columns = tax_names
    
    # make concatenated tax string at appropriate level
    cat_cols = tax_names[:level+1]
    print(cat_cols)
    tax_str = tax_cols[cat_cols].fillna(' ').apply(lambda x: '; '.join(x), axis=1)
    
    # find taxa above threshold number of OTUs
    tax_thr = pd.Series(tax_str.value_counts()).where(lambda x : x >= threshold).dropna().index
    
    # make output dir
    makedirs(output_dir, exist_ok=True)
    
    # for each tax_thr value, filter the OTU table and write to file
    
    # also, make a dict of all filtered tables and keep in memory for downstream analysis
    tax_arts = {}
    
    for t in tax_thr:
        t_ids =  pd.Series(tax_str).where(lambda x : x == t).dropna().index
        tax_otu = biom_t.filter(t_ids, axis='observation', inplace=False)
        tax_otu.remove_empty(inplace=True)
        
        
        output_f = t.replace(';','_').replace(' ','')
        output_fn = '{0}.{1}.qza'.format(tax_names[level], output_f)
        output_fp = join(output_dir, output_fn)

        # export as q2 artifact
        tax_art = Artifact.import_data("FeatureTable[Frequency]", tax_otu)
        tax_art.save(output_fp)
        
        tax_arts[t] = tax_art
        
        if export_viz:
            # export the bc and jaccard emperor viz
            (rarefied_table,
             observed_otus_vector,
             shannon_vector,
             evenness_vector,
             jaccard_distance_matrix,
             bray_curtis_distance_matrix,
             jaccard_pcoa_results,
             bray_curtis_pcoa_results,
             jaccard_emperor,
             bray_curtis_emperor) = diversity.pipelines.core_metrics(table=tax_art, 
                                                                    sampling_depth=sampling_depth,
                                                                    metadata=metadata)

            jaccard_fp = join(output_dir, '{0}.{1}.emperor.jaccard.qzv'.format(tax_names[level], output_f))
            bc_fp = join(output_dir, '{0}.{1}.emperor.braycurtis.qzv'.format(tax_names[level], output_f))
            jaccard_emperor.save(jaccard_fp)
            bray_curtis_emperor.save(bc_fp)

    return(tax_arts)

In [7]:
output_dir = './output/genus_asv_tables'

makedirs(output_dir, exist_ok=True)

In [8]:
genus_tables = split_otu_tables_by_tax(biom,
                                       tax_df,
                                       output_dir,
                                       metadata,
                                       level=5,
                                       threshold=5)

['Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus']


## Calculate Sorensen-Dice beta diversity and Host Specificity metrics

In [9]:
metric = 'dice'
distance_dir = 'output/distance'
metadata_col = 'species_geo_captivity'
makedirs(distance_dir, exist_ok=True)

for genus in genus_tables:
    tax_dir = join(distance_dir,
                   genus.replace(';','_').replace(' ',''))
    makedirs(tax_dir, exist_ok=True)
    
    # filter empty samples
    genus_filtered = filter_samples(genus_tables[genus],
                                    min_frequency=1)
    
    # get distance matrix
    dm = diversity.actions.beta(genus_filtered.filtered_table,
                            metric)
    
    # write distance matrix to file 
    dm.distance_matrix.view(DistanceMatrix).to_series().to_csv(join(tax_dir,
                                                                    'distance_list.dice.tsv'),
                                                               sep='\t')
    try:
        # calculate beta group significance
        bgs = diversity.actions.beta_group_significance(dm.distance_matrix,
                                                        metadata.get_column(metadata_col),
                                                        pairwise=True,
                                                        method='permanova')
    except ValueError:
        print("Problem with taxon %s" % genus)
    
    # write results to directory
    bgs.visualization.export_data(join(tax_dir,'permanova'))
    

  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__RFN20


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rickettsiales; f__; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rickettsiales; f__mitochondria;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Rikenellaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria;  ;  ;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Corynebacteriaceae; g__Corynebacterium




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Porphyromonadaceae; g__Porphyromonas


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__Anaerococcus




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__Peptoniphilus




Problem with taxon k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Atopobium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__[Barnesiellaceae]; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Verrucomicrobia; c__Opitutae; o__[Cerasicoccales]; f__[Cerasicoccaceae]; g__




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Paenibacillaceae; g__Paenibacillus


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Comamonadaceae;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales;  ;  




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__Anaerostipes


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Veillonella


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Thermoleophilia; o__Gaiellales; f__Gaiellaceae; g__




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Actinomycetaceae; g__Actinomyces


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__Subdoligranulum


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria;  ;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Verrucomicrobia; c__Opitutae; o__HA64; f__; g__




Problem with taxon k__Bacteria; p__Fusobacteria; c__Fusobacteriia; o__Fusobacteriales; f__Fusobacteriaceae; g__Fusobacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Peptostreptococcaceae;  




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Sphingomonas


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Sinobacteraceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__Ruminococcus




Problem with taxon k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__[Paraprevotellaceae]; g__Paraprevotella


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli;  ;  ;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Eubacteriaceae; g__Pseudoramibacter_Eubacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanosphaera


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Veillonellaceae; g__Anaerovibrio


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Synergistetes; c__Synergistia; o__Synergistales; f__Dethiosulfovibrionaceae; g__Pyramidobacter


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Streptococcaceae; g__Lactococcus


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria;  ;  ;  ;  




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__WAL_1855D


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Epsilonproteobacteria; o__Campylobacterales; f__Helicobacteraceae; g__Flexispira


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__Holdemania


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Hyphomicrobiaceae; g__Devosia




Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Xanthomonadales; f__Xanthomonadaceae;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Enterococcaceae;  




Problem with taxon k__Bacteria; p__Bacteroidetes; c__Sphingobacteriia; o__Sphingobacteriales; f__Sphingobacteriaceae; g__Sphingobacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Synergistetes; c__Synergistia; o__Synergistales; f__Synergistaceae; g__




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Micrococcaceae;  




Problem with taxon k__Bacteria; p__Actinobacteria; c__Coriobacteriia; o__Coriobacteriales; f__Coriobacteriaceae; g__Eggerthella




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhodospirillales; f__Rhodospirillaceae; g__




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Microbacteriaceae;  


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__MB-A2-108; o__0319-7L14; f__; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__Anaerofilum


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Bacilli; o__Lactobacillales; f__Leuconostocaceae; g__Weissella


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Streptomycetaceae; g__Streptomyces


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Methylocystaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Lachnospiraceae; g__Lachnobacterium




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Methylobacteriaceae; g__Methylobacterium


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Planctomycetes; c__Planctomycetia; o__Gemmatales; f__Gemmataceae; g__




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Micrococcaceae; g__Arthrobacter




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Micromonosporaceae;  




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Clostridiaceae; g__02d06




Problem with taxon k__Bacteria; p__Bacteroidetes; c__Flavobacteriia; o__Flavobacteriales; f__[Weeksellaceae]; g__Wautersiella




Problem with taxon k__Bacteria; p__Planctomycetes; c__Planctomycetia; o__Gemmatales; f__Isosphaeraceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Ruminococcaceae; g__Sporobacter




Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__; g__




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Kaistobacter




Problem with taxon k__Bacteria; p__Firmicutes; c__Erysipelotrichi; o__Erysipelotrichales; f__Erysipelotrichaceae; g__cc_115


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Frankiaceae; g__


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))
  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Nocardioidaceae; g__Nocardioides




Problem with taxon k__Bacteria; p__Acidobacteria; c__Acidobacteria-6; o__iii1-15; f__mb2424; g__




Problem with taxon k__Bacteria; p__Verrucomicrobia; c__[Spartobacteria]; o__[Chthoniobacterales]; f__[Chthoniobacteraceae]; g__DA101


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Sphingomonadales; f__Sphingomonadaceae; g__Novosphingobium




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__1-68




Problem with taxon k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Caulobacterales; f__Caulobacteraceae; g__




Problem with taxon k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__Finegoldia


  return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups))


Problem with taxon k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Pseudomonadales; f__Pseudomonadaceae;  


In [10]:

foo = genus_filtered.filtered_table.view(Table)

In [11]:
metadata.get_column(metadata_col).to_series()[foo.ids(axis='sample')]

#SampleID
65716.12173.2                  chimp_TZA_wild_moeller
67596.10317.000098752                       human_USA
67596.10317.000098897                       human_USA
67596.10317.000107269                       human_USA
67596.10317.000052320                       human_USA
67596.10317.000107243                       human_USA
67596.10317.000107970                       human_USA
67596.10317.000102692                       human_USA
67596.10317.000101152                       human_USA
67596.10317.000103709                       human_USA
67596.10317.000107299                       human_USA
67596.10317.000092924                       human_USA
67596.10317.000103024                       human_USA
67596.10317.000027863                       human_USA
67596.10317.000106817                       human_USA
67596.10317.000090519                       human_USA
80072.12821.EPRCMB16     douc_VNM_semicaptive_clayton
Name: species_geo_captivity, dtype: object

In [12]:
foo.ids(axis='sample')

array(['65716.12173.2', '67596.10317.000098752', '67596.10317.000098897',
       '67596.10317.000107269', '67596.10317.000052320',
       '67596.10317.000107243', '67596.10317.000107970',
       '67596.10317.000102692', '67596.10317.000101152',
       '67596.10317.000103709', '67596.10317.000107299',
       '67596.10317.000092924', '67596.10317.000103024',
       '67596.10317.000027863', '67596.10317.000106817',
       '67596.10317.000090519', '80072.12821.EPRCMB16'], dtype=object)