This notebook will be focused on running ANCOM on the final fecal timepoint, the proximal and distal colons.

In [1]:
from canvas.stats import ancom

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from skbio.stats.composition import ilr, ilr_inv
from skbio import TreeNode
from gneiss.balances import balanceplot, balance_basis
from gneiss.layouts import barchart_layout
from gneiss.util import match, match_tips, rename_internal_nodes
from gneiss import mixedlm

from biom import load_table
from ete3 import Tree, TreeStyle, NodeStyle, faces, AttrFace, CircleFace, BarChartFace
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
def convert_biom_to_pandas(table):
    """ Unpacks biom table into two pandas dataframes.
    
    The first dataframe will contain the count information for 
    features and samples. The second datafram will contain taxonomy 
    information for all of the OTUs.
    
    Parameters
    ----------
    table : biom.Table
    
    Returns
    -------
    pd.DataFrame
        Contingency table of counts where samples correspond 
        to rows and columns correspond to features (i.e. OTUs)
    pd.DataFrame
        A mapping of OTU names to taxonomic ids
    """

    feature_table = pd.DataFrame(np.array(table.matrix_data.todense()).T,
                             index=table.ids(axis='sample'),
                             columns=table.ids(axis='observation'))
    feature_ids = table.ids(axis='observation')
    mapping = {i: table.metadata(id=i, axis='observation')['taxonomy'] for i in feature_ids}
    # modify below as necessary.  
    # There are typically 7 levels of taxonomy.
    taxonomy = pd.DataFrame(mapping, 
                            index=['kingdom', 'phylum', 'class', 'order',
                                   'family', 'genus', 'species']).T
    return feature_table, taxonomy

# Proximal

We will run ANCOM to investigate differences between the processing types within
the fecal samples. 

In [3]:
mapping = pd.read_table('../processed_data/1706_1145_mapping.txt', index_col=0)
table = load_table('../processed_data/1706_1145_otu_table.biom')
tree = TreeNode.read('../original_data/97_otus.tree')
mapping = mapping.set_index('#SampleID')

In [4]:
# filter out samples that aren't fecal
proximal_mapping = mapping.loc[mapping.body_site=='UBERON:proximal colon']

# filter out control groups and blanks
proximal_mapping = proximal_mapping.loc[mapping.color!='Not applicable']
proximal_mapping = proximal_mapping.loc[proximal_mapping['processing'] != 'HCD']
proximal_mapping = proximal_mapping.loc[proximal_mapping['processing'] != 'Control']

In [5]:
read_filter = lambda val, _id, md : sum(val) > 125
md_filter = lambda val, _id, md : _id in proximal_mapping.index

table.filter(md_filter, axis='sample') # filter out samples not in the mapping file.
table.filter(read_filter, axis='observation')
table.filter(read_filter, axis='sample')

779 x 47 <class 'biom.table.Table'> with 26333 nonzero entries (71% dense)

In [6]:
otu_table, taxonomy = convert_biom_to_pandas(table)

proximal_table, prevention_mapping = match(otu_table, proximal_mapping)

In [7]:
res = ancom(proximal_table+1, proximal_mapping.processing, significance_test='permutative-anova')

In [8]:
res.to_csv('../results/ancom_prevention_proximal.csv')

# Distal

In [44]:
mapping = pd.read_table('../processed_data/1706_1145_mapping.txt', index_col=0)
table = load_table('../processed_data/1706_1145_otu_table.biom')
tree = TreeNode.read('../original_data/97_otus.tree')
mapping = mapping.set_index('#SampleID')

In [45]:
# filter out samples that aren't fecal
distal_mapping = mapping.loc[mapping.body_site=='UBERON:distal colon']

# filter out control groups and blanks
distal_mapping = distal_mapping.loc[distal_mapping.color!='Not applicable']
distal_mapping = distal_mapping.loc[distal_mapping['processing'] != 'HCD']
distal_mapping = distal_mapping.loc[distal_mapping['processing'] != 'Control']
distal_mapping.shape

(46, 89)

In [46]:
read_filter = lambda val, _id, md : sum(val) > 125
md_filter = lambda val, _id, md : _id in distal_mapping.index

table.filter(md_filter, axis='sample') # filter out samples not in the mapping file.
table.filter(read_filter, axis='observation')
table.filter(read_filter, axis='sample')

831 x 45 <class 'biom.table.Table'> with 28270 nonzero entries (75% dense)

In [47]:
otu_table, taxonomy = convert_biom_to_pandas(table)

distal_table, distal_mapping = match(otu_table, distal_mapping)

In [48]:
res = ancom(distal_table+1, distal_mapping.processing, significance_test='permutative-anova')

In [49]:
res.to_csv('../results/ancom_prevention_distal.csv')