In [1]:
%matplotlib inline

import pandas as pd, numpy as np, seaborn as sns
import matplotlib.pyplot as plt

from qiime.parse import parse_mapping_file
from qiime.format import format_mapping_file
from skbio.io.util import open_file
from scipy.stats import pearsonr, spearmanr
from skbio.stats.distance import permanova, anosim
from skbio import DistanceMatrix

from IPython.display import Image

def load_mf(fn):
    with open_file(fn, 'U') as f:
        mapping_data, header, _ = parse_mapping_file(f)
        _mapping_file = pd.DataFrame(mapping_data, columns=header)
        _mapping_file.set_index('SampleID', inplace=True)
    return _mapping_file

def write_mf(f, _df):
    with open_file(f, 'w') as fp:
        lines = format_mapping_file(['SampleID'] + _df.columns.tolist(),
                                    list(_df.itertuples()))
        fp.write(lines+'\n')

# Compare the effect of antibiotics

In [5]:
mf = load_mf('mapping-file-full.alpha.txt')

We have some ambiguity in the antibiotics usage information, for the rest of this analysis, we will only use the **definite answers**.

In [6]:
mf.Antibiotics.value_counts()

definite_no     126
definite_yes     35
maybe_yes        23
maybe_no          8
Name: Antibiotics, dtype: int64

In [12]:
!filter_distance_matrix.py \
-i beta/15000/unweighted_unifrac_dm.txt \
-o beta/15000/unweighted_unifrac_dm.abxs-only.txt \
-m mapping-file-full.alpha.txt \
-s 'Antibiotics:definite_no,definite_yes'

Load the distance matrix and mapping file:

In [7]:
dm = DistanceMatrix.from_file('beta/15000/unweighted_unifrac_dm.abxs-only.txt')

In [8]:
emf = mf.loc[list(dm.ids)].copy()
emf.groupby('disease_stat').Antibiotics.value_counts()

disease_stat  Antibiotics 
IBD           definite_no     35
              definite_yes    12
healthy       definite_no     77
              definite_yes     8
dtype: int64

Test for subjects **with IBD** and antibiotics and without antibiotics.

In [12]:
permanova(dm.filter(emf[emf.disease_stat == 'IBD'].index, strict=False), mf, 'Antibiotics', permutations=10000)

method name               PERMANOVA
test statistic name        pseudo-F
sample size                      47
number of groups                  2
test statistic             0.972712
p-value                     0.50155
number of permutations        10000
Name: PERMANOVA results, dtype: object

Test for subjects **without IBD** and antibiotics and without antibiotics.

In [9]:
permanova(dm.filter(emf[emf.disease_stat == 'healthy'].index, strict=False), mf, 'Antibiotics', permutations=10000)

method name               PERMANOVA
test statistic name        pseudo-F
sample size                      85
number of groups                  2
test statistic              1.49276
p-value                   0.0137986
number of permutations        10000
Name: PERMANOVA results, dtype: object

Compare them on a disease state basis:

In [10]:
permanova(DistanceMatrix.from_file('beta/15000/unweighted_unifrac_dm.txt'),
          mf, 'disease_stat', permutations=10000)

method name               PERMANOVA
test statistic name        pseudo-F
sample size                     149
number of groups                  2
test statistic              9.46218
p-value                   9.999e-05
number of permutations        10000
Name: PERMANOVA results, dtype: object

Compare them on an Antibiotic-history basis

In [11]:
permanova(DistanceMatrix.from_file('beta/15000/unweighted_unifrac_dm.abxs-only.txt'),
          mf, 'Antibiotics', permutations=10000)

method name                PERMANOVA
test statistic name         pseudo-F
sample size                      132
number of groups                   2
test statistic               1.99256
p-value                   0.00159984
number of permutations         10000
Name: PERMANOVA results, dtype: object