In [1]:
import os
import biom
import pandas as pd
from qiime2 import Artifact
from qiime2 import Metadata
from skbio import OrdinationResults
from skbio.stats.distance import permanova
from skbio.stats.distance import DistanceMatrix
from qiime2.plugins.gemelli.actions import ctf
from qiime2.plugins.diversity.actions import (beta,
                                              beta_phylogenetic,
                                              pcoa)
from qiime2.plugins.diversity.actions import beta_group_significance

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## temporal sub-samples

In [5]:
# import tree for unifrac
q2tree = Artifact.load('../../data/Halfvarson-IBD-Qiita-1629/insertion-tree.qza')

# sub-sample timepoints
for t in range(2, 8+2):
    
    # import table and rarify
    bt = biom.load_table('../../data/Halfvarson-IBD-Qiita-1629/table-matched.biom')
    mf = pd.read_csv('../../data/Halfvarson-IBD-Qiita-1629/metadata-matched.tsv',
                     sep='\t', index_col=0)

    # sub-sample time
    mf_sub_time = mf[mf.timepoint.isin(list(range(1,t)))]
    bt.filter(mf_sub_time.index, inplace=True)

    # filter zero-sum taxa
    keep_taxa = bt.ids('observation')[bt.sum('observation') > 0]
    # drop and filter
    bt.filter(keep_taxa,axis='observation',inplace=True)

    #rarify
    bt_rar = bt.subsample(int(bt.sum(axis='sample').min()),
                          axis='sample', with_replacement=True)
    
    # metadata
    q2mf = Metadata(mf_sub_time.drop(['dna_extracted',
                                      'physical_specimen_remaining'],
                                     axis=1))
    mf_sub_time.to_csv(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time',
                                    '%i-metadata.tsv'%(t-1)),
                       sep='\t')
    
    # table
    q2table = Artifact.import_data("FeatureTable[Frequency]",bt)
    q2table_rar = Artifact.import_data("FeatureTable[Frequency]",bt_rar)

    # generate all distances to compare 

    # bray-curtis
    BC_dist = beta(q2table_rar,'braycurtis').distance_matrix
    BC_OrdinationResults = pcoa(BC_dist)
    BC_dist.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time',
                              '%i-braycurtis-distance.qza'%(t-1)))
    BC_OrdinationResults.pcoa.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                                                '%i-braycurtis-pcoa.qza'%(t-1)))

    # aitchison
    ATCH_dist = beta(q2table,'aitchison').distance_matrix
    ATCH_OrdinationResults = pcoa(ATCH_dist)
    ATCH_dist.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                                '%i-aitchison-distance.qza'%(t-1)))
    ATCH_OrdinationResults.pcoa.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                                                  '%i-aitchison-pcoa.qza'%(t-1)))

    # jaccard
    JAC_dist = beta(q2table_rar,'jaccard').distance_matrix
    JAC_OrdinationResults = pcoa(JAC_dist)
    JAC_dist.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                               '%i-jaccard-distance.qza'%(t-1)))
    JAC_OrdinationResults.pcoa.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                                                 '%i-jaccard-pcoa.qza'%(t-1)))

    # w-unifrac
    WUNI_dist = beta_phylogenetic(q2table_rar, q2tree,
                                  'weighted_unifrac',
                                  n_jobs=4).distance_matrix
    WUNI_OrdinationResults = pcoa(WUNI_dist)
    WUNI_dist.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                                '%i-wunifrac-distance.qza'%(t-1)))
    WUNI_OrdinationResults.pcoa.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                                                  '%i-wunifrac-pcoa.qza'%(t-1)))

    # unifrac
    UNI_dist = beta_phylogenetic(q2table_rar, q2tree,
                                 'unweighted_unifrac',
                                 n_jobs=4).distance_matrix
    UNI_OrdinationResults = pcoa(UNI_dist)
    UNI_dist.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                               '%i-unifrac-distance.qza'%(t-1)))
    UNI_OrdinationResults.pcoa.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time/',
                                                 '%i-unifrac-pcoa.qza'%(t-1)))

    # CTF
    ctf_res  = ctf(q2table, q2mf,
                  'host_subject_id',
                  'timepoint',
                  max_iterations_als=5,
                  max_iterations_rptm=5,
                  n_initializations=5,
                  n_components=2)
    for id_, art_ in ctf_res.__dict__.items():
        if id_ != '_fields':
            art_.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629/subsampled-time',
                                   '%i-%s' % ((t-1), id_.replace('_', '-')) ))






## single data 

In [11]:
# metadata
mf = pd.read_csv('../../data/Halfvarson-IBD-Qiita-1629/metadata-matched.tsv', 
                 sep='\t', index_col=0)
mf = mf[mf.timepoint <= 7] # remove t=8 (not many samples)
q2mf = Metadata(mf.drop(['dna_extracted',
                         'physical_specimen_remaining'],
                        axis=1))
# table
bt = biom.load_table('../../data/Halfvarson-IBD-Qiita-1629/table-matched.biom')
bt.filter(mf.index, inplace=True)
# filter zero-sum taxa
keep_taxa = bt.ids('observation')[bt.sum('observation') > 0]
# drop and filter
bt.filter(keep_taxa,axis='observation',inplace=True)

q2table = Artifact.import_data("FeatureTable[Frequency]",bt)

#CTF
ctf_res  = ctf(q2table, q2mf,
                 'host_subject_id',
                 'timepoint',
                  max_iterations_als=5,
                  max_iterations_rptm=5,
                  n_initializations=5,
                  n_components=4)

for id_, art_ in ctf_res.__dict__.items():
    if id_ != '_fields':
        art_.save(os.path.join('../../data/Halfvarson-IBD-Qiita-1629',
                               id_.replace('_', '-')))



