In [1]:
import os
import biom
import pandas as pd
from qiime2 import Artifact
from qiime2 import Metadata
from skbio import OrdinationResults
from skbio.stats.distance import DistanceMatrix
from qiime2.plugins.gemelli.actions import ctf
from qiime2.plugins.diversity.actions import (beta,
                                              beta_phylogenetic,
                                              pcoa)
from qiime2.plugins.diversity.actions import beta_group_significance


In [2]:
# set all simulations
sims = [os.path.join('../../data/data-driven-simulations',dir_) 
        for dir_ in os.listdir('../../data/data-driven-simulations')
        if dir_!='.DS_Store']

# import tree for unifrac
q2tree = Artifact.load('../../data/Halfvarson-IBD-Qiita-1629/insertion-tree.qza')
# run q2-pipeline (using q2-api) for each sim
for sim in sims:
    if len(os.listdir(sim)) <= 4:
        print(sim)

    bt = biom.load_table(sim+'/table.biom')
    bt_rar = bt.subsample(int(bt.sum(axis='sample').min()),
                          axis='sample', with_replacement=True)
    mf = pd.read_csv(sim+'/metadata.tsv', sep='\t', index_col=0)
    q2mf = Metadata(mf.drop(['dna_extracted',
                             'physical_specimen_remaining'],
                            axis=1))
    # table
    q2table = Artifact.import_data("FeatureTable[Frequency]",bt)
    q2table.save(os.path.join(sim,'table.qza')) # save qza
    q2table_rar = Artifact.import_data("FeatureTable[Frequency]",bt_rar)
    q2table_rar.save(os.path.join(sim,'table_rarefied.qza')) # save qza
    
    # generate all distances to compare 
    # bray-curtis
    BC_dist = beta(q2table_rar,'braycurtis').distance_matrix
    BC_OrdinationResults = pcoa(BC_dist)
    BC_dist.save(os.path.join(sim,'braycurtis-distance.qza'))
    BC_OrdinationResults.pcoa.save(os.path.join(sim,'braycurtis-pcoa.qza'))

    # aitchison
    ATCH_dist = beta(q2table,'aitchison').distance_matrix
    ATCH_OrdinationResults = pcoa(ATCH_dist)
    ATCH_dist.save(os.path.join(sim,'aitchison-distance.qza'))
    ATCH_OrdinationResults.pcoa.save(os.path.join(sim,'aitchison-pcoa.qza'))

    # jaccard
    JAC_dist = beta(q2table_rar,'jaccard').distance_matrix
    JAC_OrdinationResults = pcoa(JAC_dist)
    JAC_dist.save(os.path.join(sim,'jaccard-distance.qza'))
    JAC_OrdinationResults.pcoa.save(os.path.join(sim,'jaccard-pcoa.qza'))

    # w-unifrac
    WUNI_dist = beta_phylogenetic(q2table_rar, q2tree,
                                  'weighted_unifrac',
                                  n_jobs=4).distance_matrix
    WUNI_OrdinationResults = pcoa(WUNI_dist)
    WUNI_dist.save(os.path.join(sim,'w-unifrac-distance.qza'))
    WUNI_OrdinationResults.pcoa.save(os.path.join(sim,'w-unifrac-pcoa.qza'))

    # unifrac
    UNI_dist = beta_phylogenetic(q2table_rar, q2tree,
                                 'unweighted_unifrac',
                                 n_jobs=4).distance_matrix
    UNI_OrdinationResults = pcoa(UNI_dist)
    UNI_dist.save(os.path.join(sim,'unifrac-distance.qza'))
    UNI_OrdinationResults.pcoa.save(os.path.join(sim,'unifrac-pcoa.qza'))
     
    # CTF
    ctf_res  = ctf(q2table, q2mf,
                   'host_subject_id',
                   'timepoint',
                   max_iterations_als=5,
                   max_iterations_rptm=5,
                   n_initializations=5,
                   n_components=2)
    for id_, art_ in ctf_res.__dict__.items():
        if id_ != '_fields':
            art_.save(os.path.join(sim, id_.replace('_', '-')))


