# Distance-Based tests

We can examine the distance based effects used for permanova and the mantel test. Here, the observations are not independent, permutative testing is required...

Due to the time requirements of permutation tests, these simulations were run using cluster computing.

In [1]:
import functools
import itertools
from multiprocessing import Pool
import os
import pickle

import numpy as np
import pandas as pd
import scipy
import skbio

from emp_power.power import subsample_power

We'll set a random seed so the same simulations are performed every time.

In [2]:
np.random.seed(25) 

We can also set up this 

In [3]:
num_cpus = 1
overwrite = False

In [4]:
sim_location = './simulations/'
if not os.path.exists(sim_location):
    os.makedirs(sim_location)

In [10]:
num_rounds = 5

In [7]:
distributions = {}

We're going to vary the number of samples we draw and the level of the test.

In [8]:
depths = [19, 99, 999]
alphas = [0.05, 0.01, 0.001]

# Power Calculation

## Permanova

In [24]:
def calculate_permanova_power(sim_fp, power_fp, depth, alpha):
    """Description strings are awesome!"""
    # Loads the simulation data
    with open(sim_fp, 'rb') as f_:
        sim = pickle.load(f_)

    # Draws the groups and distance matrix and idenifies the sample
    dm, groups = sim['samples']
    samples = [groups.loc[groups == i].index for i in [0, 1]]

    # Sets up the count depth because we cannot use bootstrapping easily
    # (unless we cheat? Do I need to cheat?)
    counts = np.arange(5, len(samples[0]) - 10, 10)

    # Defines the statistical test
    def test(ids):
        obs = np.hstack(ids)
        res = skbio.stats.distance.permanova(
            distance_matrix=dm.filter(obs),
            grouping=groups.loc[obs],
            permutations=depth,
        )
        return res['p-value']
    
    # Calculates power
    power = subsample_power(test=test,
                            samples=samples,
                            counts=counts,
                            num_iter=1000,
                            num_runs=3,
                            alpha=alpha,
                            bootstrap=False,
                            )
    
    # Generates the summary dictionary
    power_summary = {'emperical_power': power,
                     'traditional_power': None,
                     'original_p': test(samples),
                     'num_obs': len(samples[0]),
                     'depth': depth,
                     'alpha': alpha,
                     'counts': counts,
                     }
    
    # Saves the file
    with open(power_fp, 'wb') as f_:
        pickle.dump(power_summary, f_)

In [25]:
depth = 20
alpha = 0.05

In [26]:
# for (depth, alpha) in itertools.product(depths, alphas):
sim_dir = os.path.join(sim_location, 'data/permanova')
power_dir = os.path.join(sim_location,
                         'power/permanova/iter_test/iter_%i-alpha_%s'
                         % (depth, alpha))
if not os.path.exists(power_dir):
    os.makedirs(power_dir)

In [27]:
os.path.exists(power_dir)

True

In [28]:
# p = Pool(num_cpus)
for i in range(num_rounds):
    sim_fp = os.path.join(sim_dir, 'simulation_%i.p' % i)
    power_fp = os.path.join(power_dir, 'simulation_%i.p' % i)
    if not os.path.exists(power_fp):
        calculate_permanova_power(sim_fp=sim_fp,
                                  power_fp=power_fp,
                                  depth=depth,
                                  alpha=alpha,
                                  )
        

## Mantel Calculation

In [None]:
def calculate_mantel_power(i, sim_dir, power_dir, alpha, depth):
    """Wrapper to calculate power for the mantel test"""
    
    # Loads the simulation data
    with open(os.path.join(sim_dir, 'simulation_%i.p' % i), 'rb') as f_:
        sim = pickle.load(f_)
    
    # Draws the groups and distance matrix and idenifies the sample
    x, y = sim['samples']
    samples = [np.array(x.ids)]
    
    # Sets up the counts vector
    counts = np.arange(5, len(samples) - 10, 10)
    
    def test(samples):
        obs = samples[0]
        res = skbio.stats.distance.mantel(
            x.filter(obs),
            y.filter(obs),
            permutations=depth
        )
        return res[1]
    
    power = subsample_power(test=test,
                        samples=samples,
                        counts=counts,
                        num_iter=1000,
                        num_runs=3,
                        alpha=alpha,
                        bootstrap=False,
                        draw_mode='matched'
                        )
    # Generates the summary dictionary
    power_summary = {'emperical_power': power,
                     'traditional_power': None,
                     'original_p': test(samples),
                     'num_obs': len(samples[0]),
                     'depth': depth,
                     'alpha': alpha,
                     'counts': counts,
                     }
    
    # Saves the file
    with open(os.path.join(power_dir, 'simulation_%i.p' % i), 'wb') as f_:
        pickle.dump(power_summary, f_)