ENH: added functions for multiple testing (#195)

* ENH: added functions for multiple testing correction * Update test_npc.py * BUG: fixed bug causing tests to fail * BUG: fix bug causing tests to fail * BUG: fix bug causing tests to fail Co-authored-by: amandakglazer@gmail.com <amanda@MacBook-Pro-4.local>
statlab · Jan 26, 2023 · ef72da3 · ef72da3
1 parent 90d7776
commit ef72da3
Show file tree

Hide file tree

Showing 3 changed files with 234 additions and 7 deletions.
diff --git a/permute/npc.py b/permute/npc.py
@@ -269,6 +269,199 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed=
     return p, ts, ps
 
 
+def westfall_young(data, test, method="minP", alternatives="greater",
+                   in_place=False, reps=int(10**4), seed=None):
+    ''' 
+    Adjusts p-values using Westfall and Young (1993) method.
+
+    Parameters
+    ----------
+    data : Experiment object
+    test : array_like
+        Array of functions to compute test statistic to apply to each column in cols
+    method : {'minP', 'maxT'} 
+        The Westfall and Young method. Default is "minP".
+    alternatives : str or list
+        Whether alternative hypothesis is one or two sided. If str, apply to all tests.
+    in_place : Boolean
+        whether randomize group in place, default False
+    reps : int
+        number of repetitions
+    seed : RandomState instance or {None, int, RandomState instance}
+        If None, the pseudorandom number generator is the RandomState
+        instance used by `np.random`;
+        If int, seed is the seed used by the random number generator;
+        If RandomState instance, seed is the pseudorandom number generator
+
+    Returns
+    -------
+    array
+        dictionary of adjusted permutation p-values, 
+        dictionary of unadjusted permutation p-values
+    '''
+    # check data is of type Experiment
+    if not isinstance(data, Experiment):
+        raise ValueError("data not of class Experiment")
+
+    # if seed not none, reset seed
+    if seed is not None:
+        data.randomizer.reset_seed(seed)
+
+    # check if alternatives right type / length
+    if isinstance(alternatives, str):
+        alternatives = [alternatives for i in range(len(test))]
+    elif isinstance(alternatives, list):
+        if len(alternatives) != len(test):
+            raise ValueError("alternatives parameter must be either a string or list of the same length as test")
+    else:
+        raise ValueError("alternatives parameter must be either a string or list of the same length as test")
+
+    # initialize test statistic and p-value dicts
+    ts = {}
+    tv = {}
+    ps = {}
+    raw_p = {}
+    adj_p = {}
+
+    # get the test statistic for each column on the original data
+    for c in range(len(test)):
+        # apply test statistic function to column
+        ts[c] = test[c](data)
+        tv[c] = []
+
+    # check if randomization in place
+    if in_place:
+        data_copy = data
+    else:
+        data_copy = copy.deepcopy(data)
+
+    # get test statistics for random samples
+    for i in range(reps):
+        # randomly permute group
+        data_copy.randomize()
+        # calculate test statistics on permuted data
+        for c in range(len(test)):
+            # get test statistic for this permutation
+            tv[c].append(test[c](data_copy))
+
+    # get raw p-values and p-values for each permutation
+    if method == "minP":
+        for c in range(len(test)):
+            # check alternative
+            if alternatives[c] == "greater":
+            # proportion test stats greater than or equal to perm stat, including original stat
+                ps[c] = ((len(tv[c]) - rankdata(tv[c], method='min') + 1) + (tv[c] >= ts[c])) / (reps + 1)
+                raw_p[c] = (np.sum(np.array(tv[c]) >= ts[c]) + 1) / (reps + 1)
+            elif alternatives[c] == "two-sided":
+                ps[c] = ((len(tv[c]) - rankdata(np.abs(tv[c]), method='min')) + (np.abs(tv[c]) >= np.abs(ts[c])) + 1) / (reps + 1)
+                raw_p[c] = (np.sum(np.array(np.abs(tv[c])) >= np.abs(ts[c])) + 1) / (reps + 1)
+            else:
+                raise ValueError("alternatives must be either 'greater' or 'two-sided'")
+
+        ## update successive minima
+        # sort raw_p values from largest to smallest
+        sorted_raw_p = {k: v for k, v in sorted(raw_p.items(), key=lambda item: item[1], reverse=True)}
+        # iterate over permutations
+        for b in range(reps):
+            # iterate over sorted raw p-values
+            prev_i = [*sorted_raw_p][0]
+            for i in [*sorted_raw_p]:
+                # replace p-values with successive minima
+                ps[i][b] = min(ps[i][b], ps[prev_i][b])
+                prev_i = i
+        # compute adjusted p-values
+        prev_i = [*sorted_raw_p][::-1][0]
+        for c in [*sorted_raw_p][::-1]: 
+            adj_p[c] = (np.sum(np.array(ps[c])<= raw_p[c]) + 1) / (reps + 1)
+            # ensure monotonicity
+            adj_p[c] = max(adj_p[c], adj_p[prev_i])
+            prev_i = c
+        # sort adj_p dict by key so hypotheses in order entered
+        adj_p = dict(sorted(adj_p.items()))
+
+    # if maxT method
+    elif method == "maxT":
+        # get unadjusted p-values and sort test statistics from smallest to largest
+        for c in range(len(test)):
+            if alternatives[c] == "greater":
+                raw_p[c] = (np.sum(np.array(tv[c]) >= ts[c]) + 1) / (reps + 1)
+                sorted_t = {k: v for k, v in sorted(ts.items(), key=lambda item: item[1], reverse=False)}
+            elif alternatives[c] == "two-sided":
+                raw_p[c] = (np.sum(np.array(np.abs(tv[c])) >= np.abs(ts[c])) + 1) / (reps + 1)
+                sorted_t = {k: np.abs(v) for k, v in sorted(ts.items(), key=lambda item: np.abs(item[1]), reverse=False)}
+            else:
+                raise ValueError("alternatives must be either 'greater' or 'two-sided'")
+        # iterate over permutations
+        for b in range(reps):
+            # iterate over sorted test statistics
+            prev_i = [*sorted_t][0]
+            for i in [*sorted_t]:
+                # replace test stats with successive maxima
+                tv[i][b] = max(np.abs(tv[i][b]), np.abs(tv[prev_i][b]))
+                prev_i = i
+        # compute adjusted p-values
+        for c in range(len(test)): 
+            if alternatives[c] == "greater":
+                adj_p[c] = (np.sum(np.array(tv[c]) >= ts[c]) + 1) / (reps + 1)
+            else:
+                adj_p[c] = (np.sum(np.array(tv[c]) >= np.abs(ts[c])) + 1) / (reps + 1)
+        # ensure monotonicity
+        prev_i = [*sorted_t][::-1][0]
+        for c in [*sorted_t][::-1]: 
+            adj_p[c] = max(adj_p[c], adj_p[prev_i])
+            prev_i = c
+        # sort adj_p dict by key so hypotheses in order entered
+        adj_p = dict(sorted(adj_p.items()))
+    else:
+        raise ValueError("Method must be minP or maxT")
+
+    # return adjusted p-values and raw p-values
+    return adj_p, raw_p
+
+
+def adjust_p(pvalues, adjustment='holm-bonferroni'):
+    """
+    Adjust p-values using FWER or FDR control.
+    Benjamini-Hochberg adjusted p-values described in Yekutieli & Benjamini (1999).
+    
+    Parameters
+    ----------
+    pvalues : array_like
+        Array of p-values to adjust
+    adjustment : {'bonferroni-holm', 'bonferroni', 'benjamini-hochberg'} 
+        The FWER or FDR adjustment to use.
+
+    Returns
+    -------
+    array of adjusted p-values
+    """
+    # get number of tests / p-values
+    n = len(pvalues)
+    # calculate adjusted p-values
+    if adjustment == 'holm-bonferroni':
+        order = rankdata(pvalues)
+        adj_pvalues = np.minimum(pvalues*(n - order + 1), np.ones(n))
+        p_order = np.argsort(pvalues)
+        prev_order = p_order[0]
+        for i in p_order:
+            adj_pvalues[i] = max(adj_pvalues[prev_order], adj_pvalues[i])
+            prev_order = i
+    elif adjustment == 'bonferroni':
+        adj_pvalues = np.minimum(pvalues*n, np.ones(n))
+    elif adjustment == 'benjamini-hochberg':
+        order = rankdata(pvalues)
+        adj_pvalues = np.minimum(pvalues*(n / (order)), np.ones(n))
+        # make sure non-decreasing
+        prev_order = np.argsort(pvalues)[::-1][0]
+        for i in np.argsort(pvalues)[::-1]:
+            adj_pvalues[i] = np.minimum(adj_pvalues[prev_order], adj_pvalues[i])
+            prev_order = i
+    else:
+        raise ValueError("Adjustment must be 'bonferonni', 'holm-bonferonni', or \
+                         'benjamini-hochberg'")
+    return adj_pvalues
+
+
 def fwer_minp(pvalues, distr, combine='fisher', plus1=True):
     """
     Adjust p-values using the permutation "minP" variant of Holm's step-up method.
@@ -436,8 +629,8 @@ def ttest(self, index):
             groups = np.unique(self.group)
             if len(groups) != 2:
                 raise ValueError("Number of groups must be two")
-            t = ttest_ind(self.response[:, index][self.group == groups[0]], 
-                             self.response[:, index][self.group == groups[1]], equal_var=True)[0]
+            t = ttest_ind(list(self.response[:, index][self.group == groups[0]]), 
+                             list(self.response[:, index][self.group == groups[1]]), equal_var=True)[0]
             return t
 
         def one_way_anova(self, index):

diff --git a/permute/tests/test_npc.py b/permute/tests/test_npc.py
@@ -11,6 +11,8 @@
                    npc,
                    check_combfunc_monotonic,
                    sim_npc,
+                   westfall_young,
+                   adjust_p,
                    fwer_minp,
                    randomize_group,
                    Experiment)
@@ -151,8 +153,40 @@ def med_diff(data, resp_index):
     res = sim_npc(data, test = Experiment.make_test_array(Experiment.TestFunc.mean_diff, [0, 1]),
                   combine="fisher", seed=None, reps=int(1000))
     np.testing.assert_almost_equal(res[0], 0.985, decimal = 2)
+
+
+def test_westfall_young():
+    prng = RandomState(55)
+    # test from https://support.sas.com/kb/22/addl/fusion22950_1_multtest.pdf
+    group = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+    responses = np.array([[14.4, 7.00, 4.30], [14.6, 7.09, 3.88 ], [13.8, 7.06, 5.34], [10.1, 4.26, 4.26], [11.1, 5.49, 4.52], [12.4, 6.13, 5.69], [12.7, 6.69, 4.45], [11.8, 5.44, 3.94], [18.3, 1.28, 0.67], [18.0, 1.50, 0.67], [20.8, 1.51, 0.72], [18.3, 1.14, 0.67], [14.8, 2.74, 0.67], [13.8, 7.08, 3.43], [11.5, 6.37, 5.64], [10.9, 6.26, 3.47]])
+    data = Experiment(group, responses)
+    test = Experiment.make_test_array(Experiment.TestFunc.ttest, [0, 1, 2])
+    result = westfall_young(data, test, method = "minP", alternatives = 'two-sided', seed = prng) 
+    np.testing.assert_almost_equal(result[0][0], 0.1, decimal = 1)
+    np.testing.assert_almost_equal(result[0][1], 0.05, decimal = 2)
+    np.testing.assert_almost_equal(result[0][2], 0.02, decimal = 2)
+    np.testing.assert_almost_equal(result[1][0], 0.1, decimal = 1)
+    np.testing.assert_almost_equal(result[1][1], 0.03, decimal = 2)
+    np.testing.assert_almost_equal(result[1][2], 0.01, decimal = 2)
+
 
+def test_adjust_p():
+    pvalues = np.array([0.1, 0.2, 0.3, 0.4])
+    # bonferroni
+    res = adjust_p(pvalues, adjustment = 'bonferroni')
+    np.testing.assert_almost_equal(res, np.array([0.4, 0.8, 1, 1]), decimal = 2)
+    # holm-bonferroni
+    pvalues = np.array([0.01, 0.04, 0.03, 0.005])                     
+    res = adjust_p(pvalues, adjustment = 'holm-bonferroni')
+    np.testing.assert_almost_equal(res, np.array([0.03, 0.06, 0.06, 0.02]), decimal = 2)
+    # benjamini hochberg
+    res = adjust_p(pvalues, adjustment = 'benjamini-hochberg')
+    np.testing.assert_almost_equal(res, np.array([0.02, 0.04, 0.04, 0.02]), decimal = 2)
+    # raises value error for nonsense correction
+    pytest.raises(ValueError, adjust_p, pvalues, 'nonsense')
 
+
 def test_fwer_minp():
     prng = RandomState(55)
     pvalues = np.linspace(0.05, 0.9, num=5)

diff --git a/permute/tests/test_utils.py b/permute/tests/test_utils.py
@@ -21,23 +21,23 @@
 def test_binom_conf_interval():
     res = binom_conf_interval(10, 3)
     expected = (0.05154625578928545, 0.6915018049393984)
-    np.testing.assert_almost_equal(res, expected)
+    np.testing.assert_almost_equal(res, expected, decimal=3)
 
     res2 = binom_conf_interval(10, 5, cl=0.95, alternative="upper")
     expected2 = (0.0, 0.7775588989918742)
-    np.testing.assert_almost_equal(res2, expected2)
+    np.testing.assert_almost_equal(res2, expected2, decimal=3)
 
     res3 = binom_conf_interval(10, 5, cl=0.95, alternative="lower")
     expected3 = (0.22244110100812578, 1.0)
-    np.testing.assert_almost_equal(res3, expected3)
+    np.testing.assert_almost_equal(res3, expected3, decimal=3)
 
     res4 = binom_conf_interval(10, 5, cl=0.95, alternative="upper", p=1)
     expected4 = (0.0, 0.7775588989918742)
-    np.testing.assert_almost_equal(res4, expected4)
+    np.testing.assert_almost_equal(res4, expected4, decimal=3)
 
     res5 = binom_conf_interval(10, 5, cl=0.95, alternative="lower", p=0)
     expected5 = (0.22244110100812578, 1.0)
-    np.testing.assert_almost_equal(res5, expected5)
+    np.testing.assert_almost_equal(res5, expected5, decimal=3)
 
 
 def test_hypergeom_conf_interval():