diff --git a/dabest/__init__.py b/dabest/__init__.py index 6f7d114e..109a9822 100644 --- a/dabest/__init__.py +++ b/dabest/__init__.py @@ -1,6 +1,14 @@ from ._api import load, prop_dataset from ._stats_tools import effsize as effsize +from ._stats_tools import confint_2group_diff as ci_2g from ._effsize_objects import TwoGroupsEffectSize, PermutationTest from ._dabest_object import Dabest -__version__ = "2024.03.29" + +import os +if os.environ.get('SKIP_NUMBA_COMPILE') != '1': + from ._stats_tools.precompile import precompile_all, _NUMBA_COMPILED + if not _NUMBA_COMPILED: + precompile_all() + +__version__ = "2024.03.30" \ No newline at end of file diff --git a/dabest/_bootstrap_tools.py b/dabest/_bootstrap_tools.py index 0951ffb5..7a3e979c 100644 --- a/dabest/_bootstrap_tools.py +++ b/dabest/_bootstrap_tools.py @@ -66,7 +66,9 @@ def __init__( reps: int = 5000, # Number of bootstrap iterations to perform. ): # Turn to pandas series. - x1 = pd.Series(x1).dropna() + # x1 = pd.Series(x1).dropna() + x1 = x1[~np.isnan(x1)] + diff = False # Initialise stat_function @@ -89,7 +91,9 @@ def __init__( if x2 is None: raise ValueError("Please specify x2.") - x2 = pd.Series(x2).dropna() + # x2 = pd.Series(x2).dropna() + x2 = x1[~np.isnan(x2)] + if len(x1) != len(x2): raise ValueError("x1 and x2 are not the same length.") @@ -134,7 +138,8 @@ def __init__( elif x2 is not None and paired is None: diff = True - x2 = pd.Series(x2).dropna() + # x2 = pd.Series(x2).dropna() + x2 = x2[~np.isnan(x2)] # Generate statarrays for both arrays. ref_statarray = sns.algorithms.bootstrap(x1, **sns_bootstrap_kwargs) exp_statarray = sns.algorithms.bootstrap(x2, **sns_bootstrap_kwargs) diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py index d5f0db01..8e260887 100644 --- a/dabest/_dabest_object.py +++ b/dabest/_dabest_object.py @@ -112,7 +112,7 @@ def __init__( # Determine the kind of estimation plot we need to produce. if all([isinstance(i, (str, int, float)) for i in idx]): # flatten out idx. - all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist() + all_plot_groups = pd.Series([t for t in idx]).unique().tolist() if len(idx) > len(all_plot_groups): err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again." raise ValueError(err0) @@ -122,7 +122,7 @@ def __init__( self.__idx = (idx,) elif all([isinstance(i, (tuple, list)) for i in idx]): - all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist() + all_plot_groups = pd.Series([tt for t in idx for tt in t]).unique().tolist() actual_groups_given = sum([len(i) for i in idx]) diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py index 1827c1b2..df896573 100644 --- a/dabest/_delta_objects.py +++ b/dabest/_delta_objects.py @@ -388,13 +388,14 @@ def __init__(self, effectsizedataframe, permutation_count, # compute the variances of each control group and each test group control_var=[] test_var=[] + grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)} for j, current_tuple in enumerate(idx): cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() + control = grouped_data[cname] control_var.append(np.var(control, ddof=1)) tname = current_tuple[1] - test = dat[dat[xvar] == tname][yvar].copy() + test = grouped_data[tname] test_var.append(np.var(test, ddof=1)) self.__control_var = np.array(control_var) self.__test_var = np.array(test_var) @@ -414,7 +415,7 @@ def __init__(self, effectsizedataframe, permutation_count, self.__bootstraps) # Compute the weighted average mean difference based on the raw data - self.__difference = es.weighted_delta(self.__effsizedf["difference"], + self.__difference = es.weighted_delta(np.array(self.__effsizedf["difference"]), self.__group_var) sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta) diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py index b562b603..cbcb55ac 100644 --- a/dabest/_effsize_objects.py +++ b/dabest/_effsize_objects.py @@ -9,6 +9,7 @@ import pandas as pd import lqrt from scipy.stats import norm +import numpy as np from numpy import array, isnan, isinf, repeat, random, isin, abs, var from numpy import sort as npsort from numpy import nan as npnan @@ -357,12 +358,17 @@ def _perform_statistical_test(self): # References: # https://en.wikipedia.org/wiki/McNemar%27s_test - df_temp = pd.DataFrame({"control": self.__control, "test": self.__test}) - x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)]) - x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)]) - x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)]) - x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)]) - table = [[x1, x2], [x3, x4]] + # df_temp = pd.DataFrame({"control": self.__control, "test": self.__test}) + # x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)]) + # x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)]) + # x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)]) + # x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)]) + # table = [[x1, x2], [x3, x4]] + x1 = np.sum((self.__control == 0) & (self.__test == 0)) + x2 = np.sum((self.__control == 0) & (self.__test == 1)) + x3 = np.sum((self.__control == 1) & (self.__test == 0)) + x4 = np.sum((self.__control == 1) & (self.__test == 1)) + table = np.array([[x1, x2], [x3, x4]]) _mcnemar = mcnemar(table, exact=True, correction=True) self.__pvalue_mcnemar = _mcnemar.pvalue self.__statistic_mcnemar = _mcnemar.statistic @@ -861,18 +867,19 @@ def __pre_calc(self): out = [] reprs = [] + grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)} if self.__delta2: mixed_data = [] for j, current_tuple in enumerate(idx): if self.__is_paired != "sequential": cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() + control = grouped_data[cname] for ix, tname in enumerate(current_tuple[1:]): if self.__is_paired == "sequential": cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() + control = grouped_data[cname] + test = grouped_data[tname] mixed_data.append(control) mixed_data.append(test) bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff( @@ -888,13 +895,13 @@ def __pre_calc(self): for j, current_tuple in enumerate(idx): if self.__is_paired != "sequential": cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() + control = grouped_data[cname] for ix, tname in enumerate(current_tuple[1:]): if self.__is_paired == "sequential": cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() + control = grouped_data[cname] + test = grouped_data[tname] result = TwoGroupsEffectSize( control, @@ -1055,16 +1062,18 @@ def __calc_lqrt(self): out = [] + grouped_data = {name:group[yvar].copy() for name, group in dat.groupby(xvar)} + for j, current_tuple in enumerate(db_obj.idx): if self.__is_paired != "sequential": cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() + control = grouped_data[cname] for ix, tname in enumerate(current_tuple[1:]): if self.__is_paired == "sequential": cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() + control = grouped_data[cname] + test = grouped_data[tname] if self.__is_paired: # Refactored here in v0.3.0 for performance issues. diff --git a/dabest/_modidx.py b/dabest/_modidx.py index eb260d99..f483a4be 100644 --- a/dabest/_modidx.py +++ b/dabest/_modidx.py @@ -25,6 +25,8 @@ 'dabest/_stats_tools/confint_2group_diff.py'), 'dabest._stats_tools.confint_2group_diff._create_two_group_jackknife_indexes': ( 'API/confint_2group_diff.html#_create_two_group_jackknife_indexes', 'dabest/_stats_tools/confint_2group_diff.py'), + 'dabest._stats_tools.confint_2group_diff.bootstrap_indices': ( 'API/confint_2group_diff.html#bootstrap_indices', + 'dabest/_stats_tools/confint_2group_diff.py'), 'dabest._stats_tools.confint_2group_diff.calculate_group_var': ( 'API/confint_2group_diff.html#calculate_group_var', 'dabest/_stats_tools/confint_2group_diff.py'), 'dabest._stats_tools.confint_2group_diff.calculate_weighted_delta': ( 'API/confint_2group_diff.html#calculate_weighted_delta', @@ -42,11 +44,17 @@ 'dabest._stats_tools.confint_2group_diff.create_jackknife_indexes': ( 'API/confint_2group_diff.html#create_jackknife_indexes', 'dabest/_stats_tools/confint_2group_diff.py'), 'dabest._stats_tools.confint_2group_diff.create_repeated_indexes': ( 'API/confint_2group_diff.html#create_repeated_indexes', - 'dabest/_stats_tools/confint_2group_diff.py')}, - 'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor', + 'dabest/_stats_tools/confint_2group_diff.py'), + 'dabest._stats_tools.confint_2group_diff.delta2_bootstrap_loop': ( 'API/confint_2group_diff.html#delta2_bootstrap_loop', + 'dabest/_stats_tools/confint_2group_diff.py')}, + 'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._cliffs_delta_core': ( 'API/effsize.html#_cliffs_delta_core', + 'dabest/_stats_tools/effsize.py'), + 'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor', 'dabest/_stats_tools/effsize.py'), 'dabest._stats_tools.effsize._compute_standardizers': ( 'API/effsize.html#_compute_standardizers', 'dabest/_stats_tools/effsize.py'), + 'dabest._stats_tools.effsize._mann_whitney_u': ( 'API/effsize.html#_mann_whitney_u', + 'dabest/_stats_tools/effsize.py'), 'dabest._stats_tools.effsize.cliffs_delta': ( 'API/effsize.html#cliffs_delta', 'dabest/_stats_tools/effsize.py'), 'dabest._stats_tools.effsize.cohens_d': ( 'API/effsize.html#cohens_d', @@ -61,6 +69,8 @@ 'dabest/_stats_tools/effsize.py'), 'dabest._stats_tools.effsize.weighted_delta': ( 'API/effsize.html#weighted_delta', 'dabest/_stats_tools/effsize.py')}, + 'dabest._stats_tools.precompile': { 'dabest._stats_tools.precompile.precompile_all': ( 'API/precompile.html#precompile_all', + 'dabest/_stats_tools/precompile.py')}, 'dabest.forest_plot': { 'dabest.forest_plot.extract_plot_data': ( 'API/forest_plot.html#extract_plot_data', 'dabest/forest_plot.py'), 'dabest.forest_plot.forest_plot': ('API/forest_plot.html#forest_plot', 'dabest/forest_plot.py'), diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index c599e178..afdb44b2 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -3,9 +3,10 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/confint_2group_diff.ipynb. # %% auto 0 -__all__ = ['create_jackknife_indexes', 'create_repeated_indexes', 'compute_meandiff_jackknife', 'compute_bootstrapped_diff', - 'compute_delta2_bootstrapped_diff', 'compute_meandiff_bias_correction', 'compute_interval_limits', - 'calculate_group_var', 'calculate_weighted_delta'] +__all__ = ['create_jackknife_indexes', 'create_repeated_indexes', 'compute_meandiff_jackknife', 'bootstrap_indices', + 'compute_bootstrapped_diff', 'delta2_bootstrap_loop', 'compute_delta2_bootstrapped_diff', + 'compute_meandiff_bias_correction', 'compute_interval_limits', 'calculate_group_var', + 'calculate_weighted_delta'] # %% ../../nbs/API/confint_2group_diff.ipynb 4 import numpy as np @@ -13,11 +14,12 @@ from numpy import mean as npmean from numpy import sum as npsum from numpy.random import PCG64, RandomState -import pandas as pd +from numba import njit, prange from scipy.stats import norm from numpy import isnan # %% ../../nbs/API/confint_2group_diff.ipynb 5 +@njit(cache=True, parallel=True) def create_jackknife_indexes(data): """ Given an array-like, creates a jackknife bootstrap. @@ -34,18 +36,25 @@ def create_jackknife_indexes(data): Generator that yields all jackknife bootstrap samples. """ - index_range = arange(0, len(data)) - return (delete(index_range, i) for i in index_range) + n = len(data) + indexes = np.empty((n, n - 1), dtype=np.int64) + for i in prange(n): + indexes[i] = np.concatenate((np.arange(i), np.arange(i + 1, n))) + return indexes +@njit(cache=True, parallel=True) def create_repeated_indexes(data): """ Convenience function. Given an array-like with length N, returns a generator that yields N indexes [0, 1, ..., N]. """ - index_range = arange(0, len(data)) - return (index_range for i in index_range) + n = len(data) + indexes = np.empty((n, n), dtype=np.int64) # Pre-allocate the output array + for i in prange(n): + indexes[i, :] = np.arange(n) # Fill each row with the full index range + return indexes def _create_two_group_jackknife_indexes(x0, x1, is_paired): @@ -113,6 +122,20 @@ def _calc_accel(jack_dist): return numer / denom +@njit(cache=True) # parallelization must be turned off for random number generation +def bootstrap_indices(is_paired, x0_len, x1_len, resamples, random_seed): + np.random.seed(random_seed) + indices = np.empty((resamples, x0_len if is_paired else x0_len + x1_len), dtype=np.int64) + + for i in range(resamples): + if is_paired: + indices[i, :x0_len] = np.random.choice(x0_len, x0_len) + else: + indices[i, :x0_len] = np.random.choice(x0_len, x0_len) + indices[i, x0_len:x0_len+x1_len] = np.random.choice(x1_len, x1_len) + return indices + + def compute_bootstrapped_diff( x0, x1, is_paired, effect_size, resamples=5000, random_seed=12345 ): @@ -120,27 +143,60 @@ def compute_bootstrapped_diff( from . import effsize as __es - rng = RandomState(PCG64(random_seed)) - - out = np.repeat(np.nan, resamples) - x0_len = len(x0) - x1_len = len(x1) + x0_len, x1_len = len(x0), len(x1) + indices = bootstrap_indices(is_paired, x0_len, x1_len, resamples, random_seed) + out = np.empty(resamples, dtype=np.float64) - for i in range(int(resamples)): + for i in range(resamples): if is_paired: - if x0_len != x1_len: - raise ValueError("The two arrays do not have the same length.") - random_idx = rng.choice(x0_len, x0_len, replace=True) - x0_sample = x0[random_idx] - x1_sample = x1[random_idx] + x0_sample = x0[indices[i, :x0_len]] + x1_sample = x1[indices[i, :x0_len]] else: - x0_sample = rng.choice(x0, x0_len, replace=True) - x1_sample = rng.choice(x1, x1_len, replace=True) + x0_sample = x0[indices[i, :x0_len]] + x1_sample = x1[indices[i, x0_len:x0_len+x1_len]] out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size) return out +@njit(cache=True) # parallelization must be turned off for random number generation +def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired): + np.random.seed(rng_seed) + out_delta_g = np.empty(resamples) + deltadelta = np.empty(resamples) + + n1, n2, n3, n4 = len(x1), len(x2), len(x3), len(x4) + if is_paired: + if n1 != n2 or n3 != n4: + raise ValueError("Each control group must have the same length as its corresponding test group in paired analysis.") + + + # Bootstrapping + for i in range(resamples): + # Paired or unpaired resampling + if is_paired: + indices_1 = np.random.choice(len(x1),len(x1)) + indices_2 = np.random.choice(len(x3),len(x3)) + x1_sample, x2_sample = x1[indices_1], x2[indices_1] + x3_sample, x4_sample = x3[indices_2], x4[indices_2] + else: + indices_1 = np.random.randint(0, len(x1), len(x1)) + indices_2 = np.random.randint(0, len(x2), len(x2)) + indices_3 = np.random.randint(0, len(x3), len(x3)) + indices_4 = np.random.randint(0, len(x4), len(x4)) + x1_sample, x2_sample = x1[indices_1], x2[indices_2] + x3_sample, x4_sample = x3[indices_3], x4[indices_4] + + # Calculating deltas + delta_1 = np.mean(x2_sample) - np.mean(x1_sample) + delta_2 = np.mean(x4_sample) - np.mean(x3_sample) + delta_delta = delta_2 - delta_1 + + deltadelta[i] = delta_delta + out_delta_g[i] = delta_delta / pooled_sd + + return out_delta_g, deltadelta + def compute_delta2_bootstrapped_diff( x1: np.ndarray, # Control group 1 @@ -158,8 +214,6 @@ def compute_delta2_bootstrapped_diff( """ - rng = RandomState(PCG64(random_seed)) - x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4]) # Calculating pooled sample standard deviation @@ -179,33 +233,7 @@ def compute_delta2_bootstrapped_diff( if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0: raise ValueError("Pooled sample standard deviation is NaN or zero.") - out_delta_g = np.empty(resamples) - deltadelta = np.empty(resamples) - - # Bootstrapping - for i in range(resamples): - # Paired or unpaired resampling - if is_paired: - if len(x1) != len(x2) or len(x3) != len(x4): - raise ValueError("Each control group must have the same length as its corresponding test group in paired analysis.") - indices_1 = rng.choice(len(x1), len(x1), replace=True) - indices_2 = rng.choice(len(x3), len(x3), replace=True) - - x1_sample, x2_sample = x1[indices_1], x2[indices_1] - x3_sample, x4_sample = x3[indices_2], x4[indices_2] - else: - x1_sample = rng.choice(x1, len(x1), replace=True) - x2_sample = rng.choice(x2, len(x2), replace=True) - x3_sample = rng.choice(x3, len(x3), replace=True) - x4_sample = rng.choice(x4, len(x4), replace=True) - - # Calculating deltas - delta_1 = np.mean(x2_sample) - np.mean(x1_sample) - delta_2 = np.mean(x4_sample) - np.mean(x3_sample) - delta_delta = delta_2 - delta_1 - - deltadelta[i] = delta_delta - out_delta_g[i] = delta_delta / pooled_sample_sd + out_delta_g, deltadelta = delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired) # Empirical delta_g calculation delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd @@ -242,6 +270,7 @@ def _compute_alpha_from_ci(ci): return (100.0 - ci) / 100.0 +@njit(cache=True) def _compute_quantile(z, bias, acceleration): numer = bias + z denom = 1 - (acceleration * numer) @@ -277,6 +306,7 @@ def compute_interval_limits(bias, acceleration, n_boots, ci=95): return low, high +@njit(cache=True) def calculate_group_var(control_var, control_N, test_var, test_N): return control_var / control_N + test_var / test_N @@ -288,6 +318,7 @@ def calculate_weighted_delta(group_var, differences): weight = 1 / group_var denom = np.sum(weight) - num = np.sum(weight[i] * differences[i] for i in range(0, len(weight))) - + num = 0.0 + for i in range(len(weight)): + num += weight[i] * differences[i] return num / denom diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py index 402745cc..2711d021 100644 --- a/dabest/_stats_tools/effsize.py +++ b/dabest/_stats_tools/effsize.py @@ -5,10 +5,12 @@ # %% ../../nbs/API/effsize.ipynb 4 from __future__ import annotations import numpy as np +from numba import njit import warnings from scipy.special import gamma from scipy.stats import mannwhitneyu + # %% auto 0 __all__ = ['two_group_difference', 'func_difference', 'cohens_d', 'cohens_h', 'hedges_g', 'cliffs_delta', 'weighted_delta'] @@ -62,6 +64,10 @@ def two_group_difference(control:list|tuple|np.ndarray, #Accepts lists, tuples, """ + if ~isinstance(control, np.ndarray): + control = np.array(control) + if ~isinstance(test, np.ndarray): + test = np.array(test) if effect_size == "mean_diff": return func_difference(control, test, np.mean, is_paired) @@ -117,19 +123,11 @@ def func_difference(control:list|tuple|np.ndarray, # NaNs are automatically disc err = "The two arrays supplied do not have the same length." raise ValueError(err) - control_nan = np.where(np.isnan(control))[0] - test_nan = np.where(np.isnan(test))[0] + non_nan_mask = ~np.isnan(control) & ~np.isnan(test) + control_non_nan = control[non_nan_mask] + test_non_nan = test[non_nan_mask] - indexes_to_drop = np.unique(np.concatenate([control_nan, - test_nan])) - - good_indexes = [i for i in range(0, len(control)) - if i not in indexes_to_drop] - - control = control[good_indexes] - test = test[good_indexes] - - return func(test - control) + return func(test_non_nan - control_non_nan) control = control[~np.isnan(control)] @@ -138,6 +136,7 @@ def func_difference(control:list|tuple|np.ndarray, # NaNs are automatically disc # %% ../../nbs/API/effsize.ipynb 7 +@njit(cache=True) def cohens_d(control:list|tuple|np.ndarray, test:list|tuple|np.ndarray, is_paired:str=None # If not None, the paired Cohen's d is returned. @@ -182,12 +181,6 @@ def cohens_d(control:list|tuple|np.ndarray, - https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation """ - # Convert to numpy arrays for speed. - # NaNs are automatically dropped. - if ~isinstance(control, np.ndarray): - control = np.array(control) - if ~isinstance(test, np.ndarray): - test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -218,6 +211,7 @@ def cohens_d(control:list|tuple|np.ndarray, return M / divisor # %% ../../nbs/API/effsize.ipynb 8 +# @njit(cache=True) # It uses np.seterr which is not supported by Numba def cohens_h(control:list|tuple|np.ndarray, test:list|tuple|np.ndarray )->float: @@ -240,10 +234,6 @@ def cohens_h(control:list|tuple|np.ndarray, # Convert to numpy arrays for speed. # NaNs are automatically dropped. # Aligned with cohens_d calculation. - if ~isinstance(control, np.ndarray): - control = np.array(control) - if ~isinstance(test, np.ndarray): - test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -272,10 +262,6 @@ def hedges_g(control:list|tuple|np.ndarray, # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if ~isinstance(control, np.ndarray): - control = np.array(control) - if ~isinstance(test, np.ndarray): - test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -286,6 +272,29 @@ def hedges_g(control:list|tuple|np.ndarray, return correction_factor * d # %% ../../nbs/API/effsize.ipynb 10 +@njit(cache=True) +def _mann_whitney_u(x, y): + """Numba-optimized Mann-Whitney U calculation""" + n1, n2 = len(x), len(y) + combined = np.concatenate((x, y)) + + # Use numpy broadcasting for comparison + less_than = (combined.reshape(-1, 1) > combined).sum(axis=1) + equal_to = (combined.reshape(-1, 1) == combined).sum(axis=1) + + # Calculate ranks directly + ranks = less_than + (equal_to + 1) / 2 + + R1 = np.sum(ranks[:n1]) + U1 = R1 - (n1 * (n1 + 1)) / 2 + return U1 + +@njit(cache=True) +def _cliffs_delta_core(control, test): + """Numba-optimized Cliff's delta calculation""" + U = _mann_whitney_u(test, control) + return ((2 * U) / (len(control) * len(test))) - 1 + def cliffs_delta(control:list|tuple|np.ndarray, test:list|tuple|np.ndarray )->float: @@ -293,28 +302,13 @@ def cliffs_delta(control:list|tuple|np.ndarray, Computes Cliff's delta for 2 samples. See [here](https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data) """ - - # Convert to numpy arrays for speed. - # NaNs are automatically dropped. - if ~isinstance(control, np.ndarray): - control = np.array(control) - if ~isinstance(test, np.ndarray): - test = np.array(test) - c = control[~np.isnan(control)] t = test[~np.isnan(test)] - - control_n = len(c) - test_n = len(t) - - # Note the order of the control and test arrays. - U, _ = mannwhitneyu(t, c, alternative='two-sided') - cliffs_delta = ((2 * U) / (control_n * test_n)) - 1 - - return cliffs_delta + return _cliffs_delta_core(c, t) # %% ../../nbs/API/effsize.ipynb 11 +@njit(cache=True) def _compute_standardizers(control, test): """ Computes the pooled and average standard deviations for two datasets. @@ -348,9 +342,9 @@ def _compute_standardizers(control, test): control_n = len(control) test_n = len(test) - control_var = np.var(control, ddof=1) # use N-1 to compute the variance. - test_var = np.var(test, ddof=1) - + # ddof parameter is not supported by numba. + control_var = np.var(control)*control_n/(control_n-1) # use N-1 to compute the variance. + test_var = np.var(test)*test_n/(test_n-1) # For unpaired 2-groups standardized mean difference. pooled = np.sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) / @@ -379,6 +373,7 @@ def _compute_hedges_correction_factor(n1, """ df = n1 + n2 - 2 + # gamma function is not supported by numba. numer = gamma(df / 2) denom0 = gamma((df - 1) / 2) denom = np.sqrt(df / 2) * denom0 @@ -396,6 +391,7 @@ def _compute_hedges_correction_factor(n1, return out # %% ../../nbs/API/effsize.ipynb 13 +@njit(cache=True) def weighted_delta(difference, group_var): ''' Compute the weighted deltas where the weight is the inverse of the diff --git a/dabest/_stats_tools/precompile.py b/dabest/_stats_tools/precompile.py new file mode 100644 index 00000000..46cc2bc4 --- /dev/null +++ b/dabest/_stats_tools/precompile.py @@ -0,0 +1,53 @@ +"""A tool to pre-compile Numba functions for speeding up DABEST bootstrapping""" + +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/API/precompile.ipynb. + +# %% auto 0 +__all__ = ['precompile_all'] + +# %% ../../nbs/API/precompile.ipynb 4 +import numpy as np +from tqdm import tqdm +from . import effsize +from . import confint_2group_diff + +# %% ../../nbs/API/precompile.ipynb 5 +_NUMBA_COMPILED = False + +def precompile_all(): + """Pre-compile all numba functions with dummy data""" + global _NUMBA_COMPILED + + if _NUMBA_COMPILED: + return + + print("Pre-compiling numba functions for DABEST...") + + # Create dummy data + dummy_control = np.array([1.0, 2.0, 3.0]) + dummy_test = np.array([4.0, 5.0, 6.0]) + + funcs = [ + # effsize.py functions + (effsize.cohens_d, (dummy_control, dummy_test)), + (effsize._mann_whitney_u, (dummy_control, dummy_test)), + (effsize._cliffs_delta_core, (dummy_control, dummy_test)), + (effsize._compute_standardizers, (dummy_control, dummy_test)), + (effsize.weighted_delta, (np.array([1.0, 2.0]), np.array([0.1, 0.2]))), + + # confint_2group_diff.py functions + (confint_2group_diff.create_jackknife_indexes, (dummy_control,)), + (confint_2group_diff.create_repeated_indexes, (dummy_control,)), + (confint_2group_diff.bootstrap_indices, (True, 3, 3, 10, 12345)), + (confint_2group_diff.delta2_bootstrap_loop, + (dummy_control, dummy_test, dummy_control, dummy_test, 10, 1.0, 12345, False)), + (confint_2group_diff._compute_quantile, (0.5, 0.1, 0.1)), + (confint_2group_diff.calculate_group_var, (1.0, 3, 1.0, 3)) + ] + + for func, args in tqdm(funcs, desc="Compiling numba functions"): + func(*args) + + _NUMBA_COMPILED = True + + print("Numba compilation complete!") diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py index 6836d336..90c4a6bc 100644 --- a/dabest/misc_tools.py +++ b/dabest/misc_tools.py @@ -762,8 +762,8 @@ def Gardner_Altman_Plot_Aesthetic_Adjustments(effect_size_type, plot_data, xvar, which_std = 1 else: which_std = 0 - temp_control = plot_data[plot_data[xvar] == current_control][yvar] - temp_test = plot_data[plot_data[xvar] == current_group][yvar] + temp_control = np.array(plot_data[plot_data[xvar] == current_control][yvar]) + temp_test = np.array(plot_data[plot_data[xvar] == current_group][yvar]) stds = _compute_standardizers(temp_control, temp_test) if is_paired: diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index 72437f38..40d49a91 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -369,7 +369,9 @@ def single_sankey( ): """ Make a single Sankey diagram showing proportion flow from left to right + Original code from: https://github.com/anazalea/pySankey + Changes are added to normalize each diagram's height to be 1 """ diff --git a/nbs/API/bootstrap.ipynb b/nbs/API/bootstrap.ipynb index eb33a083..79de8c0e 100644 --- a/nbs/API/bootstrap.ipynb +++ b/nbs/API/bootstrap.ipynb @@ -114,7 +114,9 @@ " reps: int = 5000, # Number of bootstrap iterations to perform.\n", " ):\n", " # Turn to pandas series.\n", - " x1 = pd.Series(x1).dropna()\n", + " # x1 = pd.Series(x1).dropna()\n", + " x1 = x1[~np.isnan(x1)]\n", + "\n", " diff = False\n", "\n", " # Initialise stat_function\n", @@ -137,7 +139,9 @@ " if x2 is None:\n", " raise ValueError(\"Please specify x2.\")\n", " \n", - " x2 = pd.Series(x2).dropna()\n", + " # x2 = pd.Series(x2).dropna()\n", + " x2 = x1[~np.isnan(x2)]\n", + "\n", " if len(x1) != len(x2):\n", " raise ValueError(\"x1 and x2 are not the same length.\")\n", "\n", @@ -182,7 +186,8 @@ "\n", " elif x2 is not None and paired is None:\n", " diff = True\n", - " x2 = pd.Series(x2).dropna()\n", + " # x2 = pd.Series(x2).dropna()\n", + " x2 = x2[~np.isnan(x2)]\n", " # Generate statarrays for both arrays.\n", " ref_statarray = sns.algorithms.bootstrap(x1, **sns_bootstrap_kwargs)\n", " exp_statarray = sns.algorithms.bootstrap(x2, **sns_bootstrap_kwargs)\n", diff --git a/nbs/API/confint_2group_diff.ipynb b/nbs/API/confint_2group_diff.ipynb index dd6477aa..c080d452 100644 --- a/nbs/API/confint_2group_diff.ipynb +++ b/nbs/API/confint_2group_diff.ipynb @@ -60,7 +60,7 @@ "from numpy import mean as npmean\n", "from numpy import sum as npsum\n", "from numpy.random import PCG64, RandomState\n", - "import pandas as pd\n", + "from numba import njit, prange\n", "from scipy.stats import norm\n", "from numpy import isnan" ] @@ -73,6 +73,7 @@ "outputs": [], "source": [ "#| export\n", + "@njit(cache=True, parallel=True)\n", "def create_jackknife_indexes(data):\n", " \"\"\"\n", " Given an array-like, creates a jackknife bootstrap.\n", @@ -89,18 +90,25 @@ " Generator that yields all jackknife bootstrap samples.\n", " \"\"\"\n", "\n", - " index_range = arange(0, len(data))\n", - " return (delete(index_range, i) for i in index_range)\n", + " n = len(data)\n", + " indexes = np.empty((n, n - 1), dtype=np.int64)\n", + " for i in prange(n):\n", + " indexes[i] = np.concatenate((np.arange(i), np.arange(i + 1, n)))\n", + " return indexes\n", "\n", "\n", + "@njit(cache=True, parallel=True)\n", "def create_repeated_indexes(data):\n", " \"\"\"\n", " Convenience function. Given an array-like with length N,\n", " returns a generator that yields N indexes [0, 1, ..., N].\n", " \"\"\"\n", "\n", - " index_range = arange(0, len(data))\n", - " return (index_range for i in index_range)\n", + " n = len(data)\n", + " indexes = np.empty((n, n), dtype=np.int64) # Pre-allocate the output array\n", + " for i in prange(n):\n", + " indexes[i, :] = np.arange(n) # Fill each row with the full index range\n", + " return indexes\n", "\n", "\n", "def _create_two_group_jackknife_indexes(x0, x1, is_paired):\n", @@ -168,6 +176,20 @@ " return numer / denom\n", "\n", "\n", + "@njit(cache=True) # parallelization must be turned off for random number generation\n", + "def bootstrap_indices(is_paired, x0_len, x1_len, resamples, random_seed):\n", + " np.random.seed(random_seed)\n", + " indices = np.empty((resamples, x0_len if is_paired else x0_len + x1_len), dtype=np.int64)\n", + " \n", + " for i in range(resamples):\n", + " if is_paired:\n", + " indices[i, :x0_len] = np.random.choice(x0_len, x0_len)\n", + " else: \n", + " indices[i, :x0_len] = np.random.choice(x0_len, x0_len)\n", + " indices[i, x0_len:x0_len+x1_len] = np.random.choice(x1_len, x1_len)\n", + " return indices\n", + "\n", + "\n", "def compute_bootstrapped_diff(\n", " x0, x1, is_paired, effect_size, resamples=5000, random_seed=12345\n", "):\n", @@ -175,27 +197,60 @@ "\n", " from . import effsize as __es\n", "\n", - " rng = RandomState(PCG64(random_seed))\n", - "\n", - " out = np.repeat(np.nan, resamples)\n", - " x0_len = len(x0)\n", - " x1_len = len(x1)\n", + " x0_len, x1_len = len(x0), len(x1)\n", + " indices = bootstrap_indices(is_paired, x0_len, x1_len, resamples, random_seed)\n", + " out = np.empty(resamples, dtype=np.float64)\n", "\n", - " for i in range(int(resamples)):\n", + " for i in range(resamples):\n", " if is_paired:\n", - " if x0_len != x1_len:\n", - " raise ValueError(\"The two arrays do not have the same length.\")\n", - " random_idx = rng.choice(x0_len, x0_len, replace=True)\n", - " x0_sample = x0[random_idx]\n", - " x1_sample = x1[random_idx]\n", + " x0_sample = x0[indices[i, :x0_len]]\n", + " x1_sample = x1[indices[i, :x0_len]]\n", " else:\n", - " x0_sample = rng.choice(x0, x0_len, replace=True)\n", - " x1_sample = rng.choice(x1, x1_len, replace=True)\n", + " x0_sample = x0[indices[i, :x0_len]]\n", + " x1_sample = x1[indices[i, x0_len:x0_len+x1_len]]\n", "\n", " out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size)\n", "\n", " return out\n", "\n", + "@njit(cache=True) # parallelization must be turned off for random number generation\n", + "def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired):\n", + " np.random.seed(rng_seed)\n", + " out_delta_g = np.empty(resamples)\n", + " deltadelta = np.empty(resamples)\n", + " \n", + " n1, n2, n3, n4 = len(x1), len(x2), len(x3), len(x4)\n", + " if is_paired:\n", + " if n1 != n2 or n3 != n4:\n", + " raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n", + " \n", + "\n", + " # Bootstrapping\n", + " for i in range(resamples):\n", + " # Paired or unpaired resampling\n", + " if is_paired:\n", + " indices_1 = np.random.choice(len(x1),len(x1))\n", + " indices_2 = np.random.choice(len(x3),len(x3))\n", + " x1_sample, x2_sample = x1[indices_1], x2[indices_1]\n", + " x3_sample, x4_sample = x3[indices_2], x4[indices_2]\n", + " else:\n", + " indices_1 = np.random.randint(0, len(x1), len(x1))\n", + " indices_2 = np.random.randint(0, len(x2), len(x2))\n", + " indices_3 = np.random.randint(0, len(x3), len(x3))\n", + " indices_4 = np.random.randint(0, len(x4), len(x4))\n", + " x1_sample, x2_sample = x1[indices_1], x2[indices_2]\n", + " x3_sample, x4_sample = x3[indices_3], x4[indices_4]\n", + "\n", + " # Calculating deltas\n", + " delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n", + " delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n", + " delta_delta = delta_2 - delta_1\n", + "\n", + " deltadelta[i] = delta_delta\n", + " out_delta_g[i] = delta_delta / pooled_sd\n", + "\n", + " return out_delta_g, deltadelta\n", + "\n", "\n", "def compute_delta2_bootstrapped_diff(\n", " x1: np.ndarray, # Control group 1\n", @@ -213,8 +268,6 @@ "\n", " \"\"\"\n", "\n", - " rng = RandomState(PCG64(random_seed))\n", - "\n", " x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4])\n", "\n", " # Calculating pooled sample standard deviation\n", @@ -234,33 +287,7 @@ " if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:\n", " raise ValueError(\"Pooled sample standard deviation is NaN or zero.\")\n", "\n", - " out_delta_g = np.empty(resamples)\n", - " deltadelta = np.empty(resamples)\n", - "\n", - " # Bootstrapping\n", - " for i in range(resamples):\n", - " # Paired or unpaired resampling\n", - " if is_paired:\n", - " if len(x1) != len(x2) or len(x3) != len(x4):\n", - " raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n", - " indices_1 = rng.choice(len(x1), len(x1), replace=True)\n", - " indices_2 = rng.choice(len(x3), len(x3), replace=True)\n", - "\n", - " x1_sample, x2_sample = x1[indices_1], x2[indices_1]\n", - " x3_sample, x4_sample = x3[indices_2], x4[indices_2]\n", - " else:\n", - " x1_sample = rng.choice(x1, len(x1), replace=True)\n", - " x2_sample = rng.choice(x2, len(x2), replace=True)\n", - " x3_sample = rng.choice(x3, len(x3), replace=True)\n", - " x4_sample = rng.choice(x4, len(x4), replace=True)\n", - "\n", - " # Calculating deltas\n", - " delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n", - " delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n", - " delta_delta = delta_2 - delta_1\n", - "\n", - " deltadelta[i] = delta_delta\n", - " out_delta_g[i] = delta_delta / pooled_sample_sd\n", + " out_delta_g, deltadelta = delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired)\n", "\n", " # Empirical delta_g calculation\n", " delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd\n", @@ -297,6 +324,7 @@ " return (100.0 - ci) / 100.0\n", "\n", "\n", + "@njit(cache=True)\n", "def _compute_quantile(z, bias, acceleration):\n", " numer = bias + z\n", " denom = 1 - (acceleration * numer)\n", @@ -332,6 +360,7 @@ " return low, high\n", "\n", "\n", + "@njit(cache=True)\n", "def calculate_group_var(control_var, control_N, test_var, test_N):\n", " return control_var / control_N + test_var / test_N\n", "\n", @@ -343,18 +372,11 @@ "\n", " weight = 1 / group_var\n", " denom = np.sum(weight)\n", - " num = np.sum(weight[i] * differences[i] for i in range(0, len(weight)))\n", - "\n", + " num = 0.0\n", + " for i in range(len(weight)):\n", + " num += weight[i] * differences[i]\n", " return num / denom" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "87e0c164", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb index 35c8d5c3..75903f07 100644 --- a/nbs/API/dabest_object.ipynb +++ b/nbs/API/dabest_object.ipynb @@ -180,7 +180,7 @@ " # Determine the kind of estimation plot we need to produce.\n", " if all([isinstance(i, (str, int, float)) for i in idx]):\n", " # flatten out idx.\n", - " all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()\n", + " all_plot_groups = pd.Series([t for t in idx]).unique().tolist()\n", " if len(idx) > len(all_plot_groups):\n", " err0 = \"`idx` contains duplicated groups. Please remove any duplicates and try again.\"\n", " raise ValueError(err0)\n", @@ -190,7 +190,7 @@ " self.__idx = (idx,)\n", "\n", " elif all([isinstance(i, (tuple, list)) for i in idx]):\n", - " all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()\n", + " all_plot_groups = pd.Series([tt for t in idx for tt in t]).unique().tolist()\n", "\n", " actual_groups_given = sum([len(i) for i in idx])\n", "\n", diff --git a/nbs/API/delta_objects.ipynb b/nbs/API/delta_objects.ipynb index 358e45ad..3fb4aaef 100644 --- a/nbs/API/delta_objects.ipynb +++ b/nbs/API/delta_objects.ipynb @@ -528,13 +528,14 @@ " # compute the variances of each control group and each test group\n", " control_var=[]\n", " test_var=[]\n", + " grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}\n", " for j, current_tuple in enumerate(idx):\n", " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", + " control = grouped_data[cname]\n", " control_var.append(np.var(control, ddof=1))\n", "\n", " tname = current_tuple[1]\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", + " test = grouped_data[tname]\n", " test_var.append(np.var(test, ddof=1))\n", " self.__control_var = np.array(control_var)\n", " self.__test_var = np.array(test_var)\n", @@ -554,7 +555,7 @@ " self.__bootstraps)\n", "\n", " # Compute the weighted average mean difference based on the raw data\n", - " self.__difference = es.weighted_delta(self.__effsizedf[\"difference\"],\n", + " self.__difference = es.weighted_delta(np.array(self.__effsizedf[\"difference\"]),\n", " self.__group_var)\n", "\n", " sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)\n", diff --git a/nbs/API/effsize.ipynb b/nbs/API/effsize.ipynb index 9d8dc5d2..5123e11d 100644 --- a/nbs/API/effsize.ipynb +++ b/nbs/API/effsize.ipynb @@ -56,9 +56,10 @@ "#| export\n", "from __future__ import annotations\n", "import numpy as np\n", + "from numba import njit\n", "import warnings\n", "from scipy.special import gamma\n", - "from scipy.stats import mannwhitneyu" + "from scipy.stats import mannwhitneyu\n" ] }, { @@ -118,6 +119,10 @@ "\n", " \"\"\"\n", "\n", + " if ~isinstance(control, np.ndarray):\n", + " control = np.array(control)\n", + " if ~isinstance(test, np.ndarray):\n", + " test = np.array(test)\n", "\n", " if effect_size == \"mean_diff\":\n", " return func_difference(control, test, np.mean, is_paired)\n", @@ -180,19 +185,11 @@ " err = \"The two arrays supplied do not have the same length.\"\n", " raise ValueError(err)\n", "\n", - " control_nan = np.where(np.isnan(control))[0]\n", - " test_nan = np.where(np.isnan(test))[0]\n", - "\n", - " indexes_to_drop = np.unique(np.concatenate([control_nan,\n", - " test_nan]))\n", + " non_nan_mask = ~np.isnan(control) & ~np.isnan(test)\n", + " control_non_nan = control[non_nan_mask]\n", + " test_non_nan = test[non_nan_mask]\n", "\n", - " good_indexes = [i for i in range(0, len(control))\n", - " if i not in indexes_to_drop]\n", - "\n", - " control = control[good_indexes]\n", - " test = test[good_indexes]\n", - "\n", - " return func(test - control)\n", + " return func(test_non_nan - control_non_nan)\n", "\n", " \n", " control = control[~np.isnan(control)]\n", @@ -208,6 +205,7 @@ "outputs": [], "source": [ "#| export\n", + "@njit(cache=True)\n", "def cohens_d(control:list|tuple|np.ndarray,\n", " test:list|tuple|np.ndarray,\n", " is_paired:str=None # If not None, the paired Cohen's d is returned.\n", @@ -252,12 +250,6 @@ " - https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation\n", " \"\"\"\n", "\n", - " # Convert to numpy arrays for speed.\n", - " # NaNs are automatically dropped.\n", - " if ~isinstance(control, np.ndarray):\n", - " control = np.array(control)\n", - " if ~isinstance(test, np.ndarray):\n", - " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", "\n", @@ -296,6 +288,7 @@ "outputs": [], "source": [ "#| export\n", + "# @njit(cache=True) # It uses np.seterr which is not supported by Numba\n", "def cohens_h(control:list|tuple|np.ndarray, \n", " test:list|tuple|np.ndarray\n", " )->float:\n", @@ -318,10 +311,6 @@ " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", " # Aligned with cohens_d calculation.\n", - " if ~isinstance(control, np.ndarray):\n", - " control = np.array(control)\n", - " if ~isinstance(test, np.ndarray):\n", - " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", "\n", @@ -357,10 +346,6 @@ "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if ~isinstance(control, np.ndarray):\n", - " control = np.array(control)\n", - " if ~isinstance(test, np.ndarray):\n", - " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", "\n", @@ -379,6 +364,29 @@ "outputs": [], "source": [ "#| export\n", + "@njit(cache=True)\n", + "def _mann_whitney_u(x, y):\n", + " \"\"\"Numba-optimized Mann-Whitney U calculation\"\"\"\n", + " n1, n2 = len(x), len(y)\n", + " combined = np.concatenate((x, y))\n", + " \n", + " # Use numpy broadcasting for comparison\n", + " less_than = (combined.reshape(-1, 1) > combined).sum(axis=1)\n", + " equal_to = (combined.reshape(-1, 1) == combined).sum(axis=1)\n", + " \n", + " # Calculate ranks directly\n", + " ranks = less_than + (equal_to + 1) / 2\n", + " \n", + " R1 = np.sum(ranks[:n1])\n", + " U1 = R1 - (n1 * (n1 + 1)) / 2\n", + " return U1\n", + "\n", + "@njit(cache=True)\n", + "def _cliffs_delta_core(control, test):\n", + " \"\"\"Numba-optimized Cliff's delta calculation\"\"\"\n", + " U = _mann_whitney_u(test, control)\n", + " return ((2 * U) / (len(control) * len(test))) - 1\n", + "\n", "def cliffs_delta(control:list|tuple|np.ndarray, \n", " test:list|tuple|np.ndarray\n", " )->float:\n", @@ -386,25 +394,9 @@ " Computes Cliff's delta for 2 samples.\n", " See [here](https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data)\n", " \"\"\"\n", - "\n", - " # Convert to numpy arrays for speed.\n", - " # NaNs are automatically dropped.\n", - " if ~isinstance(control, np.ndarray):\n", - " control = np.array(control)\n", - " if ~isinstance(test, np.ndarray):\n", - " test = np.array(test)\n", - "\n", " c = control[~np.isnan(control)]\n", " t = test[~np.isnan(test)]\n", - "\n", - " control_n = len(c)\n", - " test_n = len(t)\n", - "\n", - " # Note the order of the control and test arrays.\n", - " U, _ = mannwhitneyu(t, c, alternative='two-sided')\n", - " cliffs_delta = ((2 * U) / (control_n * test_n)) - 1\n", - "\n", - " return cliffs_delta\n" + " return _cliffs_delta_core(c, t)\n" ] }, { @@ -415,6 +407,7 @@ "outputs": [], "source": [ "#| export\n", + "@njit(cache=True)\n", "def _compute_standardizers(control, test):\n", " \"\"\"\n", " Computes the pooled and average standard deviations for two datasets.\n", @@ -448,9 +441,9 @@ " control_n = len(control)\n", " test_n = len(test)\n", "\n", - " control_var = np.var(control, ddof=1) # use N-1 to compute the variance.\n", - " test_var = np.var(test, ddof=1)\n", - "\n", + " # ddof parameter is not supported by numba.\n", + " control_var = np.var(control)*control_n/(control_n-1) # use N-1 to compute the variance.\n", + " test_var = np.var(test)*test_n/(test_n-1)\n", "\n", " # For unpaired 2-groups standardized mean difference.\n", " pooled = np.sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) /\n", @@ -487,6 +480,7 @@ " \"\"\"\n", "\n", " df = n1 + n2 - 2\n", + " # gamma function is not supported by numba.\n", " numer = gamma(df / 2)\n", " denom0 = gamma((df - 1) / 2)\n", " denom = np.sqrt(df / 2) * denom0\n", @@ -512,6 +506,7 @@ "outputs": [], "source": [ "#| export\n", + "@njit(cache=True)\n", "def weighted_delta(difference, group_var):\n", " '''\n", " Compute the weighted deltas where the weight is the inverse of the\n", diff --git a/nbs/API/effsize_objects.ipynb b/nbs/API/effsize_objects.ipynb index da633fda..cfdc7e92 100644 --- a/nbs/API/effsize_objects.ipynb +++ b/nbs/API/effsize_objects.ipynb @@ -62,6 +62,7 @@ "import pandas as pd\n", "import lqrt\n", "from scipy.stats import norm\n", + "import numpy as np\n", "from numpy import array, isnan, isinf, repeat, random, isin, abs, var\n", "from numpy import sort as npsort\n", "from numpy import nan as npnan\n", @@ -417,12 +418,17 @@ " # References:\n", " # https://en.wikipedia.org/wiki/McNemar%27s_test\n", "\n", - " df_temp = pd.DataFrame({\"control\": self.__control, \"test\": self.__test})\n", - " x1 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 0)])\n", - " x2 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 1)])\n", - " x3 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 0)])\n", - " x4 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 1)])\n", - " table = [[x1, x2], [x3, x4]]\n", + " # df_temp = pd.DataFrame({\"control\": self.__control, \"test\": self.__test})\n", + " # x1 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 0)])\n", + " # x2 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 1)])\n", + " # x3 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 0)])\n", + " # x4 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 1)])\n", + " # table = [[x1, x2], [x3, x4]]\n", + " x1 = np.sum((self.__control == 0) & (self.__test == 0))\n", + " x2 = np.sum((self.__control == 0) & (self.__test == 1))\n", + " x3 = np.sum((self.__control == 1) & (self.__test == 0))\n", + " x4 = np.sum((self.__control == 1) & (self.__test == 1))\n", + " table = np.array([[x1, x2], [x3, x4]])\n", " _mcnemar = mcnemar(table, exact=True, correction=True)\n", " self.__pvalue_mcnemar = _mcnemar.pvalue\n", " self.__statistic_mcnemar = _mcnemar.statistic\n", @@ -1020,18 +1026,19 @@ " out = []\n", " reprs = []\n", "\n", + " grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}\n", " if self.__delta2:\n", " mixed_data = []\n", " for j, current_tuple in enumerate(idx):\n", " if self.__is_paired != \"sequential\":\n", " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", + " control = grouped_data[cname]\n", "\n", " for ix, tname in enumerate(current_tuple[1:]):\n", " if self.__is_paired == \"sequential\":\n", " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", + " control = grouped_data[cname]\n", + " test = grouped_data[tname]\n", " mixed_data.append(control)\n", " mixed_data.append(test)\n", " bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(\n", @@ -1047,13 +1054,13 @@ " for j, current_tuple in enumerate(idx):\n", " if self.__is_paired != \"sequential\":\n", " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", + " control = grouped_data[cname]\n", "\n", " for ix, tname in enumerate(current_tuple[1:]):\n", " if self.__is_paired == \"sequential\":\n", " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", + " control = grouped_data[cname]\n", + " test = grouped_data[tname]\n", "\n", " result = TwoGroupsEffectSize(\n", " control,\n", @@ -1214,16 +1221,18 @@ "\n", " out = []\n", "\n", + " grouped_data = {name:group[yvar].copy() for name, group in dat.groupby(xvar)}\n", + "\n", " for j, current_tuple in enumerate(db_obj.idx):\n", " if self.__is_paired != \"sequential\":\n", " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", + " control = grouped_data[cname]\n", "\n", " for ix, tname in enumerate(current_tuple[1:]):\n", " if self.__is_paired == \"sequential\":\n", " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", + " control = grouped_data[cname]\n", + " test = grouped_data[tname]\n", "\n", " if self.__is_paired:\n", " # Refactored here in v0.3.0 for performance issues.\n", diff --git a/nbs/API/misc_tools.ipynb b/nbs/API/misc_tools.ipynb index 64eda8c4..499b9be1 100644 --- a/nbs/API/misc_tools.ipynb +++ b/nbs/API/misc_tools.ipynb @@ -815,8 +815,8 @@ " which_std = 1\n", " else:\n", " which_std = 0\n", - " temp_control = plot_data[plot_data[xvar] == current_control][yvar]\n", - " temp_test = plot_data[plot_data[xvar] == current_group][yvar]\n", + " temp_control = np.array(plot_data[plot_data[xvar] == current_control][yvar])\n", + " temp_test = np.array(plot_data[plot_data[xvar] == current_group][yvar])\n", "\n", " stds = _compute_standardizers(temp_control, temp_test)\n", " if is_paired:\n", diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index 2c431029..c5c3b311 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -420,7 +420,9 @@ "):\n", " \"\"\"\n", " Make a single Sankey diagram showing proportion flow from left to right\n", + "\n", " Original code from: https://github.com/anazalea/pySankey\n", + " \n", " Changes are added to normalize each diagram's height to be 1\n", "\n", " \"\"\"\n", diff --git a/nbs/API/precompile.ipynb b/nbs/API/precompile.ipynb new file mode 100644 index 00000000..223c4ce9 --- /dev/null +++ b/nbs/API/precompile.ipynb @@ -0,0 +1,111 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# precompile\n", + "\n", + "> A tool to pre-compile Numba functions for speeding up DABEST bootstrapping\n", + "\n", + "- order: 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp _stats_tools/precompile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from __future__ import annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *\n", + "import nbdev\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from dabest._stats_tools import effsize\n", + "from dabest._stats_tools import confint_2group_diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "_NUMBA_COMPILED = False\n", + "\n", + "def precompile_all():\n", + " \"\"\"Pre-compile all numba functions with dummy data\"\"\"\n", + " global _NUMBA_COMPILED\n", + " \n", + " if _NUMBA_COMPILED:\n", + " return\n", + " \n", + " print(\"Pre-compiling numba functions for DABEST...\")\n", + " \n", + " # Create dummy data\n", + " dummy_control = np.array([1.0, 2.0, 3.0])\n", + " dummy_test = np.array([4.0, 5.0, 6.0])\n", + " \n", + " funcs = [\n", + " # effsize.py functions\n", + " (effsize.cohens_d, (dummy_control, dummy_test)),\n", + " (effsize._mann_whitney_u, (dummy_control, dummy_test)),\n", + " (effsize._cliffs_delta_core, (dummy_control, dummy_test)),\n", + " (effsize._compute_standardizers, (dummy_control, dummy_test)),\n", + " (effsize.weighted_delta, (np.array([1.0, 2.0]), np.array([0.1, 0.2]))),\n", + " \n", + " # confint_2group_diff.py functions\n", + " (confint_2group_diff.create_jackknife_indexes, (dummy_control,)),\n", + " (confint_2group_diff.create_repeated_indexes, (dummy_control,)),\n", + " (confint_2group_diff.bootstrap_indices, (True, 3, 3, 10, 12345)),\n", + " (confint_2group_diff.delta2_bootstrap_loop, \n", + " (dummy_control, dummy_test, dummy_control, dummy_test, 10, 1.0, 12345, False)),\n", + " (confint_2group_diff._compute_quantile, (0.5, 0.1, 0.1)),\n", + " (confint_2group_diff.calculate_group_var, (1.0, 3, 1.0, 3))\n", + " ]\n", + " \n", + " for func, args in tqdm(funcs, desc=\"Compiling numba functions\"):\n", + " func(*args)\n", + " \n", + " _NUMBA_COMPILED = True\n", + " \n", + " print(\"Numba compilation complete!\")" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png index d6374405..3abf1737 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_01_gardner_altman_unpaired_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png index 7dc4d313..ae655349 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_02_gardner_altman_unpaired_mediandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png b/nbs/tests/mpl_image_tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png index 689b26ce..4d63bbcc 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png and b/nbs/tests/mpl_image_tests/baseline_images/test_03_gardner_altman_unpaired_hedges_g.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png b/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png index 37c2cfd6..1f1372aa 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png and b/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_hedges_g.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_meandiff.png index 6d3f0f6d..ecf1b062 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_04_gardner_altman_paired_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png index 895782b9..9b248613 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_05_cummings_two_group_unpaired_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png index 2c172249..003cee58 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_06_cummings_two_group_paired_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_07_cummings_multi_group_unpaired.png b/nbs/tests/mpl_image_tests/baseline_images/test_07_cummings_multi_group_unpaired.png index 5dabd6ba..4c1e3287 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_07_cummings_multi_group_unpaired.png and b/nbs/tests/mpl_image_tests/baseline_images/test_07_cummings_multi_group_unpaired.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_08_cummings_multi_group_paired.png b/nbs/tests/mpl_image_tests/baseline_images/test_08_cummings_multi_group_paired.png index 5416a604..bf0299c1 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_08_cummings_multi_group_paired.png and b/nbs/tests/mpl_image_tests/baseline_images/test_08_cummings_multi_group_paired.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_09_cummings_shared_control.png b/nbs/tests/mpl_image_tests/baseline_images/test_09_cummings_shared_control.png index 42484152..8932487d 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_09_cummings_shared_control.png and b/nbs/tests/mpl_image_tests/baseline_images/test_09_cummings_shared_control.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_101_gardner_altman_unpaired_propdiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_101_gardner_altman_unpaired_propdiff.png index 79cb7092..d163eb47 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_101_gardner_altman_unpaired_propdiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_101_gardner_altman_unpaired_propdiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_103_cummings_two_group_unpaired_propdiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_103_cummings_two_group_unpaired_propdiff.png index 4fdd936a..7fd3564d 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_103_cummings_two_group_unpaired_propdiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_103_cummings_two_group_unpaired_propdiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_105_cummings_multi_group_unpaired_propdiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_105_cummings_multi_group_unpaired_propdiff.png index 392abb53..29e798bd 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_105_cummings_multi_group_unpaired_propdiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_105_cummings_multi_group_unpaired_propdiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_106_cummings_shared_control_propdiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_106_cummings_shared_control_propdiff.png index 59d7ab71..1c9607ef 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_106_cummings_shared_control_propdiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_106_cummings_shared_control_propdiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_107_cummings_multi_groups_propdiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_107_cummings_multi_groups_propdiff.png index 606257de..e8da299c 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_107_cummings_multi_groups_propdiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_107_cummings_multi_groups_propdiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_109_gardner_altman_ylabel.png b/nbs/tests/mpl_image_tests/baseline_images/test_109_gardner_altman_ylabel.png index 4641f875..252b4612 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_109_gardner_altman_ylabel.png and b/nbs/tests/mpl_image_tests/baseline_images/test_109_gardner_altman_ylabel.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_10_cummings_multi_groups.png b/nbs/tests/mpl_image_tests/baseline_images/test_10_cummings_multi_groups.png index 751a187f..2e595360 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_10_cummings_multi_groups.png and b/nbs/tests/mpl_image_tests/baseline_images/test_10_cummings_multi_groups.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_110_change_fig_size.png b/nbs/tests/mpl_image_tests/baseline_images/test_110_change_fig_size.png index 9d941ce2..52841a60 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_110_change_fig_size.png and b/nbs/tests/mpl_image_tests/baseline_images/test_110_change_fig_size.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_111_change_palette_b.png b/nbs/tests/mpl_image_tests/baseline_images/test_111_change_palette_b.png index b73a9128..f0db92d1 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_111_change_palette_b.png and b/nbs/tests/mpl_image_tests/baseline_images/test_111_change_palette_b.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_112_change_palette_c.png b/nbs/tests/mpl_image_tests/baseline_images/test_112_change_palette_c.png index bbedf6e9..aa95e180 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_112_change_palette_c.png and b/nbs/tests/mpl_image_tests/baseline_images/test_112_change_palette_c.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_113_desat.png b/nbs/tests/mpl_image_tests/baseline_images/test_113_desat.png index eb015140..3cb1a98e 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_113_desat.png and b/nbs/tests/mpl_image_tests/baseline_images/test_113_desat.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_114_change_ylims.png b/nbs/tests/mpl_image_tests/baseline_images/test_114_change_ylims.png index 9ccb46fe..56f87ef2 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_114_change_ylims.png and b/nbs/tests/mpl_image_tests/baseline_images/test_114_change_ylims.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_115_invert_ylim.png b/nbs/tests/mpl_image_tests/baseline_images/test_115_invert_ylim.png index 33a9c89a..c6c789a2 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_115_invert_ylim.png and b/nbs/tests/mpl_image_tests/baseline_images/test_115_invert_ylim.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_116_ticker_gardner_altman.png b/nbs/tests/mpl_image_tests/baseline_images/test_116_ticker_gardner_altman.png index fd5a79c4..72349492 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_116_ticker_gardner_altman.png and b/nbs/tests/mpl_image_tests/baseline_images/test_116_ticker_gardner_altman.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_117_err_color.png b/nbs/tests/mpl_image_tests/baseline_images/test_117_err_color.png index 01184f12..7752500d 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_117_err_color.png and b/nbs/tests/mpl_image_tests/baseline_images/test_117_err_color.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_118_cummings_two_group_unpaired_meandiff_bar_width.png b/nbs/tests/mpl_image_tests/baseline_images/test_118_cummings_two_group_unpaired_meandiff_bar_width.png index edad4e3c..a8302b4b 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_118_cummings_two_group_unpaired_meandiff_bar_width.png and b/nbs/tests/mpl_image_tests/baseline_images/test_118_cummings_two_group_unpaired_meandiff_bar_width.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_119_wide_df_nan.png b/nbs/tests/mpl_image_tests/baseline_images/test_119_wide_df_nan.png index cd00b86d..0fc0b11b 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_119_wide_df_nan.png and b/nbs/tests/mpl_image_tests/baseline_images/test_119_wide_df_nan.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_11_inset_plots.png b/nbs/tests/mpl_image_tests/baseline_images/test_11_inset_plots.png index 28a2bcd1..887e5283 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_11_inset_plots.png and b/nbs/tests/mpl_image_tests/baseline_images/test_11_inset_plots.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_120_long_df_nan.png b/nbs/tests/mpl_image_tests/baseline_images/test_120_long_df_nan.png index cd00b86d..0fc0b11b 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_120_long_df_nan.png and b/nbs/tests/mpl_image_tests/baseline_images/test_120_long_df_nan.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_121_cohens_h_gardner_altman.png b/nbs/tests/mpl_image_tests/baseline_images/test_121_cohens_h_gardner_altman.png index 7ae94529..44fe3297 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_121_cohens_h_gardner_altman.png and b/nbs/tests/mpl_image_tests/baseline_images/test_121_cohens_h_gardner_altman.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_122_cohens_h_cummings.png b/nbs/tests/mpl_image_tests/baseline_images/test_122_cohens_h_cummings.png index c6f347da..73db4fe4 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_122_cohens_h_cummings.png and b/nbs/tests/mpl_image_tests/baseline_images/test_122_cohens_h_cummings.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_123_sankey_gardner_altman.png b/nbs/tests/mpl_image_tests/baseline_images/test_123_sankey_gardner_altman.png index 52d6eee3..e7a11809 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_123_sankey_gardner_altman.png and b/nbs/tests/mpl_image_tests/baseline_images/test_123_sankey_gardner_altman.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_124_sankey_cummings.png b/nbs/tests/mpl_image_tests/baseline_images/test_124_sankey_cummings.png index b23dd51e..0aa2f092 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_124_sankey_cummings.png and b/nbs/tests/mpl_image_tests/baseline_images/test_124_sankey_cummings.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_125_sankey_2paired_groups.png b/nbs/tests/mpl_image_tests/baseline_images/test_125_sankey_2paired_groups.png index 1820a124..c2a17393 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_125_sankey_2paired_groups.png and b/nbs/tests/mpl_image_tests/baseline_images/test_125_sankey_2paired_groups.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_126_sankey_2sequential_groups.png b/nbs/tests/mpl_image_tests/baseline_images/test_126_sankey_2sequential_groups.png index 1820a124..c2a17393 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_126_sankey_2sequential_groups.png and b/nbs/tests/mpl_image_tests/baseline_images/test_126_sankey_2sequential_groups.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_127_sankey_multi_group_paired.png b/nbs/tests/mpl_image_tests/baseline_images/test_127_sankey_multi_group_paired.png index e4c7e8c2..b9d3169e 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_127_sankey_multi_group_paired.png and b/nbs/tests/mpl_image_tests/baseline_images/test_127_sankey_multi_group_paired.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_128_sankey_transparency.png b/nbs/tests/mpl_image_tests/baseline_images/test_128_sankey_transparency.png index 83c2c9ae..ae6bd630 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_128_sankey_transparency.png and b/nbs/tests/mpl_image_tests/baseline_images/test_128_sankey_transparency.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_12_gardner_altman_ylabel.png b/nbs/tests/mpl_image_tests/baseline_images/test_12_gardner_altman_ylabel.png index 33c4ebc6..914b8682 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_12_gardner_altman_ylabel.png and b/nbs/tests/mpl_image_tests/baseline_images/test_12_gardner_altman_ylabel.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_132_shared_control_sankey_off.png b/nbs/tests/mpl_image_tests/baseline_images/test_132_shared_control_sankey_off.png index 7a4aeec3..e60f656c 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_132_shared_control_sankey_off.png and b/nbs/tests/mpl_image_tests/baseline_images/test_132_shared_control_sankey_off.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_133_shared_control_flow_off.png b/nbs/tests/mpl_image_tests/baseline_images/test_133_shared_control_flow_off.png index 5c31d70c..ded4d4d2 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_133_shared_control_flow_off.png and b/nbs/tests/mpl_image_tests/baseline_images/test_133_shared_control_flow_off.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_134_separate_control_sankey_off.png b/nbs/tests/mpl_image_tests/baseline_images/test_134_separate_control_sankey_off.png index 9609f767..66426292 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_134_separate_control_sankey_off.png and b/nbs/tests/mpl_image_tests/baseline_images/test_134_separate_control_sankey_off.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_135_separate_control_flow_off.png b/nbs/tests/mpl_image_tests/baseline_images/test_135_separate_control_flow_off.png index 392cdfb6..e3eec96e 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_135_separate_control_flow_off.png and b/nbs/tests/mpl_image_tests/baseline_images/test_135_separate_control_flow_off.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_136_style_sheets.png b/nbs/tests/mpl_image_tests/baseline_images/test_136_style_sheets.png index 54e02859..fd7aa500 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_136_style_sheets.png and b/nbs/tests/mpl_image_tests/baseline_images/test_136_style_sheets.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_13_multi_2group_color.png b/nbs/tests/mpl_image_tests/baseline_images/test_13_multi_2group_color.png index 6ed27f79..032a4930 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_13_multi_2group_color.png and b/nbs/tests/mpl_image_tests/baseline_images/test_13_multi_2group_color.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_14_gardner_altman_paired_color.png b/nbs/tests/mpl_image_tests/baseline_images/test_14_gardner_altman_paired_color.png index 2b9aea2f..7f3b1a0b 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_14_gardner_altman_paired_color.png and b/nbs/tests/mpl_image_tests/baseline_images/test_14_gardner_altman_paired_color.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_15_change_palette_a.png b/nbs/tests/mpl_image_tests/baseline_images/test_15_change_palette_a.png index da2bc0d1..2eaf771d 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_15_change_palette_a.png and b/nbs/tests/mpl_image_tests/baseline_images/test_15_change_palette_a.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_16_change_palette_b.png b/nbs/tests/mpl_image_tests/baseline_images/test_16_change_palette_b.png index aa4fc957..02536dc2 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_16_change_palette_b.png and b/nbs/tests/mpl_image_tests/baseline_images/test_16_change_palette_b.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_17_change_palette_c.png b/nbs/tests/mpl_image_tests/baseline_images/test_17_change_palette_c.png index c448fc55..bb3c6d62 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_17_change_palette_c.png and b/nbs/tests/mpl_image_tests/baseline_images/test_17_change_palette_c.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_18_desat.png b/nbs/tests/mpl_image_tests/baseline_images/test_18_desat.png index fa571711..685c2d43 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_18_desat.png and b/nbs/tests/mpl_image_tests/baseline_images/test_18_desat.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_19_dot_sizes.png b/nbs/tests/mpl_image_tests/baseline_images/test_19_dot_sizes.png index 8eaf98df..39345266 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_19_dot_sizes.png and b/nbs/tests/mpl_image_tests/baseline_images/test_19_dot_sizes.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_201_forest_plot_no_colorpalette.png b/nbs/tests/mpl_image_tests/baseline_images/test_201_forest_plot_no_colorpalette.png index 4f394293..907e6146 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_201_forest_plot_no_colorpalette.png and b/nbs/tests/mpl_image_tests/baseline_images/test_201_forest_plot_no_colorpalette.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_202_forest_plot_with_colorpalette.png b/nbs/tests/mpl_image_tests/baseline_images/test_202_forest_plot_with_colorpalette.png index 50a85d7b..8bafdc53 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_202_forest_plot_with_colorpalette.png and b/nbs/tests/mpl_image_tests/baseline_images/test_202_forest_plot_with_colorpalette.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_203_horizontal_forest_plot_no_colorpalette.png b/nbs/tests/mpl_image_tests/baseline_images/test_203_horizontal_forest_plot_no_colorpalette.png index 756567a6..814c3d4a 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_203_horizontal_forest_plot_no_colorpalette.png and b/nbs/tests/mpl_image_tests/baseline_images/test_203_horizontal_forest_plot_no_colorpalette.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_204_horizontal_forest_plot_with_colorpalette.png b/nbs/tests/mpl_image_tests/baseline_images/test_204_horizontal_forest_plot_with_colorpalette.png index 5e457b96..f7f0533f 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_204_horizontal_forest_plot_with_colorpalette.png and b/nbs/tests/mpl_image_tests/baseline_images/test_204_horizontal_forest_plot_with_colorpalette.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_205_forest_mini_meta_horizontal.png b/nbs/tests/mpl_image_tests/baseline_images/test_205_forest_mini_meta_horizontal.png index 9f296a66..ba7970ec 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_205_forest_mini_meta_horizontal.png and b/nbs/tests/mpl_image_tests/baseline_images/test_205_forest_mini_meta_horizontal.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_206_forest_mini_meta.png b/nbs/tests/mpl_image_tests/baseline_images/test_206_forest_mini_meta.png index 864654a6..f003b4a4 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_206_forest_mini_meta.png and b/nbs/tests/mpl_image_tests/baseline_images/test_206_forest_mini_meta.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_207_gardner_altman_meandiff_empty_circle.png b/nbs/tests/mpl_image_tests/baseline_images/test_207_gardner_altman_meandiff_empty_circle.png index 3abb704e..a59c9d51 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_207_gardner_altman_meandiff_empty_circle.png and b/nbs/tests/mpl_image_tests/baseline_images/test_207_gardner_altman_meandiff_empty_circle.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_208_cummings_two_group_unpaired_meandiff_empty_circle.png b/nbs/tests/mpl_image_tests/baseline_images/test_208_cummings_two_group_unpaired_meandiff_empty_circle.png index 1cc00350..25e665cd 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_208_cummings_two_group_unpaired_meandiff_empty_circle.png and b/nbs/tests/mpl_image_tests/baseline_images/test_208_cummings_two_group_unpaired_meandiff_empty_circle.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_209_cummings_shared_control_meandiff_empty_circle.png b/nbs/tests/mpl_image_tests/baseline_images/test_209_cummings_shared_control_meandiff_empty_circle.png index 2e975cf8..aad3e270 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_209_cummings_shared_control_meandiff_empty_circle.png and b/nbs/tests/mpl_image_tests/baseline_images/test_209_cummings_shared_control_meandiff_empty_circle.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_20_change_ylims.png b/nbs/tests/mpl_image_tests/baseline_images/test_20_change_ylims.png index d43d7033..ed1adf17 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_20_change_ylims.png and b/nbs/tests/mpl_image_tests/baseline_images/test_20_change_ylims.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_210_cummings_multi_groups_meandiff_empty_circle.png b/nbs/tests/mpl_image_tests/baseline_images/test_210_cummings_multi_groups_meandiff_empty_circle.png index 24bf0b02..f763947b 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_210_cummings_multi_groups_meandiff_empty_circle.png and b/nbs/tests/mpl_image_tests/baseline_images/test_210_cummings_multi_groups_meandiff_empty_circle.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_211_cummings_multi_2_group_meandiff_empty_circle.png b/nbs/tests/mpl_image_tests/baseline_images/test_211_cummings_multi_2_group_meandiff_empty_circle.png index 76967000..09da92bf 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_211_cummings_multi_2_group_meandiff_empty_circle.png and b/nbs/tests/mpl_image_tests/baseline_images/test_211_cummings_multi_2_group_meandiff_empty_circle.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_212_cummings_unpaired_delta_delta_meandiff_empty_circle.png b/nbs/tests/mpl_image_tests/baseline_images/test_212_cummings_unpaired_delta_delta_meandiff_empty_circle.png index cd688658..c05965c3 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_212_cummings_unpaired_delta_delta_meandiff_empty_circle.png and b/nbs/tests/mpl_image_tests/baseline_images/test_212_cummings_unpaired_delta_delta_meandiff_empty_circle.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_213_cummings_unpaired_mini_meta_meandiff_empty_circle.png b/nbs/tests/mpl_image_tests/baseline_images/test_213_cummings_unpaired_mini_meta_meandiff_empty_circle.png index ae0484d8..4235aad6 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_213_cummings_unpaired_mini_meta_meandiff_empty_circle.png and b/nbs/tests/mpl_image_tests/baseline_images/test_213_cummings_unpaired_mini_meta_meandiff_empty_circle.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_214_change_idx_order_custom_palette_original.png b/nbs/tests/mpl_image_tests/baseline_images/test_214_change_idx_order_custom_palette_original.png index 976ab458..ffcc5b7e 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_214_change_idx_order_custom_palette_original.png and b/nbs/tests/mpl_image_tests/baseline_images/test_214_change_idx_order_custom_palette_original.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_215_change_idx_order_custom_palette_new.png b/nbs/tests/mpl_image_tests/baseline_images/test_215_change_idx_order_custom_palette_new.png index 65ff816a..9ce3bfc4 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_215_change_idx_order_custom_palette_new.png and b/nbs/tests/mpl_image_tests/baseline_images/test_215_change_idx_order_custom_palette_new.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_216_cummings_multi_groups_meandiff_show_baseline_ec.png b/nbs/tests/mpl_image_tests/baseline_images/test_216_cummings_multi_groups_meandiff_show_baseline_ec.png index b5824a18..d5c6c451 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_216_cummings_multi_groups_meandiff_show_baseline_ec.png and b/nbs/tests/mpl_image_tests/baseline_images/test_216_cummings_multi_groups_meandiff_show_baseline_ec.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_217_cummings_multi_2_group_meandiff_show_baseline_ec.png b/nbs/tests/mpl_image_tests/baseline_images/test_217_cummings_multi_2_group_meandiff_show_baseline_ec.png index b4bf63c4..5911ab83 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_217_cummings_multi_2_group_meandiff_show_baseline_ec.png and b/nbs/tests/mpl_image_tests/baseline_images/test_217_cummings_multi_2_group_meandiff_show_baseline_ec.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_21_invert_ylim.png b/nbs/tests/mpl_image_tests/baseline_images/test_21_invert_ylim.png index 2379d990..3b5c2a76 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_21_invert_ylim.png and b/nbs/tests/mpl_image_tests/baseline_images/test_21_invert_ylim.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_22_ticker_gardner_altman.png b/nbs/tests/mpl_image_tests/baseline_images/test_22_ticker_gardner_altman.png index 19b28ad0..19300870 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_22_ticker_gardner_altman.png and b/nbs/tests/mpl_image_tests/baseline_images/test_22_ticker_gardner_altman.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_23_ticker_cumming.png b/nbs/tests/mpl_image_tests/baseline_images/test_23_ticker_cumming.png index 0a9db591..8f4ef09f 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_23_ticker_cumming.png and b/nbs/tests/mpl_image_tests/baseline_images/test_23_ticker_cumming.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_24_wide_df_nan.png b/nbs/tests/mpl_image_tests/baseline_images/test_24_wide_df_nan.png index 1350981c..bbedf974 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_24_wide_df_nan.png and b/nbs/tests/mpl_image_tests/baseline_images/test_24_wide_df_nan.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_25_long_df_nan.png b/nbs/tests/mpl_image_tests/baseline_images/test_25_long_df_nan.png index 1350981c..bbedf974 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_25_long_df_nan.png and b/nbs/tests/mpl_image_tests/baseline_images/test_25_long_df_nan.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_26_slopegraph_kwargs.png b/nbs/tests/mpl_image_tests/baseline_images/test_26_slopegraph_kwargs.png index 87359f5f..cd91fb59 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_26_slopegraph_kwargs.png and b/nbs/tests/mpl_image_tests/baseline_images/test_26_slopegraph_kwargs.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png b/nbs/tests/mpl_image_tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png index 73336ca7..f9928420 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png and b/nbs/tests/mpl_image_tests/baseline_images/test_27_gardner_altman_reflines_kwargs.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png b/nbs/tests/mpl_image_tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png index 038bbf35..337de71f 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png and b/nbs/tests/mpl_image_tests/baseline_images/test_28_unpaired_cumming_reflines_kwargs.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_29_paired_cumming_slopegraph_reflines_kwargs.png b/nbs/tests/mpl_image_tests/baseline_images/test_29_paired_cumming_slopegraph_reflines_kwargs.png index f9177fa2..d25bdc9e 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_29_paired_cumming_slopegraph_reflines_kwargs.png and b/nbs/tests/mpl_image_tests/baseline_images/test_29_paired_cumming_slopegraph_reflines_kwargs.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_30_sequential_cumming_slopegraph.png b/nbs/tests/mpl_image_tests/baseline_images/test_30_sequential_cumming_slopegraph.png index b42f20e8..8acb151f 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_30_sequential_cumming_slopegraph.png and b/nbs/tests/mpl_image_tests/baseline_images/test_30_sequential_cumming_slopegraph.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_31_baseline_cumming_slopegraph.png b/nbs/tests/mpl_image_tests/baseline_images/test_31_baseline_cumming_slopegraph.png index 512e3038..99f98af6 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_31_baseline_cumming_slopegraph.png and b/nbs/tests/mpl_image_tests/baseline_images/test_31_baseline_cumming_slopegraph.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_47_cummings_unpaired_delta_delta_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_47_cummings_unpaired_delta_delta_meandiff.png index 300f43ca..cf2cf853 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_47_cummings_unpaired_delta_delta_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_47_cummings_unpaired_delta_delta_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_48_cummings_sequential_delta_delta_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_48_cummings_sequential_delta_delta_meandiff.png index a9384f01..b895b264 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_48_cummings_sequential_delta_delta_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_48_cummings_sequential_delta_delta_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_49_cummings_baseline_delta_delta_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_49_cummings_baseline_delta_delta_meandiff.png index a9384f01..b895b264 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_49_cummings_baseline_delta_delta_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_49_cummings_baseline_delta_delta_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_50_delta_plot_ylabel.png b/nbs/tests/mpl_image_tests/baseline_images/test_50_delta_plot_ylabel.png index f64a02db..7b6724d3 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_50_delta_plot_ylabel.png and b/nbs/tests/mpl_image_tests/baseline_images/test_50_delta_plot_ylabel.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_51_delta_plot_change_palette_a.png b/nbs/tests/mpl_image_tests/baseline_images/test_51_delta_plot_change_palette_a.png index 2bb76b44..fe0f671c 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_51_delta_plot_change_palette_a.png and b/nbs/tests/mpl_image_tests/baseline_images/test_51_delta_plot_change_palette_a.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_52_delta_specified.png b/nbs/tests/mpl_image_tests/baseline_images/test_52_delta_specified.png index 89b23a8d..08ce8953 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_52_delta_specified.png and b/nbs/tests/mpl_image_tests/baseline_images/test_52_delta_specified.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_53_delta_change_ylims.png b/nbs/tests/mpl_image_tests/baseline_images/test_53_delta_change_ylims.png index a188c8ad..6bee7f87 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_53_delta_change_ylims.png and b/nbs/tests/mpl_image_tests/baseline_images/test_53_delta_change_ylims.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_54_delta_invert_ylim.png b/nbs/tests/mpl_image_tests/baseline_images/test_54_delta_invert_ylim.png index ebccec26..da12c98e 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_54_delta_invert_ylim.png and b/nbs/tests/mpl_image_tests/baseline_images/test_54_delta_invert_ylim.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_55_delta_median_diff.png b/nbs/tests/mpl_image_tests/baseline_images/test_55_delta_median_diff.png index 0195fbad..f0c64745 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_55_delta_median_diff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_55_delta_median_diff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_56_delta_cohens_d.png b/nbs/tests/mpl_image_tests/baseline_images/test_56_delta_cohens_d.png index 37a1fb10..00ea66e0 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_56_delta_cohens_d.png and b/nbs/tests/mpl_image_tests/baseline_images/test_56_delta_cohens_d.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_57_delta_show_delta2.png b/nbs/tests/mpl_image_tests/baseline_images/test_57_delta_show_delta2.png index 6d88ef90..85b09805 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_57_delta_show_delta2.png and b/nbs/tests/mpl_image_tests/baseline_images/test_57_delta_show_delta2.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_58_delta_axes_invert_ylim.png b/nbs/tests/mpl_image_tests/baseline_images/test_58_delta_axes_invert_ylim.png index d15b1724..35b72020 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_58_delta_axes_invert_ylim.png and b/nbs/tests/mpl_image_tests/baseline_images/test_58_delta_axes_invert_ylim.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_59_delta_axes_invert_ylim_not_showing_delta2.png b/nbs/tests/mpl_image_tests/baseline_images/test_59_delta_axes_invert_ylim_not_showing_delta2.png index 6d88ef90..85b09805 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_59_delta_axes_invert_ylim_not_showing_delta2.png and b/nbs/tests/mpl_image_tests/baseline_images/test_59_delta_axes_invert_ylim_not_showing_delta2.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_60_cummings_unpaired_mini_meta_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_60_cummings_unpaired_mini_meta_meandiff.png index c3cd87bb..5843eea5 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_60_cummings_unpaired_mini_meta_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_60_cummings_unpaired_mini_meta_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_61_cummings_sequential_mini_meta_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_61_cummings_sequential_mini_meta_meandiff.png index 678c6462..0560f44f 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_61_cummings_sequential_mini_meta_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_61_cummings_sequential_mini_meta_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_62_cummings_baseline_mini_meta_meandiff.png b/nbs/tests/mpl_image_tests/baseline_images/test_62_cummings_baseline_mini_meta_meandiff.png index 678c6462..0560f44f 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_62_cummings_baseline_mini_meta_meandiff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_62_cummings_baseline_mini_meta_meandiff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_63_mini_meta_plot_ylabel.png b/nbs/tests/mpl_image_tests/baseline_images/test_63_mini_meta_plot_ylabel.png index f0a2ef78..e4c4e38c 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_63_mini_meta_plot_ylabel.png and b/nbs/tests/mpl_image_tests/baseline_images/test_63_mini_meta_plot_ylabel.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_64_mini_meta_plot_change_palette_a.png b/nbs/tests/mpl_image_tests/baseline_images/test_64_mini_meta_plot_change_palette_a.png index 7a7013b6..030ed983 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_64_mini_meta_plot_change_palette_a.png and b/nbs/tests/mpl_image_tests/baseline_images/test_64_mini_meta_plot_change_palette_a.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_65_mini_meta_dot_sizes.png b/nbs/tests/mpl_image_tests/baseline_images/test_65_mini_meta_dot_sizes.png index 38fa293c..a69768f7 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_65_mini_meta_dot_sizes.png and b/nbs/tests/mpl_image_tests/baseline_images/test_65_mini_meta_dot_sizes.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_66_mini_meta_change_ylims.png b/nbs/tests/mpl_image_tests/baseline_images/test_66_mini_meta_change_ylims.png index de122d50..9e317831 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_66_mini_meta_change_ylims.png and b/nbs/tests/mpl_image_tests/baseline_images/test_66_mini_meta_change_ylims.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_67_mini_meta_invert_ylim.png b/nbs/tests/mpl_image_tests/baseline_images/test_67_mini_meta_invert_ylim.png index afd67c8d..822a8511 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_67_mini_meta_invert_ylim.png and b/nbs/tests/mpl_image_tests/baseline_images/test_67_mini_meta_invert_ylim.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_68_mini_meta_median_diff.png b/nbs/tests/mpl_image_tests/baseline_images/test_68_mini_meta_median_diff.png index 9808f220..86c6ed2c 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_68_mini_meta_median_diff.png and b/nbs/tests/mpl_image_tests/baseline_images/test_68_mini_meta_median_diff.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_69_mini_meta_cohens_d.png b/nbs/tests/mpl_image_tests/baseline_images/test_69_mini_meta_cohens_d.png index f3c1d761..e5984237 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_69_mini_meta_cohens_d.png and b/nbs/tests/mpl_image_tests/baseline_images/test_69_mini_meta_cohens_d.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_70_mini_meta_not_show.png b/nbs/tests/mpl_image_tests/baseline_images/test_70_mini_meta_not_show.png index 3c881c1c..249d78a6 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_70_mini_meta_not_show.png and b/nbs/tests/mpl_image_tests/baseline_images/test_70_mini_meta_not_show.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_71_unpaired_delta_g.png b/nbs/tests/mpl_image_tests/baseline_images/test_71_unpaired_delta_g.png index 20c75b9c..98a099f1 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_71_unpaired_delta_g.png and b/nbs/tests/mpl_image_tests/baseline_images/test_71_unpaired_delta_g.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_72_sequential_delta_g.png b/nbs/tests/mpl_image_tests/baseline_images/test_72_sequential_delta_g.png index a9384f01..b895b264 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_72_sequential_delta_g.png and b/nbs/tests/mpl_image_tests/baseline_images/test_72_sequential_delta_g.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_73_baseline_delta_g.png b/nbs/tests/mpl_image_tests/baseline_images/test_73_baseline_delta_g.png index a9384f01..b895b264 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_73_baseline_delta_g.png and b/nbs/tests/mpl_image_tests/baseline_images/test_73_baseline_delta_g.png differ diff --git a/nbs/tests/mpl_image_tests/baseline_images/test_99_style_sheets.png b/nbs/tests/mpl_image_tests/baseline_images/test_99_style_sheets.png index 2be2ffe5..8428745c 100644 Binary files a/nbs/tests/mpl_image_tests/baseline_images/test_99_style_sheets.png and b/nbs/tests/mpl_image_tests/baseline_images/test_99_style_sheets.png differ diff --git a/nbs/tests/test_01_effsizes_pvals.ipynb b/nbs/tests/test_01_effsizes_pvals.ipynb index f2997a42..d499f31b 100644 --- a/nbs/tests/test_01_effsizes_pvals.ipynb +++ b/nbs/tests/test_01_effsizes_pvals.ipynb @@ -132,7 +132,7 @@ "metadata": {}, "outputs": [], "source": [ - "cohens_d = effsize.cohens_d(wellbeing.control, wellbeing.expt,\n", + "cohens_d = effsize.cohens_d(np.array(wellbeing.control), np.array(wellbeing.expt),\n", " is_paired=False)\n", "assert np.round(cohens_d, 2) == pytest.approx(0.47)" ] @@ -152,7 +152,7 @@ "metadata": {}, "outputs": [], "source": [ - "hedges_g = effsize.hedges_g(wellbeing.control, wellbeing.expt,\n", + "hedges_g = effsize.hedges_g(np.array(wellbeing.control), np.array(wellbeing.expt),\n", " is_paired=False)\n", "assert np.round(hedges_g, 2) == pytest.approx(0.45)" ] @@ -172,7 +172,7 @@ "metadata": {}, "outputs": [], "source": [ - "cohens_d = effsize.cohens_d(paired_wellbeing.pre, paired_wellbeing.post,\n", + "cohens_d = effsize.cohens_d(np.array(paired_wellbeing.pre), np.array(paired_wellbeing.post),\n", " is_paired=\"baseline\")\n", "assert np.round(cohens_d, 2) == pytest.approx(0.34)\n" ] @@ -192,7 +192,7 @@ "metadata": {}, "outputs": [], "source": [ - "hedges_g = effsize.hedges_g(paired_wellbeing.pre, paired_wellbeing.post,\n", + "hedges_g = effsize.hedges_g(np.array(paired_wellbeing.pre), np.array(paired_wellbeing.post),\n", " is_paired=\"baseline\")\n", "assert np.round(hedges_g, 2) == pytest.approx(0.33)" ] @@ -212,7 +212,7 @@ "metadata": {}, "outputs": [], "source": [ - "cohens_h = effsize.cohens_h(smoke.low, smoke.high)\n", + "cohens_h = effsize.cohens_h(np.array(smoke.low), np.array(smoke.high))\n", "assert np.round(cohens_h, 2) == pytest.approx(0.17)" ] }, @@ -231,10 +231,10 @@ "metadata": {}, "outputs": [], "source": [ - "likert_delta = effsize.cliffs_delta(likert_treatment, likert_control)\n", + "likert_delta = effsize.cliffs_delta(np.array(likert_treatment), np.array(likert_control))\n", "assert likert_delta == pytest.approx(-0.25)\n", "\n", - "scores_delta = effsize.cliffs_delta(b_scores, a_scores)\n", + "scores_delta = effsize.cliffs_delta(np.array(b_scores), np.array(a_scores))\n", "assert scores_delta == pytest.approx(0.65)" ] }, diff --git a/nbs/tests/test_02_edge_cases.ipynb b/nbs/tests/test_02_edge_cases.ipynb index 27821eee..42fb6377 100644 --- a/nbs/tests/test_02_edge_cases.ipynb +++ b/nbs/tests/test_02_edge_cases.ipynb @@ -49,7 +49,7 @@ "random_seed=12345\n", "\n", "# rng = RandomState(MT19937(random_seed))\n", - "rng = RandomState(PCG64(12345))\n", + "rng = RandomState(PCG64(random_seed))\n", "# rng = np.random.default_rng(seed=random_seed)\n", "\n", "df = pd.DataFrame(\n", @@ -63,19 +63,10 @@ " idx=['Group 1', 'Group 2'])\n", "\n", "md = test.mean_diff.results\n", - "\n", "assert md.difference[0] == pytest.approx(-0.0322, abs=1e-4)\n", - "assert md.bca_low[0] == pytest.approx(-0.2279, abs=1e-4)\n", - "assert md.bca_high[0] == pytest.approx(0.1613, abs=1e-4)" + "assert md.bca_low[0] == pytest.approx(-0.2268, abs=1e-4)\n", + "assert md.bca_high[0] == pytest.approx(0.1524, abs=1e-4)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "afc96b46", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/nbs/tests/test_08_mini_meta_pvals.ipynb b/nbs/tests/test_08_mini_meta_pvals.ipynb index 464d3524..dd3730be 100644 --- a/nbs/tests/test_08_mini_meta_pvals.ipynb +++ b/nbs/tests/test_08_mini_meta_pvals.ipynb @@ -110,10 +110,10 @@ "source": [ "difference = unpaired.mean_diff.mini_meta_delta.difference\n", "\n", - "np_means = [np.mean(rep1_yes)-np.mean(rep1_no), \n", - " np.mean(rep2_yes)-np.mean(rep2_no)]\n", - "np_var = [np.var(rep1_yes, ddof=1)/N+np.var(rep1_no, ddof=1)/N,\n", - " np.var(rep2_yes, ddof=1)/N+np.var(rep2_no, ddof=1)/N]\n", + "np_means = np.array([np.mean(rep1_yes)-np.mean(rep1_no), \n", + " np.mean(rep2_yes)-np.mean(rep2_no)])\n", + "np_var = np.array([np.var(rep1_yes, ddof=1)/N+np.var(rep1_no, ddof=1)/N,\n", + " np.var(rep2_yes, ddof=1)/N+np.var(rep2_no, ddof=1)/N])\n", "\n", "np_difference = effsize.weighted_delta(np_means, np_var)\n", "\n", diff --git a/nbs/tests/test_99_confidence_intervals.ipynb b/nbs/tests/test_99_confidence_intervals.ipynb index 2475793b..2926a5c7 100644 --- a/nbs/tests/test_99_confidence_intervals.ipynb +++ b/nbs/tests/test_99_confidence_intervals.ipynb @@ -55,8 +55,9 @@ " paired=\"baseline\", id_col=\"subject_id\")\n", "paired_mean_diff = ex_bp.mean_diff.results\n", "\n", - "assert pytest.approx(3.875) == paired_mean_diff.bca_low[0]\n", - "assert pytest.approx(9.5) == paired_mean_diff.bca_high[0]" + "\n", + "assert pytest.approx(3.625) == paired_mean_diff.bca_low[0]\n", + "assert pytest.approx(9.125) == paired_mean_diff.bca_high[0]" ] }, { @@ -198,14 +199,6 @@ "assert error_count_median_diff <= max_errors\n", "assert error_count_cliffs_delta <= max_errors\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9da1b76d", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/settings.ini b/settings.ini index a6b36da8..614453d7 100644 --- a/settings.ini +++ b/settings.ini @@ -2,7 +2,7 @@ ### Python library ### repo = DABEST-python lib_name = dabest -version = 2024.03.29 +version = 2024.03.30 min_python = 3.9 license = apache2 @@ -37,7 +37,7 @@ language = English status = 3 user = acclab -requirements = fastcore pandas~=2.1.4 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt +requirements = fastcore pandas~=2.1.4 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt numba tqdm dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1 ### Optional ###