From 1c852bdf359d0461298c94448548ad259d4c1759 Mon Sep 17 00:00:00 2001 From: cyberosa Date: Thu, 14 Dec 2023 17:23:46 +0100 Subject: [PATCH 01/10] delta objects notebook --- README.md | 3 +- dabest/_classes.py | 1230 ++++++----------------------------- dabest/_delta_objects.py | 825 +++++++++++++++++++++++ dabest/plot_tools.py | 22 - nbs/API/class.ipynb | 1046 +---------------------------- nbs/API/delta_objects.ipynb | 1055 ++++++++++++++++++++++++++++++ nbs/API/plot_tools.ipynb | 22 - 7 files changed, 2066 insertions(+), 2137 deletions(-) create mode 100644 dabest/_delta_objects.py create mode 100644 nbs/API/delta_objects.ipynb diff --git a/README.md b/README.md index 33d4fb21..a3f3bfb0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -DABEST-Python -================ +# DABEST-Python diff --git a/dabest/_classes.py b/dabest/_classes.py index d71e8733..43d40d03 100644 --- a/dabest/_classes.py +++ b/dabest/_classes.py @@ -1,7 +1,7 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/class.ipynb. # %% auto 0 -__all__ = ['Dabest', 'DeltaDelta', 'MiniMetaDelta', 'TwoGroupsEffectSize', 'EffectSizeDataFrame', 'PermutationTest'] +__all__ = ['Dabest', 'TwoGroupsEffectSize', 'EffectSizeDataFrame', 'PermutationTest'] # %% ../nbs/API/class.ipynb 4 import numpy as np @@ -44,8 +44,6 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, # Make a copy of the data, so we don't make alterations to it. data_in = data.copy() - # data_in.reset_index(inplace=True) - # data_in_index_name = data_in.index.name # Check if it is a valid mini_meta case @@ -141,11 +139,6 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, self.__x1_level = x1_level - # # Check if idx is specified - # if delta2 is False and not idx: - # err = '`idx` is not a column in `data`. Please check.' - # raise IndexError(err) - # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case if idx is None and x is not None and y is not None: @@ -210,15 +203,6 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, 'entered--{}.'.format(idx) raise ValueError(err) - # Having parsed the idx, check if it is a kosher paired plot, - # if so stated. - #if paired is True: - # all_idx_lengths = [len(t) for t in self.__idx] - # if (np.array(all_idx_lengths) != 2).any(): - # err1 = "`is_paired` is True, but some idx " - # err2 = "in {} does not consist only of two groups.".format(idx) - # raise ValueError(err1 + err2) - # Check if there is a typo on paired if paired is not None: if paired not in ("baseline", "sequential"): @@ -314,23 +298,12 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, categories=all_plot_groups, ordered=True) - # # The line below was added in v0.2.4, removed in v0.2.5. - # plot_data.dropna(inplace=True) self.__plot_data = plot_data self.__all_plot_groups = all_plot_groups - # Sanity check that all idxs are paired, if so desired. - #if paired is True: - # if id_col is None: - # err = "`id_col` must be specified if `is_paired` is set to True." - # raise IndexError(err) - # elif id_col not in plot_data.columns: - # err = "{} is not a column in `data`. ".format(id_col) - # raise IndexError(err) - # Check if `id_col` is valid if paired: if id_col is None: @@ -414,1114 +387,278 @@ def __repr__(self): if self.__is_paired == 'sequential': for j, current_tuple in enumerate(self.__idx): for ix, test_name in enumerate(current_tuple[1:]): - control_name = current_tuple[ix] - comparisons.append("{} minus {}".format(test_name, control_name)) - else: - for j, current_tuple in enumerate(self.__idx): - control_name = current_tuple[0] - - for ix, test_name in enumerate(current_tuple[1:]): - comparisons.append("{} minus {}".format(test_name, control_name)) - - if self.__delta2 is True: - comparisons.append("{} minus {} (only for mean difference)".format(self.__experiment_label[1], self.__experiment_label[0])) - - if self.__mini_meta is True: - comparisons.append("weighted delta (only for mean difference)") - - for j, g in enumerate(comparisons): - out.append("{}. {}".format(j+1, g)) - - resamples_line1 = "\n{} resamples ".format(self.__resamples) - resamples_line2 = "will be used to generate the effect size bootstraps." - out.append(resamples_line1 + resamples_line2) - - return "\n".join(out) - - - # def __variable_name(self): - # return [k for k,v in locals().items() if v is self] - # - # @property - # def variable_name(self): - # return self.__variable_name() - - @property - def mean_diff(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()` - - """ - return self.__mean_diff - - - @property - def median_diff(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__median_diff - - - @property - def cohens_d(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__cohens_d - - - @property - def cohens_h(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `h`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `directional` argument in `dabest.load()`. - - """ - return self.__cohens_h - - - @property - def hedges_g(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__hedges_g - - - @property - def cliffs_delta(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__cliffs_delta - - @property - def delta_g(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - """ - return self.__delta_g - - @property - def data(self): - """ - Returns the pandas DataFrame that was passed to `dabest.load()`. - When `delta2` is True, a new column is added to support the - function. The name of this new column is indicated by `x`. - """ - return self.__data - - - @property - def idx(self): - """ - Returns the order of categories that was passed to `dabest.load()`. - """ - return self.__idx - - - @property - def x1(self): - """ - Returns the first variable declared in x when it is a delta-delta - case; returns None otherwise. - """ - return self.__x1 - - - @property - def x1_level(self): - """ - Returns the levels of first variable declared in x when it is a - delta-delta case; returns None otherwise. - """ - return self.__x1_level - - - @property - def x2(self): - """ - Returns the second variable declared in x when it is a delta-delta - case; returns None otherwise. - """ - return self.__x2 - - - @property - def experiment(self): - """ - Returns the column name of experiment labels that was passed to - `dabest.load()` when it is a delta-delta case; returns None otherwise. - """ - return self.__experiment - - - @property - def experiment_label(self): - """ - Returns the experiment labels in order that was passed to `dabest.load()` - when it is a delta-delta case; returns None otherwise. - """ - return self.__experiment_label - - - @property - def delta2(self): - """ - Returns the boolean parameter indicating if this is a delta-delta - situation. - """ - return self.__delta2 - - - @property - def is_paired(self): - """ - Returns the type of repeated-measures experiment. - """ - return self.__is_paired - - - @property - def id_col(self): - """ - Returns the id column declared to `dabest.load()`. - """ - return self.__id_col - - - @property - def ci(self): - """ - The width of the desired confidence interval. - """ - return self.__ci - - - @property - def resamples(self): - """ - The number of resamples used to generate the bootstrap. - """ - return self.__resamples - - - @property - def random_seed(self): - """ - The number used to initialise the numpy random seed generator, ie. - `seed_value` from `numpy.random.seed(seed_value)` is returned. - """ - return self.__random_seed - - - @property - def x(self): - """ - Returns the x column that was passed to `dabest.load()`, if any. - When `delta2` is True, `x` returns the name of the new column created - for the delta-delta situation. To retrieve the 2 variables passed into - `x` when `delta2` is True, please call `x1` and `x2` instead. - """ - return self.__x - - - @property - def y(self): - """ - Returns the y column that was passed to `dabest.load()`, if any. - """ - return self.__y - - - @property - def _xvar(self): - """ - Returns the xvar in dabest.plot_data. - """ - return self.__xvar - - - @property - def _yvar(self): - """ - Returns the yvar in dabest.plot_data. - """ - return self.__yvar - - - @property - def _plot_data(self): - """ - Returns the pandas DataFrame used to produce the estimation stats/plots. - """ - return self.__plot_data - - - @property - def proportional(self): - """ - Returns the proportional parameter class. - """ - return self.__proportional - - - @property - def mini_meta(self): - """ - Returns the mini_meta boolean parameter. - """ - return self.__mini_meta - - - @property - def _all_plot_groups(self): - """ - Returns the all plot groups, as indicated via the `idx` keyword. - """ - return self.__all_plot_groups - -# %% ../nbs/API/class.ipynb 28 -class DeltaDelta(object): - """ - A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs: - - - $$\Delta_{1} = \overline{X}_{A_{2}, B_{1}} - \overline{X}_{A_{1}, B_{1}}$$ - - $$\Delta_{2} = \overline{X}_{A_{2}, B_{2}} - \overline{X}_{A_{1}, B_{2}}$$ - - - where $\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\Delta$ is the mean difference between two samples. - - A delta-delta value is then calculated as the mean difference between the two primary deltas: - - - $$\Delta_{\Delta} = \Delta_{2} - \Delta_{1}$$ - - and a deltas' g value is calculated as the mean difference between the two primary deltas divided by - the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples: - - $$\Delta_{g} = \frac{\Delta_{\Delta}}{s_{\Delta_{\Delta}}}$$ - - $$s_{\Delta_{\Delta}} = \sqrt{\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$ - - where $s$ is the standard deviation and $n$ is the sample size. - - - """ - - def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta, - ci=95): - - import numpy as np - from numpy import sort as npsort - from numpy import sqrt, isinf, isnan - from ._stats_tools import effsize as es - from ._stats_tools import confint_1group as ci1g - from ._stats_tools import confint_2group_diff as ci2g - - - from string import Template - import warnings - - self.__effsizedf = effectsizedataframe.results - self.__dabest_obj = effectsizedataframe.dabest_obj - self.__ci = ci - self.__resamples = effectsizedataframe.resamples - self.__effect_size = effectsizedataframe.effect_size - self.__alpha = ci2g._compute_alpha_from_ci(ci) - self.__permutation_count = permutation_count - self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) - self.__control = self.__dabest_obj.experiment_label[0] - self.__test = self.__dabest_obj.experiment_label[1] - - - # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data - if self.__effect_size == "mean_diff": - self.__bootstraps_delta_delta = bootstraps_delta_delta[2] - self.__difference = self.__effsizedf["difference"][1] - self.__effsizedf["difference"][0] - else: - self.__bootstraps_delta_delta = bootstraps_delta_delta[0] - self.__difference = bootstraps_delta_delta[1] - - sorted_delta_delta = npsort(self.__bootstraps_delta_delta) - - self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps_delta_delta, self.__difference) - - self.__jackknives = np.array(ci1g.compute_1group_jackknife( - self.__bootstraps_delta_delta, - np.mean)) - - self.__acceleration_value = ci2g._calc_accel(self.__jackknives) - - # Compute BCa intervals. - bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) - - self.__bca_interval_idx = (bca_idx_low, bca_idx_high) - - if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_delta_delta[bca_idx_low] - self.__bca_high = sorted_delta_delta[bca_idx_high] - - err1 = "The $lim_type limit of the interval" - err2 = "was in the $loc 10 values." - err3 = "The result should be considered unstable." - err_temp = Template(" ".join([err1, err2, err3])) - - if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) - - if bca_idx_high >= self.__resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) - - else: - err1 = "The $lim_type limit of the BCa interval cannot be computed." - err2 = "It is set to the effect size itself." - err3 = "All bootstrap values were likely all the same." - err_temp = Template(" ".join([err1, err2, err3])) - - if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) - - if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) - - # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * self.__resamples) - pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) - - self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_delta_delta[pct_idx_low] - self.__pct_high = sorted_delta_delta[pct_idx_high] - - - - def __permutation_test(self): - """ - Perform a permutation test and obtain the permutation p-value - based on the permutation data. - """ - import numpy as np - self.__permutations = np.array(self.__effsizedf["permutations"]) - - THRESHOLD = np.abs(self.__difference) - - self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0]) - - count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD) - self.__pvalue_permutation = count/self.__permutation_count - - - - def __repr__(self, header=True, sigfig=3): - from .__init__ import __version__ - import datetime as dt - import numpy as np - - from .misc_tools import print_greeting - - first_line = {"control" : self.__control, - "test" : self.__test} - - if self.__effect_size == "mean_diff": - out1 = "The delta-delta between {control} and {test} ".format(**first_line) - else: - out1 = "The deltas' g between {control} and {test} ".format(**first_line) - - base_string_fmt = "{:." + str(sigfig) + "}" - if "." in str(self.__ci): - ci_width = base_string_fmt.format(self.__ci) - else: - ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - - out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) - out = out1 + out2 - - if header is True: - out = print_greeting() + "\n" + "\n" + out - - - pval_rounded = base_string_fmt.format(self.pvalue_permutation) - - - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) - p2 = "calculated for legacy purposes only. " - pvalue = p1 + p2 - - - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) - bs2 = "the confidence interval is bias-corrected and accelerated." - bs = bs1 + bs2 - - pval_def1 = "Any p-value reported is the probability of observing the " + \ - "effect size (or greater),\nassuming the null hypothesis of " + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." - pval_def = pval_def1 + pval_def2 - - - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - - - def to_dict(self): - """ - Returns the attributes of the `DeltaDelta` object as a - dictionary. - """ - # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] - out = {} - for a in attrs: - out[a] = getattr(self, a) - return out - - - @property - def ci(self): - """ - Returns the width of the confidence interval, in percent. - """ - return self.__ci - - - @property - def alpha(self): - """ - Returns the significance level of the statistical test as a float - between 0 and 1. - """ - return self.__alpha - - - @property - def bias_correction(self): - return self.__bias_correction - - - @property - def bootstraps(self): - ''' - Return the bootstrapped deltas from all the experiment groups. - ''' - return self.__bootstraps - - - @property - def jackknives(self): - return self.__jackknives - - - @property - def acceleration_value(self): - return self.__acceleration_value - - - @property - def bca_low(self): - """ - The bias-corrected and accelerated confidence interval lower limit. - """ - return self.__bca_low - - - @property - def bca_high(self): - """ - The bias-corrected and accelerated confidence interval upper limit. - """ - return self.__bca_high - - - @property - def bca_interval_idx(self): - return self.__bca_interval_idx - - - @property - def control(self): - ''' - Return the name of the control experiment group. - ''' - return self.__control - - - @property - def test(self): - ''' - Return the name of the test experiment group. - ''' - return self.__test - - - @property - def bootstraps_delta_delta(self): - ''' - Return the delta-delta values calculated from the bootstrapped - deltas. - ''' - return self.__bootstraps_delta_delta - - - @property - def difference(self): - ''' - Return the delta-delta value calculated based on the raw data. - ''' - return self.__difference - - - @property - def pct_interval_idx (self): - return self.__pct_interval_idx - - - @property - def pct_low(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_low - - - @property - def pct_high(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_high - - - @property - def pvalue_permutation(self): - try: - return self.__pvalue_permutation - except AttributeError: - self.__permutation_test() - return self.__pvalue_permutation - - - @property - def permutation_count(self): - """ - The number of permuations taken. - """ - return self.__permutation_count - - - @property - def permutations(self): - ''' - Return the mean differences of permutations obtained during - the permutation test for each experiment group. - ''' - try: - return self.__permutations - except AttributeError: - self.__permutation_test() - return self.__permutations - - - @property - def permutations_delta_delta(self): - ''' - Return the delta-delta values of permutations obtained - during the permutation test. - ''' - try: - return self.__permutations_delta_delta - except AttributeError: - self.__permutation_test() - return self.__permutations_delta_delta - - - -# %% ../nbs/API/class.ipynb 32 -class MiniMetaDelta(object): - """ - A class to compute and store the weighted delta. - A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``. - - """ - - def __init__(self, effectsizedataframe, permutation_count, - ci=95): - - import numpy as np - from numpy import sort as npsort - from numpy import sqrt, isinf, isnan - from ._stats_tools import effsize as es - from ._stats_tools import confint_1group as ci1g - from ._stats_tools import confint_2group_diff as ci2g - - - from string import Template - import warnings - - self.__effsizedf = effectsizedataframe.results - self.__dabest_obj = effectsizedataframe.dabest_obj - self.__ci = ci - self.__resamples = effectsizedataframe.resamples - self.__alpha = ci2g._compute_alpha_from_ci(ci) - self.__permutation_count = permutation_count - self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) - self.__control = np.array(self.__effsizedf["control"]) - self.__test = np.array(self.__effsizedf["test"]) - self.__control_N = np.array(self.__effsizedf["control_N"]) - self.__test_N = np.array(self.__effsizedf["test_N"]) - - - idx = self.__dabest_obj.idx - dat = self.__dabest_obj._plot_data - xvar = self.__dabest_obj._xvar - yvar = self.__dabest_obj._yvar - - # compute the variances of each control group and each test group - control_var=[] - test_var=[] - for j, current_tuple in enumerate(idx): - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - control_var.append(np.var(control, ddof=1)) - - tname = current_tuple[1] - test = dat[dat[xvar] == tname][yvar].copy() - test_var.append(np.var(test, ddof=1)) - self.__control_var = np.array(control_var) - self.__test_var = np.array(test_var) - - # Compute pooled group variances for each pair of experiment groups - # based on the raw data - self.__group_var = ci2g.calculate_group_var(self.__control_var, - self.__control_N, - self.__test_var, - self.__test_N) - - # Compute the weighted average mean differences of the bootstrap data - # using the pooled group variances of the raw data as the inverse of - # weights - self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta( - self.__group_var, - self.__bootstraps, - self.__resamples) - - # Compute the weighted average mean difference based on the raw data - self.__difference = es.weighted_delta(self.__effsizedf["difference"], - self.__group_var) - - sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta) - - - self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps_weighted_delta, self.__difference) - - self.__jackknives = np.array(ci1g.compute_1group_jackknife( - self.__bootstraps_weighted_delta, - np.mean)) - - self.__acceleration_value = ci2g._calc_accel(self.__jackknives) - - # Compute BCa intervals. - bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) - - self.__bca_interval_idx = (bca_idx_low, bca_idx_high) - - if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_weighted_deltas[bca_idx_low] - self.__bca_high = sorted_weighted_deltas[bca_idx_high] - - err1 = "The $lim_type limit of the interval" - err2 = "was in the $loc 10 values." - err3 = "The result should be considered unstable." - err_temp = Template(" ".join([err1, err2, err3])) - - if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) - - if bca_idx_high >= self.__resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) - - else: - err1 = "The $lim_type limit of the BCa interval cannot be computed." - err2 = "It is set to the effect size itself." - err3 = "All bootstrap values were likely all the same." - err_temp = Template(" ".join([err1, err2, err3])) - - if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) - - if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) - - # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * self.__resamples) - pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) - - self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_weighted_deltas[pct_idx_low] - self.__pct_high = sorted_weighted_deltas[pct_idx_high] - - - - def __permutation_test(self): - """ - Perform a permutation test and obtain the permutation p-value - based on the permutation data. - """ - import numpy as np - self.__permutations = np.array(self.__effsizedf["permutations"]) - self.__permutations_var = np.array(self.__effsizedf["permutations_var"]) - - THRESHOLD = np.abs(self.__difference) - - all_num = [] - all_denom = [] - - groups = len(self.__permutations) - for i in range(0, len(self.__permutations[0])): - weight = [1/self.__permutations_var[j][i] for j in range(0, groups)] - all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)])) - all_denom.append(np.sum(weight)) - - output=[] - for i in range(0, len(all_num)): - output.append(all_num[i]/all_denom[i]) - - self.__permutations_weighted_delta = np.array(output) - - count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD) - self.__pvalue_permutation = count/self.__permutation_count - - - - def __repr__(self, header=True, sigfig=3): - from .__init__ import __version__ - import datetime as dt - import numpy as np - - from .misc_tools import print_greeting - - is_paired = self.__dabest_obj.is_paired - - PAIRED_STATUS = {'baseline' : 'paired', - 'sequential' : 'paired', - 'None' : 'unpaired' - } - - first_line = {"paired_status": PAIRED_STATUS[str(is_paired)]} - - - out1 = "The weighted-average {paired_status} mean differences ".format(**first_line) - - base_string_fmt = "{:." + str(sigfig) + "}" - if "." in str(self.__ci): - ci_width = base_string_fmt.format(self.__ci) - else: - ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - - out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) - out = out1 + out2 - - if header is True: - out = print_greeting() + "\n" + "\n" + out - + control_name = current_tuple[ix] + comparisons.append("{} minus {}".format(test_name, control_name)) + else: + for j, current_tuple in enumerate(self.__idx): + control_name = current_tuple[0] - pval_rounded = base_string_fmt.format(self.pvalue_permutation) + for ix, test_name in enumerate(current_tuple[1:]): + comparisons.append("{} minus {}".format(test_name, control_name)) + if self.__delta2 is True: + comparisons.append("{} minus {} (only for mean difference)".format(self.__experiment_label[1], self.__experiment_label[0])) - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) - p2 = "calculated for legacy purposes only. " - pvalue = p1 + p2 - + if self.__mini_meta is True: + comparisons.append("weighted delta (only for mean difference)") - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) - bs2 = "the confidence interval is bias-corrected and accelerated." - bs = bs1 + bs2 + for j, g in enumerate(comparisons): + out.append("{}. {}".format(j+1, g)) - pval_def1 = "Any p-value reported is the probability of observing the" + \ - "effect size (or greater),\nassuming the null hypothesis of" + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." - pval_def = pval_def1 + pval_def2 + resamples_line1 = "\n{} resamples ".format(self.__resamples) + resamples_line2 = "will be used to generate the effect size bootstraps." + out.append(resamples_line1 + resamples_line2) + return "\n".join(out) - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + # def __variable_name(self): + # return [k for k,v in locals().items() if v is self] + # + # @property + # def variable_name(self): + # return self.__variable_name() + + @property + def mean_diff(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()` - def to_dict(self): """ - Returns all attributes of the `dabest.MiniMetaDelta` object as a - dictionary. + return self.__mean_diff + + + @property + def median_diff(self): """ - # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] - out = {} - for a in attrs: - out[a] = getattr(self, a) - return out - + Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - @property - def ci(self): """ - Returns the width of the confidence interval, in percent. + return self.__median_diff + + + @property + def cohens_d(self): """ - return self.__ci - + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - @property - def alpha(self): """ - Returns the significance level of the statistical test as a float - between 0 and 1. + return self.__cohens_d + + + @property + def cohens_h(self): """ - return self.__alpha + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `h`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `directional` argument in `dabest.load()`. + """ + return self.__cohens_h - @property - def bias_correction(self): - return self.__bias_correction + @property + def hedges_g(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - @property - def bootstraps(self): - ''' - Return the bootstrapped differences from all the experiment groups. - ''' - return self.__bootstraps + """ + return self.__hedges_g + + + @property + def cliffs_delta(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + """ + return self.__cliffs_delta @property - def jackknives(self): - return self.__jackknives - + def delta_g(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + """ + return self.__delta_g @property - def acceleration_value(self): - return self.__acceleration_value + def data(self): + """ + Returns the pandas DataFrame that was passed to `dabest.load()`. + When `delta2` is True, a new column is added to support the + function. The name of this new column is indicated by `x`. + """ + return self.__data @property - def bca_low(self): + def idx(self): """ - The bias-corrected and accelerated confidence interval lower limit. + Returns the order of categories that was passed to `dabest.load()`. """ - return self.__bca_low - + return self.__idx + @property - def bca_high(self): + def x1(self): """ - The bias-corrected and accelerated confidence interval upper limit. + Returns the first variable declared in x when it is a delta-delta + case; returns None otherwise. """ - return self.__bca_high + return self.__x1 @property - def bca_interval_idx(self): - return self.__bca_interval_idx + def x1_level(self): + """ + Returns the levels of first variable declared in x when it is a + delta-delta case; returns None otherwise. + """ + return self.__x1_level @property - def control(self): - ''' - Return the names of the control groups from all the experiment - groups in order. - ''' - return self.__control + def x2(self): + """ + Returns the second variable declared in x when it is a delta-delta + case; returns None otherwise. + """ + return self.__x2 @property - def test(self): - ''' - Return the names of the test groups from all the experiment - groups in order. - ''' - return self.__test + def experiment(self): + """ + Returns the column name of experiment labels that was passed to + `dabest.load()` when it is a delta-delta case; returns None otherwise. + """ + return self.__experiment + @property - def control_N(self): - ''' - Return the sizes of the control groups from all the experiment - groups in order. - ''' - return self.__control_N + def experiment_label(self): + """ + Returns the experiment labels in order that was passed to `dabest.load()` + when it is a delta-delta case; returns None otherwise. + """ + return self.__experiment_label @property - def test_N(self): - ''' - Return the sizes of the test groups from all the experiment - groups in order. - ''' - return self.__test_N + def delta2(self): + """ + Returns the boolean parameter indicating if this is a delta-delta + situation. + """ + return self.__delta2 @property - def control_var(self): - ''' - Return the estimated population variances of the control groups - from all the experiment groups in order. Here the population - variance is estimated from the sample variance. - ''' - return self.__control_var + def is_paired(self): + """ + Returns the type of repeated-measures experiment. + """ + return self.__is_paired @property - def test_var(self): - ''' - Return the estimated population variances of the control groups - from all the experiment groups in order. Here the population - variance is estimated from the sample variance. - ''' - return self.__test_var + def id_col(self): + """ + Returns the id column declared to `dabest.load()`. + """ + return self.__id_col + - @property - def group_var(self): - ''' - Return the pooled group variances of all the experiment groups - in order. - ''' - return self.__group_var + def ci(self): + """ + The width of the desired confidence interval. + """ + return self.__ci @property - def bootstraps_weighted_delta(self): - ''' - Return the weighted-average mean differences calculated from the bootstrapped - deltas and weights across the experiment groups, where the weights are - the inverse of the pooled group variances. - ''' - return self.__bootstraps_weighted_delta + def resamples(self): + """ + The number of resamples used to generate the bootstrap. + """ + return self.__resamples @property - def difference(self): - ''' - Return the weighted-average delta calculated from the raw data. - ''' - return self.__difference + def random_seed(self): + """ + The number used to initialise the numpy random seed generator, ie. + `seed_value` from `numpy.random.seed(seed_value)` is returned. + """ + return self.__random_seed @property - def pct_interval_idx (self): - return self.__pct_interval_idx + def x(self): + """ + Returns the x column that was passed to `dabest.load()`, if any. + When `delta2` is True, `x` returns the name of the new column created + for the delta-delta situation. To retrieve the 2 variables passed into + `x` when `delta2` is True, please call `x1` and `x2` instead. + """ + return self.__x @property - def pct_low(self): + def y(self): """ - The percentile confidence interval lower limit. + Returns the y column that was passed to `dabest.load()`, if any. """ - return self.__pct_low + return self.__y @property - def pct_high(self): + def _xvar(self): """ - The percentile confidence interval lower limit. + Returns the xvar in dabest.plot_data. """ - return self.__pct_high + return self.__xvar @property - def pvalue_permutation(self): - try: - return self.__pvalue_permutation - except AttributeError: - self.__permutation_test() - return self.__pvalue_permutation - + def _yvar(self): + """ + Returns the yvar in dabest.plot_data. + """ + return self.__yvar + @property - def permutation_count(self): + def _plot_data(self): """ - The number of permuations taken. + Returns the pandas DataFrame used to produce the estimation stats/plots. """ - return self.__permutation_count + return self.__plot_data @property - def permutations(self): - ''' - Return the mean differences of permutations obtained during - the permutation test for each experiment group. - ''' - try: - return self.__permutations - except AttributeError: - self.__permutation_test() - return self.__permutations - - - @property - def permutations_var(self): - ''' - Return the pooled group variances of permutations obtained during - the permutation test for each experiment group. - ''' - try: - return self.__permutations_var - except AttributeError: - self.__permutation_test() - return self.__permutations_var + def proportional(self): + """ + Returns the proportional parameter class. + """ + return self.__proportional @property - def permutations_weighted_delta(self): - ''' - Return the weighted-average deltas of permutations obtained - during the permutation test. - ''' - try: - return self.__permutations_weighted_delta - except AttributeError: - self.__permutation_test() - return self.__permutations_weighted_delta + def mini_meta(self): + """ + Returns the mini_meta boolean parameter. + """ + return self.__mini_meta + @property + def _all_plot_groups(self): + """ + Returns the all plot groups, as indicated via the `idx` keyword. + """ + return self.__all_plot_groups -# %% ../nbs/API/class.ipynb 37 +# %% ../nbs/API/class.ipynb 28 class TwoGroupsEffectSize(object): """ @@ -2209,7 +1346,7 @@ def proportional_difference(self): return npnan -# %% ../nbs/API/class.ipynb 41 +# %% ../nbs/API/class.ipynb 32 class EffectSizeDataFrame(object): """A class that generates and stores the results of bootstrapped effect sizes for several comparisons.""" @@ -2246,6 +1383,7 @@ def __pre_calc(self): import pandas as pd from .misc_tools import print_greeting, get_varname from ._stats_tools import confint_2group_diff as ci2g + from ._delta_objects import MiniMetaDelta, DeltaDelta idx = self.__dabest_obj.idx dat = self.__dabest_obj._plot_data @@ -2869,7 +2007,7 @@ def delta_delta(self): -# %% ../nbs/API/class.ipynb 59 +# %% ../nbs/API/class.ipynb 50 class PermutationTest: """ A class to compute and report permutation tests. diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py new file mode 100644 index 00000000..06e1f9a1 --- /dev/null +++ b/dabest/_delta_objects.py @@ -0,0 +1,825 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/delta_objects.ipynb. + +# %% auto 0 +__all__ = ['DeltaDelta', 'MiniMetaDelta'] + +# %% ../nbs/API/delta_objects.ipynb 5 +from scipy.stats import norm +import pandas as pd +from scipy.stats import randint +import numpy as np +from numpy import sort as npsort +from numpy import sqrt, isinf, isnan +from string import Template +import warnings +import datetime as dt + +# %% ../nbs/API/delta_objects.ipynb 6 +class DeltaDelta(object): + """ + A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs: + + + $$\Delta_{1} = \overline{X}_{A_{2}, B_{1}} - \overline{X}_{A_{1}, B_{1}}$$ + + $$\Delta_{2} = \overline{X}_{A_{2}, B_{2}} - \overline{X}_{A_{1}, B_{2}}$$ + + + where $\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\Delta$ is the mean difference between two samples. + + A delta-delta value is then calculated as the mean difference between the two primary deltas: + + + $$\Delta_{\Delta} = \Delta_{2} - \Delta_{1}$$ + + and a deltas' g value is calculated as the mean difference between the two primary deltas divided by + the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples: + + $$\Delta_{g} = \frac{\Delta_{\Delta}}{s_{\Delta_{\Delta}}}$$ + + $$s_{\Delta_{\Delta}} = \sqrt{\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$ + + where $s$ is the standard deviation and $n$ is the sample size. + + + """ + + def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta, + ci=95): + from ._stats_tools import effsize as es + from ._stats_tools import confint_1group as ci1g + from ._stats_tools import confint_2group_diff as ci2g + + self.__effsizedf = effectsizedataframe.results + self.__dabest_obj = effectsizedataframe.dabest_obj + self.__ci = ci + self.__resamples = effectsizedataframe.resamples + self.__effect_size = effectsizedataframe.effect_size + self.__alpha = ci2g._compute_alpha_from_ci(ci) + self.__permutation_count = permutation_count + self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) + self.__control = self.__dabest_obj.experiment_label[0] + self.__test = self.__dabest_obj.experiment_label[1] + + + # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data + if self.__effect_size == "mean_diff": + self.__bootstraps_delta_delta = bootstraps_delta_delta[2] + self.__difference = self.__effsizedf["difference"][1] - self.__effsizedf["difference"][0] + else: + self.__bootstraps_delta_delta = bootstraps_delta_delta[0] + self.__difference = bootstraps_delta_delta[1] + + sorted_delta_delta = npsort(self.__bootstraps_delta_delta) + + self.__bias_correction = ci2g.compute_meandiff_bias_correction( + self.__bootstraps_delta_delta, self.__difference) + + self.__jackknives = np.array(ci1g.compute_1group_jackknife( + self.__bootstraps_delta_delta, + np.mean)) + + self.__acceleration_value = ci2g._calc_accel(self.__jackknives) + + # Compute BCa intervals. + bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( + self.__bias_correction, self.__acceleration_value, + self.__resamples, ci) + + self.__bca_interval_idx = (bca_idx_low, bca_idx_high) + + if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): + self.__bca_low = sorted_delta_delta[bca_idx_low] + self.__bca_high = sorted_delta_delta[bca_idx_high] + + err1 = "The $lim_type limit of the interval" + err2 = "was in the $loc 10 values." + err3 = "The result should be considered unstable." + err_temp = Template(" ".join([err1, err2, err3])) + + if bca_idx_low <= 10: + warnings.warn(err_temp.substitute(lim_type="lower", + loc="bottom"), + stacklevel=1) + + if bca_idx_high >= self.__resamples-9: + warnings.warn(err_temp.substitute(lim_type="upper", + loc="top"), + stacklevel=1) + + else: + err1 = "The $lim_type limit of the BCa interval cannot be computed." + err2 = "It is set to the effect size itself." + err3 = "All bootstrap values were likely all the same." + err_temp = Template(" ".join([err1, err2, err3])) + + if isnan(bca_idx_low): + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), + stacklevel=0) + + if isnan(bca_idx_high): + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), + stacklevel=0) + + # Compute percentile intervals. + pct_idx_low = int((self.__alpha/2) * self.__resamples) + pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) + + self.__pct_interval_idx = (pct_idx_low, pct_idx_high) + self.__pct_low = sorted_delta_delta[pct_idx_low] + self.__pct_high = sorted_delta_delta[pct_idx_high] + + + + def __permutation_test(self): + """ + Perform a permutation test and obtain the permutation p-value + based on the permutation data. + """ + self.__permutations = np.array(self.__effsizedf["permutations"]) + + THRESHOLD = np.abs(self.__difference) + + self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0]) + + count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD) + self.__pvalue_permutation = count/self.__permutation_count + + + + def __repr__(self, header=True, sigfig=3): + from .misc_tools import print_greeting + + first_line = {"control" : self.__control, + "test" : self.__test} + + if self.__effect_size == "mean_diff": + out1 = "The delta-delta between {control} and {test} ".format(**first_line) + else: + out1 = "The deltas' g between {control} and {test} ".format(**first_line) + + base_string_fmt = "{:." + str(sigfig) + "}" + if "." in str(self.__ci): + ci_width = base_string_fmt.format(self.__ci) + else: + ci_width = str(self.__ci) + + ci_out = {"es" : base_string_fmt.format(self.__difference), + "ci" : ci_width, + "bca_low" : base_string_fmt.format(self.__bca_low), + "bca_high" : base_string_fmt.format(self.__bca_high)} + + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) + out = out1 + out2 + + if header is True: + out = print_greeting() + "\n" + "\n" + out + + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + + p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) + p2 = "calculated for legacy purposes only. " + pvalue = p1 + p2 + + + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) + bs2 = "the confidence interval is bias-corrected and accelerated." + bs = bs1 + bs2 + + pval_def1 = "Any p-value reported is the probability of observing the " + \ + "effect size (or greater),\nassuming the null hypothesis of " + \ + "zero difference is true." + pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ + "control and test labels were performed." + pval_def = pval_def1 + pval_def2 + + + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + + + def to_dict(self): + """ + Returns the attributes of the `DeltaDelta` object as a + dictionary. + """ + # Only get public (user-facing) attributes. + attrs = [a for a in dir(self) + if not a.startswith(("_", "to_dict"))] + out = {} + for a in attrs: + out[a] = getattr(self, a) + return out + + + @property + def ci(self): + """ + Returns the width of the confidence interval, in percent. + """ + return self.__ci + + + @property + def alpha(self): + """ + Returns the significance level of the statistical test as a float + between 0 and 1. + """ + return self.__alpha + + + @property + def bias_correction(self): + return self.__bias_correction + + + @property + def bootstraps(self): + ''' + Return the bootstrapped deltas from all the experiment groups. + ''' + return self.__bootstraps + + + @property + def jackknives(self): + return self.__jackknives + + + @property + def acceleration_value(self): + return self.__acceleration_value + + + @property + def bca_low(self): + """ + The bias-corrected and accelerated confidence interval lower limit. + """ + return self.__bca_low + + + @property + def bca_high(self): + """ + The bias-corrected and accelerated confidence interval upper limit. + """ + return self.__bca_high + + + @property + def bca_interval_idx(self): + return self.__bca_interval_idx + + + @property + def control(self): + ''' + Return the name of the control experiment group. + ''' + return self.__control + + + @property + def test(self): + ''' + Return the name of the test experiment group. + ''' + return self.__test + + + @property + def bootstraps_delta_delta(self): + ''' + Return the delta-delta values calculated from the bootstrapped + deltas. + ''' + return self.__bootstraps_delta_delta + + + @property + def difference(self): + ''' + Return the delta-delta value calculated based on the raw data. + ''' + return self.__difference + + + @property + def pct_interval_idx (self): + return self.__pct_interval_idx + + + @property + def pct_low(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_low + + + @property + def pct_high(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_high + + + @property + def pvalue_permutation(self): + try: + return self.__pvalue_permutation + except AttributeError: + self.__permutation_test() + return self.__pvalue_permutation + + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__permutation_count + + + @property + def permutations(self): + ''' + Return the mean differences of permutations obtained during + the permutation test for each experiment group. + ''' + try: + return self.__permutations + except AttributeError: + self.__permutation_test() + return self.__permutations + + + @property + def permutations_delta_delta(self): + ''' + Return the delta-delta values of permutations obtained + during the permutation test. + ''' + try: + return self.__permutations_delta_delta + except AttributeError: + self.__permutation_test() + return self.__permutations_delta_delta + + + +# %% ../nbs/API/delta_objects.ipynb 10 +class MiniMetaDelta(object): + """ + A class to compute and store the weighted delta. + A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``. + + """ + + def __init__(self, effectsizedataframe, permutation_count, + ci=95): + from ._stats_tools import effsize as es + from ._stats_tools import confint_1group as ci1g + from ._stats_tools import confint_2group_diff as ci2g + + self.__effsizedf = effectsizedataframe.results + self.__dabest_obj = effectsizedataframe.dabest_obj + self.__ci = ci + self.__resamples = effectsizedataframe.resamples + self.__alpha = ci2g._compute_alpha_from_ci(ci) + self.__permutation_count = permutation_count + self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) + self.__control = np.array(self.__effsizedf["control"]) + self.__test = np.array(self.__effsizedf["test"]) + self.__control_N = np.array(self.__effsizedf["control_N"]) + self.__test_N = np.array(self.__effsizedf["test_N"]) + + + idx = self.__dabest_obj.idx + dat = self.__dabest_obj._plot_data + xvar = self.__dabest_obj._xvar + yvar = self.__dabest_obj._yvar + + # compute the variances of each control group and each test group + control_var=[] + test_var=[] + for j, current_tuple in enumerate(idx): + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + control_var.append(np.var(control, ddof=1)) + + tname = current_tuple[1] + test = dat[dat[xvar] == tname][yvar].copy() + test_var.append(np.var(test, ddof=1)) + self.__control_var = np.array(control_var) + self.__test_var = np.array(test_var) + + # Compute pooled group variances for each pair of experiment groups + # based on the raw data + self.__group_var = ci2g.calculate_group_var(self.__control_var, + self.__control_N, + self.__test_var, + self.__test_N) + + # Compute the weighted average mean differences of the bootstrap data + # using the pooled group variances of the raw data as the inverse of + # weights + self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta( + self.__group_var, + self.__bootstraps, + self.__resamples) + + # Compute the weighted average mean difference based on the raw data + self.__difference = es.weighted_delta(self.__effsizedf["difference"], + self.__group_var) + + sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta) + + + self.__bias_correction = ci2g.compute_meandiff_bias_correction( + self.__bootstraps_weighted_delta, self.__difference) + + self.__jackknives = np.array(ci1g.compute_1group_jackknife( + self.__bootstraps_weighted_delta, + np.mean)) + + self.__acceleration_value = ci2g._calc_accel(self.__jackknives) + + # Compute BCa intervals. + bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( + self.__bias_correction, self.__acceleration_value, + self.__resamples, ci) + + self.__bca_interval_idx = (bca_idx_low, bca_idx_high) + + if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): + self.__bca_low = sorted_weighted_deltas[bca_idx_low] + self.__bca_high = sorted_weighted_deltas[bca_idx_high] + + err1 = "The $lim_type limit of the interval" + err2 = "was in the $loc 10 values." + err3 = "The result should be considered unstable." + err_temp = Template(" ".join([err1, err2, err3])) + + if bca_idx_low <= 10: + warnings.warn(err_temp.substitute(lim_type="lower", + loc="bottom"), + stacklevel=1) + + if bca_idx_high >= self.__resamples-9: + warnings.warn(err_temp.substitute(lim_type="upper", + loc="top"), + stacklevel=1) + + else: + err1 = "The $lim_type limit of the BCa interval cannot be computed." + err2 = "It is set to the effect size itself." + err3 = "All bootstrap values were likely all the same." + err_temp = Template(" ".join([err1, err2, err3])) + + if isnan(bca_idx_low): + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), + stacklevel=0) + + if isnan(bca_idx_high): + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), + stacklevel=0) + + # Compute percentile intervals. + pct_idx_low = int((self.__alpha/2) * self.__resamples) + pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) + + self.__pct_interval_idx = (pct_idx_low, pct_idx_high) + self.__pct_low = sorted_weighted_deltas[pct_idx_low] + self.__pct_high = sorted_weighted_deltas[pct_idx_high] + + + + def __permutation_test(self): + """ + Perform a permutation test and obtain the permutation p-value + based on the permutation data. + """ + self.__permutations = np.array(self.__effsizedf["permutations"]) + self.__permutations_var = np.array(self.__effsizedf["permutations_var"]) + + THRESHOLD = np.abs(self.__difference) + + all_num = [] + all_denom = [] + + groups = len(self.__permutations) + for i in range(0, len(self.__permutations[0])): + weight = [1/self.__permutations_var[j][i] for j in range(0, groups)] + all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)])) + all_denom.append(np.sum(weight)) + + output=[] + for i in range(0, len(all_num)): + output.append(all_num[i]/all_denom[i]) + + self.__permutations_weighted_delta = np.array(output) + + count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD) + self.__pvalue_permutation = count/self.__permutation_count + + + + def __repr__(self, header=True, sigfig=3): + from .misc_tools import print_greeting + + is_paired = self.__dabest_obj.is_paired + + PAIRED_STATUS = {'baseline' : 'paired', + 'sequential' : 'paired', + 'None' : 'unpaired' + } + + first_line = {"paired_status": PAIRED_STATUS[str(is_paired)]} + + + out1 = "The weighted-average {paired_status} mean differences ".format(**first_line) + + base_string_fmt = "{:." + str(sigfig) + "}" + if "." in str(self.__ci): + ci_width = base_string_fmt.format(self.__ci) + else: + ci_width = str(self.__ci) + + ci_out = {"es" : base_string_fmt.format(self.__difference), + "ci" : ci_width, + "bca_low" : base_string_fmt.format(self.__bca_low), + "bca_high" : base_string_fmt.format(self.__bca_high)} + + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) + out = out1 + out2 + + if header is True: + out = print_greeting() + "\n" + "\n" + out + + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + + p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) + p2 = "calculated for legacy purposes only. " + pvalue = p1 + p2 + + + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) + bs2 = "the confidence interval is bias-corrected and accelerated." + bs = bs1 + bs2 + + pval_def1 = "Any p-value reported is the probability of observing the" + \ + "effect size (or greater),\nassuming the null hypothesis of" + \ + "zero difference is true." + pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ + "control and test labels were performed." + pval_def = pval_def1 + pval_def2 + + + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + + + def to_dict(self): + """ + Returns all attributes of the `dabest.MiniMetaDelta` object as a + dictionary. + """ + # Only get public (user-facing) attributes. + attrs = [a for a in dir(self) + if not a.startswith(("_", "to_dict"))] + out = {} + for a in attrs: + out[a] = getattr(self, a) + return out + + + @property + def ci(self): + """ + Returns the width of the confidence interval, in percent. + """ + return self.__ci + + + @property + def alpha(self): + """ + Returns the significance level of the statistical test as a float + between 0 and 1. + """ + return self.__alpha + + + @property + def bias_correction(self): + return self.__bias_correction + + + @property + def bootstraps(self): + ''' + Return the bootstrapped differences from all the experiment groups. + ''' + return self.__bootstraps + + + @property + def jackknives(self): + return self.__jackknives + + + @property + def acceleration_value(self): + return self.__acceleration_value + + + @property + def bca_low(self): + """ + The bias-corrected and accelerated confidence interval lower limit. + """ + return self.__bca_low + + + @property + def bca_high(self): + """ + The bias-corrected and accelerated confidence interval upper limit. + """ + return self.__bca_high + + + @property + def bca_interval_idx(self): + return self.__bca_interval_idx + + + @property + def control(self): + ''' + Return the names of the control groups from all the experiment + groups in order. + ''' + return self.__control + + + @property + def test(self): + ''' + Return the names of the test groups from all the experiment + groups in order. + ''' + return self.__test + + @property + def control_N(self): + ''' + Return the sizes of the control groups from all the experiment + groups in order. + ''' + return self.__control_N + + + @property + def test_N(self): + ''' + Return the sizes of the test groups from all the experiment + groups in order. + ''' + return self.__test_N + + + @property + def control_var(self): + ''' + Return the estimated population variances of the control groups + from all the experiment groups in order. Here the population + variance is estimated from the sample variance. + ''' + return self.__control_var + + + @property + def test_var(self): + ''' + Return the estimated population variances of the control groups + from all the experiment groups in order. Here the population + variance is estimated from the sample variance. + ''' + return self.__test_var + + + @property + def group_var(self): + ''' + Return the pooled group variances of all the experiment groups + in order. + ''' + return self.__group_var + + + @property + def bootstraps_weighted_delta(self): + ''' + Return the weighted-average mean differences calculated from the bootstrapped + deltas and weights across the experiment groups, where the weights are + the inverse of the pooled group variances. + ''' + return self.__bootstraps_weighted_delta + + + @property + def difference(self): + ''' + Return the weighted-average delta calculated from the raw data. + ''' + return self.__difference + + + @property + def pct_interval_idx (self): + return self.__pct_interval_idx + + + @property + def pct_low(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_low + + + @property + def pct_high(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_high + + + @property + def pvalue_permutation(self): + try: + return self.__pvalue_permutation + except AttributeError: + self.__permutation_test() + return self.__pvalue_permutation + + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__permutation_count + + + @property + def permutations(self): + ''' + Return the mean differences of permutations obtained during + the permutation test for each experiment group. + ''' + try: + return self.__permutations + except AttributeError: + self.__permutation_test() + return self.__permutations + + + @property + def permutations_var(self): + ''' + Return the pooled group variances of permutations obtained during + the permutation test for each experiment group. + ''' + try: + return self.__permutations_var + except AttributeError: + self.__permutation_test() + return self.__permutations_var + + + @property + def permutations_weighted_delta(self): + ''' + Return the weighted-average deltas of permutations obtained + during the permutation test. + ''' + try: + return self.__permutations_weighted_delta + except AttributeError: + self.__permutation_test() + return self.__permutations_weighted_delta + + diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index 44a07f8e..a297cc00 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -41,28 +41,6 @@ def halfviolin(v, half='right', fill_color='k', alpha=1, b.set_linewidth(line_width) - -# def align_yaxis(ax1, v1, ax2, v2): -# """adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1""" -# # Taken from -# # http://stackoverflow.com/questions/7630778/ -# # matplotlib-align-origin-of-right-axis-with-specific-left-axis-value -# _, y1 = ax1.transData.transform((0, v1)) -# _, y2 = ax2.transData.transform((0, v2)) -# inv = ax2.transData.inverted() -# _, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2)) -# miny, maxy = ax2.get_ylim() -# ax2.set_ylim(miny+dy, maxy+dy) -# -# -# -# def rotate_ticks(axes, angle=45, alignment='right'): -# for tick in axes.get_xticklabels(): -# tick.set_rotation(angle) -# tick.set_horizontalalignment(alignment) - - - def get_swarm_spans(coll): """ Given a matplotlib Collection, will obtain the x and y spans diff --git a/nbs/API/class.ipynb b/nbs/API/class.ipynb index ff6d2398..e4cba0a9 100644 --- a/nbs/API/class.ipynb +++ b/nbs/API/class.ipynb @@ -114,8 +114,6 @@ "\n", " # Make a copy of the data, so we don't make alterations to it.\n", " data_in = data.copy()\n", - " # data_in.reset_index(inplace=True)\n", - " # data_in_index_name = data_in.index.name\n", "\n", "\n", " # Check if it is a valid mini_meta case\n", @@ -211,11 +209,6 @@ " self.__x1_level = x1_level\n", "\n", "\n", - " # # Check if idx is specified\n", - " # if delta2 is False and not idx:\n", - " # err = '`idx` is not a column in `data`. Please check.'\n", - " # raise IndexError(err)\n", - "\n", "\n", " # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case\n", " if idx is None and x is not None and y is not None:\n", @@ -280,15 +273,6 @@ " 'entered--{}.'.format(idx)\n", " raise ValueError(err)\n", "\n", - " # Having parsed the idx, check if it is a kosher paired plot,\n", - " # if so stated.\n", - " #if paired is True:\n", - " # all_idx_lengths = [len(t) for t in self.__idx]\n", - " # if (np.array(all_idx_lengths) != 2).any():\n", - " # err1 = \"`is_paired` is True, but some idx \"\n", - " # err2 = \"in {} does not consist only of two groups.\".format(idx)\n", - " # raise ValueError(err1 + err2)\n", - "\n", " # Check if there is a typo on paired\n", " if paired is not None:\n", " if paired not in (\"baseline\", \"sequential\"):\n", @@ -384,23 +368,12 @@ " categories=all_plot_groups,\n", " ordered=True)\n", " \n", - " # # The line below was added in v0.2.4, removed in v0.2.5.\n", - " # plot_data.dropna(inplace=True)\n", " \n", " self.__plot_data = plot_data\n", " \n", " self.__all_plot_groups = all_plot_groups\n", "\n", "\n", - " # Sanity check that all idxs are paired, if so desired.\n", - " #if paired is True:\n", - " # if id_col is None:\n", - " # err = \"`id_col` must be specified if `is_paired` is set to True.\"\n", - " # raise IndexError(err)\n", - " # elif id_col not in plot_data.columns:\n", - " # err = \"{} is not a column in `data`. \".format(id_col)\n", - " # raise IndexError(err)\n", - "\n", " # Check if `id_col` is valid\n", " if paired:\n", " if id_col is None:\n", @@ -1330,1024 +1303,6 @@ "$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "87f50106", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class DeltaDelta(object):\n", - " \"\"\"\n", - " A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs:\n", - "\n", - "\n", - " $$\\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}}$$\n", - "\n", - " $$\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}$$\n", - "\n", - "\n", - " where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples. \n", - "\n", - " A delta-delta value is then calculated as the mean difference between the two primary deltas:\n", - "\n", - "\n", - " $$\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}$$\n", - " \n", - " and a deltas' g value is calculated as the mean difference between the two primary deltas divided by\n", - " the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples:\n", - " \n", - " $$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$$\n", - "\n", - " $$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", - "\n", - " where $s$ is the standard deviation and $n$ is the sample size.\n", - "\n", - "\n", - " \"\"\"\n", - " \n", - " def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta,\n", - " ci=95):\n", - "\n", - " import numpy as np\n", - " from numpy import sort as npsort\n", - " from numpy import sqrt, isinf, isnan\n", - " from ._stats_tools import effsize as es\n", - " from ._stats_tools import confint_1group as ci1g\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - "\n", - "\n", - " from string import Template\n", - " import warnings\n", - " \n", - " self.__effsizedf = effectsizedataframe.results\n", - " self.__dabest_obj = effectsizedataframe.dabest_obj\n", - " self.__ci = ci\n", - " self.__resamples = effectsizedataframe.resamples\n", - " self.__effect_size = effectsizedataframe.effect_size\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", - " self.__permutation_count = permutation_count\n", - " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", - " self.__control = self.__dabest_obj.experiment_label[0]\n", - " self.__test = self.__dabest_obj.experiment_label[1]\n", - "\n", - "\n", - " # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data\n", - " if self.__effect_size == \"mean_diff\":\n", - " self.__bootstraps_delta_delta = bootstraps_delta_delta[2]\n", - " self.__difference = self.__effsizedf[\"difference\"][1] - self.__effsizedf[\"difference\"][0]\n", - " else:\n", - " self.__bootstraps_delta_delta = bootstraps_delta_delta[0]\n", - " self.__difference = bootstraps_delta_delta[1]\n", - " \n", - " sorted_delta_delta = npsort(self.__bootstraps_delta_delta)\n", - "\n", - " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps_delta_delta, self.__difference)\n", - " \n", - " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", - " self.__bootstraps_delta_delta, \n", - " np.mean))\n", - "\n", - " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", - "\n", - " # Compute BCa intervals.\n", - " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", - " \n", - " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", - "\n", - " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_delta_delta[bca_idx_low]\n", - " self.__bca_high = sorted_delta_delta[bca_idx_high]\n", - "\n", - " err1 = \"The $lim_type limit of the interval\"\n", - " err2 = \"was in the $loc 10 values.\"\n", - " err3 = \"The result should be considered unstable.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", - "\n", - " if bca_idx_high >= self.__resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", - "\n", - " else:\n", - " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", - " err2 = \"It is set to the effect size itself.\"\n", - " err3 = \"All bootstrap values were likely all the same.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", - "\n", - " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", - "\n", - " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", - "\n", - " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_delta_delta[pct_idx_low]\n", - " self.__pct_high = sorted_delta_delta[pct_idx_high]\n", - " \n", - " \n", - "\n", - " def __permutation_test(self):\n", - " \"\"\"\n", - " Perform a permutation test and obtain the permutation p-value\n", - " based on the permutation data.\n", - " \"\"\"\n", - " import numpy as np\n", - " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", - "\n", - " THRESHOLD = np.abs(self.__difference)\n", - "\n", - " self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0])\n", - "\n", - " count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD)\n", - " self.__pvalue_permutation = count/self.__permutation_count\n", - "\n", - "\n", - "\n", - " def __repr__(self, header=True, sigfig=3):\n", - " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", - "\n", - " from .misc_tools import print_greeting\n", - "\n", - " first_line = {\"control\" : self.__control,\n", - " \"test\" : self.__test}\n", - " \n", - " if self.__effect_size == \"mean_diff\":\n", - " out1 = \"The delta-delta between {control} and {test} \".format(**first_line)\n", - " else:\n", - " out1 = \"The deltas' g between {control} and {test} \".format(**first_line)\n", - " \n", - " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", - " if \".\" in str(self.__ci):\n", - " ci_width = base_string_fmt.format(self.__ci)\n", - " else:\n", - " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", - " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", - " out = out1 + out2\n", - "\n", - " if header is True:\n", - " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", - "\n", - "\n", - " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", - "\n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", - " p2 = \"calculated for legacy purposes only. \"\n", - " pvalue = p1 + p2\n", - "\n", - "\n", - " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", - " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", - " bs = bs1 + bs2\n", - "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the \" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of \" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", - " pval_def = pval_def1 + pval_def2\n", - "\n", - "\n", - " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - "\n", - "\n", - " def to_dict(self):\n", - " \"\"\"\n", - " Returns the attributes of the `DeltaDelta` object as a\n", - " dictionary.\n", - " \"\"\"\n", - " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", - " out = {}\n", - " for a in attrs:\n", - " out[a] = getattr(self, a)\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " Returns the width of the confidence interval, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - "\n", - " @property\n", - " def alpha(self):\n", - " \"\"\"\n", - " Returns the significance level of the statistical test as a float\n", - " between 0 and 1.\n", - " \"\"\"\n", - " return self.__alpha\n", - "\n", - "\n", - " @property\n", - " def bias_correction(self):\n", - " return self.__bias_correction\n", - "\n", - "\n", - " @property\n", - " def bootstraps(self):\n", - " '''\n", - " Return the bootstrapped deltas from all the experiment groups.\n", - " '''\n", - " return self.__bootstraps\n", - "\n", - "\n", - " @property\n", - " def jackknives(self):\n", - " return self.__jackknives\n", - "\n", - "\n", - " @property\n", - " def acceleration_value(self):\n", - " return self.__acceleration_value\n", - "\n", - "\n", - " @property\n", - " def bca_low(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__bca_low\n", - "\n", - "\n", - " @property\n", - " def bca_high(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval upper limit.\n", - " \"\"\"\n", - " return self.__bca_high\n", - "\n", - "\n", - " @property\n", - " def bca_interval_idx(self):\n", - " return self.__bca_interval_idx\n", - "\n", - "\n", - " @property\n", - " def control(self):\n", - " '''\n", - " Return the name of the control experiment group.\n", - " '''\n", - " return self.__control\n", - "\n", - "\n", - " @property\n", - " def test(self):\n", - " '''\n", - " Return the name of the test experiment group.\n", - " '''\n", - " return self.__test\n", - "\n", - "\n", - " @property\n", - " def bootstraps_delta_delta(self):\n", - " '''\n", - " Return the delta-delta values calculated from the bootstrapped \n", - " deltas.\n", - " '''\n", - " return self.__bootstraps_delta_delta\n", - "\n", - "\n", - " @property\n", - " def difference(self):\n", - " '''\n", - " Return the delta-delta value calculated based on the raw data.\n", - " '''\n", - " return self.__difference\n", - "\n", - "\n", - " @property\n", - " def pct_interval_idx (self):\n", - " return self.__pct_interval_idx \n", - "\n", - "\n", - " @property\n", - " def pct_low(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_low\n", - "\n", - "\n", - " @property\n", - " def pct_high(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_high\n", - "\n", - "\n", - " @property\n", - " def pvalue_permutation(self):\n", - " try:\n", - " return self.__pvalue_permutation\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__pvalue_permutation\n", - " \n", - "\n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__permutation_count\n", - "\n", - " \n", - " @property\n", - " def permutations(self):\n", - " '''\n", - " Return the mean differences of permutations obtained during\n", - " the permutation test for each experiment group.\n", - " '''\n", - " try:\n", - " return self.__permutations\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations\n", - "\n", - " \n", - " @property\n", - " def permutations_delta_delta(self):\n", - " '''\n", - " Return the delta-delta values of permutations obtained \n", - " during the permutation test.\n", - " '''\n", - " try:\n", - " return self.__permutations_delta_delta\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations_delta_delta\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c6a7192f", - "metadata": {}, - "source": [ - "\n", - "\n", - "and the standard deviation of the delta-delta value is calculated from a pooled variance of the 4 samples:\n", - "\n", - "\n", - "$$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", - "\n", - "where $s$ is the standard deviation and $n$ is the sample size." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a5905b79", - "metadata": {}, - "source": [ - "#### Example: delta-delta" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "088f734b", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0UAAAIaCAYAAADvKOYjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAACRpUlEQVR4nOzdd3xT9f4/8NdJ2qZ779JJoVBoKVBAZhmyRQREULwM/eJVcV0FwauI/mS411VRuQoOEFGGslQEyihSRlsoo4zSUigt3SMdaZuc3x9cIqGDpk16muT1fDz6kJzP55zziulp8s75nM8RRFEUQUREREREZKFkUgcgIiIiIiKSEosiIiIiIiKyaCyKiIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCyaRRVFOTk5eO2115CTkyN1FCIiIiIiaicsrih6/fXXWRQREREREZGWRRVFREREREREt2NRREREREREFo1FEVE7oa6pRlXRNdSpKqWOQkRERGRRrKQOQGTp1DXVyNz9Na6f3A1NbTUEuTU8IwcjbOT/wdreRep4RERERGaPRRGRhERRxJkfX0Pp5dS/l6lrkZ+6BxXXLyHmkQ8gs7KRMCERERGR+WNRRCShkktJOgXRrSrzMnFh24dQlRZAU1cD56Du8O8zAbauPm2ckoiIiMi8sSgiklDRxaNNtuef2qf9tzLnAq6n/I5uD74B5w5djB2NiIiIyGJwogUiKYmiXt3Vqkpc2PahcbIQERERWSgWRUQScgvvo/c6VQVXUJ6dZoQ0RERERJaJRRGRhNw69oZzYDe916upKDVCGiIiIiLLxKKISEKCICBy+mvw7TUWMmvFjWVyK8gVDk2sJIODV3AbJSQiIiIyf5xogUhiVgp7hI97CiEjHkFNeSFsHFxRcO4vXNz2UYP93Tv3g62bbxunJCIiIjJfJnOm6LXXXoMgCDo/XbpwBi4yH1YKe9h7BsLKzgm+MaMQ0H8KIOgeos6B3dD5nmclSkhERESWrCIvE1n71+HSH18ia/86VORlGnV/s2fPhiAIePzxx+u1zZs3D4IgYPbs2QbZl0mdKerWrRv+/PNP7WMrK5OKT6SX0BGPwK/3PShMS4C6VgWX4O5wCeoudSwiIiKyMFVF13D+1/dRfvUsIMggCDKIogZZ+9fCqUNXdL73edi5+xtl34GBgVi/fj0++OAD2NnZAQCqq6uxbt06BAUFGWw/JlVVWFlZwdeXw4bIcti6eiPgrklSxyAiIiILVVV0DSe+/hfqVJU3FogaiKJG216efQ4nvv4XejzygVEKo169eiE9PR2bNm3CjBkzAACbNm1CUFAQQkNDDbYfkxk+BwAXLlyAv78/wsLCMGPGDGRlZTXZX6VSoaysTPujVCrbKCkRERERkek7/+v7NwqiWwohHaIGdapKnP/1faNleOSRR7B69Wrt46+//hpz5swx6D5Mpijq168f1qxZg99++w0rV65ERkYGBg8ejPLy8kbXWbFiBVxcXLQ/cXFxbZiYiIiIiMh0VeRl3hgy11hBdJOoQfnVs0a7xujhhx/GwYMHcfnyZVy+fBkJCQl4+OGHDboPkxk+N3bsWO2/o6Oj0a9fPwQHB2PDhg149NFHG1znpZdewvPPP699nJKSwsKIiIiIiKgZCtMO3Zj06U5FEQAIMhSmHYKDd4jBc3h5eWH8+PFYs2YNRFHE+PHj4enpadB9mExRdDtXV1d07twZFy9ebLSPQqGAQqHQPnZ0dGyLaEREREREJq+uWqmdVOFOBEGGumrjXaryyCOP4KmnngIAfPrppwbfvskMn7udUqlEeno6/Pz8pI5CRERERGR2rGwdm1UQAYAoamBla7wTEGPGjEFNTQ1qa2sxevRog2/fZIqi+fPnY9++fcjMzMShQ4cwadIkyOVyPPjgg1JHIyIiIiIyOx5dBjRv6BwAiJob/Y1ELpfj7NmzOHPmDORyucG3bzLD565evYoHH3wQhYWF8PLywqBBg3D48GF4eXlJHY2IiIiIyOw4eIfAqUNXlGefa7o4EmRwCogwyvVEt3J2djbatgVRFEWjbb2dSUpKQu/evXH8+HH06tVL6jhEWjXKIuSd3ANVWT7s3APgHTUMVnZOUsciIiIiC6dzn6KGCiNBBiuFvdHuU9RWTOZMEZG5yjsVjwtbP4CortMuuxz/LbpM+TfcOrJ4JyIiIunYufujxyMf4Pyv79+YnluQ/T35gqiBU0AEOt/7vEkXRACLIiJJVRVl48Kv70PUqHWWq2uqcPbnZQjoPwUl6cegrq2BS1A3+PedaPJ/dIiIiMi02Ln7o8fsd1GRl4nCtEOoq1bCytYRHl0GGH3IXFthUUQkodyknfUKops0tdW4sn+t9nFlXgbyTu5G5PTX4RLUra0iEhEREQG4cY2RuRRBtzOZ2eeIzFFlYbZe/dU1Vbi4/WMjpSEiIiKyTCyKiCSkcPLQe52qwqsou5pmhDRERERElolFEZGEfGJadvOxuqoyAychIiIislwsiogk5OTfCcFD/6HfSoIM9l4hRslDREREZIk40QKRxAIHTYdraAxyk3+HqqwAdu7+sHZ0R1b8tw3294joD1tX7zZOSURERGS+WBQRtQNOAV3gFNBFZ5lYV4Orh37SmZ3OJaQHOt3zbFvHIyIiIjJrHD5H1E4FD/0HYp/6GmGjH0fI8DnoMec9RD28HFa2DlJHIyIiIjKqzz//HE5OTqir+/vm9kqlEtbW1hg6dKhO3/j4eAiCgPT09Bbvj2eKiNoxhbMn/PtMkDoGERERETKuFeDgyYtQVqngaKfAoOhwhPp7GmVfw4YNg1KpxLFjx3DXXXcBAA4cOABfX18kJiaiuroatra2AIC9e/ciKCgIHTt2bPH+WBQRWbjY2Fjk5ubC19cXx44dkzoOERERtTPZ+SV4e93vOJOZA5lMgEwQoBFFfPv7YXQL9cOCB0cjwMvVoPuMiIiAn58f4uPjtUVRfHw8Jk6ciD179uDw4cPaM0bx8fEYNmxYq/bH4XNEFi43NxfZ2dnIzc2VOgoRERG1M9n5JXj6wx+QlnXjc4JGI6JOrYFGIwIAzl7OxdMf/oDs/BKD73vYsGHYu3ev9vHevXsxdOhQxMXFaZdXVVUhMTGRRRERERERERnH2+t+R0V1jbYIup1GI6Kiugbv/PCHwfc9bNgwJCQkoK6uDuXl5UhOTkZcXByGDBmC+Ph4AMBff/0FlUrFooiIiIiIiAwv41oBzmTmNFoQ3aTRiDidcQ0Z1woMuv+hQ4eioqICR48exYEDB9C5c2d4eXkhLi5Oe11RfHw8wsLCEBQU1Kp98ZoiIiIiIiKq5+DJi5DJhDsWRQAgkwk4mHrRoBMvhIeHo0OHDti7dy+Ki4sRFxcHAPD390dgYCAOHTqEvXv3Yvjw4a3eF88UERERERFRPcoqFWSC0Ky+MkGAslJl8AzDhg1DfHw84uPjdabiHjJkCHbu3IkjR460eugcwKKIiIiIiIga4GingEa881kiANCIIhztFQbPMGzYMBw8eBApKSnaM0UAEBcXhy+++AI1NTUsiojMjbqmCtWledDU1UodhYiIiCzcoOjwZg2dA25cVzQoOtzgGYYNG4aqqiqEh4fDx8dHuzwuLg7l5eXaqbtbi9cUEbUDNRUlyPjzKxScOQBRXQsrW0f49ByN4Lh/QGZlLXU8IiIiskCh/p6IDPFDWlZuk8WRTCaga7AfQv0MfyPXkJAQiA2crQoODm5weUvxTBGRxNQ1VUj9bhHyU/dAVN84Q1RXrUT2XxuRtmmFxOmIiIjIkr340Gg42NpAJmv42iKZTICDrQ0WPDiqjZMZFosiIonlndyNqoIrDbYVnU9E2dWzbZyIiIiI6IYAL1f857kH0TX4xhA1mUyAlVymLZK6BvvhP889iAAvVwlTth6HzxFJrPB8YpPtRecT4dyhaxulISIiItIV4OWKD595ABnXCnAw9SKUlSo42iswKDrcKEPmpMCiiEhqdxgPqyrLR8mlZDj6d4KVrWMbhSIiIiLSFervadD7ELUnLIqIJOYWHouSjORG2/NPxSP/VDxk1gr49R6PkOGzIcjkbZiQiIiIyLzxmiIiifn0GAmFq88d+2lqVcg+vAkZu1e3QSoiIiIiy8GiiEhiVrYOiP7HW3DvfBcg/O+QbOLu0bnHd6C2qryN0hERERGZPw6fI2oHFC5eiHxgMWoqSlBbUYLkL59qtK+mToXyq2fh3qlvGyYkIiIiMl8siojaERsHV9g4uEJmbQNNrarRfjIrRRumIiIiIjJvHD5H1A55dhnUaJu1gyucg7q1YRoiIiIi88aiiKgdChz8IKzsnRtoERAy4hHI5DzJS0RERGQoLIqI2iE7dz/0mP0evLoPhSC3BgA4d4hE5LRX4RM9QuJ0REREROaFXzcTtUMadS3Ks89Bo66Fa1gvuIbGwKfH3bBS2EsdjYiIiMjssCgiamfqVJU4ve4VlGef0y4rvpCIa0e2IOofb8LWxVvCdERERETmh8PniNqZy/Hf6RREN6lKriN9xycSJCIiIiIybyyKiNoRjboOeSd3N9penJ4EVWl+GyYiIiIiMn8siojaEXVNFdSqiiZ6iFCVFzS5jRplEWrKiwwbjIiIiMiM8ZoionbESmEPawdX1FaUNNxBEHAl4UfYOLrDu/twuAR31zYVXTyGrH3fQZlzEQDg4NsRwXEPw71T3zZITkRERGS6eKaIqB0RZHL4xoxuvIMoovjCUVxP/h2p3y3Ehe0fAwCKLh7FmR9f1xZEAFCRm44zG95A4bm/mtynr68vAgIC4Ovra5DnQERERGRqeKaIqJ0JHPIglHkZKL5w5I59ryf/DpfgaGT/9TMgaup3EDW4HP8dPCL6N7qNY8eOtSYuERERkcljUUTUzsjk1ug2bQlKL6ei8NxfqCrMRnF644XLtSO/ouJ6RqPtlfmXUV2cC1s3ngkiIiIiagiHzxG1Uy7BUQgb9Rjcwns32a/mDhMvAIAoioaKRURERGR2TLYoevPNNyEIAp577jmpoxDdUV11Ba4d/RXnf3kfl/74UnvtT52qEjnHd+DSrv8iO3ELaitLddarrSiFIGv6hK6dRwfYewY12d7UWaLY2Fh06NABsbGxejwjIiIiIvNhksPnjh49ii+++ALR0dFSRyG6I2XOBZz64VXUVZZpl1078gs8ugxESUaKzhTcl/d+g073Pg8nv0649McXKLp47H/XCgkAGj7b49d7HAABaRtXNNgnaMhDEASh0Xy5ubnIzs5u4bMjIiIiMn0md6ZIqVRixowZWLVqFdzc3KSOQ9QkUaPG2Z+X6xRENxWmJdS7J5Gmrgbnt7yDE2vmo+jCkVsmT2i4IPLvOxHOQd1RXZILl9AYWNk5a9ts3fzR+b4F8OoWZ7DnQ0RERGSOTO5M0bx58zB+/HjcfffdWLp0qdRxiJpUnH4cqtI8vdYRNWrUVhQ32u7euR9s3fzg3X0YVGX5OPafOdDU1fzdQWaFkBFzENB3YpNniIiIiIjoBpMqitavX4+kpCQcPXq0Wf1VKhVUKpX2sVKpNFY0ogZVF+cafJtO/hEIHDQNqvJCnFgzH6K6VreDpg6Xd6+GZ8QA2Lp6G3z/RERERObGZIbPXblyBc8++yzWrl0LW1vbZq2zYsUKuLi4aH/i4jiMiNqWwghFSUVeJq6f+BPZhzfXL4j+R9TU4XrK7wbfNxEREZE5EkQTmat3y5YtmDRpEuRyuXaZWq2GIAiQyWRQqVQ6bUD9M0UpKSmIi4vD8ePH0atXrzbLbmxqjQaJpzOQlpULB1sFhvXqDG835zuvSEYnatQ4+p9HmjVttqF5Rg5Gl8mL7tivQ4cOyM7ORkBAAK5evdoGyYiIiIjaF5MZPjdixAikpqbqLJszZw66dOmChQsX1iuIAEChUEChUGgfOzo6Gj1nW7teVIZ/f7kFWdeLtMu+3pGAmaPvwoxR/SRMRgAgyOToMuUlnF7/KtTVupMqOAd2Q9mVM7h9EoWAfpNRkHZQ72uRbqdw9mzV+kRERESWwmSKIicnJ3Tv3l1nmYODAzw8POottyT/b812nYIIADQaEWt2/oVQP08MiOooUTK6yblDF8Q+uQrXU3ZBmXsRVraO8I4aDufASChz05F7fAeUuemorSxDXbUS+Wf2wTU0BrUVpSjJSIaoUQOC7JaZ6JpDgE+PUUZ7TkSWTlWaj8ILiRDVtXANiYGDT6jUkYiIqBVMpiii+s5kXsP5K9cbbd9yIIVFUTthbe+CDgPur7fc0bcjPLsNQV7qHu0McmpVBfJO7oaNozt6zPkAMhsFklb+U4+9CQgd+X+w92r8hq5E1HIZf36F7MQtOl9UuHe+CxGTFkBu3bxrXomIqH0x6aIoPj5e6giSyswtukN7YRsloda49NvnulNq/0+NsgjZiZsQcd8CyKxtoamtbnQbHl0GQlOngq2LD3x6joajL4thImPIObYN2Yc31VtedP4wLv3+JTrd84wEqYiIqLVMuiiydO5O9k23Ozu0URJqKeX1S6gsyGq0veDsQXj3GAnnoG4oST/eYB9bN390mfIS70lE1AayE39ptC0vdQ9Chs+Ctb1LGyYiIiJDMJkpuam+Pl1C4OHSeOEzum9kG6ahllCrKptsF9V1OL325UYLIkFujY5jnmBBRNQG6lSVqC6+1mi7qK5FZf6VNkxERESGwqLIhMnlMrz40GjY2tQ/4denSzAmDIyWIBXpw8E7FDJrxZ073kJuYwdrB1d4Rg5Bj9nvwq2j+UwvT9Seya0Vdzxereyd2igNEREZEosiE9ercxC+WPAwpgzthYggHwR4uqKDlytq6tTYdigVVaqGb+5J7YOVrQN8e43Vax1Ro0avxz9Hl8kL4egXbqRkRHQ7QSaHV7fGbwLu6BcOB6/gNkxERESGwqLIDPh7umJM3264XlSG7IISXM0vwYmLV/Hppng89/GPKK9s/AJ9kl7I8Dnw7TkGgqz+vbYaoqmrQfnVs0ZORUQNCR76D9i6+ddbLlc4oOPYeRIkIiIiQ+BEC2bigw1/okRZVW/5pWsF+GbnX3hqyjAJUlFzyORWCB//NAIHP4jSyychyK1x/tcPINapGl/HSr8hd0RkGDaO7ujxyPvIObYdhecOoa5KCYWLNzy6DoSDd4jU8YiIqIV4psgMXMkrwpnMnEbbdx07C7VGnxt/khQUzp7wjhoOr8jB8Ioc1Gg/a0c3OAd1a8NkRHQrazsneEcNBwCoSq+jLCsVGb9/jiMfzULh+cMSpyMiopZgUWQGisqansGssroGqpq6NkpDhhA0+MFGpvUVEDr8EcjkPMlLJBVR1ODMj6+hIjddZ3ldVRnSNq5ARf5liZIREVFLsSgyA4HebpDLGn8pfdycYKewbsNE1Fq2bn6Inv0evKKGQ2ZlA0CAc2A3RE5/Dd7Rw6WOR2TRitOPo7KRwkdU1yHn6NY2TkRERK3Fr5vNgLuzA4bEdMLepHMNtk8cHMP72JggO3c/REx8AZj4AkRRA0HgdxhE7YHy2oWm23OabiciovaHRZGZePb+4SgsVeJkerbO8rF3dceUON7HxtQJggwadS0KzhxESUYyBJkVPCLuglt4LIslojZmZevYqnYiImp/WBSZCQc7Bd57aipOpl9F0vkrsJbLMCg6HMG+HlJHIwOoURbh1Pcvo7IgS7vsesrvcAnpgchpSyDX8wawRNRynpGDkbH7K4jqhq/V9IriEFciIlPDosjMRHfsgOiOHaSOQQZ2ccenOgXRTaWZJ5C1fy1CRzwiQSoiy2Tj6IbQEY/i0h9f1Gtz6xgL7+5D2z4UERG1CosionZOVVaAogtHGm2/nrILIcNmNfvmr0TUev5974W9VxCuHf0VFdczYe3gAp8ed8MnZjSPRSIiE8SiiKidU5XmAWLj95mqqypDnaoS1nZObZiKiFxDY+AaGiN1DCIiMgAWRUTtQGnWaVw/sQs15YWw9wyEb6+xsPcMBAAoXLwBQdZoYWRl5wQrhX1bxiUiIiIyKyyKiCSWuecbXD20Qfu45FISco5tQ6cJ/4JXtyGoLrkOR79OUF5reMp1nx4jWzVcx9fXV+e/RERERJaGRRGRhEovp+oURDeJGjUubP0AGX9+jdqKov8tFQCIOv2cg6IQFDejVRmOHTvWqvWJiIiITJ3RiqJr167B39/fWJsnMgu5KX802iZq1LcURMDNgsjOowOcAiLgEdEf7p368qJuIolUFmQh59h2VORdho2DC7x73A338D5SxyIiohYwWlHUrVs3fPrpp3jooYeMtQsik1dTXqj3OqrSfETPfpcTKxBJqODMAZzb8i5Ezd/3Kio4exA+MaPQ6Z5nJUxGREQtITPWhpctW4Z//vOfmDp1KoqKiu68ApEFsnMP0HsdTZ0K5VfPGiENETVHXXUFzm/9UKcguul6yh8oPPeXBKmIiKg1jFYUPfnkkzh58iQKCwsRGRmJrVu3GmtXRCbLr/e4GzPL6UlmpTBCGiJqjoIz+6GprW60valhsURE1D4ZdaKF0NBQ7NmzB5988gkmT56Mrl27wspKd5dJSUnGjEDUrjn4hCJ83FNI3/kpRI1au1yQW0FU1/8WGgCsHd3gHNStrSIS0W1qlMVNt7dgWCwREUnL6LPPXb58GZs2bYKbmxsmTpxYrygisnS+PUfDLTwWeSd3a+9T5NQhEqfWvYy6yrLbegsIHf4IZHIeR0RSsfvfPcQaY+8Z1EZJiIjIUIz6yWrVqlV44YUXcPfdd+P06dPw8vIy5u6ITJbCyQOBAx/QWdZj9nvI2r8OhWkJ0NTVwDmwGzoMnMrZrYgk5hHRHzZOnqgpL2igVYBf7D1tnomIiFrHaEXRmDFjcOTIEXzyySeYOXOmsXZDZLbs3P0Rcd98iOILgKjh1NtmoLqmFgdOXERRWQWCfNzRNzIEcpnRLu0kI5HJrRA57VWcWf8aapR/TyQkyOQIG/1POHfoImE6IiJqCaMVRWq1GidPnkSHDh2MtQsiiyAIAiCwIDJ1f526hLfX/Q5llUq7zNfdGf/v/+5FqJ+nhMmoJRx9OyL2qa9QcOYgKvIyYO3gCu/uw2Dj5C51NCIiagFBFEVR6hBtJSkpCb1798bx48fRq1cvqeO0StL5LGxNOIlrBSXwdnPGuLu6o3/3MKljkYGJooiK65egqauBg08o5Na2UkeiFriaX4zH3v4etXXqem1ero5Y8/Js2PB6SyIiIsnwXdgErf0jEWt2/n0fjEvXCnD49CVMHtITT0yKkzAZGVLRhSPI2PVfVBVlAwDktg7w7zMRQUMeunH2iEzG1oMnGyyIACC/RIn9KRdwd2zXNk5FREREN3Ewu4nJul6kUxDdatP+ZJy6dM1o+46NjUWHDh0QGxtrtH3QDaVZp3H2p6XagggA1NUVuHJgHS7HfythMmqJ9Gv5TbdnN91ORERExsUzRSZm19EzTbav23UEHQM8YSWXY3CPcIT5G27Gv9zcXGRnZ9+5I7Xa1YQfde5bdKtrR35Fh/73w8rWoY1TUUs5O9g12e5yh3aipsTGxiI3Nxe+vr44duyY1HGIiEwSiyITU6KsarL9aFomjqZlAgC+/yMRI/t0xfzpoyCTcbiVKSnJSGm0TVNbjbKrZzg1twkZ1acrDpy40GCbTCZgeG/OVkYtxy+siIhaj8PnTExHPc/87Dp6Fj/tPW6kNGQsgqzp7ytkd2in9qVfZChGNnLN0GMTBsPbzamNExEREdGtWBSZmJF9usLJXr8ZyH45eAIWNMmgWfCI6N9om7W9C5yDurdhGmotQRCw4KFR+Pc/xqJX5yAE+7hjcHQ43p03BVOGmvZMmEREROaAXzebGAc7BZY9NhGvfb0NRWUVzVonv6QcVapa2NvaGDkdGUrg4OkoSj8KdXX91zh46D8gs7KWIBW1hiAIGNYrAsN6RUgdhYiIiG7DosgEdQ32w/eLH0FC6kVkF5TCWibDqm0HG+3vYGsDhQ1falNi7xmI6Fnv4HL8dyg6nwiIGjj4dkTggKnwjBwsdTwiIiIis8JPyibK2kqOoT3//sb50OlLOJ3R8HTcI/tEQi7jSElT4+AVjMipr0BTVwONug5WCnupIxERERGZJX5SNhPPT7sb7k71PzSHB3hh1ti7JEhEhiKzsmFBRERERGREPFNkJoJ83PHFiw9j+1+nkHw+C1ZyGQZHd8LdsV05dI6IiIiIqAn8tGxGXB3tMWNkX8wY2bdeW51ajb1J57EnKQ2V1TXoGuyLewf1gL+na9sHJTIzJy5ewfkreXCyt8Xg6HA42CmkjkRERER6YFFkYtQaDY6cyUROYQl83V3QLzIUcnnToyBr6uqweNWvSDqfpV12JjMH2/9Kxf/7v4no2SnQ2LGJzFJBiRKvfvUrLlzN0y77dNNezJs8DGP6dZMwGREREemDRZEJOZd1Hf9vzTbkFZdrl3m6OGLx7HGIDPFvdL1fD57UKYhuqq6pw9trf8f3ix+5Y2FFRPW9tnqrTkEE3DiuPvjxT3TwckX3sIBmbUcURahq62BrozvVesa1AlRUqxDi5wFHO/3uT0ZERETNx6LIRFRUqfDvLzejrKJaZ3lBqRL//uIXTBwcjUOpl6CsUqFLsC+mxPVC97AbhdLvR043ut2CUiWOn7+Mvl1DjZqfyNycunQN57KuN9imEUVs2pd8x6KoolqF735PxB9HTqO8UgVvNydMGBiNLkG++GzzPmTkFAAAbG2sMKZfdzx272BYW8kN/lyo/RBFEaWZJ6Eqy4Otmx9ceKNmIqI2YTJF0cqVK7Fy5UpkZmYCALp164ZXX30VY8eOlTZYG/nj6Jl6BdFNFdUqrNt1VPv44MmLOJSajgUPjcLdsV1RXFbZ5LaL7tBORPVduNpwQXTT8fNZ+McbX0MQBPSLDMX9Q3vBx91Z215TV4dFKzch7ZbCKq+4HF9tS4BMEKARRe3y6po6bDmQgkpVDRY8OMrwT8ZClF1NQ3H6cQgyGdw794OjT5ikeVRlBajMvwxre2c4+nWCMucC0ja9jeriv2+vYO8VjC5TXoK9J4c5ExEZk8kURR06dMCbb76JTp06QRRFfPPNN5g4cSKSk5PRrZv5j92/cCXvzp1uoRFFfLopHoOiwxHs646T6dmN9g3182htPCKL4+Jg12R7ZXUNKqtrAABbDqRgb9I5vPfU/Qj2vXG87Tl+TqcgutWtBdGt/jx6FjNH36VTXNGdqWurkfbzChSnH9Muy9r3Pby6xaHTvf9CScYJKK+dh9zWAV6Rg2Hj6A4AqK0sRXVJHmwc3aBw9jRYnjpVJS5u/w8Kzh4ERA0AwM6jA2qURVCrdL+kqsy/jFNrX0HvJ7+A3JpDKImIjMVkiqIJEyboPF62bBlWrlyJw4cPW0RR5GSv/5uhskqFLftT0KmDd6NFUddgX0QE+bY2HpHF6d+9I+xtbbSFz52UVlThi1/2Y/k/JwEAEk5e1HufGlFE8oUrnMRBTxm7vtIpiG7KP70PxRkpqKss1S7L/PNrBA6ejqrCbBScPQBRXQdAgFvH3ug4dh5sXb1bnSdt4wqUXErSWVZVeLXR/jXlBcg/tQ++PUe3et9ERNQwkymKbqVWq/HTTz+hoqIC/fv3b7SfSqWCSqXSPlYqlW0Rzyjuju2KTfuT9V7vq+0JjbaF+Hlg8ezxrYlFZLHsFNZ4dupwvLX2d2g0DZ/Zud2xtMt4ZdUvKK+sxvXishbtl9cU6aeuugJ5qbsbb7+lIAIAUVOHrH3f39ZLRHH6MaR+twg9H/tE52bKpZdTUZGXCWsHV7h36gu5df3p2FXlhSg8mwB1TRWs7JzrFUTNUZ6dxqKIiMiITKooSk1NRf/+/VFdXQ1HR0ds3rwZkZGRjfZfsWIFXn/99TZMaDydAr0xZWgvbIzX/830VnY21hg/IAox4YHo0zUEMplgoIRElmd4ry4I8HTF5v0pOH/lOmysrZCend9ofxFA4pmMFu9PYW2FPl1CGm2PjY1Fbm4ufH19cexY/TMjlqi6JBeaWtWdOzaDqvQ68k78Cf++96K6JA9nf3oDFdcvadut7JzQ6Z5n4RHx95d1WQd+wJUDP0DUqFu1b/kthRgRERmeSc3DHBERgZSUFCQmJuKJJ57ArFmzcObMmUb7v/TSSygtLdX+7Nu3rw3TGt7jE4fg1Tn3oFfnIPh5uCCmUyBmjr4LMqH5hU1VTS28XJ3Qr1soCyIiA4gI8sWih8fg65dm4ePnpt3xWqPmsGpkivwH7+4DZ4fGh9Lm5uYiOzsbubm5rc5gLqwdXAEY7m9d8aXjEEUNzqxfolMQAUBdVTnSNr2JivzLAID8MweQte/7VhdEAODdfVirt0FERI0zqTNFNjY2CA8PBwD07t0bR48exUcffYQvvviiwf4KhQIKxd9DGRwdHdskpzENjg7H4OhwnWW+Hs74dFM8Kpp5bcPpjGuYHNfTGPGILJqNlRUmDu6Bb3873KL1ZTIBA7p3xIyRffFzfBL2n7iA2jo1gnzcMXVYb15L1AIKJw+4hvVs0ZC1hgiCDMXpx1FZUP/ebwAgquuQc3Qrwsc9hWuJWwyyT9/e4+DoF37njkRE1GImVRTdTqPR6FwzZOpq69TYm3QO8SnnUV1Ti+iwANwzMBqeLk0XcyP7RGJQdCfsP3Eel64VIONaAZIvXGm0v63CutE2ImqdGSP7oVRZha2HTjb7WqNRfSMxMrYrArxc4eXqBABY9PAYzH9wJGpq1bC3tTFmZLPXccyTSP1uIWrKC3UbBJl29rfmsnZ0R/6p+Cb7KHMuAAAqrus3VNLOIxC+vcYiL3U3VGX5sHPzh2/vcfCJHqHXdoiISH8mUxS99NJLGDt2LIKCglBeXo5169YhPj4ev//+u9TRDKK6phb//nILUm+ZJS41PRu/JpzAin9Ohlwm4Ni5y5DLZOjfPQwdvNy0/URRxI97jmHzvmRUqu58tmhoz84tyujr66vzXyKqTyYT8NSUYZg2IhbH0i5DEASkpmfjj6OND/WNCgtATKf696GxksthJefECq1l5+6HnnP/g9yk31CcfhwQZPCI6AcbJ0+c/+U9iOpanf5W9s6oq2x4Iozryb/dcX9Wtje+yLK2d4aqrPFrzBQu3lCV5kFu6wDvqBEIGvwgrO2dEdBvoh7Pjn+biYgMwWSKory8PMycORM5OTlwcXFBdHQ0fv/9d4wcOVLqaAbx057jOgXRTeWVKiz47GdUqf5+01619QDG3RWFZ+4fDplMwHe/H8baPxKbtZ/B0eGIjQhuUUZeuE3UfF6uTugc6IO/Tl+CrY0VZDKhwTNHro72GNYzQoKElsXa3gWBg6YhcNA0neUO3sHIOboN5dfOw8rWAV7dh8Kj6yBcTdiA3OTfbhRHep5R8owcjMJzf8HBt2OjRZHC1Qex8/4LiCIEWesKX/5tJiJqPZMpir766iupIxjVb0dON9p2a0EEAKIIbP8rFX4eLrh3UA9s3Nf4VN0KaysIAuDt5ozx/aMwcXAPCMKND2fHzmUip7AMfh7OiI3gTHREhqJWa/DWut+xN+mcznJBECDecmNWNyd7vPF/90JhYzJ/is2OvWcQOo59st7ykGGzEBz3MEoyT+L0uleavz3vUGTsXg11deO3gJBZK9DpnucgCDJDzgFBREStwHfidqKorELvdX45mIIuQb5N3jxSVVuHwT3CkZlTiP0nLsDWxgqh/p5Y/u1O5Bb9PTzE190Zi2ePR+dAnxblJ6K/ffdHYr2CCLgx1DUqLADdwvwR5O2OuJhOsLHmn+H2SpDJUV3U8I2vb5JZKWDt4AprBxc4deiKnCO/4sbk67qs7JygcPaCc2Ak/PveCzv3ACOlJiKiluC7cTsR5OOOS9cK9Fonv0SJWvWdp3o9cOLi//5VjNMZ12All6FOrTsUJLeoDP/+YgvWvDwLjnYNT/nLe6AQ3VltnRrbEk422n7hah6Wzp3IyRNMhNym6fsDCbL/Da0TNSjNSEFDBREA1FUp0WPOe0Yphvi3mYio9UzqPkXm7L7BMXqvY2MtR/dQf/i4Oem13u0F0U2lFVX440jjF4PzHihEd1ZcXonSiqpG26trapFbVNqGiag13Dv3g8xa0Wi7uqYKqrJ8KHMuovJ/9ydqmIjyq/XPHhoC/zYTEbUei6J2Yuxd3TFpSAxuvw+rrU3j02c72Cqw4LOfEeLnAUGPG7g25VzWdYNsh8hSOdkrYN3EjHEyQYCrY9NnH6j9sLJ1QMiIRwyyLbmi9Tf2JSIi4+DwuXbkyUlDMWFgNPalXEC1qhZRHQPg6eqIhZ9tavCb5+LyShSXVwK4cdYo0MsN6dcKYCWXIcDLDZdzC+utcyeO9g0PnSOi5rFT2GBwj3DsaeCaIgCQy2V48LX/ItDbDRMHx+CeAVEG+1KDjMM/9h7Yuvog+/BmKHMuQhBkqKsu12sbVnbOcOvY20gJiYiotVgUtTOB3u54eFQ/nWWfz5+BXxJO4HjaZRSXV6KgtP6sRjW1ahSVV6JzBy+cv5qPrBYURAAwoneXFq1HRH977N4hSMu6jmsFJfXaautuXAd4+XoRPv55DzJyCvDM/cPbOCHpyz28D9zD+wAAsvavQ9b+tc1fWZAhbNRjkFnxOjIiovaKw+dMgKerIx4dPxCfvfAQbKwbH5ZTXF6J81dv3BOj4Ut9mzZhYDQiQ/xamJKIbvJwccBnzz+Ix+4djKiOAQjwcm2079aEk8jMadmXGCQNR/+mb4Dt4NsRClefG2eHOvVF1MPL4d65H/JS9yLn+A5UXM9oo6RERNRcPFNkYm4Ol2spe4UNnp06HBCAbYdSkVtYBl8PZ9wzIArDe/EsEZGhONgpMHVYb0wd1huvrPoF2fkljfbdl3IeIX792y4ctYiqvBBVhdlQuHrD3iu44YkVBBlqKkqgrlLC3isQnl0HobIgC6fXvwZNbbW2m2tYL3SZvBBWto5t+AyIiKgxLIpMTIivB85e1m+GIQHAI/cMhLuTAwZFh2unAmYRRNQ2VLV1rWonadVVlePijk9QkHboxvTbABx9O8HeKwiV+VnafoJMDlGjRm35jTN/ypyLuPDr+w1us+RSEs7/8h4ipy0x/hMgIqI74vA5EzNpSE+91xFxY8rvUX0jeW8UIglEhzV9b5rojryRZ3sliiJOr38NBWcPagsiAFDmXkBtVTkipy1Bx7FPwrf3PRA1d75v3K2KLhxFVWHTN4clIqK2waLIxAzrFYEZo/pBJtOdrer2x7fqFurf5NTeRGRc4wdEwdmh4ZkdOwZ4oW/X0HrLK6pUSDyTgaNpmVDV8EySVEouJaE8O63BtlplMSryMuHXezxUJS25R5CI8pwLrQtIREQGweFzJmj22P4Y268b9p+4gCpVDbqF+uN81nV8veNQvb4yQcCMUX0lSElEN7k7O+CtJybj7bV/ICOnQLu8d0QQFs4YrfOlhiiK+Oa3w9gYn4TqmloAN+59NHN0f9w3JKato1u8kswTTbdnpMA3ZjTqVBUt2r6VrUOL1iMiIsNiUWSifNydMXXY3/e86B0RDJlMhg17j6Gs4sbFvL7uzpg7YTD6dAmRKCUR3RQe4I0vX3wY57Kuo7BMiSBvd3TwdqvX74c/j2LtH4k6y8orVfh0czwc7BQY2adrW0UmADJ502fZy66cQeIHDwGC/gMvrB1c4Rqq/5BoIiIyPBZFZmTaiFjEdgnGrmNnobCSY8LAHvB05cxGRMakqqlDysUrqFOr0T00AC6Odk32jwjyAeDTYFtNbR027ktqdN0fdx9lUdTGPLoMwJWD6xttF9W1//uHptE+gpUNxLoa3WUyOTqOfRIyOd+GiYjaA/41NhM1tXV4a+3v2H/i7/HpG/YexwPDYzFn3AAJkxGZr60JJ7F6xyGUV944O2ttJceEAdF4bOJgyGX6nzm4nFukPdPbYPv1IpQoK+HqaN9gu6+vr85/qfUcfTvCK2o48lP3tGh9l5Ae6DjmSRSmHUT+6f1Qqyrh1KErAu6aBKc73O+IiIjaDosiM/HZ5n06BREA1Kk1WLfrCLxdnTB+QJREyYjM076U8/j4Z90PyrV1amzanwxrKzn+b8KgZm3nan4xftpzHEfOZkKtafxsA3DjGkEbq8b/bB87dqxZ+yT9dJ7wHBy8gpFzbBtUZfmQWdlAc9uZn1vZeQUjaPCDsPcKgoNXMADAftB0BA6a3laRiYhITyyKzEBZRRX+OHqm0faN+5JYFBEZ2Po/jzba9svBE7CxkuNaYSncnR0wqk8kQvw86vW7eDUP8z/9GRXVjX/AvlVsl2BOqy8BQSZHhwH3I6D/FGhqq5GduAVZ+75vtL/cWgGvyMFtmJCIiFqLRZEZyMgpRG1d4/fHuJJXjCpVDewU/DBFZAhVqlpczM5vtL26phbf3TJZwk97j+OR8QMwYWA09iadR1FZBYJ83LE14WSzCyJ7Wxs8Mn5gq7NTywmCALmNHdw69m6yKHIL69WGqYiIyBBYFJkBZ/uG739yk8LaSmfITVlFFfYmn0dxeSVCfT0wMLojrORyY8ckMhvWVjJYyWWoUzc93O1WX28/hO9/T0RNE19g3M5KLoNcJsOAqI6YMbIvgn3rn226VWxsLHJzc+Hr68uhdEbk5N8ZbuF9UHyx/tlCawdX+MWOlyAVERG1BosiMxDq74mOAV5Ib+Sb60HR4RAhAgB2HT2DD3/ajZravz+Yebk6YdnciQj192yTvESmzkoux6DocMQnn9drPX0KIgD4+qVZ8PNwaXb/3NxcZGdn67UP0k9VYTYqC6+gw4CpUDh5IC91j/b6Ilt3f8ht7HHq+5fh4NsR/n0nwsm/k8SJiYioOVgUmYln7h+Olz7fjEqV7lAcmSBg9/E0HDqVjj5dQnAw9SI0GlGnT35JOV5e9Qu+fWU2zxgRNdOssf2RfP4KSiuqjLJ9DxcHeLs6GWXbpL8aZRHO//I+SjKStcscfEIR+dAbEAQBmbvXoPzq39d2VhZkIf/0PkRMfAFe3YdKkJiIiPSh/5yx1C5Fhvjh0xcexISB0QjwcoWDnQIAoBFvFEBVqlrsP3GhXkF0U35JORJS09ssL5Gp6+Dlhv88Nx1j+nWDg60NFNZWCPMz3NnWKXG9IJfzT3R7IIoanFr3qk5BBAAV1zOQ9vMylF85q1MQ/b2iBhd3fgp1jXEK55t8fX0REBDAqdiJiFqBZ4rMSAcvNzxz/3BcupaPf76zVu/1M3IKERfTeDvvgUKky8/TBS9MH4kXpo8EAFRUq/DQa1/VO2N7JzJB0H6BYS2XY1JcDO4fyov124uiC0dQmZfRYFtdZRmuHdvW6LpqVSUKz/0F76jhxorH68eIiAyARZEZSjjZsjM+bk4N3xDyJr7xEjXNwVaBl2eNxf9bvR2q2rpmrSOXyfDJ89ORca0QgnBj2u3Gbs5K0ijLOtVke11lWZPttXdoJyIi6bEoMlFnL+dgz/FzqKhWoWuwL+6O7aqdcrvuDjeAbIiNtRzDevLu6kSt1bdrKNa8PBu/JZ7G5dxCuDnZY3TfbthyIAW/JZ7W6SsIwLzJQxEe4I3wAG+JEtOdyKwUTbZb2TqgRqlqtN3BJ8zQkYiIyMBYFJmgDzfsxva/UrWPdx09i+//OII3H5+EUD9P9OociHW7jjR7ezKZgGfvHwFnBztjxCWyOJ4ujnh4VD/UqdX4cc9xvLLqFxSUKuFgawM3J3vYKWwQ4uuBCYOi0TXYT+q4dAeekYNx5eD6RtudOnRF4blDgFj/mk0H345wDYk2ZjwiIjIAFkUmZtfRMzoF0U1FZRV4Y812TB/RBykXr8DdyR5F5ZX1+rk52uHRCYOQkJqO4rIKhPh5YuKgHgjvwG+piQxt6Tc7dCYwqaiuQUV1DWI6BeKFB0dCLuNECqbAwTsEvr3GIjdpZ4PthWkJ//uXAODvwsjBJxSRUxcbPyAREbUaiyITszXhZKNtV/KK8c4Pf+gsu/UtOqpjAJ6bOgJBPu4Y3beb8UISEU5cvNrojI4pF67gr1OXMCg6vI1TUUt1HDsPDj6hyDm6DZWFVyHIZBDVt183JkKQWyOg70S4hvWES0gPCIIgSV4iItIPiyITk1NYqld/EcDIPl3x8Kh+8Pd0NUomIqrvwIkLd2xnUWQ6BEGAX+/x8Os9HiWZJ3Dq+3832E9U10KjqYNraEzbBiQiolZhUWRifNydUaLU754XR89exvzpo4yUiMhy1dTVYc/xczh48iJq69To2SkQ4/p3h7ODHWrr1HdYt+l2ar/Ksk63qp2IiNofFkUmZnz/KJzLuq7XOiXKSlTX1MLe1sZIqYgsT0W1CotWbkLaLcdj0vksbD6QgnfnTUFMp0DsONz4VM49OwVq/63RiDh3JRc1tXXo1MGHx2o7J7Oxbbrduul2IiJqf1gUmZgx/bohNT0bu46dbfY6ro52sLWxNmIqIsvz3e+JOgXRTUVlFXhv/Z94d94UBP/hjsvXi+r18XV3xt2xXQEAB05exJe/7Edu0Y172dgrbDBxcA/MHjsAMhmvR2mPPLsOQubu1YDY8O0PvLoNaeNERETUWiyKTIwgCHhxxmiM7heJ3cfPoaJKhS7BvjhyNgMpF642uM7Yu7rzwxWRAWk0Iv440vgQqdMZ1/Dqf38FBMDFwQ6lFX8PeXVxsIOjnQIrt+xD50BvfLIpHhrN3zOWVapq8MOfRyGKIh69Z5BRnwe1jK2LNwIHTMWVhB/rtTkFRMA7eoQEqYiIqDVYFJmoHuGB6BH+9/CboT07Y8FnG5GdX6LTr2enQDw8ql8bpyMybzV1dSivbPxmnQBwNO2yzmMfNydcLy5HaUUVSiuqcDE7v97NXG+15cAJTB/RBw52Td84lKQRPGwm7DwDce3IL6jIy4C1vSt8etyNDgPuh9yarxkRkalhUWQmvFyd8MX8h7E3+RySz2fBykqOQVHh6BcZyrNERAZma2MNbzcn5BWXN3ud63r0BYDqmlqcuZyDPl1C9ExHbcU7ahi8o4ZJHYOIiAyARZEZUdhYYUy/bhjTj/cgIjK2ewf2wH+3HTTqPmys5EbdPhEREd3A26kTEbXA/cN6YUTvLkbbvruTPbqF+htt+0RERPQ3nikiImoBuUyGRQ+PwZS4Xjhw8gJq69SoU2uw5UBKq7ctCMCj9wyClZxnioiIiNoCiyIiolboFOiNToHeAIAqVQ3ik8/pdYNlJ3tbxIR3wF+nL6FOrUHXYF88eHdf9O8eZqzIREREdBsWRUREBmKnsMHSufdhyde/orC0QrvcwdYG4/pH4deDJ6CqrdMud3Oyxxv/dy8ignyhVmug1mhgY93yP8u+vr46/yUiIqLmEURRFO/czTwkJSWhd+/eOH78OHr16iV1HCIyU7V1ahxKTcfV/GJ4ujpiSI/OsFNYo6yiGnuS0lBYVoEgb3fExXRqVRFEREREhsF3YyIiA7O2kiOuZ+d6y50dbHHvwB5Q1dbBTmEtQTIiIiJqiMkURStWrMCmTZuQlpYGOzs7DBgwAG+99RYiIiKkjkZEdEcVVSp8+9th/HH0DJRVKni7OeHegT1w/7BekMs4ESgREZGUTOadeN++fZg3bx4OHz6MXbt2oba2FqNGjUJFRcWdVyYiklBNbR1eXLkJm/YnQ1mlAgDkFZfjv9sO4t0fdkmcjoiIiEzmTNFvv/2m83jNmjXw9vbG8ePHMWTIEIlSERHd2e7jaTh/5XqDbX8eO4spQ3siPMC7jVMRERHRTSZzpuh2paWlAAB3d3eJkxARNe3AyYtNt59oup2IiIiMy2TOFN1Ko9Hgueeew8CBA9G9e/dG+6lUKqhUKu1jpVLZFvGIiHTU1alb1U5ERETGZZJniubNm4dTp05h/fr1TfZbsWIFXFxctD9xcXFtlJCI6G89OwfeoT2ojZIQERFRQ0yuKHrqqaewbds27N27Fx06dGiy70svvYTS0lLtz759+9ooJRHR38bdFQV3J/sG27oG+6J3BIsiIiIiKZlMUSSKIp566ils3rwZe/bsQWho6B3XUSgUcHZ21v44Ojq2QVIiIl0ujnZ4Z9796Bbqr10mkwkY3KMTls69D4IgSJiOiIiITOaaonnz5mHdunX45Zdf4OTkhNzcXACAi4sL7OzsJE5HRNS0IB93fPjMA7iaV4zCMiUCvNzg6cIvaoiIiNoDQRRFUeoQzdHYN6mrV6/G7Nmzm7WNpKQk9O7dG8ePH0evXr0MmI6IiIiIiEyVyZwpMpHajYiIiIiITIzJXFNERERERERkDCyKiIiIiIjIopnM8DkiIlNXqqzC7uNpKCqrQJCPO+JiOkNhwz/DREREUuO7MRFRG9hzPA3v/bgLNbVq7bJVWw/ijf+7F12CfSVMRkRERBw+R0RkZJk5hXh73R86BREAlCgrsfi/v6C6plaiZERERASwKCIiMrpfE05ArdE02FairEJ88vk2TkRERES3YlFERGRkWdeLmmzPzC1soyRERETUEBZFRERG5uZk32S7h7NDGyUhIiKihrAoIiIysjH9ujXaZiWXYUTvLm2YhoiIiG7HooiIyMh6RwTjvsEx9ZbLBAHPTh0Bd54pIiIikhSn5CYiagPzJg9F38gQ/Hb4NAr/d5+iewdGI7yDt9TRiIiILB6LIiKiNtKnSwj6dAmROgYRERHdhsPniIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqvKTJTOTk5yMnJkToGGYifnx/8/PykjkEGwuPT/PAYJSIybRZVFPn5+WHJkiVm/8alUqnw4IMPYt++fVJHIQOJi4vD77//DoVCIXUUaiUen+aJxygRkWkTRFEUpQ5BhlVWVgYXFxfs27cPjo6OUsehVlIqlYiLi0NpaSmcnZ2ljkOtxOPT/PAYJSIyfRZ1psjSxMTE8A3aDJSVlUkdgYyAx6f54DFKRGT6ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5bwgl8zwdfTvPD1ND98TYmITB8nWiAiIiIiIovGM0VERERERGTRWBQREREREZFFY1FEREREREQWjUWRCYqPj4cgCCgpKWmzfc6ePRv33Xdfm+3PkgiCgC1btrTZ/tasWQNXV9c22x8RERFRe8eiyEA+//xzODk5oa6uTrtMqVTC2toaQ4cO1el7s6hJT09vcFuvvfYaBEGAIAiwsrJCSEgI/vWvf0GpVBrzKVADZs+eDUEQ8Pjjj9drmzdvHgRBwOzZsxtd/+ZrffPHx8cHU6ZMwaVLl4yYmlrKmMexp6cnhgwZgg8//BAqlcqYT4NuY8jjWCaTwcXFBT179sSLL76InJwcIyYnIqK2wqLIQIYNGwalUoljx45plx04cAC+vr5ITExEdXW1dvnevXsRFBSEjh07Nrq9bt26IScnB5mZmXjrrbfw5Zdf4oUXXjDqc6CGBQYGYv369aiqqtIuq66uxrp16xAUFNSsbZw7dw7Xrl3DTz/9hNOnT2PChAlQq9XGikwtZKzjOCsrC3v37sXUqVOxYsUKDBgwAOXl5Y2uV1NTY5gnRFqGPI6PHj2KhQsX4s8//0T37t2Rmpra6Dp8LYmITAOLIgOJiIiAn58f4uPjtcvi4+MxceJEhIaG4vDhwzrLhw0b1uT2rKys4Ovriw4dOmDatGmYMWMGfv311wb7FhYW4sEHH0RAQADs7e0RFRWFH374QaePRqPB22+/jfDwcCgUCgQFBWHZsmXa9itXruCBBx6Aq6sr3N3dMXHiRGRmZtbb1+uvvw4vLy84Ozvj8ccf13nDV6lUeOaZZ+Dt7Q1bW1sMGjQIR48ebfJ5moJevXohMDAQmzZt0i7btGkTgoKC0LNnz2Ztw9vbG35+fhgyZAheffVVnDlzBhcvXmyw78KFC9G5c2fY29sjLCwMixcvRm1trU6frVu3ok+fPrC1tYWnpycmTZqkbVOpVJg/fz4CAgLg4OCAfv366fxe3rRlyxZ06tQJtra2GD16NK5cuaLTvnLlSnTs2BE2NjaIiIjAd99916znasqMdRz7+/sjKioKTz/9NPbt24dTp07hrbfe0vYLCQnBG2+8gZkzZ8LZ2RmPPfZYg8NkU1JSIAiCzrG5atUqBAYGwt7eHpMmTcL777/P4ZENMNRx7Ovri86dO2P69OlISEiAl5cXnnjiCW2fm0ONly1bBn9/f0RERABoeJisq6sr1qxZo3186NAhxMTEwNbWFrGxsdiyZQsEQUBKSkqLnzcRETUPiyIDGjZsGPbu3at9vHfvXgwdOhRxcXHa5VVVVUhMTLzjh6nb2dnZNfqNY3V1NXr37o3t27fj1KlTeOyxx/CPf/wDR44c0fZ56aWX8Oabb2Lx4sU4c+YM1q1bBx8fHwBAbW0tRo8eDScnJxw4cAAJCQlwdHTEmDFjdPa5e/dunD17FvHx8fjhhx+wadMmvP7669r2F198ERs3bsQ333yDpKQkhIeHY/To0SgqKtLrubZHjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHJ3fb3NlzOMYALp06YKxY8fqfDgHgHfffRc9evRAcnIyFi9e3KxtJSQk4PHHH8ezzz6LlJQUjBw5UufLDtJlyOMYuHEsP/7440hISEBeXp52+e7du3Hu3Dns2rUL27Zta9a2ysrKMGHCBERFRSEpKQlvvPEGFi5c2OJsRESkJ5EMZtWqVaKDg4NYW1srlpWViVZWVmJeXp64bt06cciQIaIoiuLu3btFAOLly5cb3c6SJUvEHj16aB8fO3ZM9PT0FO+//35RFEVx7969IgCxuLi40W2MHz9efOGFF0RRFMWysjJRoVCIq1atarDvd999J0ZERIgajUa7TKVSiXZ2duLvv/8uiqIozpo1S3R3dxcrKiq0fVauXCk6OjqKarVaVCqVorW1tbh27Vpte01Njejv7y++/fbbjeZs72bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja5/+2t17do1ccCAAWJAQICoUqlEURRFAOLmzZsb3cY777wj9u7dW/u4f//+4owZMxrse/nyZVEul4vZ2dk6y0eMGCG+9NJLoiiK4urVq0UA4uHDh7XtZ8+eFQGIiYmJoiiK4oABA8S5c+fqbGPq1KniuHHjGs1pLox1HN9q4cKFop2dnfZxcHCweN999+n0aeg4T05OFgGIGRkZoiiK4rRp08Tx48frrDdjxgzRxcWl+U/YAhj6OL7Vzp07dY6dWbNmiT4+Ptrj+6aGjnMXFxdx9erVoije+Hvq4eEhVlVVadtXrVolAhCTk5Nb8rSJiEgPVpJUYmZq6NChqKiowNGjR1FcXIzOnTvDy8sLcXFxmDNnDqqrqxEfH4+wsLA7jmFPTU2Fo6Mj1Go1ampqMH78eHzyyScN9lWr1Vi+fDk2bNiA7Oxs1NTUQKVSwd7eHgBw9uxZqFQqjBgxosH1T5w4gYsXL8LJyUlneXV1tc5F5D169NBuEwD69+8PpVKJK1euoLS0FLW1tRg4cKC23draGn379sXZs2eb/h9nAry8vDB+/HisWbMGoihi/Pjx8PT0bPb6HTp0gCiKqKysRI8ePbBx40bY2Ng02PfHH3/Exx9/jPT0dCiVStTV1cHZ2VnbnpKSgrlz5za4bmpqKtRqNTp37qyzXKVSwcPDQ/vYysoKffr00T7u0qULXF1dcfbsWe1r9thjj+lsY+DAgfjoo4+a/ZxNlSGP48aIoghBEHSWxcbG6r2dc+fO6QydBIC+ffs2++yEpWntcdwQURQBQOf1jIqKavT4bsy5c+cQHR0NW1tb7bJbzwATEZFxsSgyoPDwcHTo0AF79+5FcXEx4uLiAAD+/v4IDAzEoUOHsHfvXgwfPvyO24qIiMCvv/4KKysr+Pv7N/kG+8477+Cjjz7Chx9+iKioKDg4OOC5557TDs+6OVyrMUqlEr1798batWvrtXl5ed0xq6V45JFH8NRTTwEAPv30U73WPXDgAJydneHt7V2v+LzVX3/9hRkzZuD111/H6NGj4eLigvXr1+O9997T9mnq9VQqlZDL5Th+/DjkcrlOm6Ojo16ZLZUhj+PGnD17FqGhoTrLHBwcdB7LZDdGN9/80A2g3rVlpL/WHMcNufmlT0hIiHbZ7a8lcKNouvW1BPh6EhG1J7ymyMCGDRuG+Ph4xMfH60zhO2TIEOzcuRNHjhxp1nUINjY2CA8PR0hIyB2/cUxISMDEiRPx8MMPo0ePHggLC8P58+e17Z06dYKdnR12797d4Pq9evXChQsX4O3tjfDwcJ0fFxcXbb8TJ07ozNx0+PBhODo6IjAwUHtBfkJCgra9trYWR48eRWRk5B2frym4eY3VzWuw9BEaGoqOHTs2WRABNy60Dg4Oxssvv4zY2Fh06tQJly9f1ukTHR3d6GvZs2dPqNVq5OXl1XstfX19tf3q6up0Zlg7d+4cSkpK0LVrVwBA165ddV5L4Mbvmbm8lndiqOO4IWlpafjtt98wZcqUJvvd/ELi1imfb7/gPiIiot5kJuYwuYkxteY4vl1VVRW+/PJLDBky5I5fIHl5eem8lhcuXEBlZaX2cUREBFJTU3Wma+drSUTUdlgUGdiwYcNw8OBBpKSkaL9hBoC4uDh88cUXqKmpafGHqcZ06tQJu3btwqFDh3D27Fn885//xPXr17Xttra2WLhwIV588UV8++23SE9Px+HDh/HVV18BAGbMmAFPT09MnDgRBw4cQEZGBuLj4/HMM8/g6tWr2u3U1NTg0UcfxZkzZ7Bjxw4sWbIETz31FGQyGRwcHPDEE09gwYIF+O2333DmzBnMnTsXlZWVePTRRw36fKUil8tx9uxZnDlzpt5ZGEPp1KkTsrKysH79eqSnp+Pjjz/G5s2bdfosWbIEP/zwA5YsWYKzZ88iNTVVO5NZ586dMWPGDMycORObNm1CRkYGjhw5ghUrVmD79u3abVhbW+Ppp59GYmIijh8/jtmzZ+Ouu+7SDtdZsGAB1qxZg5UrV+LChQt4//33sWnTJsyfP98oz7u9MdRxXFdXh9zcXFy7dg2pqan4z3/+g7i4OMTExGDBggVNrhseHo7AwEC89tpruHDhArZv365zxhAAnn76aezYsQPvv/8+Lly4gC+++AI7d+6sNzSP/taa4zgvLw+5ubm4cOEC1q9fj4EDB6KgoAArV66847rDhw/HJ598guTkZBw7dgyPP/44rK2tte0PPfQQNBoNHnvsMZw9exa///473n33XQDg60lE1BYkvJ7JLGVkZIgAxC5duugsz8zMFAGIERERd9xGUxdoi2L9i34LCwvFiRMnio6OjqK3t7f4yiuviDNnzhQnTpyoXUetVotLly4Vg4ODRWtrazEoKEhcvny5tj0nJ0ecOXOm6OnpKSoUCjEsLEycO3euWFpaKori3xcqv/rqq6KHh4fo6Ogozp07V6yurtZuo6qqSnz66ae12xg4cKB45MiRZvxfa79uPu/GtOYC7Ztw2wXYCxYs0P4/njZtmvjBBx/Uu3B+48aNYkxMjGhjYyN6enqKkydP1rbV1NSIr776qhgSEiJaW1uLfn5+4qRJk8STJ0+KonhjogUXFxdx48aNYlhYmKhQKMS777673qQBn332mRgWFiZaW1uLnTt3Fr/99ttGn4O5MdRxDEAEIMrlctHd3V0cNGiQ+MEHH+gcN6J4Y6KFDz74oN42Dh48KEZFRYm2trbi4MGDxZ9++klnogVRFMUvv/xSDAgIEO3s7MT77rtPXLp0qejr69ui522uDHUcAxAFQRCdnJzEHj16iAsWLBBzcnKata/s7Gxx1KhRooODg9ipUydxx44dOhMtiKIoJiQkiNHR0aKNjY3Yu3dvcd26dSIAMS0tTc9nTERE+hJE8bZBzkREZLLmzp2LtLQ0HDhwQOoo1Epr167FnDlzUFpaesdrQ4mIqHU40QIRkQl79913MXLkSDg4OGDnzp345ptv8Nlnn0kdi1rg22+/RVhYGAICAnDixAksXLgQDzzwAAsiIqI2wKKIiMiEHTlyBG+//TbKy8sRFhaGjz/+GP/3f/8ndSxqgdzcXLz66qvIzc2Fn58fpk6dypvxEhG1EQ6fIyIiIiIii8bZ54iIiIiIyKKxKCIiIiIiIovGokhCs2fPhiAIePPNN3WWb9myxaj3pSgqKsLTTz+NiIgI2NnZISgoCM888wxKS0t1+mVlZWH8+PGwt7eHt7c3FixYgLq6OqPlMnV8Pc0LX0/zw9eUiIgaw6JIYra2tnjrrbdQXFzcZvu8du0arl27hnfffRenTp3CmjVr8Ntvv+ncZFWtVmP8+PGoqanBoUOH8M0332DNmjV49dVX2yynKeLraV74epofvqZERNQgaW+TZNlmzZol3nPPPWKXLl3EBQsWaJdv3rxZbOuXZsOGDaKNjY1YW1sriqIo7tixQ5TJZGJubq62z8qVK0VnZ2dRpVK1aTZTwdfTvPD1ND98TYmIqDE8UyQxuVyO5cuX4z//+Q+uXr3a7PXGjh0LR0fHRn+6deumV47S0lI4OzvDyurGLO1//fUXoqKi4OPjo+0zevRolJWV4fTp03pt25Lw9TQvfD3ND19TIiJqCO9T1A5MmjQJMTExWLJkCb766qtmrfPf//4XVVVVjbZbW1s3e/8FBQV444038Nhjj2mX5ebm6rw5A9A+zs3Nbfa2LRFfT/PC19P88DUlIqLbsShqJ9566y0MHz4c8+fPb1b/gIAAg+y3rKwM48ePR2RkJF577TWDbJP4epobvp7mh68pERHdisPn2okhQ4Zg9OjReOmll5rV3xBDOcrLyzFmzBg4OTlh8+bNOt90+vr64vr16zr9bz729fXV45lZJr6e5oWvp/nha0pERLfimaJ25M0330RMTAwiIiLu2Le1QznKysowevRoKBQK/Prrr7C1tdVp79+/P5YtW4a8vDx4e3sDAHbt2gVnZ2dERkY249kQX0/zwtfT/PA1JSKim1gUtSNRUVGYMWMGPv744zv2bc1QjrKyMowaNQqVlZX4/vvvUVZWhrKyMgCAl5cX5HI5Ro0ahcjISPzjH//A22+/jdzcXLzyyiuYN28eFApFi/dtSfh6mhe+nuaHrykREWlJPf2dJZs1a5Y4ceJEnWUZGRmijY2NUaeH3bt3rwigwZ+MjAxtv8zMTHHs2LGinZ2d6OnpKb7wwgva6WOpPr6e5oWvp/nha0pERI0RRFEU26b8IiIiIiIian840QIREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNEsqijKycnBa6+9hpycHKmjEBEREREZBD/jtp7FFUWvv/46f2GIiIiIyGzwM27rWVRRREREREREdDsWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEZAKqq6uljkBERERktlgUEZmA/Px8qSMQERERmS0WRUQmoKamBjU1NVLHICIiIjJLLIqITERZWZnUEYiIiIjMEosiIhNRXFwsdQQiIiIis8SiiMhE8LoiIiIiIuNgUURkIi5fvgxRFKWOQURERGR2WBQRmYjy8nJkZWVJHYOIiIjI7LAoIjIhx44d49kiIiIiIgNjUURkQgoLC3Hu3DmpYxARERGZFRZFRO1cbGwsBg4ciGXLlgEAEhMTUV5eLnEqIiIiIvPBooioncvNzcX169e19ylSqVT4448/oFKpJE5GREREZB5YFBGZoMLCQuzYsQNVVVVSRyEiIiIyeSyKiExUfn4+fvnlFxQWFkodhYiIiMiksSgiMmFlZWXYsmULTp48CY1GI3UcIiIiIpPEoojIxKnVahw+fBibN29Gdna21HGIiIiITA6LIiIzUVhYiO3bt2P79u3Iy8uTOg4RERGRybCSOgARGVZ2djays7MRHByM2NhYeHh4SB2JiIiIqF1jUURkpi5fvozLly8jNDQUPXv2hKenp9SRiIiIiNolFkVEZi4jIwMZGRnw8/ND165dERISAisrHvpEREREN/GTEVE7lpWVhcrKSgBATU0NioqK4O7u3qJt5eTkICcnB9bW1ggJCUF4eDgCAgIgk/HSQiIiIrJsLIqI2qEjR47gjTfewPbt2yGKIgCgsrIS//73vxEVFYXx48cjJCSkRduura3FhQsXcOHCBdjZ2SEiIgLdunWDg4ODAZ8BERERkelgUUTUzmzatAnTpk2DKIragugmURRx6tQpnDp1CnPnzkWvXr1ata+qqiqkpKTg1KlT6NevH7p169aq7RERERGZIo6bIWpHjhw5gmnTpkGtVkOtVjfYR6PRQKPRYNWqVcjMzDTIfuvq6pCQkID09HSDbI+IiIjIlLAoImpHli5d2uAZosbs2LHDoPs/ePAgysvLDbpNIiIiovaORRFRO5GVlYVt27Y1eobodhqNBidPnkRRUZHBMqhUKmzduhXFxcUG2yYRERFRe8eiiKid2L17d7PPEN0kiiLS0tIMmkOpVOKXX35Bfn6+QbdLRERE1F6xKCJqJ8rLy/WeHlsQBFRXVxs8S01NDVJSUgy+XSIiIqL2iEURUTvh5OQEjUaj1zqiKMLW1tbgWaytrREVFWXw7RIRERG1R5ySm6idGDFiBARB0GsInSAI6NKli0FzeHt7Y9iwYXBxcTHodomIiIjaK54pImongoKCcM8990Aulzerv0wmQ3R0NNzd3Q2yf7lcjr59++Lee+9lQUREREQWhUURUTuyePFiCIIAQRCa1X/cuHEG2a+vry8mT56MmJgYva9rIiIiIjJ1/PRD1I706dMHP/74I+RyeaNnjGQyGWQyGR577DGEhIS0an+urq64++67MWHCBLi5ubVqW0RERESmitcUEbUzkydPxqFDh/DGG29g27ZtOtcYCYKAqKgojBs3rlUFkbu7O3r27ImwsLBmn5UiIiIiMlcsiojaoT59+uDXX39FVlYWYmJiUFxcDHt7eyxevLhV1xB5eXmhV69eCAoKYjFERERE9D8mNXxu//79mDBhAvz9/SEIArZs2SJ1JCKjCgoKgr29PQDAxsamxQWRi4sLRo4cifvuuw/BwcEsiIiIiCwcP1frMqmiqKKiAj169MCnn34qdRQik2BtbY277roL999/P0JDQ1kMEREREQB+rr5di4bPpaenY/Xq1UhPT8dHH30Eb29v7Ny5E0FBQejWrZuhM2qNHTsWY8eONdr2icyFIAjo3Lkz+vTpoz3TRERERHQTP1fr0vtM0b59+xAVFYXExERs2rQJSqUSAHDixAksWbLE4AFbQ6VSoaysTPtzMyuROQsNDcX999+PuLg4FkREREQWRKlU6nz2ValUUkcyGXoXRYsWLcLSpUuxa9cu2NjYaJcPHz4chw8fNmi41lqxYgVcXFy0P3FxcVJHIjIKQRAQHh6OqVOnYuTIkZxem4iIyALFxcXpfPZdsWKF1JFMht7D51JTU7Fu3bp6y729vVFQUGCQUIby0ksv4fnnn9c+TklJYWFEZic4OBh9+/ZlIURERGTh9u3bh5iYGO1jhUIhXRgTo3dR5OrqipycHISGhuosT05ORkBAgMGCGYJCodD5ZXB0dJQwDZFh2draYvDgwfWORSIiIrJMjo6OcHZ2ljqGSdJ7+Nz06dOxcOFC5ObmQhAEaDQaJCQkYP78+Zg5c6YxMhLRbTw8PDB58mQWREREREQGoPeZouXLl2PevHkIDAyEWq1GZGQk1Go1HnroIbzyyivGyKilVCpx8eJF7eOMjAykpKTA3d0dQUFBRt03kVR8fX1RV1enPevp5eWF8ePH61zTR0RERKQPfq7WJYiiKLZkxStXriA1NRVKpRI9e/ZEp06dDJ2tnvj4eAwbNqze8lmzZmHNmjV3XD8pKQm9e/fG8ePH0atXLyMkJDKO9PR07N69G9bW1pg6dSqHghIREZFWSz7jtvZztblp0X2KACAwMBCBgYGGzHJHQ4cORQtrOCKzEBERwYKIiIiIWo2fq3XpfU3RlClT8NZbb9Vb/vbbb2Pq1KkGCUVEDeM1RERERESGp3dRtH//fowbN67e8rFjx2L//v0GCUVE9dnY2MDHx0fqGERERERmR++iSKlUNniBt7W1NcrKygwSiojq69ChA2QyvQ9ZIiIiIroDvT9hRUVF4ccff6y3fP369YiMjDRIKCKqLzg4WOoIRERERGZJ74kWFi9ejMmTJyM9PR3Dhw8HAOzevRs//PADfvrpJ4MHJKIbOHSOiIiIyDj0LoomTJiALVu2YPny5fj5559hZ2eH6Oho/Pnnn4iLizNGRiKLJwgCZ50jIiIik1ZdXY2amhqdZc7OzhKl0dWiKbnHjx+P8ePHGzoLETXC1taW1xMRERGRyamsrMSLL76IDRs2oLCwsF67Wq2WIFV9Lf6UVVNTg6tXryIrK0vnh4gMz9bWVuoIRNSEuro6qSMQEbVLCxYswJ49e7By5UooFAr897//xeuvvw5/f398++23UsfT0vtM0YULF/DII4/g0KFDOstFUYQgCO2m2iMyJ+3l1DIRNayyspLHKRFRA7Zu3Ypvv/0WQ4cOxZw5czB48GCEh4cjODgYa9euxYwZM6SOCKAFRdHs2bNhZWWFbdu2wc/PD4IgGCMXEd3CyqpFI12JqI0olUoWRUREDSgqKkJYWBiAG1/yFhUVAQAGDRqEJ554QspoOvT+pJWSkoLjx4+jS5cuxshDRERkcgoKCuDv7y91DCKidicsLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG09L6mKDIyEgUFBcbIQkREZJKuXLkCURSljkFE1O7MmTMHJ06cAAAsWrQIn376KWxtbfGvf/0LCxYskDjd3/Q+U/TWW2/hxRdfxPLlyxEVFQVra2uddg4fICIiS6NUKpGZmYnQ0FCpoxARtSv/+te/tP++++67kZaWhuPHjyM8PBzR0dESJtOld1F09913AwBGjBihs5wTLRARkSVLTk5GSEgIr7UlIrrFt99+i2nTpkGhUAAAgoODERwcjJqaGnz77beYOXOmxAlv0Lso2rt3rzFyEBERmbSCggJcu3YNAQEBUkchImo35syZgzFjxsDb21tneXl5OebMmWO6RVFcXJwxchAREZmk2NhYZGRkwNHRER4eHrjvvvsgl8uljkVE1C7cHE12u6tXr8LFxUWCRA1r0Ty/Bw4cwBdffIFLly7hp59+QkBAAL777juEhoZi0KBBhs5IRETUbuXm5qKoqAgajQaFhYVISEjA4MGDOYyOiCxaz549IQgCBEHAiBEjdG4volarkZGRgTFjxkiYUJfeRdHGjRvxj3/8AzNmzEBSUhJUKhUAoLS0FMuXL8eOHTsMHpKIiMhUpKWloaqqCgMHDoSjo6PUcYiIJHHfffcBuHE7n9GjR+v8PbSxsUFISAimTJkiUbr69C6Kli5dis8//xwzZ87E+vXrtcsHDhyIpUuXGjQcERGRKbp8+TKuXr2KTp06oXv37nB3d5c6EhFRm1qyZAkAICQkBNOmTYOtra3EiZqmd1F07tw5DBkypN5yFxcXlJSUGCITERGRyVOr1UhLS0NaWhr8/PwQExODDh06cFgdEVmUWbNmSR2hWfQuinx9fXHx4kWEhIToLD948CDCwsIMlYuIiMhs5OTkICcnBwEBARgyZAicnJykjkREZDRubm7N/gKoqKjIyGmaR++iaO7cuXj22Wfx9ddfQxAEXLt2DX/99Rfmz5+PxYsXGyMjERGRWcjOzsbGjRvRv39/dO7cmWeNiMgsffjhh1JH0JveRdGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiIyGzU1Ndi3bx9SU1PRvXt3dOzYEdbW1lLHIiIyGFMZMncrvYoitVqNhIQEzJs3DwsWLMDFixehVCoRGRnJGXaIiMjiZGVlobKyEsCNYqeoqKjZkyoUFRVh//79OHToEIKCghAaGooOHTpo7/pORGQu0tPTsXr1aqSnp+Ojjz6Ct7c3du7ciaCgIHTr1k3qeAAAmT6d5XI5Ro0aheLiYtjY2CAyMhJ9+/ZlQURERBblyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPa26urqcOnSJezevRvfffcdtm7dipSUFBQVFUEURSM9AyKitrFv3z5ERUUhMTERmzZtglKpBACcOHFCO0Nde6D38Lnu3bvj0qVLCA0NNUYeIiKidm3Tpk2YNm0aRFGsV7SIoohTp07h1KlTmDt3Lnr16qXXtjUajXZShiNHjsDR0REhISGIiIiAh4eHIZ8GEVGbWLRoEZYuXYrnn39eZ5KZ4cOH45NPPpEwmS69zhQBN+5TNH/+fGzbtg05OTkoKyvT+SEiIjJXR44cwbRp06BWq6FWqxvso9FooNFosGrVKr3OGDVEqVTi1KlT2LhxI3777TftDdOJiExFamoqJk2aVG+5t7c3CgoKJEjUML2LonHjxuHEiRO499570aFDB7i5ucHNzQ2urq5wc3MzRkYiIqJ2YenSpQ2eIWrMjh07DLbvrKwsHDhwwGDbIyJqC66ursjJyam3PDk5GQEBARIkapjew+f27t1rjBxERETtWlZWFrZt29bsgkij0eDkyZN6Tb5wJ5cuXcK5c+cQERFhkO0RERnb9OnTsXDhQvz0008QBAEajQYJCQmYP38+Zs6cKXU8Lb2Lori4OGPkICIiatd2796t98QHoigiLS0NAwYMMFiO/fv3QyaToVOnTgbbJhGRsSxfvhzz5s1DYGAg1Go1IiMjoVar8dBDD+GVV16ROp6W3sPnAODAgQN4+OGHMWDAAGRnZwMAvvvuOxw8eNCg4YiIiNqL8vJyyGT6vW0KgoDq6mqD5hBFEfv379dOBU5E1J7Z2Nhg1apVSE9Px7Zt2/D9998jLS0N3333HeRyudTxtPQuijZu3IjRo0fDzs4OSUlJ2os+S0tLsXz5coMHJCIiag+cnJyg0Wj0WkcURdja2hopERGR6QgKCsK4cePwwAMPtMsz3XoPn1u6dCk+//xzzJw5E+vXr9cuHzhwIJYuXWrQcERERO3FiBEjIAiCXkPoBEFAly5dDJrD2toaw4cPh729vUG3S0RkKM8//3yz+77//vtGTNJ8ehdF586dw5AhQ+otd3FxQUlJiSEyERERtTtBQUG45557sGPHjkan476VTCZDVFSUwSZZAAAvLy8MGzYMrq6uBtsmEZGhJScn6zxOSkpCXV2ddpKY8+fPQy6Xo3fv3lLEa5DeRZGvry8uXryIkJAQneUHDx5EWFiYoXIRERG1O4sXL8bOnTubfcZo3LhxBtmvlZUVevXqhejoaL2vayIiamu3zlb9/vvvw8nJCd9884329j3FxcWYM2cOBg8eLFXEevT+yzp37lw8++yzSExMhCAIuHbtGtauXYv58+fjiSeeMEZGIiKidqFPnz748ccfIZfLG71AWCaTQSaT4bHHHqv3BaK+BEFAREQEpk2bhpiYGBZERGRy3nvvPaxYsULnfqZubm5YunQp3nvvPQmT6dL7TNGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiJqNyZPnoxDhw7hjTfeqHffIkEQEBUVhXHjxrWqILKxsUHnzp0RFRUFJycnA6QmIpJGWVkZ8vPz6y3Pz89HeXm5BIka1qyi6OTJk+jevTtkMhkEQcDLL7+MBQsW4OLFi1AqlYiMjISjo6OxsxIREbULffr0wa+//oqsrCzExMSguLgY9vb2WLx4cYuvIRIEAf7+/ujUqRNCQ0NhbW1t4NRERG1v0qRJmDNnDt577z307dsXAJCYmIgFCxZg8uTJEqf7W7OKop49eyInJwfe3t4ICwvD0aNH4eHhgcjISGPnIyIiareCgoJgb2+P4uJi2NjYtKggcnd3R+fOndGxY0c4ODgYISURkXQ+//xzzJ8/Hw899BBqa2sB3LhO8tFHH8U777wjcbq/NasocnV1RUZGBry9vZGZman3fRqIiIjob1ZWVujYsSMiIyPh6ekJQRCkjkREZBT29vb47LPP8M477yA9PR0A2uWXQM0qiqZMmYK4uDj4+flBEATExsY2eoHppUuXDBqQiIjIXDg6OiIyMhJdu3aFQqGQOg4RUZtxcHBAdHS01DEa1ayi6Msvv8TkyZNx8eJFPPPMM5g7dy4v/CQiImomb29vREVFITQ0lDPIERG1Q82eaGHUqFEYM2YMjh8/jmeffZZFERER0R14e3ujb9++2pEWRETUPuk90cK+fftQU1Nj7FxEREQmSy6Xo3///ujatSuLISIiE9Csc/g3J1oAIPlEC59++ilCQkJga2uLfv364ciRI5JlISIi8vX1hbu7O5ydnQHcGDc/ceJEREZGsiAionaNn6v/ZlITLfz44494/vnn8fnnn6Nfv3748MMPMXr0aJw7dw7e3t5G2y8REVFjjh07hh9//BGlpaVwdHTEhAkTOMSciNo9KT5XOzs7IyUlBWFhYUbZfmuY1EQL77//PubOnYs5c+YAuDHv+fbt2/H1119j0aJFbZ6HiIjoJrlcjjFjxrAgIiKTIMXnalEUjbJdQ2hWUQQAY8aMAQDJJlqoqanB8ePH8dJLL2mXyWQy3H333fjrr78aXEelUkGlUmkfK5VKAEBdXZ325lFEREStVVdXh+7du8PJyYnvL0TU5urq6gDc+KxbVlamXa5QKBqc/r8ln6vNXbOLoptWr15tjBx3VFBQALVaDR8fH53lPj4+SEtLa3CdFStW4PXXX6+3vF+/fkbJSEREREQklbi4OJ3HS5YswWuvvVavX0s+VxvCww8/rL3+sr1pVlE0efJkrFmzBs7Ozpg8eXKTfTdt2mSQYIbw0ksv4fnnn9c+TklJQVxcHBITE9GzZ08JkxERkTk5fPgw7rrrLqljEJGFSk5ORr9+/bBv3z7ExMRol7e3m0SvXLlS6giNalZR5OLiop1Bx8XFxaiBGuPp6Qm5XI7r16/rLL9+/Tp8fX0bXOf2U4aOjo4AACsrK1hbWxsvLBERWRRfX1++rxCRZKysbnykd3R0bNaZmJZ8rjZ3zSqKbh0yJ9XwORsbG/Tu3Ru7d+/GfffdBwDQaDTYvXs3nnrqKUkyERERAX9/6UZEZAr4ubo+va8pktLzzz+PWbNmITY2Fn379sWHH36IiooK7awZREREUrC1tZU6AhGRXvi5WleziqKePXs2+wZ0SUlJrQrUlGnTpiE/Px+vvvoqcnNzERMTg99++63eRWJERERtiUUREZkafq7W1ayi6OZpNQCorq7GZ599hsjISPTv3x/AjQtMT58+jSeffNIoIW/11FNPWexpPSIiap9sbGykjkBEpLe2/lxdXV2NmpoanWXtZTa6ZhVFS5Ys0f77//7v//DMM8/gjTfeqNfnypUrhk1HRERkAmQymdQRiIjapcrKSrz44ovYsGEDCgsL67Wr1WoJUtWn91/xn376CTNnzqy3/OGHH8bGjRsNEoqIiIiIiEzfggULsGfPHqxcuRIKhQL//e9/8frrr8Pf3x/ffvut1PG09C6K7OzskJCQUG95QkICx1QTEREREZHW1q1b8dlnn2HKlCmwsrLC4MGD8corr2D58uVYu3at1PG09J597rnnnsMTTzyBpKQk9O3bFwCQmJiIr7/+GosXLzZ4QCIiovZOFMVmT0hERGRJioqKEBYWBuDG9UNFRUUAgEGDBuGJJ56QMpoOvYuiRYsWISwsDB999BG+//57AEDXrl2xevVqPPDAAwYPSERE1N6Joih1BCKidiksLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG0WnSfogceeIAFEBER0f/I5XKpIxARtUtz5szBiRMnEBcXh0WLFmHChAn45JNPUFtbi/fff1/qeFomdfNWIiIiIiIyHf/617+0/7777ruRlpaG48ePIzw8HNHR0RIm08U5RImIiIjIbNXW1kodwaJ9++23UKlU2sfBwcGYPHkyunTpYtqzzxERERERmYrbbxZKbWvOnDkoLS2tt7y8vBxz5syRIFHDWBQRERERkdniRCjSamx2zqtXr8LFxUWCRA3jNUVEREREZLZYFEmjZ8+eEAQBgiBgxIgRsLL6u+xQq9XIyMjAmDFjJEyoS++iSK1WY82aNdi9ezfy8vKg0Wh02vfs2WOwcERERERErcFriqRx3333AQBSUlIwevRoODo6attsbGwQEhKCKVOmSJSuPr2LomeffRZr1qzB+PHj0b17d96sjoiIiIjarVsv8qe2s2TJEgBASEgIpk2bBltbW4kTNU3vomj9+vXYsGEDxo0bZ4w8REREREQGU11d3eh1LWR8s2bNkjpCs+hdFNnY2CA8PNwYWYiIiIiIDEqtVkOlUrX7MxXmxM3NrdlFaFFRkZHTNI/eRdELL7yAjz76CJ988gkrbiIiIiJq98rKylgUtaEPP/xQ6gh607soOnjwIPbu3YudO3eiW7dusLa21mnftGmTwcIREREREbVWQUEBvL29pY5hMUxlyNyt9C6KXF1dMWnSJGNkISIiIiIyuKysLERGRkodw2Klp6dj9erVSE9Px0cffQRvb2/s3LkTQUFB6Natm9TxALSgKFq9erUxchARERERGcWVK1dQWlrarm4Wain27duHsWPHYuDAgdi/fz+WLVsGb29vnDhxAl999RV+/vlnqSMCAGRSByAiIiIiMiZRFJGYmCh1DIu0aNEiLF26FLt27YKNjY12+fDhw3H48GEJk+nS+0wRAPz888/YsGEDsrKyUFNTo9OWlJRkkGBERERERIaSmZmJS5cuISwsTOooFiU1NRXr1q2rt9zb2xsFBQUSJGqY3meKPv74Y8yZMwc+Pj5ITk5G37594eHhgUuXLmHs2LHGyEhEREREpLfY2FgMGjQIy5YtAwAcOHAAFRUVEqeyLK6ursjJyam3PDk5GQEBARIkapjeRdFnn32GL7/8Ev/5z39gY2ODF198Ebt27cIzzzyD0tJSY2QkIiIiItJbbm4url+/jrKyMgCASqXC7t27odFoJE5mOaZPn46FCxciNzcXgiBAo9EgISEB8+fPx8yZM6WOp6V3UZSVlYUBAwYAAOzs7FBeXg4A+Mc//oEffvjBsOmIiIiIiAwoNzcXBw8ehCiKUkexCMuXL0eXLl0QGBgIpVKJyMhIDBkyBAMGDMArr7widTwtvYsiX19f7Z1ng4KCtBdIZWRk8JeLiIiIiNq9tLQ0HDt2jJ9d24CNjQ1WrVqF9PR0bNu2Dd9//z3S0tLw3XffQS6XSx1PS++JFoYPH45ff/0VPXv2xJw5c/Cvf/0LP//8M44dO4bJkycbIyMRERERkUElJydDEAT07t0bgiBIHcfsBQUFISgoSOoYjdK7KPryyy+14zDnzZsHDw8PHDp0CPfeey/++c9/GjwgEREREZExJCUlQa1Wo2/fviyMDOj5559vdt/333/fiEmaT++iSCaTQSb7e9Td9OnTMX36dIOGIiIiIiJqCydOnEB1dTUGDRrUroZzmbLk5GSdx0lJSairq0NERAQA4Pz585DL5ejdu7cU8RrUovsUHThwAF988QXS09Px888/IyAgAN999x1CQ0MxaNAgQ2ckIiIiIjKac+fOobi4GMOHD4ezs7PUcUze3r17tf9+//334eTkhG+++QZubm4AgOLiYsyZMweDBw+WKmI9ek+0sHHjRowePRp2dnZITk6GSqUCAJSWlmL58uUGD0hEREREZGx5eXn4+eefcfLkSU7ZbUDvvfceVqxYoS2IAMDNzQ1Lly7Fe++9J2EyXXoXRUuXLsXnn3+OVatWwdraWrt84MCBSEpKMmg4IiIiIqK2UldXh8OHD2Pz5s24fv261HHMQllZGfLz8+stz8/P197apz3Quyg6d+4chgwZUm+5i4sLSkpKDJGJiIiIiEgyhYWF+OWXX7B//37tqChqmUmTJmHOnDnYtGkTrl69iqtXr2Ljxo149NFH29XM1XpfU+Tr64uLFy8iJCREZ/nBgwcRFhZmqFxERERERJJKS0vD5cuXMWDAAISFhXGGuhb4/PPPMX/+fDz00EOora0FAFhZWeHRRx/FO++8I3G6v+l9pmju3Ll49tlnkZiYCEEQcO3aNaxduxbz58/HE088YYyMRERERESSqKqqwu7du/HHH3+goqJC6jgmx97eHp999hkKCwuRnJyM5ORkFBUV4bPPPoODg4PU8bT0PlO0aNEiaDQajBgxApWVlRgyZAgUCgXmz5+Pp59+2hgZiYiIiIgkdfnyZeTm5mLAgAEIDw/nWSM9OTg4IDo6WuoYjdK7KBIEAS+//DIWLFiAixcvQqlUIjIyEo6OjsbIR0RERESkt6ysLFRWVgIAampqUFRUBHd391ZtU6VSYe/evcjMzNSeGCDzoPfwuZtsbGwQGRmJvn37siAiIiIionbhyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPU+MjIysHnzZu32yfQ1+0zRI4880qx+X3/9dYvDEBERERG11KZNmzBt2jSIoghRFHXaRFHEqVOncOrUKcydOxe9evVq1b7Kysrw66+/Yty4cfDy8mrVtkh6zT5TtGbNGuzduxclJSUoLi5u9IeIiIiIqK0dOXIE06ZNg1qthlqtbrCPRqOBRqPBqlWrDHLGSKVSYceOHfwMbAaafaboiSeewA8//ICMjAzMmTMHDz/8cKvHZRIRERERGcLSpUsbPEPUmB07duDJJ59s9X5vFkb33nsvnJycWr09kkazzxR9+umnyMnJwYsvvoitW7ciMDAQDzzwAH7//fdm//IRERERERlaVlYWtm3b1ugZottpNBqcPHkSRUVFBtl/RUUFtm3bhrKyMoNsj9qeXhMtKBQKPPjgg9i1axfOnDmDbt264cknn0RISAiUSqWxMhIRERERNWr37t16f0kviiLS0tIMlqG8vBxbt25FSUmJwbZJbafFs8/JZDIIggBRFJtdlRMRERERGVp5eTlkMv0+1gqCgOrqaoPmqKiowI4dO6BSqQy6XTI+vX57VCoVfvjhB4wcORKdO3dGamoqPvnkE2RlZRl9Wu5ly5ZhwIABsLe3h6urq1H3RURERESmw8nJCRqNRq91RFGEra2twbMolUqcPHnS4NuViqV8Bm92UfTkk0/Cz88Pb775Ju655x5cuXIFP/30E8aNG6d3Zd4SNTU1mDp1Kp544gmj74uIiIiITMeIESMgCIJe6wiCgC5duhgljznNRmcpn8GbPfvc559/jqCgIISFhWHfvn3Yt29fg/02bdpksHC3ev311wHcmBqciIiIiOimoKAg3HPPPdixY0ezLuuQyWSIiooy2kzKPj4+RtmuFCzlM3izi6KZM2fqXYETEREREbWFxYsXY+fOndpr3u9k3LhxRskRHByM7t27G2XbZDzNLopMsTpUqVQ6F7pxhjwiIiIi89SnTx/8+OOPmDZtWqMTgd285OOxxx5DSEiIQfevUCgQGxuLyMhIyU4kKJVKnWnBFQoFFAqFJFlMjfEvBmrCokWLIAhCkz+tmSpxxYoVcHFx0f7ExcUZMD0RERERtSeTJ0/GoUOHMG7cuHqFiSAIiIqKwsKFC9GzZ0+D7dPa2hq9evXCgw8+iG7dukk6siouLk7ns++KFSsa7Gfsz+CmSBAlvPNqfn4+CgsLm+wTFhYGGxsb7eM1a9bgueeea9Yc8LefKUpJSUFcXByOHz+OXr16tTg3EREREbVvWVlZiImJQXFxMezt7bF48WKDXkMkk8nQrVs39OzZ0yiz2OkjKSkJvXv3xr59+xATE6Nd3tiZImN/BjdFzR4+ZwxeXl7w8vIy2vZv/0Uw9rThRERERNQ+BAUFwd7eHsXFxbCxsTFoQeTj44MhQ4bAzc3NYNs0BEdHRzg7O9+xn7E/g5siSYsifWRlZaGoqAhZWVlQq9VISUkBAISHh7PYISIiIiKjs7a2Rp8+fSQfJteWLOUzuMkURa+++iq++eYb7eObY0H37t2LoUOHSpSKiIiIiCxBWFgY7rrrLrMqBJrDUj6Dm0xRtGbNGpOcAY+IiIiITJePjw/69esHX19fqaNIwlI+g5tMUURERERE1FZcXFzQt29fhISEWMxQOUvGooiIiIiI6H+sra0RGxuLbt26ae9rROaPRREREREREW7MWDd48GA4ODhIHYXaGIsiIiIiIrJoMpkMd911l0XNKke6WBQRERERkcVSKBQYNWoU/Pz8pI5CEmJRREREREQWycHBAePGjWt3N2GltseiiIiIiIgsjqOjIyZMmAAnJyepo1A7wCk1iIiIiMii2NnZYfz48SyISItFERERERFZDJlMhpEjR8LFxUXqKNSOcPgcEREREZklX19f1NXVQaFQaJf1798fvr6+Eqai9ohFERERERGZpWPHjuHixYvYs2cPACA0NBSRkZESp6L2iMPniIiIiMjs2djYYNCgQbwPETWIRRERERERmb3IyEjY2dlJHYPaKRZFRERERGT2OnfuLHUEasdYFBERERGRWXN1dYWrq6vUMagdY1FERERERGatQ4cOUkegdo5FERERERGZNR8fH6kjUDvHooiIiIiIzJq7u7vUEaidY1FERERERGZLEAQ4OztLHYPaORZFRERERGS2bG1tIZfLpY5B7RyLIiIiIiIyW7a2tlJHIBPAooiIiIiIzBbPElFzsCgiIiIiIrMlCILUEcgEsCgiIiIiIrMlk/HjLt0Zf0uIiIiIyGxx+Bw1B4siIiIiIjJbLIqoOVgUEREREZHZ4vA5ag7+lhARERGR2eJEC9QcLIqIiIiIyGxx+Bw1B4siIiIiIjJbPFNEzcGiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCwaiyIiIiIiIrJoVlIHIOPIyclBTk6O1DHIQPz8/ODn5yd1DDIQHp/mh8eoeeExal54fFJzWFRR5OfnhyVLlpj9gaFSqfDggw9i3759UkchA4mLi8Pvv/8OhUIhdRRqJR6f5onHqPngMWp+LOH4tJTPuMYkiKIoSh2CDKusrAwuLi7Yt28fHB0dpY5DraRUKhEXF4fS0lI4OztLHYdaicen+eExal54jJoXHp/UXBZ1psjSxMTE8A+AGSgrK5M6AhkBj0/zwWPUPPEYNQ88Pqm5ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5aY9QWFloSvp3nh62l++JqaF76e5oWvJzUXJ1ogIiIiIiKLxjNFRERERERk0VgUERERERGRRWNRREREREREFo1FEZHEBEHAli1b2mx/a9asgaura5vtj4iIaOjQoXjuueea1bet36dee+01xMTENLt/ZmYmBEFASkqK0TJR22NRRNSE2bNnQxAEPP744/Xa5s2bB0EQMHv27EbXj4+PhyAI2h8fHx9MmTIFly5dMmJqIrqVIY9jmUwGFxcX9OzZEy+++CJycnKMmJyIAP2LlrZ2829ESUmJ1FGoFVgUEd1BYGAg1q9fj6qqKu2y6upqrFu3DkFBQc3axrlz53Dt2jX89NNPOH36NCZMmAC1Wm2syER0G0Mex0ePHsXChQvx559/onv37khNTW10nZqamlZnJyIi42NRRHQHvXr1QmBgIDZt2qRdtmnTJgQFBaFnz57N2oa3tzf8/PwwZMgQvPrqqzhz5gwuXrzYYN+FCxeic+fOsLe3R1hYGBYvXoza2lqdPlu3bkWfPn1ga2sLT09PTJo0SdumUqkwf/58BAQEwMHBAf369UN8fHy9/WzZsgWdOnWCra0tRo8ejStXrui0r1y5Eh07doSNjQ0iIiLw3XffNeu5ErVHhjqOfX190blzZ0yfPh0JCQnw8vLCE088oe0ze/Zs3HfffVi2bBn8/f0REREBoOFhsq6urlizZo328aFDhxATEwNbW1vExsZiy5YtHKJDJqmiogIzZ86Eo6Mj/Pz88N577+m0N/d9CrgxlO7111/HiRMntGdsbx4377//PqKiouDg4IDAwEA8+eSTUCqVd8z35ptvwsfHB05OTnj00UdRXV1dr89///tfdO3aFba2tujSpQs+++yzBreVmZmJYcOGAQDc3Nx0zjz/9ttvGDRoEFxdXeHh4YF77rkH6enpd8xH0mBRRNQMjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHOzdu7dFz5moPTDkcQzcOJYff/xxJCQkIC8vT7t89+7dOHfuHHbt2oVt27Y1a1tlZWWYMGECoqKikJSUhDfeeAMLFy5scTYiKS1YsAD79u3DL7/8gj/++APx8fFISkrStjfnfeqmadOm4YUXXkC3bt2Qk5ODnJwcTJs2DQAgk8nw8ccf4/Tp0/jmm2+wZ88evPjii01m27BhA1577TUsX74cx44dg5+fX72CZ+3atXj11VexbNkynD17FsuXL8fixYvxzTff1NteYGAgNm7cCODG2eScnBx89NFHAG4Uh88//zyOHTuG3bt3QyaTYdKkSdBoNPr9D6W2IRJRo2bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja6/d+9eEYBYXFwsiqIoXrt2TRwwYIAYEBAgqlQqURRFEYC4efPmRrfxzjvviL1799Y+7t+/vzhjxowG+16+fFmUy+Vidna2zvIRI0aIL730kiiKorh69WoRgHj48GFt+9mzZ0UAYmJioiiKojhgwABx7ty5OtuYOnWqOG7cuEZzErVXhj6Ob7Vz506dY2fWrFmij4+P9vi+qaHj3MXFRVy9erUoiqK4cuVK0cPDQ6yqqtK2r1q1SgQgJicnt+RpE0mivLxctLGxETds2KBdVlhYKNrZ2YnPPvtss9+nXFxctG1LliwRe/Toccd9//TTT6KHh0eTffr37y8++eSTOsv69euns/2OHTuK69at0+nzxhtviP379xdFURQzMjJ0js2m/kbcKj8/XwQgpqam3vG5UNuzkqgWIzIpXl5eGD9+PNasWQNRFDF+/Hh4eno2e/0OHTpAFEVUVlaiR48e2LhxI2xsbBrs++OPP+Ljjz9Geno6lEol6urq4OzsrG1PSUnB3LlzG1w3NTUVarUanTt31lmuUqng4eGhfWxlZYU+ffpoH3fp0gWurq44e/Ys+vbti7Nnz+Kxxx7T2cbAgQO1334RmaLWHscNEUURwI3hcTdFRUU1enw35ty5c4iOjoatra122a1ngIlMRXp6OmpqatCvXz/tMnd3d+1Q0ua+TzXHn3/+iRUrViAtLQ1lZWWoq6tDdXU1KisrYW9vD0dHR23fhx9+GJ9//jnOnj1bb9KV/v37a0dCVFRUID09HY8++qjOe21dXR1cXFz0ynfhwgW8+uqrSExMREFBgfYMUVZWFrp3767Xtsj4WBQRNdMjjzyCp556CgDw6aef6rXugQMH4OzsDG9vbzg5OTXa76+//sKMGTPw+uuvY/To0XBxccH69et1xmPfHH7XEKVSCblcjuPHj0Mul+u03frmQGSpWnMcN+Ts2bMAgJCQEO0yBweHev0EQdAWUDfdfq0gkSUw1PtUZmYm7rnnHjzxxBNYtmwZ3N3dcfDgQTz66KOoqamBvb29zvV4t365eKd8ALBq1Sqdwg5Avbx3MmHCBAQHB2PVqlXw9/eHRqNB9+7dOQFLO8WiiKiZxowZg5qaGgiCgNGjR+u1bmhoaLPuuXDo0CEEBwfj5Zdf1i67fPmyTp/o6Gjs3r27wWshevbsCbVajby8PAwePLjR/dTV1eHYsWPab6LPnTuHkpISdO3aFQDQtWtXJCQkYNasWdp1EhISEBkZecfnQNSeteY4vl1VVRW+/PJLDBkyBF5eXk329fLy0pm++8KFC6isrNQ+joiIwPfffw+VSgWFQgEAOHr0aKvyEUmhY8eOsLa2RmJionZmx+LiYpw/fx5xcXHNfp+6lY2NTb0ZW48fPw6NRoP33nsPMtmNS+Q3bNig0yc8PLzetrp27YrExETMnDlTu+zw4cPaf/v4+MDf3x+XLl3CjBkzmp0PgE7GwsJCnDt3DqtWrdI+z4MHDzZreyQNFkVEzSSXy7XfCuv7bVFzderUCVlZWVi/fj369OmD7du3Y/PmzTp9lixZghEjRqBjx46YPn066urqsGPHDu2sdTNmzMDMmTPx3nvvoWfPnsjPz8fu3bsRHR2N8ePHAwCsra3x9NNP4+OPP4aVlRWeeuop3HXXXdoiacGCBXjggQfQs2dP3H333di6dSs2bdqEP//80yjPm6ittOY4zsvLQ3V1NcrLy3H8+HG8/fbbKCgo0JnRrjHDhw/HJ598gv79+0OtVmPhwoWwtrbWtj/00EN4+eWX8dhjj2HRokXIysrCu+++C0B3aB5Re+fo6IhHH30UCxYsgIeHB7y9vfHyyy9rC5fmvk/dKiQkBBkZGUhJSUGHDh3g5OSE8PBw1NbW4j//+Q8mTJiAhIQEfP7553fM9+yzz2L27NmIjY3FwIEDsXbtWpw+fRphYWHaPq+//jqeeeYZuLi4YMyYMVCpVDh27BiKi4vx/PPP19tmcHAwBEHAtm3bMG7cONjZ2cHNzQ0eHh748ssv4efnh6ysLCxa9P/bu/uYpq64D+DfastLWxS0iEVRLFR8CdSpM5kuBUTFxSXgCxqj+DKjk0RQ55DHRBHd3JQ55rLF6ciSiUGSJfKHUdAY1k6G8S0K6kTnfJ/iTNQpqNAKv+ePPdzHzoJso4L2+0lMuOeee+45TX7S3+Wcc//nP3yy5HEduqKJqJNrXqDdkv+yQLsZ/rYAOzMzU3r27Cl6vV5mzJghX3zxhcuCUxGR3bt3y7Bhw8THx0cMBoNMmTJFOedwOCQ7O1vCw8NFo9GI0WiUyZMny+nTp0Xk/xew7t69W0wmk/j6+sq4cePk2rVrLvfYunWrmEwm0Wg0MnDgQCkoKGhxDESdWXvFMQBRqVQSEBAgFotFMjMzpaampk33unnzpkyYMEF0Op2YzWYpKSlx2WhBRKSiokJiYmLEx8dHRowYIbt27RIAcv78+X84YqKOVVtbK7NnzxatVishISGSm5srsbGxsnTpUhFp+++pZvX19TJ16lQJDAwUAErc5OXlidFoFH9/f0lMTJSCgoI2bXiwYcMGMRgMotfrZe7cubJy5crnNnIoLCxUfs8GBQWJ1WqV4uJiEXl+owURkfXr10vv3r1FpVIp/58cPHhQBg8eLL6+vhITEyN2u/2FmytRx1GJ/G2SMxEREXW4wsJCzJ8/Hw8ePGh1LSEREf13nD5HRETUCRQUFMBkMqFPnz6oqqpCVlYWpk+fzoSIiOglYFJERETUCdy+fRvZ2dm4ffs2jEYjUlJSsGHDho7uFhGRV+D0OSIiIiIi8mpdOroDREREREREHYlJEVEnYLfboVKp8Oeff3Z0V4jIDcYoEdHrjdPniDoBh8OBe/fuISQkhO8kIeqEGKNERK83JkVEREREROTVOH2OyAPi4uKQnp6OZcuWISgoCCEhIcjPz8ejR48wf/585W3cpaWlAJ6fmvP9998jMDAQBw4cwODBg6HX6zFx4kTU1NS43GPZsmUu901OTsa8efOU461bt8JsNsPPzw8hISGYNm2ap4dO9EpgjBIR0bOYFBF5yI4dO2AwGHDs2DGkp6cjLS0NKSkpGD16NE6ePIkJEyYgNTUVjx8/dnv948ePsXnzZuzcuROHDh3C9evX8eGHH7b5/idOnEBGRgbWr1+PCxcuYP/+/bBare01PKJXHmOUiIiaMSki8hCLxYLVq1fDbDZj1apV8PPzg8FgwMKFC2E2m5GdnY27d+/i9OnTbq93Op3Ytm0bRo4cieHDh2PJkiUoKytr8/2vX78OnU6Hd999F/3798cbb7yBjIyM9hoe0SuPMUpERM2YFBF5SExMjPJz165d0bNnT0RHRytlISEhAIA7d+64vV6r1SIiIkI5NhqNLdZ1Z/z48ejfvz9MJhNSU1NRWFjY4hNvIm/EGCUiomZMiog8RKPRuByrVCqXsuYdrJqamtp8/bP7onTp0gV/3yfF6XQqPwcEBODkyZMoKiqC0WhEdnY2LBYLtxQm+j+MUSIiasakiOgVFRwc7LKou7GxEWfPnnWpo1arMW7cOOTm5uL06dO4evUqfvzxx5fdVSKvxBglInp1qDu6A0T074wdOxYffPAB9u3bh4iICOTl5bk8Yd67dy8uX74Mq9WKoKAglJSUoKmpCVFRUR3XaSIvwhglInp1MCkiekW99957qKqqwpw5c6BWq7F8+XLEx8cr5wMDA1FcXIycnBzU19fDbDajqKgIQ4cO7cBeE3kPxigR0auDL28lIiIiIiKvxjVFRERERETk1ZgUERERERGRV2NSREREREREXo1JEREREREReTUmRUQdzG63Q6VSvdQXNs6bNw/Jyckv7X5EREREnRmTIiI3tm3bhoCAADx9+lQpq6urg0ajQVxcnEvd5qTm0qVLbtvKycmBSqWCSqWCWq1GeHg4li9fjrq6Ok8OgYjg2Vg2GAywWq3YsmULGhoaPDkMIiLyMCZFRG7Ex8ejrq4OJ06cUMrKy8vRu3dvHD16FPX19Uq5zWZDv379EBER0WJ7Q4cORU1NDa5evYpNmzbh22+/xYoVKzw6BiLyXCxfv34dNpsNKSkp+PTTTzF69GjU1ta2eJ3D4WifARERkUcwKSJyIyoqCkajEXa7XSmz2+1ISkrCgAEDcOTIEZfyZ1/I6I5arUbv3r3Rt29fzJgxA7NmzcKePXvc1r179y5mzpyJPn36QKvVIjo6GkVFRS51mpqakJubi8jISPj6+qJfv37YsGGDcv7GjRuYPn06AgMD0aNHDyQlJeHq1avP3WvdunUIDg5Gt27dsHjxYpcvbg0NDcjIyECvXr3g5+eHt99+G8ePH291nESdjadiOTQ0FNHR0UhPT8dPP/2Es2fPYtOmTUq98PBwfPTRR5gzZw66deuGRYsWuZ0qW1lZCZVK5RKf+fn5CAsLg1arxeTJk5GXl4fAwMD/+lEQEVErmBQRtSA+Ph42m005ttlsiIuLQ2xsrFL+5MkTHD169IVfpP7O39+/xSfH9fX1GDFiBPbt24ezZ89i0aJFSE1NxbFjx5Q6q1atwsaNG7FmzRqcO3cOu3btQkhICADA6XQiMTERAQEBKC8vR0VFBfR6PSZOnOhyz7KyMlRXV8Nut6OoqAjFxcVYt26dcn7lypXYvXs3duzYgZMnTyIyMhKJiYm4d+/ePxorUUfzZCwDwKBBg/DOO++guLjYpXzz5s2wWCw4deoU1qxZ06a2KioqsHjxYixduhSVlZUYP368ywMPIiLyECEit/Lz80Wn04nT6ZSHDx+KWq2WO3fuyK5du8RqtYqISFlZmQCQa9eutdjO2rVrxWKxKMcnTpwQg8Eg06ZNExERm80mAOT+/fsttjFp0iRZsWKFiIg8fPhQfH19JT8/323dnTt3SlRUlDQ1NSllDQ0N4u/vLwcOHBARkblz50qPHj3k0aNHSp1vvvlG9Hq9NDY2Sl1dnWg0GiksLFTOOxwOCQ0Nldzc3Bb7SdQZeSqWn5WVlSX+/v7Kcf/+/SU5OdmljrtYP3XqlACQK1euiIjIjBkzZNKkSS7XzZo1S7p37972ARMR0T+m7tCMjKgTi4uLw6NHj3D8+HHcv38fAwcORHBwMGJjYzF//nzU19fDbrfDZDKhX79+rbZ15swZ6PV6NDY2wuFwYNKkSfj666/d1m1sbMQnn3yCH374ATdv3oTD4UBDQwO0Wi0AoLq6Gg0NDUhISHB7fVVVFX777TcEBAS4lNfX17ssILdYLEqbAPDWW2+hrq4ON27cwIMHD+B0OjFmzBjlvEajwahRo1BdXd36B0fUybRnLLdERKBSqVzKRo4c+Y/buXDhAiZPnuxSNmrUKOzdu/df9YuIiNqGSRFRCyIjI9G3b1/YbDbcv38fsbGxAIDQ0FCEhYXh8OHDsNlsGDt27AvbioqKwp49e6BWqxEaGgofH58W63722Wf48ssvsWXLFkRHR0On02HZsmXK1Dd/f/9W71VXV4cRI0agsLDwuXPBwcEv7CvR66Y9Y7kl1dXVGDBggEuZTqdzOe7S5a8Z6yKilDmdzn99TyIiaj9cU0TUivj4eNjtdtjtdpfte61WK0pLS3Hs2LE2rUHw8fFBZGQkwsPDW02IgL/WFCQlJWH27NmwWCwwmUz49ddflfNmsxn+/v4oKytze/3w4cNx8eJF9OrVC5GRkS7/unfvrtSrqqrCkydPlOMjR45Ar9cjLCwMERER8PHxQUVFhXLe6XTi+PHjGDJkyAvHS9TZtFcsu3P+/Hns378fU6dObbVe80OJmpoapayystKlTlRU1HMbmnCDEyIiz2NSRNSK+Ph4/Pzzz6isrFSeLgNAbGwstm/fDofD8a+/SLXEbDbj4MGDOHz4MKqrq/H+++/jjz/+UM77+fkhKysLK1euREFBAS5duoQjR47gu+++AwDMmjULBoMBSUlJKC8vx5UrV2C325GRkYHff/9dacfhcGDBggU4d+4cSkpKsHbtWixZsgRdunSBTqdDWloaMjMzsX//fpw7dw4LFy7E48ePsWDBgnYdL9HL0F6x/PTpU9y+fRu3bt3CmTNn8NVXXyE2NhbDhg1DZmZmq9dGRkYiLCwMOTk5uHjxIvbt24fPP//cpU56ejpKSkqQl5eHixcvYvv27SgtLX1uah4REbUvTp8jakV8fDyePHmCQYMGKbu7AX99kaqtrVW2+21Pq1evxuXLl5GYmAitVotFixYhOTkZDx48UOqsWbMGarUa2dnZuHXrFoxGIxYvXgwA0Gq1OHToELKysjBlyhTU1taiT58+SEhIQLdu3ZQ2EhISYDabYbVa0dDQgJkzZyInJ0c5v3HjRjQ1NSE1NRW1tbUYOXIkDhw4gKCgoHYdL9HL0F6x/Msvv8BoNKJr167o3r07hgwZglWrViEtLQ2+vr6tXqvRaFBUVIS0tDTExMTgzTffxMcff4yUlBSlzpgxY7Bt2zasW7cOq1evRmJiIpYvX97iGkQiImofKnl2cjMRERF1KgsXLsT58+dRXl7e0V0hInpt8S9FREREncjmzZsxfvx46HQ6lJaWYseOHdi6dWtHd4uI6LXGvxQRERF1ItOnT4fdbkdtbS1MJhPS09OV6bFEROQZTIqIiIiIiMircfc5IiIiIiLyakyKiIiIiIjIqzEpIiIiIiIir8akiIiIiIiIvBqTIiIiIiIi8mpMioiIiIiIyKsxKSIiIiIiIq/GpIiIiIiIiLwakyIiIiIiIvJq/wvnnQ3fkVlEQgAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "np.random.seed(9999) # Fix the seed so the results are replicable.\n", - "N = 20\n", - "# Create samples\n", - "y = norm.rvs(loc=3, scale=0.4, size=N*4)\n", - "y[N:2*N] = y[N:2*N]+1\n", - "y[2*N:3*N] = y[2*N:3*N]-0.5\n", - "# Add a `Treatment` column\n", - "t1 = np.repeat('Placebo', N*2).tolist()\n", - "t2 = np.repeat('Drug', N*2).tolist()\n", - "treatment = t1 + t2 \n", - "# Add a `Rep` column as the first variable for the 2 replicates of experiments done\n", - "rep = []\n", - "for i in range(N*2):\n", - " rep.append('Rep1')\n", - " rep.append('Rep2')\n", - "# Add a `Genotype` column as the second variable\n", - "wt = np.repeat('W', N).tolist()\n", - "mt = np.repeat('M', N).tolist()\n", - "wt2 = np.repeat('W', N).tolist()\n", - "mt2 = np.repeat('M', N).tolist()\n", - "genotype = wt + mt + wt2 + mt2\n", - "# Add an `id` column for paired data plotting.\n", - "id = list(range(0, N*2))\n", - "id_col = id + id \n", - "# Combine all columns into a DataFrame.\n", - "df_delta2 = pd.DataFrame({'ID' : id_col,\n", - " 'Rep' : rep,\n", - " 'Genotype' : genotype, \n", - " 'Treatment': treatment,\n", - " 'Y' : y\n", - " })\n", - "unpaired_delta2 = dabest.load(data = df_delta2, x = [\"Genotype\", \"Genotype\"], y = \"Y\", delta2 = True, experiment = \"Treatment\")\n", - "unpaired_delta2.mean_diff.plot();" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "24c4b036", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class MiniMetaDelta(object):\n", - " \"\"\"\n", - " A class to compute and store the weighted delta.\n", - " A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``.\n", - " \n", - " \"\"\"\n", - "\n", - " def __init__(self, effectsizedataframe, permutation_count,\n", - " ci=95):\n", - "\n", - " import numpy as np\n", - " from numpy import sort as npsort\n", - " from numpy import sqrt, isinf, isnan\n", - " from ._stats_tools import effsize as es\n", - " from ._stats_tools import confint_1group as ci1g\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - "\n", - "\n", - " from string import Template\n", - " import warnings\n", - " \n", - " self.__effsizedf = effectsizedataframe.results\n", - " self.__dabest_obj = effectsizedataframe.dabest_obj\n", - " self.__ci = ci\n", - " self.__resamples = effectsizedataframe.resamples\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", - " self.__permutation_count = permutation_count\n", - " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", - " self.__control = np.array(self.__effsizedf[\"control\"])\n", - " self.__test = np.array(self.__effsizedf[\"test\"])\n", - " self.__control_N = np.array(self.__effsizedf[\"control_N\"])\n", - " self.__test_N = np.array(self.__effsizedf[\"test_N\"])\n", - "\n", - "\n", - " idx = self.__dabest_obj.idx\n", - " dat = self.__dabest_obj._plot_data\n", - " xvar = self.__dabest_obj._xvar\n", - " yvar = self.__dabest_obj._yvar\n", - "\n", - " # compute the variances of each control group and each test group\n", - " control_var=[]\n", - " test_var=[]\n", - " for j, current_tuple in enumerate(idx):\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " control_var.append(np.var(control, ddof=1))\n", - "\n", - " tname = current_tuple[1]\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - " test_var.append(np.var(test, ddof=1))\n", - " self.__control_var = np.array(control_var)\n", - " self.__test_var = np.array(test_var)\n", - "\n", - " # Compute pooled group variances for each pair of experiment groups\n", - " # based on the raw data\n", - " self.__group_var = ci2g.calculate_group_var(self.__control_var, \n", - " self.__control_N,\n", - " self.__test_var, \n", - " self.__test_N)\n", - "\n", - " # Compute the weighted average mean differences of the bootstrap data\n", - " # using the pooled group variances of the raw data as the inverse of \n", - " # weights\n", - " self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta(\n", - " self.__group_var, \n", - " self.__bootstraps, \n", - " self.__resamples)\n", - "\n", - " # Compute the weighted average mean difference based on the raw data\n", - " self.__difference = es.weighted_delta(self.__effsizedf[\"difference\"],\n", - " self.__group_var)\n", - "\n", - " sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)\n", - "\n", - "\n", - " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps_weighted_delta, self.__difference)\n", - " \n", - " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", - " self.__bootstraps_weighted_delta, \n", - " np.mean))\n", - "\n", - " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", - "\n", - " # Compute BCa intervals.\n", - " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", - " \n", - " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", - "\n", - " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_weighted_deltas[bca_idx_low]\n", - " self.__bca_high = sorted_weighted_deltas[bca_idx_high]\n", - "\n", - " err1 = \"The $lim_type limit of the interval\"\n", - " err2 = \"was in the $loc 10 values.\"\n", - " err3 = \"The result should be considered unstable.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", - "\n", - " if bca_idx_high >= self.__resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", - "\n", - " else:\n", - " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", - " err2 = \"It is set to the effect size itself.\"\n", - " err3 = \"All bootstrap values were likely all the same.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", - "\n", - " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", - "\n", - " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", - "\n", - " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_weighted_deltas[pct_idx_low]\n", - " self.__pct_high = sorted_weighted_deltas[pct_idx_high]\n", - " \n", - " \n", - "\n", - " def __permutation_test(self):\n", - " \"\"\"\n", - " Perform a permutation test and obtain the permutation p-value\n", - " based on the permutation data.\n", - " \"\"\"\n", - " import numpy as np\n", - " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", - " self.__permutations_var = np.array(self.__effsizedf[\"permutations_var\"])\n", - "\n", - " THRESHOLD = np.abs(self.__difference)\n", - "\n", - " all_num = []\n", - " all_denom = []\n", - "\n", - " groups = len(self.__permutations)\n", - " for i in range(0, len(self.__permutations[0])):\n", - " weight = [1/self.__permutations_var[j][i] for j in range(0, groups)]\n", - " all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)]))\n", - " all_denom.append(np.sum(weight))\n", - " \n", - " output=[]\n", - " for i in range(0, len(all_num)):\n", - " output.append(all_num[i]/all_denom[i])\n", - " \n", - " self.__permutations_weighted_delta = np.array(output)\n", - "\n", - " count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD)\n", - " self.__pvalue_permutation = count/self.__permutation_count\n", - "\n", - "\n", - "\n", - " def __repr__(self, header=True, sigfig=3):\n", - " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", - "\n", - " from .misc_tools import print_greeting\n", - " \n", - " is_paired = self.__dabest_obj.is_paired\n", - "\n", - " PAIRED_STATUS = {'baseline' : 'paired', \n", - " 'sequential' : 'paired',\n", - " 'None' : 'unpaired'\n", - " }\n", - "\n", - " first_line = {\"paired_status\": PAIRED_STATUS[str(is_paired)]}\n", - " \n", - "\n", - " out1 = \"The weighted-average {paired_status} mean differences \".format(**first_line)\n", - " \n", - " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", - " if \".\" in str(self.__ci):\n", - " ci_width = base_string_fmt.format(self.__ci)\n", - " else:\n", - " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", - " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", - " out = out1 + out2\n", - "\n", - " if header is True:\n", - " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", - "\n", - "\n", - " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", - "\n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", - " p2 = \"calculated for legacy purposes only. \"\n", - " pvalue = p1 + p2\n", - "\n", - "\n", - " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", - " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", - " bs = bs1 + bs2\n", - "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", - " pval_def = pval_def1 + pval_def2\n", - "\n", - "\n", - " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - "\n", - "\n", - " def to_dict(self):\n", - " \"\"\"\n", - " Returns all attributes of the `dabest.MiniMetaDelta` object as a\n", - " dictionary.\n", - " \"\"\"\n", - " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", - " out = {}\n", - " for a in attrs:\n", - " out[a] = getattr(self, a)\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " Returns the width of the confidence interval, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - "\n", - " @property\n", - " def alpha(self):\n", - " \"\"\"\n", - " Returns the significance level of the statistical test as a float\n", - " between 0 and 1.\n", - " \"\"\"\n", - " return self.__alpha\n", - "\n", - "\n", - " @property\n", - " def bias_correction(self):\n", - " return self.__bias_correction\n", - "\n", - "\n", - " @property\n", - " def bootstraps(self):\n", - " '''\n", - " Return the bootstrapped differences from all the experiment groups.\n", - " '''\n", - " return self.__bootstraps\n", - "\n", - "\n", - " @property\n", - " def jackknives(self):\n", - " return self.__jackknives\n", - "\n", - "\n", - " @property\n", - " def acceleration_value(self):\n", - " return self.__acceleration_value\n", - "\n", - "\n", - " @property\n", - " def bca_low(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__bca_low\n", - "\n", - "\n", - " @property\n", - " def bca_high(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval upper limit.\n", - " \"\"\"\n", - " return self.__bca_high\n", - "\n", - "\n", - " @property\n", - " def bca_interval_idx(self):\n", - " return self.__bca_interval_idx\n", - "\n", - "\n", - " @property\n", - " def control(self):\n", - " '''\n", - " Return the names of the control groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__control\n", - "\n", - "\n", - " @property\n", - " def test(self):\n", - " '''\n", - " Return the names of the test groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__test\n", - " \n", - " @property\n", - " def control_N(self):\n", - " '''\n", - " Return the sizes of the control groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__control_N\n", - "\n", - "\n", - " @property\n", - " def test_N(self):\n", - " '''\n", - " Return the sizes of the test groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__test_N\n", - "\n", - "\n", - " @property\n", - " def control_var(self):\n", - " '''\n", - " Return the estimated population variances of the control groups \n", - " from all the experiment groups in order. Here the population \n", - " variance is estimated from the sample variance. \n", - " '''\n", - " return self.__control_var\n", - "\n", - "\n", - " @property\n", - " def test_var(self):\n", - " '''\n", - " Return the estimated population variances of the control groups \n", - " from all the experiment groups in order. Here the population \n", - " variance is estimated from the sample variance. \n", - " '''\n", - " return self.__test_var\n", - "\n", - " \n", - " @property\n", - " def group_var(self):\n", - " '''\n", - " Return the pooled group variances of all the experiment groups \n", - " in order. \n", - " '''\n", - " return self.__group_var\n", - "\n", - "\n", - " @property\n", - " def bootstraps_weighted_delta(self):\n", - " '''\n", - " Return the weighted-average mean differences calculated from the bootstrapped \n", - " deltas and weights across the experiment groups, where the weights are \n", - " the inverse of the pooled group variances.\n", - " '''\n", - " return self.__bootstraps_weighted_delta\n", - "\n", - "\n", - " @property\n", - " def difference(self):\n", - " '''\n", - " Return the weighted-average delta calculated from the raw data.\n", - " '''\n", - " return self.__difference\n", - "\n", - "\n", - " @property\n", - " def pct_interval_idx (self):\n", - " return self.__pct_interval_idx \n", - "\n", - "\n", - " @property\n", - " def pct_low(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_low\n", - "\n", - "\n", - " @property\n", - " def pct_high(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_high\n", - "\n", - "\n", - " @property\n", - " def pvalue_permutation(self):\n", - " try:\n", - " return self.__pvalue_permutation\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__pvalue_permutation\n", - " \n", - "\n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__permutation_count\n", - "\n", - " \n", - " @property\n", - " def permutations(self):\n", - " '''\n", - " Return the mean differences of permutations obtained during\n", - " the permutation test for each experiment group.\n", - " '''\n", - " try:\n", - " return self.__permutations\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations\n", - "\n", - "\n", - " @property\n", - " def permutations_var(self):\n", - " '''\n", - " Return the pooled group variances of permutations obtained during\n", - " the permutation test for each experiment group.\n", - " '''\n", - " try:\n", - " return self.__permutations_var\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations_var\n", - "\n", - " \n", - " @property\n", - " def permutations_weighted_delta(self):\n", - " '''\n", - " Return the weighted-average deltas of permutations obtained \n", - " during the permutation test.\n", - " '''\n", - " try:\n", - " return self.__permutations_weighted_delta\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations_weighted_delta\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "ae5bac56", - "metadata": {}, - "source": [ - "The weighted delta is calcuated as follows:\n", - "\n", - "$$\\theta_{\\text{weighted}} = \\frac{\\Sigma\\hat{\\theta_{i}}w_{i}}{{\\Sigma}w_{i}}$$\n", - "\n", - "where:\n", - "\n", - "$$\\hat{\\theta_{i}} = \\text{Mean difference for replicate }i$$\n", - "\n", - "\n", - "$$w_{i} = \\text{Weight for replicate }i = \\frac{1}{s_{i}^2} $$\n", - "\n", - "$$s_{i}^2 = \\text{Pooled variance for replicate }i = \\frac{(n_{test}-1)s_{test}^2+(n_{control}-1)s_{control}^2}{n_{test}+n_{control}-2}$$\n", - "\n", - "$$n = \\text{sample size and }s^2 = \\text{variance for control/test.}$$\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "dc1239ee", - "metadata": {}, - "source": [ - "#### Example: mini-meta-delta" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e144ed50", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:44 2023.\n", - "\n", - "The weighted-average unpaired mean differences is 0.0336 [95%CI -0.137, 0.228].\n", - "The p-value of the two-sided permutation t-test is 0.736, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Ns = 20\n", - "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", - "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", - "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", - "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", - "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", - "my_df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", - " 'Control 2' : c2, 'Test 2' : t2,\n", - " 'Control 3' : c3, 'Test 3' : t3})\n", - "my_dabest_object = dabest.load(my_df, idx=((\"Control 1\", \"Test 1\"), (\"Control 2\", \"Test 2\"), (\"Control 3\", \"Test 3\")), mini_meta=True)\n", - "my_dabest_object.mean_diff.mini_meta_delta" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "669285cb", - "metadata": {}, - "source": [ - "As of version 2023.02.14, weighted delta can only be calculated for mean difference, and not for standardized measures such as Cohen's *d*.\n", - "\n", - "Details about the calculated weighted delta are accessed as attributes of the ``mini_meta_delta`` class. See the `minimetadelta` for details on usage.\n", - "\n", - "Refer to Chapter 10 of the Cochrane handbook for further information on meta-analysis: \n", - "https://training.cochrane.org/handbook/current/chapter-10\n", - "\t\t" - ] - }, { "cell_type": "code", "execution_count": null, @@ -3185,6 +2140,7 @@ " import pandas as pd\n", " from .misc_tools import print_greeting, get_varname\n", " from ._stats_tools import confint_2group_diff as ci2g\n", + " from ._delta_objects import MiniMetaDelta, DeltaDelta\n", "\n", " idx = self.__dabest_obj.idx\n", " dat = self.__dabest_obj._plot_data\n", diff --git a/nbs/API/delta_objects.ipynb b/nbs/API/delta_objects.ipynb new file mode 100644 index 00000000..4dff4c67 --- /dev/null +++ b/nbs/API/delta_objects.ipynb @@ -0,0 +1,1055 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Delta objects\n", + "\n", + "> The different types of delta used for the computations.\n", + "\n", + "- order: 9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp _delta_objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from __future__ import annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *\n", + "import nbdev\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import dabest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from scipy.stats import norm\n", + "import pandas as pd\n", + "from scipy.stats import randint\n", + "import numpy as np\n", + "from numpy import sort as npsort\n", + "from numpy import sqrt, isinf, isnan\n", + "from string import Template\n", + "import warnings\n", + "import datetime as dt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#| export\n", + "class DeltaDelta(object):\n", + " \"\"\"\n", + " A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs:\n", + "\n", + "\n", + " $$\\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}}$$\n", + "\n", + " $$\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}$$\n", + "\n", + "\n", + " where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples. \n", + "\n", + " A delta-delta value is then calculated as the mean difference between the two primary deltas:\n", + "\n", + "\n", + " $$\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}$$\n", + " \n", + " and a deltas' g value is calculated as the mean difference between the two primary deltas divided by\n", + " the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples:\n", + " \n", + " $$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$$\n", + "\n", + " $$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", + "\n", + " where $s$ is the standard deviation and $n$ is the sample size.\n", + "\n", + "\n", + " \"\"\"\n", + " \n", + " def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta,\n", + " ci=95):\n", + " from ._stats_tools import effsize as es\n", + " from ._stats_tools import confint_1group as ci1g\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + " \n", + " self.__effsizedf = effectsizedataframe.results\n", + " self.__dabest_obj = effectsizedataframe.dabest_obj\n", + " self.__ci = ci\n", + " self.__resamples = effectsizedataframe.resamples\n", + " self.__effect_size = effectsizedataframe.effect_size\n", + " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", + " self.__permutation_count = permutation_count\n", + " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", + " self.__control = self.__dabest_obj.experiment_label[0]\n", + " self.__test = self.__dabest_obj.experiment_label[1]\n", + "\n", + "\n", + " # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data\n", + " if self.__effect_size == \"mean_diff\":\n", + " self.__bootstraps_delta_delta = bootstraps_delta_delta[2]\n", + " self.__difference = self.__effsizedf[\"difference\"][1] - self.__effsizedf[\"difference\"][0]\n", + " else:\n", + " self.__bootstraps_delta_delta = bootstraps_delta_delta[0]\n", + " self.__difference = bootstraps_delta_delta[1]\n", + " \n", + " sorted_delta_delta = npsort(self.__bootstraps_delta_delta)\n", + "\n", + " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", + " self.__bootstraps_delta_delta, self.__difference)\n", + " \n", + " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", + " self.__bootstraps_delta_delta, \n", + " np.mean))\n", + "\n", + " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", + "\n", + " # Compute BCa intervals.\n", + " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", + " self.__bias_correction, self.__acceleration_value,\n", + " self.__resamples, ci)\n", + " \n", + " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", + "\n", + " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", + " self.__bca_low = sorted_delta_delta[bca_idx_low]\n", + " self.__bca_high = sorted_delta_delta[bca_idx_high]\n", + "\n", + " err1 = \"The $lim_type limit of the interval\"\n", + " err2 = \"was in the $loc 10 values.\"\n", + " err3 = \"The result should be considered unstable.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if bca_idx_low <= 10:\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", + " loc=\"bottom\"),\n", + " stacklevel=1)\n", + "\n", + " if bca_idx_high >= self.__resamples-9:\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", + " loc=\"top\"),\n", + " stacklevel=1)\n", + "\n", + " else:\n", + " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", + " err2 = \"It is set to the effect size itself.\"\n", + " err3 = \"All bootstrap values were likely all the same.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if isnan(bca_idx_low):\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", + " stacklevel=0)\n", + "\n", + " if isnan(bca_idx_high):\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", + " stacklevel=0)\n", + "\n", + " # Compute percentile intervals.\n", + " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", + " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", + "\n", + " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", + " self.__pct_low = sorted_delta_delta[pct_idx_low]\n", + " self.__pct_high = sorted_delta_delta[pct_idx_high]\n", + " \n", + " \n", + "\n", + " def __permutation_test(self):\n", + " \"\"\"\n", + " Perform a permutation test and obtain the permutation p-value\n", + " based on the permutation data.\n", + " \"\"\"\n", + " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", + "\n", + " THRESHOLD = np.abs(self.__difference)\n", + "\n", + " self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0])\n", + "\n", + " count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD)\n", + " self.__pvalue_permutation = count/self.__permutation_count\n", + "\n", + "\n", + "\n", + " def __repr__(self, header=True, sigfig=3):\n", + " from .misc_tools import print_greeting\n", + " \n", + " first_line = {\"control\" : self.__control,\n", + " \"test\" : self.__test}\n", + " \n", + " if self.__effect_size == \"mean_diff\":\n", + " out1 = \"The delta-delta between {control} and {test} \".format(**first_line)\n", + " else:\n", + " out1 = \"The deltas' g between {control} and {test} \".format(**first_line)\n", + " \n", + " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", + " if \".\" in str(self.__ci):\n", + " ci_width = base_string_fmt.format(self.__ci)\n", + " else:\n", + " ci_width = str(self.__ci)\n", + " \n", + " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", + " \"ci\" : ci_width,\n", + " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", + " \n", + " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", + " out = out1 + out2\n", + "\n", + " if header is True:\n", + " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", + "\n", + "\n", + " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", + "\n", + " \n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", + " p2 = \"calculated for legacy purposes only. \"\n", + " pvalue = p1 + p2\n", + "\n", + "\n", + " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", + " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", + " bs = bs1 + bs2\n", + "\n", + " pval_def1 = \"Any p-value reported is the probability of observing the \" + \\\n", + " \"effect size (or greater),\\nassuming the null hypothesis of \" + \\\n", + " \"zero difference is true.\"\n", + " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", + " \"control and test labels were performed.\"\n", + " pval_def = pval_def1 + pval_def2\n", + "\n", + "\n", + " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", + "\n", + "\n", + " def to_dict(self):\n", + " \"\"\"\n", + " Returns the attributes of the `DeltaDelta` object as a\n", + " dictionary.\n", + " \"\"\"\n", + " # Only get public (user-facing) attributes.\n", + " attrs = [a for a in dir(self)\n", + " if not a.startswith((\"_\", \"to_dict\"))]\n", + " out = {}\n", + " for a in attrs:\n", + " out[a] = getattr(self, a)\n", + " return out\n", + "\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " Returns the width of the confidence interval, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + "\n", + " @property\n", + " def alpha(self):\n", + " \"\"\"\n", + " Returns the significance level of the statistical test as a float\n", + " between 0 and 1.\n", + " \"\"\"\n", + " return self.__alpha\n", + "\n", + "\n", + " @property\n", + " def bias_correction(self):\n", + " return self.__bias_correction\n", + "\n", + "\n", + " @property\n", + " def bootstraps(self):\n", + " '''\n", + " Return the bootstrapped deltas from all the experiment groups.\n", + " '''\n", + " return self.__bootstraps\n", + "\n", + "\n", + " @property\n", + " def jackknives(self):\n", + " return self.__jackknives\n", + "\n", + "\n", + " @property\n", + " def acceleration_value(self):\n", + " return self.__acceleration_value\n", + "\n", + "\n", + " @property\n", + " def bca_low(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__bca_low\n", + "\n", + "\n", + " @property\n", + " def bca_high(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval upper limit.\n", + " \"\"\"\n", + " return self.__bca_high\n", + "\n", + "\n", + " @property\n", + " def bca_interval_idx(self):\n", + " return self.__bca_interval_idx\n", + "\n", + "\n", + " @property\n", + " def control(self):\n", + " '''\n", + " Return the name of the control experiment group.\n", + " '''\n", + " return self.__control\n", + "\n", + "\n", + " @property\n", + " def test(self):\n", + " '''\n", + " Return the name of the test experiment group.\n", + " '''\n", + " return self.__test\n", + "\n", + "\n", + " @property\n", + " def bootstraps_delta_delta(self):\n", + " '''\n", + " Return the delta-delta values calculated from the bootstrapped \n", + " deltas.\n", + " '''\n", + " return self.__bootstraps_delta_delta\n", + "\n", + "\n", + " @property\n", + " def difference(self):\n", + " '''\n", + " Return the delta-delta value calculated based on the raw data.\n", + " '''\n", + " return self.__difference\n", + "\n", + "\n", + " @property\n", + " def pct_interval_idx (self):\n", + " return self.__pct_interval_idx \n", + "\n", + "\n", + " @property\n", + " def pct_low(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_low\n", + "\n", + "\n", + " @property\n", + " def pct_high(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_high\n", + "\n", + "\n", + " @property\n", + " def pvalue_permutation(self):\n", + " try:\n", + " return self.__pvalue_permutation\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__pvalue_permutation\n", + " \n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permuations taken.\n", + " \"\"\"\n", + " return self.__permutation_count\n", + "\n", + " \n", + " @property\n", + " def permutations(self):\n", + " '''\n", + " Return the mean differences of permutations obtained during\n", + " the permutation test for each experiment group.\n", + " '''\n", + " try:\n", + " return self.__permutations\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations\n", + "\n", + " \n", + " @property\n", + " def permutations_delta_delta(self):\n", + " '''\n", + " Return the delta-delta values of permutations obtained \n", + " during the permutation test.\n", + " '''\n", + " try:\n", + " return self.__permutations_delta_delta\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations_delta_delta\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "and the standard deviation of the delta-delta value is calculated from a pooled variance of the 4 samples:\n", + "\n", + "\n", + "$$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", + "\n", + "where $s$ is the standard deviation and $n$ is the sample size." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: delta-delta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0UAAAIaCAYAAADvKOYjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAACRpUlEQVR4nOzdd3xT9f4/8NdJ2qZ779JJoVBoKVBAZhmyRQREULwM/eJVcV0FwauI/mS411VRuQoOEFGGslQEyihSRlsoo4zSUigt3SMdaZuc3x9cIqGDpk16muT1fDz6kJzP55zziulp8s75nM8RRFEUQUREREREZKFkUgcgIiIiIiKSEosiIiIiIiKyaCyKiIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCyaRRVFOTk5eO2115CTkyN1FCIiIiIiaicsrih6/fXXWRQREREREZGWRRVFREREREREt2NRREREREREFo1FEVE7oa6pRlXRNdSpKqWOQkRERGRRrKQOQGTp1DXVyNz9Na6f3A1NbTUEuTU8IwcjbOT/wdreRep4RERERGaPRRGRhERRxJkfX0Pp5dS/l6lrkZ+6BxXXLyHmkQ8gs7KRMCERERGR+WNRRCShkktJOgXRrSrzMnFh24dQlRZAU1cD56Du8O8zAbauPm2ckoiIiMi8sSgiklDRxaNNtuef2qf9tzLnAq6n/I5uD74B5w5djB2NiIiIyGJwogUiKYmiXt3Vqkpc2PahcbIQERERWSgWRUQScgvvo/c6VQVXUJ6dZoQ0RERERJaJRRGRhNw69oZzYDe916upKDVCGiIiIiLLxKKISEKCICBy+mvw7TUWMmvFjWVyK8gVDk2sJIODV3AbJSQiIiIyf5xogUhiVgp7hI97CiEjHkFNeSFsHFxRcO4vXNz2UYP93Tv3g62bbxunJCIiIjJfJnOm6LXXXoMgCDo/XbpwBi4yH1YKe9h7BsLKzgm+MaMQ0H8KIOgeos6B3dD5nmclSkhERESWrCIvE1n71+HSH18ia/86VORlGnV/s2fPhiAIePzxx+u1zZs3D4IgYPbs2QbZl0mdKerWrRv+/PNP7WMrK5OKT6SX0BGPwK/3PShMS4C6VgWX4O5wCeoudSwiIiKyMFVF13D+1/dRfvUsIMggCDKIogZZ+9fCqUNXdL73edi5+xtl34GBgVi/fj0++OAD2NnZAQCqq6uxbt06BAUFGWw/JlVVWFlZwdeXw4bIcti6eiPgrklSxyAiIiILVVV0DSe+/hfqVJU3FogaiKJG216efQ4nvv4XejzygVEKo169eiE9PR2bNm3CjBkzAACbNm1CUFAQQkNDDbYfkxk+BwAXLlyAv78/wsLCMGPGDGRlZTXZX6VSoaysTPujVCrbKCkRERERkek7/+v7NwqiWwohHaIGdapKnP/1faNleOSRR7B69Wrt46+//hpz5swx6D5Mpijq168f1qxZg99++w0rV65ERkYGBg8ejPLy8kbXWbFiBVxcXLQ/cXFxbZiYiIiIiMh0VeRl3hgy11hBdJOoQfnVs0a7xujhhx/GwYMHcfnyZVy+fBkJCQl4+OGHDboPkxk+N3bsWO2/o6Oj0a9fPwQHB2PDhg149NFHG1znpZdewvPPP699nJKSwsKIiIiIiKgZCtMO3Zj06U5FEQAIMhSmHYKDd4jBc3h5eWH8+PFYs2YNRFHE+PHj4enpadB9mExRdDtXV1d07twZFy9ebLSPQqGAQqHQPnZ0dGyLaEREREREJq+uWqmdVOFOBEGGumrjXaryyCOP4KmnngIAfPrppwbfvskMn7udUqlEeno6/Pz8pI5CRERERGR2rGwdm1UQAYAoamBla7wTEGPGjEFNTQ1qa2sxevRog2/fZIqi+fPnY9++fcjMzMShQ4cwadIkyOVyPPjgg1JHIyIiIiIyOx5dBjRv6BwAiJob/Y1ELpfj7NmzOHPmDORyucG3bzLD565evYoHH3wQhYWF8PLywqBBg3D48GF4eXlJHY2IiIiIyOw4eIfAqUNXlGefa7o4EmRwCogwyvVEt3J2djbatgVRFEWjbb2dSUpKQu/evXH8+HH06tVL6jhEWjXKIuSd3ANVWT7s3APgHTUMVnZOUsciIiIiC6dzn6KGCiNBBiuFvdHuU9RWTOZMEZG5yjsVjwtbP4CortMuuxz/LbpM+TfcOrJ4JyIiIunYufujxyMf4Pyv79+YnluQ/T35gqiBU0AEOt/7vEkXRACLIiJJVRVl48Kv70PUqHWWq2uqcPbnZQjoPwUl6cegrq2BS1A3+PedaPJ/dIiIiMi02Ln7o8fsd1GRl4nCtEOoq1bCytYRHl0GGH3IXFthUUQkodyknfUKops0tdW4sn+t9nFlXgbyTu5G5PTX4RLUra0iEhEREQG4cY2RuRRBtzOZ2eeIzFFlYbZe/dU1Vbi4/WMjpSEiIiKyTCyKiCSkcPLQe52qwqsou5pmhDRERERElolFEZGEfGJadvOxuqoyAychIiIislwsiogk5OTfCcFD/6HfSoIM9l4hRslDREREZIk40QKRxAIHTYdraAxyk3+HqqwAdu7+sHZ0R1b8tw3294joD1tX7zZOSURERGS+WBQRtQNOAV3gFNBFZ5lYV4Orh37SmZ3OJaQHOt3zbFvHIyIiIjJrHD5H1E4FD/0HYp/6GmGjH0fI8DnoMec9RD28HFa2DlJHIyIiIjKqzz//HE5OTqir+/vm9kqlEtbW1hg6dKhO3/j4eAiCgPT09Bbvj2eKiNoxhbMn/PtMkDoGERERETKuFeDgyYtQVqngaKfAoOhwhPp7GmVfw4YNg1KpxLFjx3DXXXcBAA4cOABfX18kJiaiuroatra2AIC9e/ciKCgIHTt2bPH+WBQRWbjY2Fjk5ubC19cXx44dkzoOERERtTPZ+SV4e93vOJOZA5lMgEwQoBFFfPv7YXQL9cOCB0cjwMvVoPuMiIiAn58f4uPjtUVRfHw8Jk6ciD179uDw4cPaM0bx8fEYNmxYq/bH4XNEFi43NxfZ2dnIzc2VOgoRERG1M9n5JXj6wx+QlnXjc4JGI6JOrYFGIwIAzl7OxdMf/oDs/BKD73vYsGHYu3ev9vHevXsxdOhQxMXFaZdXVVUhMTGRRRERERERERnH2+t+R0V1jbYIup1GI6Kiugbv/PCHwfc9bNgwJCQkoK6uDuXl5UhOTkZcXByGDBmC+Ph4AMBff/0FlUrFooiIiIiIiAwv41oBzmTmNFoQ3aTRiDidcQ0Z1woMuv+hQ4eioqICR48exYEDB9C5c2d4eXkhLi5Oe11RfHw8wsLCEBQU1Kp98ZoiIiIiIiKq5+DJi5DJhDsWRQAgkwk4mHrRoBMvhIeHo0OHDti7dy+Ki4sRFxcHAPD390dgYCAOHTqEvXv3Yvjw4a3eF88UERERERFRPcoqFWSC0Ky+MkGAslJl8AzDhg1DfHw84uPjdabiHjJkCHbu3IkjR460eugcwKKIiIiIiIga4GingEa881kiANCIIhztFQbPMGzYMBw8eBApKSnaM0UAEBcXhy+++AI1NTUsiojMjbqmCtWledDU1UodhYiIiCzcoOjwZg2dA25cVzQoOtzgGYYNG4aqqiqEh4fDx8dHuzwuLg7l5eXaqbtbi9cUEbUDNRUlyPjzKxScOQBRXQsrW0f49ByN4Lh/QGZlLXU8IiIiskCh/p6IDPFDWlZuk8WRTCaga7AfQv0MfyPXkJAQiA2crQoODm5weUvxTBGRxNQ1VUj9bhHyU/dAVN84Q1RXrUT2XxuRtmmFxOmIiIjIkr340Gg42NpAJmv42iKZTICDrQ0WPDiqjZMZFosiIonlndyNqoIrDbYVnU9E2dWzbZyIiIiI6IYAL1f857kH0TX4xhA1mUyAlVymLZK6BvvhP889iAAvVwlTth6HzxFJrPB8YpPtRecT4dyhaxulISIiItIV4OWKD595ABnXCnAw9SKUlSo42iswKDrcKEPmpMCiiEhqdxgPqyrLR8mlZDj6d4KVrWMbhSIiIiLSFervadD7ELUnLIqIJOYWHouSjORG2/NPxSP/VDxk1gr49R6PkOGzIcjkbZiQiIiIyLzxmiIiifn0GAmFq88d+2lqVcg+vAkZu1e3QSoiIiIiy8GiiEhiVrYOiP7HW3DvfBcg/O+QbOLu0bnHd6C2qryN0hERERGZPw6fI2oHFC5eiHxgMWoqSlBbUYLkL59qtK+mToXyq2fh3qlvGyYkIiIiMl8siojaERsHV9g4uEJmbQNNrarRfjIrRRumIiIiIjJvHD5H1A55dhnUaJu1gyucg7q1YRoiIiIi88aiiKgdChz8IKzsnRtoERAy4hHI5DzJS0RERGQoLIqI2iE7dz/0mP0evLoPhSC3BgA4d4hE5LRX4RM9QuJ0REREROaFXzcTtUMadS3Ks89Bo66Fa1gvuIbGwKfH3bBS2EsdjYiIiMjssCgiamfqVJU4ve4VlGef0y4rvpCIa0e2IOofb8LWxVvCdERERETmh8PniNqZy/Hf6RREN6lKriN9xycSJCIiIiIybyyKiNoRjboOeSd3N9penJ4EVWl+GyYiIiIiMn8siojaEXVNFdSqiiZ6iFCVFzS5jRplEWrKiwwbjIiIiMiM8ZoionbESmEPawdX1FaUNNxBEHAl4UfYOLrDu/twuAR31zYVXTyGrH3fQZlzEQDg4NsRwXEPw71T3zZITkRERGS6eKaIqB0RZHL4xoxuvIMoovjCUVxP/h2p3y3Ehe0fAwCKLh7FmR9f1xZEAFCRm44zG95A4bm/mtynr68vAgIC4Ovra5DnQERERGRqeKaIqJ0JHPIglHkZKL5w5I59ryf/DpfgaGT/9TMgaup3EDW4HP8dPCL6N7qNY8eOtSYuERERkcljUUTUzsjk1ug2bQlKL6ei8NxfqCrMRnF644XLtSO/ouJ6RqPtlfmXUV2cC1s3ngkiIiIiagiHzxG1Uy7BUQgb9Rjcwns32a/mDhMvAIAoioaKRURERGR2TLYoevPNNyEIAp577jmpoxDdUV11Ba4d/RXnf3kfl/74UnvtT52qEjnHd+DSrv8iO3ELaitLddarrSiFIGv6hK6dRwfYewY12d7UWaLY2Fh06NABsbGxejwjIiIiIvNhksPnjh49ii+++ALR0dFSRyG6I2XOBZz64VXUVZZpl1078gs8ugxESUaKzhTcl/d+g073Pg8nv0649McXKLp47H/XCgkAGj7b49d7HAABaRtXNNgnaMhDEASh0Xy5ubnIzs5u4bMjIiIiMn0md6ZIqVRixowZWLVqFdzc3KSOQ9QkUaPG2Z+X6xRENxWmJdS7J5Gmrgbnt7yDE2vmo+jCkVsmT2i4IPLvOxHOQd1RXZILl9AYWNk5a9ts3fzR+b4F8OoWZ7DnQ0RERGSOTO5M0bx58zB+/HjcfffdWLp0qdRxiJpUnH4cqtI8vdYRNWrUVhQ32u7euR9s3fzg3X0YVGX5OPafOdDU1fzdQWaFkBFzENB3YpNniIiIiIjoBpMqitavX4+kpCQcPXq0Wf1VKhVUKpX2sVKpNFY0ogZVF+cafJtO/hEIHDQNqvJCnFgzH6K6VreDpg6Xd6+GZ8QA2Lp6G3z/RERERObGZIbPXblyBc8++yzWrl0LW1vbZq2zYsUKuLi4aH/i4jiMiNqWwghFSUVeJq6f+BPZhzfXL4j+R9TU4XrK7wbfNxEREZE5EkQTmat3y5YtmDRpEuRyuXaZWq2GIAiQyWRQqVQ6bUD9M0UpKSmIi4vD8ePH0atXrzbLbmxqjQaJpzOQlpULB1sFhvXqDG835zuvSEYnatQ4+p9HmjVttqF5Rg5Gl8mL7tivQ4cOyM7ORkBAAK5evdoGyYiIiIjaF5MZPjdixAikpqbqLJszZw66dOmChQsX1iuIAEChUEChUGgfOzo6Gj1nW7teVIZ/f7kFWdeLtMu+3pGAmaPvwoxR/SRMRgAgyOToMuUlnF7/KtTVupMqOAd2Q9mVM7h9EoWAfpNRkHZQ72uRbqdw9mzV+kRERESWwmSKIicnJ3Tv3l1nmYODAzw8POottyT/b812nYIIADQaEWt2/oVQP08MiOooUTK6yblDF8Q+uQrXU3ZBmXsRVraO8I4aDufASChz05F7fAeUuemorSxDXbUS+Wf2wTU0BrUVpSjJSIaoUQOC7JaZ6JpDgE+PUUZ7TkSWTlWaj8ILiRDVtXANiYGDT6jUkYiIqBVMpiii+s5kXsP5K9cbbd9yIIVFUTthbe+CDgPur7fc0bcjPLsNQV7qHu0McmpVBfJO7oaNozt6zPkAMhsFklb+U4+9CQgd+X+w92r8hq5E1HIZf36F7MQtOl9UuHe+CxGTFkBu3bxrXomIqH0x6aIoPj5e6giSyswtukN7YRsloda49NvnulNq/0+NsgjZiZsQcd8CyKxtoamtbnQbHl0GQlOngq2LD3x6joajL4thImPIObYN2Yc31VtedP4wLv3+JTrd84wEqYiIqLVMuiiydO5O9k23Ozu0URJqKeX1S6gsyGq0veDsQXj3GAnnoG4oST/eYB9bN390mfIS70lE1AayE39ptC0vdQ9Chs+Ctb1LGyYiIiJDMJkpuam+Pl1C4OHSeOEzum9kG6ahllCrKptsF9V1OL325UYLIkFujY5jnmBBRNQG6lSVqC6+1mi7qK5FZf6VNkxERESGwqLIhMnlMrz40GjY2tQ/4denSzAmDIyWIBXpw8E7FDJrxZ073kJuYwdrB1d4Rg5Bj9nvwq2j+UwvT9Seya0Vdzxereyd2igNEREZEosiE9ercxC+WPAwpgzthYggHwR4uqKDlytq6tTYdigVVaqGb+5J7YOVrQN8e43Vax1Ro0avxz9Hl8kL4egXbqRkRHQ7QSaHV7fGbwLu6BcOB6/gNkxERESGwqLIDPh7umJM3264XlSG7IISXM0vwYmLV/Hppng89/GPKK9s/AJ9kl7I8Dnw7TkGgqz+vbYaoqmrQfnVs0ZORUQNCR76D9i6+ddbLlc4oOPYeRIkIiIiQ+BEC2bigw1/okRZVW/5pWsF+GbnX3hqyjAJUlFzyORWCB//NAIHP4jSyychyK1x/tcPINapGl/HSr8hd0RkGDaO7ujxyPvIObYdhecOoa5KCYWLNzy6DoSDd4jU8YiIqIV4psgMXMkrwpnMnEbbdx07C7VGnxt/khQUzp7wjhoOr8jB8Ioc1Gg/a0c3OAd1a8NkRHQrazsneEcNBwCoSq+jLCsVGb9/jiMfzULh+cMSpyMiopZgUWQGisqansGssroGqpq6NkpDhhA0+MFGpvUVEDr8EcjkPMlLJBVR1ODMj6+hIjddZ3ldVRnSNq5ARf5liZIREVFLsSgyA4HebpDLGn8pfdycYKewbsNE1Fq2bn6Inv0evKKGQ2ZlA0CAc2A3RE5/Dd7Rw6WOR2TRitOPo7KRwkdU1yHn6NY2TkRERK3Fr5vNgLuzA4bEdMLepHMNtk8cHMP72JggO3c/REx8AZj4AkRRA0HgdxhE7YHy2oWm23OabiciovaHRZGZePb+4SgsVeJkerbO8rF3dceUON7HxtQJggwadS0KzhxESUYyBJkVPCLuglt4LIslojZmZevYqnYiImp/WBSZCQc7Bd57aipOpl9F0vkrsJbLMCg6HMG+HlJHIwOoURbh1Pcvo7IgS7vsesrvcAnpgchpSyDX8wawRNRynpGDkbH7K4jqhq/V9IriEFciIlPDosjMRHfsgOiOHaSOQQZ2ccenOgXRTaWZJ5C1fy1CRzwiQSoiy2Tj6IbQEY/i0h9f1Gtz6xgL7+5D2z4UERG1CosionZOVVaAogtHGm2/nrILIcNmNfvmr0TUev5974W9VxCuHf0VFdczYe3gAp8ed8MnZjSPRSIiE8SiiKidU5XmAWLj95mqqypDnaoS1nZObZiKiFxDY+AaGiN1DCIiMgAWRUTtQGnWaVw/sQs15YWw9wyEb6+xsPcMBAAoXLwBQdZoYWRl5wQrhX1bxiUiIiIyKyyKiCSWuecbXD20Qfu45FISco5tQ6cJ/4JXtyGoLrkOR79OUF5reMp1nx4jWzVcx9fXV+e/RERERJaGRRGRhEovp+oURDeJGjUubP0AGX9+jdqKov8tFQCIOv2cg6IQFDejVRmOHTvWqvWJiIiITJ3RiqJr167B39/fWJsnMgu5KX802iZq1LcURMDNgsjOowOcAiLgEdEf7p368qJuIolUFmQh59h2VORdho2DC7x73A338D5SxyIiohYwWlHUrVs3fPrpp3jooYeMtQsik1dTXqj3OqrSfETPfpcTKxBJqODMAZzb8i5Ezd/3Kio4exA+MaPQ6Z5nJUxGREQtITPWhpctW4Z//vOfmDp1KoqKiu68ApEFsnMP0HsdTZ0K5VfPGiENETVHXXUFzm/9UKcguul6yh8oPPeXBKmIiKg1jFYUPfnkkzh58iQKCwsRGRmJrVu3GmtXRCbLr/e4GzPL6UlmpTBCGiJqjoIz+6GprW60valhsURE1D4ZdaKF0NBQ7NmzB5988gkmT56Mrl27wspKd5dJSUnGjEDUrjn4hCJ83FNI3/kpRI1au1yQW0FU1/8WGgCsHd3gHNStrSIS0W1qlMVNt7dgWCwREUnL6LPPXb58GZs2bYKbmxsmTpxYrygisnS+PUfDLTwWeSd3a+9T5NQhEqfWvYy6yrLbegsIHf4IZHIeR0RSsfvfPcQaY+8Z1EZJiIjIUIz6yWrVqlV44YUXcPfdd+P06dPw8vIy5u6ITJbCyQOBAx/QWdZj9nvI2r8OhWkJ0NTVwDmwGzoMnMrZrYgk5hHRHzZOnqgpL2igVYBf7D1tnomIiFrHaEXRmDFjcOTIEXzyySeYOXOmsXZDZLbs3P0Rcd98iOILgKjh1NtmoLqmFgdOXERRWQWCfNzRNzIEcpnRLu0kI5HJrRA57VWcWf8aapR/TyQkyOQIG/1POHfoImE6IiJqCaMVRWq1GidPnkSHDh2MtQsiiyAIAiCwIDJ1f526hLfX/Q5llUq7zNfdGf/v/+5FqJ+nhMmoJRx9OyL2qa9QcOYgKvIyYO3gCu/uw2Dj5C51NCIiagFBFEVR6hBtJSkpCb1798bx48fRq1cvqeO0StL5LGxNOIlrBSXwdnPGuLu6o3/3MKljkYGJooiK65egqauBg08o5Na2UkeiFriaX4zH3v4etXXqem1ero5Y8/Js2PB6SyIiIsnwXdgErf0jEWt2/n0fjEvXCnD49CVMHtITT0yKkzAZGVLRhSPI2PVfVBVlAwDktg7w7zMRQUMeunH2iEzG1oMnGyyIACC/RIn9KRdwd2zXNk5FREREN3Ewu4nJul6kUxDdatP+ZJy6dM1o+46NjUWHDh0QGxtrtH3QDaVZp3H2p6XagggA1NUVuHJgHS7HfythMmqJ9Gv5TbdnN91ORERExsUzRSZm19EzTbav23UEHQM8YSWXY3CPcIT5G27Gv9zcXGRnZ9+5I7Xa1YQfde5bdKtrR35Fh/73w8rWoY1TUUs5O9g12e5yh3aipsTGxiI3Nxe+vr44duyY1HGIiEwSiyITU6KsarL9aFomjqZlAgC+/yMRI/t0xfzpoyCTcbiVKSnJSGm0TVNbjbKrZzg1twkZ1acrDpy40GCbTCZgeG/OVkYtxy+siIhaj8PnTExHPc/87Dp6Fj/tPW6kNGQsgqzp7ytkd2in9qVfZChGNnLN0GMTBsPbzamNExEREdGtWBSZmJF9usLJXr8ZyH45eAIWNMmgWfCI6N9om7W9C5yDurdhGmotQRCw4KFR+Pc/xqJX5yAE+7hjcHQ43p03BVOGmvZMmEREROaAXzebGAc7BZY9NhGvfb0NRWUVzVonv6QcVapa2NvaGDkdGUrg4OkoSj8KdXX91zh46D8gs7KWIBW1hiAIGNYrAsN6RUgdhYiIiG7DosgEdQ32w/eLH0FC6kVkF5TCWibDqm0HG+3vYGsDhQ1falNi7xmI6Fnv4HL8dyg6nwiIGjj4dkTggKnwjBwsdTwiIiIis8JPyibK2kqOoT3//sb50OlLOJ3R8HTcI/tEQi7jSElT4+AVjMipr0BTVwONug5WCnupIxERERGZJX5SNhPPT7sb7k71PzSHB3hh1ti7JEhEhiKzsmFBRERERGREPFNkJoJ83PHFiw9j+1+nkHw+C1ZyGQZHd8LdsV05dI6IiIiIqAn8tGxGXB3tMWNkX8wY2bdeW51ajb1J57EnKQ2V1TXoGuyLewf1gL+na9sHJTIzJy5ewfkreXCyt8Xg6HA42CmkjkRERER6YFFkYtQaDY6cyUROYQl83V3QLzIUcnnToyBr6uqweNWvSDqfpV12JjMH2/9Kxf/7v4no2SnQ2LGJzFJBiRKvfvUrLlzN0y77dNNezJs8DGP6dZMwGREREemDRZEJOZd1Hf9vzTbkFZdrl3m6OGLx7HGIDPFvdL1fD57UKYhuqq6pw9trf8f3ix+5Y2FFRPW9tnqrTkEE3DiuPvjxT3TwckX3sIBmbUcURahq62BrozvVesa1AlRUqxDi5wFHO/3uT0ZERETNx6LIRFRUqfDvLzejrKJaZ3lBqRL//uIXTBwcjUOpl6CsUqFLsC+mxPVC97AbhdLvR043ut2CUiWOn7+Mvl1DjZqfyNycunQN57KuN9imEUVs2pd8x6KoolqF735PxB9HTqO8UgVvNydMGBiNLkG++GzzPmTkFAAAbG2sMKZfdzx272BYW8kN/lyo/RBFEaWZJ6Eqy4Otmx9ceKNmIqI2YTJF0cqVK7Fy5UpkZmYCALp164ZXX30VY8eOlTZYG/nj6Jl6BdFNFdUqrNt1VPv44MmLOJSajgUPjcLdsV1RXFbZ5LaL7tBORPVduNpwQXTT8fNZ+McbX0MQBPSLDMX9Q3vBx91Z215TV4dFKzch7ZbCKq+4HF9tS4BMEKARRe3y6po6bDmQgkpVDRY8OMrwT8ZClF1NQ3H6cQgyGdw794OjT5ikeVRlBajMvwxre2c4+nWCMucC0ja9jeriv2+vYO8VjC5TXoK9J4c5ExEZk8kURR06dMCbb76JTp06QRRFfPPNN5g4cSKSk5PRrZv5j92/cCXvzp1uoRFFfLopHoOiwxHs646T6dmN9g3182htPCKL4+Jg12R7ZXUNKqtrAABbDqRgb9I5vPfU/Qj2vXG87Tl+TqcgutWtBdGt/jx6FjNH36VTXNGdqWurkfbzChSnH9Muy9r3Pby6xaHTvf9CScYJKK+dh9zWAV6Rg2Hj6A4AqK0sRXVJHmwc3aBw9jRYnjpVJS5u/w8Kzh4ERA0AwM6jA2qURVCrdL+kqsy/jFNrX0HvJ7+A3JpDKImIjMVkiqIJEyboPF62bBlWrlyJw4cPW0RR5GSv/5uhskqFLftT0KmDd6NFUddgX0QE+bY2HpHF6d+9I+xtbbSFz52UVlThi1/2Y/k/JwEAEk5e1HufGlFE8oUrnMRBTxm7vtIpiG7KP70PxRkpqKss1S7L/PNrBA6ejqrCbBScPQBRXQdAgFvH3ug4dh5sXb1bnSdt4wqUXErSWVZVeLXR/jXlBcg/tQ++PUe3et9ERNQwkymKbqVWq/HTTz+hoqIC/fv3b7SfSqWCSqXSPlYqlW0Rzyjuju2KTfuT9V7vq+0JjbaF+Hlg8ezxrYlFZLHsFNZ4dupwvLX2d2g0DZ/Zud2xtMt4ZdUvKK+sxvXishbtl9cU6aeuugJ5qbsbb7+lIAIAUVOHrH3f39ZLRHH6MaR+twg9H/tE52bKpZdTUZGXCWsHV7h36gu5df3p2FXlhSg8mwB1TRWs7JzrFUTNUZ6dxqKIiMiITKooSk1NRf/+/VFdXQ1HR0ds3rwZkZGRjfZfsWIFXn/99TZMaDydAr0xZWgvbIzX/830VnY21hg/IAox4YHo0zUEMplgoIRElmd4ry4I8HTF5v0pOH/lOmysrZCend9ofxFA4pmMFu9PYW2FPl1CGm2PjY1Fbm4ufH19cexY/TMjlqi6JBeaWtWdOzaDqvQ68k78Cf++96K6JA9nf3oDFdcvadut7JzQ6Z5n4RHx95d1WQd+wJUDP0DUqFu1b/kthRgRERmeSc3DHBERgZSUFCQmJuKJJ57ArFmzcObMmUb7v/TSSygtLdX+7Nu3rw3TGt7jE4fg1Tn3oFfnIPh5uCCmUyBmjr4LMqH5hU1VTS28XJ3Qr1soCyIiA4gI8sWih8fg65dm4ePnpt3xWqPmsGpkivwH7+4DZ4fGh9Lm5uYiOzsbubm5rc5gLqwdXAEY7m9d8aXjEEUNzqxfolMQAUBdVTnSNr2JivzLAID8MweQte/7VhdEAODdfVirt0FERI0zqTNFNjY2CA8PBwD07t0bR48exUcffYQvvviiwf4KhQIKxd9DGRwdHdskpzENjg7H4OhwnWW+Hs74dFM8Kpp5bcPpjGuYHNfTGPGILJqNlRUmDu6Bb3873KL1ZTIBA7p3xIyRffFzfBL2n7iA2jo1gnzcMXVYb15L1AIKJw+4hvVs0ZC1hgiCDMXpx1FZUP/ebwAgquuQc3Qrwsc9hWuJWwyyT9/e4+DoF37njkRE1GImVRTdTqPR6FwzZOpq69TYm3QO8SnnUV1Ti+iwANwzMBqeLk0XcyP7RGJQdCfsP3Eel64VIONaAZIvXGm0v63CutE2ImqdGSP7oVRZha2HTjb7WqNRfSMxMrYrArxc4eXqBABY9PAYzH9wJGpq1bC3tTFmZLPXccyTSP1uIWrKC3UbBJl29rfmsnZ0R/6p+Cb7KHMuAAAqrus3VNLOIxC+vcYiL3U3VGX5sHPzh2/vcfCJHqHXdoiISH8mUxS99NJLGDt2LIKCglBeXo5169YhPj4ev//+u9TRDKK6phb//nILUm+ZJS41PRu/JpzAin9Ohlwm4Ni5y5DLZOjfPQwdvNy0/URRxI97jmHzvmRUqu58tmhoz84tyujr66vzXyKqTyYT8NSUYZg2IhbH0i5DEASkpmfjj6OND/WNCgtATKf696GxksthJefECq1l5+6HnnP/g9yk31CcfhwQZPCI6AcbJ0+c/+U9iOpanf5W9s6oq2x4Iozryb/dcX9Wtje+yLK2d4aqrPFrzBQu3lCV5kFu6wDvqBEIGvwgrO2dEdBvoh7Pjn+biYgMwWSKory8PMycORM5OTlwcXFBdHQ0fv/9d4wcOVLqaAbx057jOgXRTeWVKiz47GdUqf5+01619QDG3RWFZ+4fDplMwHe/H8baPxKbtZ/B0eGIjQhuUUZeuE3UfF6uTugc6IO/Tl+CrY0VZDKhwTNHro72GNYzQoKElsXa3gWBg6YhcNA0neUO3sHIOboN5dfOw8rWAV7dh8Kj6yBcTdiA3OTfbhRHep5R8owcjMJzf8HBt2OjRZHC1Qex8/4LiCIEWesKX/5tJiJqPZMpir766iupIxjVb0dON9p2a0EEAKIIbP8rFX4eLrh3UA9s3Nf4VN0KaysIAuDt5ozx/aMwcXAPCMKND2fHzmUip7AMfh7OiI3gTHREhqJWa/DWut+xN+mcznJBECDecmNWNyd7vPF/90JhYzJ/is2OvWcQOo59st7ykGGzEBz3MEoyT+L0uleavz3vUGTsXg11deO3gJBZK9DpnucgCDJDzgFBREStwHfidqKorELvdX45mIIuQb5N3jxSVVuHwT3CkZlTiP0nLsDWxgqh/p5Y/u1O5Bb9PTzE190Zi2ePR+dAnxblJ6K/ffdHYr2CCLgx1DUqLADdwvwR5O2OuJhOsLHmn+H2SpDJUV3U8I2vb5JZKWDt4AprBxc4deiKnCO/4sbk67qs7JygcPaCc2Ak/PveCzv3ACOlJiKiluC7cTsR5OOOS9cK9Fonv0SJWvWdp3o9cOLi//5VjNMZ12All6FOrTsUJLeoDP/+YgvWvDwLjnYNT/nLe6AQ3VltnRrbEk422n7hah6Wzp3IyRNMhNym6fsDCbL/Da0TNSjNSEFDBREA1FUp0WPOe0Yphvi3mYio9UzqPkXm7L7BMXqvY2MtR/dQf/i4Oem13u0F0U2lFVX440jjF4PzHihEd1ZcXonSiqpG26trapFbVNqGiag13Dv3g8xa0Wi7uqYKqrJ8KHMuovJ/9ydqmIjyq/XPHhoC/zYTEbUei6J2Yuxd3TFpSAxuvw+rrU3j02c72Cqw4LOfEeLnAUGPG7g25VzWdYNsh8hSOdkrYN3EjHEyQYCrY9NnH6j9sLJ1QMiIRwyyLbmi9Tf2JSIi4+DwuXbkyUlDMWFgNPalXEC1qhZRHQPg6eqIhZ9tavCb5+LyShSXVwK4cdYo0MsN6dcKYCWXIcDLDZdzC+utcyeO9g0PnSOi5rFT2GBwj3DsaeCaIgCQy2V48LX/ItDbDRMHx+CeAVEG+1KDjMM/9h7Yuvog+/BmKHMuQhBkqKsu12sbVnbOcOvY20gJiYiotVgUtTOB3u54eFQ/nWWfz5+BXxJO4HjaZRSXV6KgtP6sRjW1ahSVV6JzBy+cv5qPrBYURAAwoneXFq1HRH977N4hSMu6jmsFJfXaautuXAd4+XoRPv55DzJyCvDM/cPbOCHpyz28D9zD+wAAsvavQ9b+tc1fWZAhbNRjkFnxOjIiovaKw+dMgKerIx4dPxCfvfAQbKwbH5ZTXF6J81dv3BOj4Ut9mzZhYDQiQ/xamJKIbvJwccBnzz+Ix+4djKiOAQjwcm2079aEk8jMadmXGCQNR/+mb4Dt4NsRClefG2eHOvVF1MPL4d65H/JS9yLn+A5UXM9oo6RERNRcPFNkYm4Ol2spe4UNnp06HBCAbYdSkVtYBl8PZ9wzIArDe/EsEZGhONgpMHVYb0wd1huvrPoF2fkljfbdl3IeIX792y4ctYiqvBBVhdlQuHrD3iu44YkVBBlqKkqgrlLC3isQnl0HobIgC6fXvwZNbbW2m2tYL3SZvBBWto5t+AyIiKgxLIpMTIivB85e1m+GIQHAI/cMhLuTAwZFh2unAmYRRNQ2VLV1rWonadVVlePijk9QkHboxvTbABx9O8HeKwiV+VnafoJMDlGjRm35jTN/ypyLuPDr+w1us+RSEs7/8h4ipy0x/hMgIqI74vA5EzNpSE+91xFxY8rvUX0jeW8UIglEhzV9b5rojryRZ3sliiJOr38NBWcPagsiAFDmXkBtVTkipy1Bx7FPwrf3PRA1d75v3K2KLhxFVWHTN4clIqK2waLIxAzrFYEZo/pBJtOdrer2x7fqFurf5NTeRGRc4wdEwdmh4ZkdOwZ4oW/X0HrLK6pUSDyTgaNpmVDV8EySVEouJaE8O63BtlplMSryMuHXezxUJS25R5CI8pwLrQtIREQGweFzJmj22P4Y268b9p+4gCpVDbqF+uN81nV8veNQvb4yQcCMUX0lSElEN7k7O+CtJybj7bV/ICOnQLu8d0QQFs4YrfOlhiiK+Oa3w9gYn4TqmloAN+59NHN0f9w3JKato1u8kswTTbdnpMA3ZjTqVBUt2r6VrUOL1iMiIsNiUWSifNydMXXY3/e86B0RDJlMhg17j6Gs4sbFvL7uzpg7YTD6dAmRKCUR3RQe4I0vX3wY57Kuo7BMiSBvd3TwdqvX74c/j2LtH4k6y8orVfh0czwc7BQY2adrW0UmADJ502fZy66cQeIHDwGC/gMvrB1c4Rqq/5BoIiIyPBZFZmTaiFjEdgnGrmNnobCSY8LAHvB05cxGRMakqqlDysUrqFOr0T00AC6Odk32jwjyAeDTYFtNbR027ktqdN0fdx9lUdTGPLoMwJWD6xttF9W1//uHptE+gpUNxLoa3WUyOTqOfRIyOd+GiYjaA/41NhM1tXV4a+3v2H/i7/HpG/YexwPDYzFn3AAJkxGZr60JJ7F6xyGUV944O2ttJceEAdF4bOJgyGX6nzm4nFukPdPbYPv1IpQoK+HqaN9gu6+vr85/qfUcfTvCK2o48lP3tGh9l5Ae6DjmSRSmHUT+6f1Qqyrh1KErAu6aBKc73O+IiIjaDosiM/HZ5n06BREA1Kk1WLfrCLxdnTB+QJREyYjM076U8/j4Z90PyrV1amzanwxrKzn+b8KgZm3nan4xftpzHEfOZkKtafxsA3DjGkEbq8b/bB87dqxZ+yT9dJ7wHBy8gpFzbBtUZfmQWdlAc9uZn1vZeQUjaPCDsPcKgoNXMADAftB0BA6a3laRiYhITyyKzEBZRRX+OHqm0faN+5JYFBEZ2Po/jzba9svBE7CxkuNaYSncnR0wqk8kQvw86vW7eDUP8z/9GRXVjX/AvlVsl2BOqy8BQSZHhwH3I6D/FGhqq5GduAVZ+75vtL/cWgGvyMFtmJCIiFqLRZEZyMgpRG1d4/fHuJJXjCpVDewU/DBFZAhVqlpczM5vtL26phbf3TJZwk97j+OR8QMwYWA09iadR1FZBYJ83LE14WSzCyJ7Wxs8Mn5gq7NTywmCALmNHdw69m6yKHIL69WGqYiIyBBYFJkBZ/uG739yk8LaSmfITVlFFfYmn0dxeSVCfT0wMLojrORyY8ckMhvWVjJYyWWoUzc93O1WX28/hO9/T0RNE19g3M5KLoNcJsOAqI6YMbIvgn3rn226VWxsLHJzc+Hr68uhdEbk5N8ZbuF9UHyx/tlCawdX+MWOlyAVERG1BosiMxDq74mOAV5Ib+Sb60HR4RAhAgB2HT2DD3/ajZravz+Yebk6YdnciQj192yTvESmzkoux6DocMQnn9drPX0KIgD4+qVZ8PNwaXb/3NxcZGdn67UP0k9VYTYqC6+gw4CpUDh5IC91j/b6Ilt3f8ht7HHq+5fh4NsR/n0nwsm/k8SJiYioOVgUmYln7h+Olz7fjEqV7lAcmSBg9/E0HDqVjj5dQnAw9SI0GlGnT35JOV5e9Qu+fWU2zxgRNdOssf2RfP4KSiuqjLJ9DxcHeLs6GWXbpL8aZRHO//I+SjKStcscfEIR+dAbEAQBmbvXoPzq39d2VhZkIf/0PkRMfAFe3YdKkJiIiPSh/5yx1C5Fhvjh0xcexISB0QjwcoWDnQIAoBFvFEBVqlrsP3GhXkF0U35JORJS09ssL5Gp6+Dlhv88Nx1j+nWDg60NFNZWCPMz3NnWKXG9IJfzT3R7IIoanFr3qk5BBAAV1zOQ9vMylF85q1MQ/b2iBhd3fgp1jXEK55t8fX0REBDAqdiJiFqBZ4rMSAcvNzxz/3BcupaPf76zVu/1M3IKERfTeDvvgUKky8/TBS9MH4kXpo8EAFRUq/DQa1/VO2N7JzJB0H6BYS2XY1JcDO4fyov124uiC0dQmZfRYFtdZRmuHdvW6LpqVSUKz/0F76jhxorH68eIiAyARZEZSjjZsjM+bk4N3xDyJr7xEjXNwVaBl2eNxf9bvR2q2rpmrSOXyfDJ89ORca0QgnBj2u3Gbs5K0ijLOtVke11lWZPttXdoJyIi6bEoMlFnL+dgz/FzqKhWoWuwL+6O7aqdcrvuDjeAbIiNtRzDevLu6kSt1bdrKNa8PBu/JZ7G5dxCuDnZY3TfbthyIAW/JZ7W6SsIwLzJQxEe4I3wAG+JEtOdyKwUTbZb2TqgRqlqtN3BJ8zQkYiIyMBYFJmgDzfsxva/UrWPdx09i+//OII3H5+EUD9P9OociHW7jjR7ezKZgGfvHwFnBztjxCWyOJ4ujnh4VD/UqdX4cc9xvLLqFxSUKuFgawM3J3vYKWwQ4uuBCYOi0TXYT+q4dAeekYNx5eD6RtudOnRF4blDgFj/mk0H345wDYk2ZjwiIjIAFkUmZtfRMzoF0U1FZRV4Y812TB/RBykXr8DdyR5F5ZX1+rk52uHRCYOQkJqO4rIKhPh5YuKgHgjvwG+piQxt6Tc7dCYwqaiuQUV1DWI6BeKFB0dCLuNECqbAwTsEvr3GIjdpZ4PthWkJ//uXAODvwsjBJxSRUxcbPyAREbUaiyITszXhZKNtV/KK8c4Pf+gsu/UtOqpjAJ6bOgJBPu4Y3beb8UISEU5cvNrojI4pF67gr1OXMCg6vI1TUUt1HDsPDj6hyDm6DZWFVyHIZBDVt183JkKQWyOg70S4hvWES0gPCIIgSV4iItIPiyITk1NYqld/EcDIPl3x8Kh+8Pd0NUomIqrvwIkLd2xnUWQ6BEGAX+/x8Os9HiWZJ3Dq+3832E9U10KjqYNraEzbBiQiolZhUWRifNydUaLU754XR89exvzpo4yUiMhy1dTVYc/xczh48iJq69To2SkQ4/p3h7ODHWrr1HdYt+l2ar/Ksk63qp2IiNofFkUmZnz/KJzLuq7XOiXKSlTX1MLe1sZIqYgsT0W1CotWbkLaLcdj0vksbD6QgnfnTUFMp0DsONz4VM49OwVq/63RiDh3JRc1tXXo1MGHx2o7J7Oxbbrduul2IiJqf1gUmZgx/bohNT0bu46dbfY6ro52sLWxNmIqIsvz3e+JOgXRTUVlFXhv/Z94d94UBP/hjsvXi+r18XV3xt2xXQEAB05exJe/7Edu0Y172dgrbDBxcA/MHjsAMhmvR2mPPLsOQubu1YDY8O0PvLoNaeNERETUWiyKTIwgCHhxxmiM7heJ3cfPoaJKhS7BvjhyNgMpF642uM7Yu7rzwxWRAWk0Iv440vgQqdMZ1/Dqf38FBMDFwQ6lFX8PeXVxsIOjnQIrt+xD50BvfLIpHhrN3zOWVapq8MOfRyGKIh69Z5BRnwe1jK2LNwIHTMWVhB/rtTkFRMA7eoQEqYiIqDVYFJmoHuGB6BH+9/CboT07Y8FnG5GdX6LTr2enQDw8ql8bpyMybzV1dSivbPxmnQBwNO2yzmMfNydcLy5HaUUVSiuqcDE7v97NXG+15cAJTB/RBw52Td84lKQRPGwm7DwDce3IL6jIy4C1vSt8etyNDgPuh9yarxkRkalhUWQmvFyd8MX8h7E3+RySz2fBykqOQVHh6BcZyrNERAZma2MNbzcn5BWXN3ud63r0BYDqmlqcuZyDPl1C9ExHbcU7ahi8o4ZJHYOIiAyARZEZUdhYYUy/bhjTj/cgIjK2ewf2wH+3HTTqPmys5EbdPhEREd3A26kTEbXA/cN6YUTvLkbbvruTPbqF+htt+0RERPQ3nikiImoBuUyGRQ+PwZS4Xjhw8gJq69SoU2uw5UBKq7ctCMCj9wyClZxnioiIiNoCiyIiolboFOiNToHeAIAqVQ3ik8/pdYNlJ3tbxIR3wF+nL6FOrUHXYF88eHdf9O8eZqzIREREdBsWRUREBmKnsMHSufdhyde/orC0QrvcwdYG4/pH4deDJ6CqrdMud3Oyxxv/dy8ignyhVmug1mhgY93yP8u+vr46/yUiIqLmEURRFO/czTwkJSWhd+/eOH78OHr16iV1HCIyU7V1ahxKTcfV/GJ4ujpiSI/OsFNYo6yiGnuS0lBYVoEgb3fExXRqVRFEREREhsF3YyIiA7O2kiOuZ+d6y50dbHHvwB5Q1dbBTmEtQTIiIiJqiMkURStWrMCmTZuQlpYGOzs7DBgwAG+99RYiIiKkjkZEdEcVVSp8+9th/HH0DJRVKni7OeHegT1w/7BekMs4ESgREZGUTOadeN++fZg3bx4OHz6MXbt2oba2FqNGjUJFRcWdVyYiklBNbR1eXLkJm/YnQ1mlAgDkFZfjv9sO4t0fdkmcjoiIiEzmTNFvv/2m83jNmjXw9vbG8ePHMWTIEIlSERHd2e7jaTh/5XqDbX8eO4spQ3siPMC7jVMRERHRTSZzpuh2paWlAAB3d3eJkxARNe3AyYtNt59oup2IiIiMy2TOFN1Ko9Hgueeew8CBA9G9e/dG+6lUKqhUKu1jpVLZFvGIiHTU1alb1U5ERETGZZJniubNm4dTp05h/fr1TfZbsWIFXFxctD9xcXFtlJCI6G89OwfeoT2ojZIQERFRQ0yuKHrqqaewbds27N27Fx06dGiy70svvYTS0lLtz759+9ooJRHR38bdFQV3J/sG27oG+6J3BIsiIiIiKZlMUSSKIp566ils3rwZe/bsQWho6B3XUSgUcHZ21v44Ojq2QVIiIl0ujnZ4Z9796Bbqr10mkwkY3KMTls69D4IgSJiOiIiITOaaonnz5mHdunX45Zdf4OTkhNzcXACAi4sL7OzsJE5HRNS0IB93fPjMA7iaV4zCMiUCvNzg6cIvaoiIiNoDQRRFUeoQzdHYN6mrV6/G7Nmzm7WNpKQk9O7dG8ePH0evXr0MmI6IiIiIiEyVyZwpMpHajYiIiIiITIzJXFNERERERERkDCyKiIiIiIjIopnM8DkiIlNXqqzC7uNpKCqrQJCPO+JiOkNhwz/DREREUuO7MRFRG9hzPA3v/bgLNbVq7bJVWw/ijf+7F12CfSVMRkRERBw+R0RkZJk5hXh73R86BREAlCgrsfi/v6C6plaiZERERASwKCIiMrpfE05ArdE02FairEJ88vk2TkRERES3YlFERGRkWdeLmmzPzC1soyRERETUEBZFRERG5uZk32S7h7NDGyUhIiKihrAoIiIysjH9ujXaZiWXYUTvLm2YhoiIiG7HooiIyMh6RwTjvsEx9ZbLBAHPTh0Bd54pIiIikhSn5CYiagPzJg9F38gQ/Hb4NAr/d5+iewdGI7yDt9TRiIiILB6LIiKiNtKnSwj6dAmROgYRERHdhsPniIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqvKTJTOTk5yMnJkToGGYifnx/8/PykjkEGwuPT/PAYJSIybRZVFPn5+WHJkiVm/8alUqnw4IMPYt++fVJHIQOJi4vD77//DoVCIXUUaiUen+aJxygRkWkTRFEUpQ5BhlVWVgYXFxfs27cPjo6OUsehVlIqlYiLi0NpaSmcnZ2ljkOtxOPT/PAYJSIyfRZ1psjSxMTE8A3aDJSVlUkdgYyAx6f54DFKRGT6ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5bwgl8zwdfTvPD1ND98TYmITB8nWiAiIiIiIovGM0VERERERGTRWBQREREREZFFY1FEREREREQWjUWRCYqPj4cgCCgpKWmzfc6ePRv33Xdfm+3PkgiCgC1btrTZ/tasWQNXV9c22x8RERFRe8eiyEA+//xzODk5oa6uTrtMqVTC2toaQ4cO1el7s6hJT09vcFuvvfYaBEGAIAiwsrJCSEgI/vWvf0GpVBrzKVADZs+eDUEQ8Pjjj9drmzdvHgRBwOzZsxtd/+ZrffPHx8cHU6ZMwaVLl4yYmlrKmMexp6cnhgwZgg8//BAqlcqYT4NuY8jjWCaTwcXFBT179sSLL76InJwcIyYnIqK2wqLIQIYNGwalUoljx45plx04cAC+vr5ITExEdXW1dvnevXsRFBSEjh07Nrq9bt26IScnB5mZmXjrrbfw5Zdf4oUXXjDqc6CGBQYGYv369aiqqtIuq66uxrp16xAUFNSsbZw7dw7Xrl3DTz/9hNOnT2PChAlQq9XGikwtZKzjOCsrC3v37sXUqVOxYsUKDBgwAOXl5Y2uV1NTY5gnRFqGPI6PHj2KhQsX4s8//0T37t2Rmpra6Dp8LYmITAOLIgOJiIiAn58f4uPjtcvi4+MxceJEhIaG4vDhwzrLhw0b1uT2rKys4Ovriw4dOmDatGmYMWMGfv311wb7FhYW4sEHH0RAQADs7e0RFRWFH374QaePRqPB22+/jfDwcCgUCgQFBWHZsmXa9itXruCBBx6Aq6sr3N3dMXHiRGRmZtbb1+uvvw4vLy84Ozvj8ccf13nDV6lUeOaZZ+Dt7Q1bW1sMGjQIR48ebfJ5moJevXohMDAQmzZt0i7btGkTgoKC0LNnz2Ztw9vbG35+fhgyZAheffVVnDlzBhcvXmyw78KFC9G5c2fY29sjLCwMixcvRm1trU6frVu3ok+fPrC1tYWnpycmTZqkbVOpVJg/fz4CAgLg4OCAfv366fxe3rRlyxZ06tQJtra2GD16NK5cuaLTvnLlSnTs2BE2NjaIiIjAd99916znasqMdRz7+/sjKioKTz/9NPbt24dTp07hrbfe0vYLCQnBG2+8gZkzZ8LZ2RmPPfZYg8NkU1JSIAiCzrG5atUqBAYGwt7eHpMmTcL777/P4ZENMNRx7Ovri86dO2P69OlISEiAl5cXnnjiCW2fm0ONly1bBn9/f0RERABoeJisq6sr1qxZo3186NAhxMTEwNbWFrGxsdiyZQsEQUBKSkqLnzcRETUPiyIDGjZsGPbu3at9vHfvXgwdOhRxcXHa5VVVVUhMTLzjh6nb2dnZNfqNY3V1NXr37o3t27fj1KlTeOyxx/CPf/wDR44c0fZ56aWX8Oabb2Lx4sU4c+YM1q1bBx8fHwBAbW0tRo8eDScnJxw4cAAJCQlwdHTEmDFjdPa5e/dunD17FvHx8fjhhx+wadMmvP7669r2F198ERs3bsQ333yDpKQkhIeHY/To0SgqKtLrubZHjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHJ3fb3NlzOMYALp06YKxY8fqfDgHgHfffRc9evRAcnIyFi9e3KxtJSQk4PHHH8ezzz6LlJQUjBw5UufLDtJlyOMYuHEsP/7440hISEBeXp52+e7du3Hu3Dns2rUL27Zta9a2ysrKMGHCBERFRSEpKQlvvPEGFi5c2OJsRESkJ5EMZtWqVaKDg4NYW1srlpWViVZWVmJeXp64bt06cciQIaIoiuLu3btFAOLly5cb3c6SJUvEHj16aB8fO3ZM9PT0FO+//35RFEVx7969IgCxuLi40W2MHz9efOGFF0RRFMWysjJRoVCIq1atarDvd999J0ZERIgajUa7TKVSiXZ2duLvv/8uiqIozpo1S3R3dxcrKiq0fVauXCk6OjqKarVaVCqVorW1tbh27Vpte01Njejv7y++/fbbjeZs72bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja5/+2t17do1ccCAAWJAQICoUqlEURRFAOLmzZsb3cY777wj9u7dW/u4f//+4owZMxrse/nyZVEul4vZ2dk6y0eMGCG+9NJLoiiK4urVq0UA4uHDh7XtZ8+eFQGIiYmJoiiK4oABA8S5c+fqbGPq1KniuHHjGs1pLox1HN9q4cKFop2dnfZxcHCweN999+n0aeg4T05OFgGIGRkZoiiK4rRp08Tx48frrDdjxgzRxcWl+U/YAhj6OL7Vzp07dY6dWbNmiT4+Ptrj+6aGjnMXFxdx9erVoije+Hvq4eEhVlVVadtXrVolAhCTk5Nb8rSJiEgPVpJUYmZq6NChqKiowNGjR1FcXIzOnTvDy8sLcXFxmDNnDqqrqxEfH4+wsLA7jmFPTU2Fo6Mj1Go1ampqMH78eHzyyScN9lWr1Vi+fDk2bNiA7Oxs1NTUQKVSwd7eHgBw9uxZqFQqjBgxosH1T5w4gYsXL8LJyUlneXV1tc5F5D169NBuEwD69+8PpVKJK1euoLS0FLW1tRg4cKC23draGn379sXZs2eb/h9nAry8vDB+/HisWbMGoihi/Pjx8PT0bPb6HTp0gCiKqKysRI8ePbBx40bY2Ng02PfHH3/Exx9/jPT0dCiVStTV1cHZ2VnbnpKSgrlz5za4bmpqKtRqNTp37qyzXKVSwcPDQ/vYysoKffr00T7u0qULXF1dcfbsWe1r9thjj+lsY+DAgfjoo4+a/ZxNlSGP48aIoghBEHSWxcbG6r2dc+fO6QydBIC+ffs2++yEpWntcdwQURQBQOf1jIqKavT4bsy5c+cQHR0NW1tb7bJbzwATEZFxsSgyoPDwcHTo0AF79+5FcXEx4uLiAAD+/v4IDAzEoUOHsHfvXgwfPvyO24qIiMCvv/4KKysr+Pv7N/kG+8477+Cjjz7Chx9+iKioKDg4OOC5557TDs+6OVyrMUqlEr1798batWvrtXl5ed0xq6V45JFH8NRTTwEAPv30U73WPXDgAJydneHt7V2v+LzVX3/9hRkzZuD111/H6NGj4eLigvXr1+O9997T9mnq9VQqlZDL5Th+/DjkcrlOm6Ojo16ZLZUhj+PGnD17FqGhoTrLHBwcdB7LZDdGN9/80A2g3rVlpL/WHMcNufmlT0hIiHbZ7a8lcKNouvW1BPh6EhG1J7ymyMCGDRuG+Ph4xMfH60zhO2TIEOzcuRNHjhxp1nUINjY2CA8PR0hIyB2/cUxISMDEiRPx8MMPo0ePHggLC8P58+e17Z06dYKdnR12797d4Pq9evXChQsX4O3tjfDwcJ0fFxcXbb8TJ07ozNx0+PBhODo6IjAwUHtBfkJCgra9trYWR48eRWRk5B2frym4eY3VzWuw9BEaGoqOHTs2WRABNy60Dg4Oxssvv4zY2Fh06tQJly9f1ukTHR3d6GvZs2dPqNVq5OXl1XstfX19tf3q6up0Zlg7d+4cSkpK0LVrVwBA165ddV5L4Mbvmbm8lndiqOO4IWlpafjtt98wZcqUJvvd/ELi1imfb7/gPiIiot5kJuYwuYkxteY4vl1VVRW+/PJLDBky5I5fIHl5eem8lhcuXEBlZaX2cUREBFJTU3Wma+drSUTUdlgUGdiwYcNw8OBBpKSkaL9hBoC4uDh88cUXqKmpafGHqcZ06tQJu3btwqFDh3D27Fn885//xPXr17Xttra2WLhwIV588UV8++23SE9Px+HDh/HVV18BAGbMmAFPT09MnDgRBw4cQEZGBuLj4/HMM8/g6tWr2u3U1NTg0UcfxZkzZ7Bjxw4sWbIETz31FGQyGRwcHPDEE09gwYIF+O2333DmzBnMnTsXlZWVePTRRw36fKUil8tx9uxZnDlzpt5ZGEPp1KkTsrKysH79eqSnp+Pjjz/G5s2bdfosWbIEP/zwA5YsWYKzZ88iNTVVO5NZ586dMWPGDMycORObNm1CRkYGjhw5ghUrVmD79u3abVhbW+Ppp59GYmIijh8/jtmzZ+Ouu+7SDtdZsGAB1qxZg5UrV+LChQt4//33sWnTJsyfP98oz7u9MdRxXFdXh9zcXFy7dg2pqan4z3/+g7i4OMTExGDBggVNrhseHo7AwEC89tpruHDhArZv365zxhAAnn76aezYsQPvv/8+Lly4gC+++AI7d+6sNzSP/taa4zgvLw+5ubm4cOEC1q9fj4EDB6KgoAArV66847rDhw/HJ598guTkZBw7dgyPP/44rK2tte0PPfQQNBoNHnvsMZw9exa///473n33XQDg60lE1BYkvJ7JLGVkZIgAxC5duugsz8zMFAGIERERd9xGUxdoi2L9i34LCwvFiRMnio6OjqK3t7f4yiuviDNnzhQnTpyoXUetVotLly4Vg4ODRWtrazEoKEhcvny5tj0nJ0ecOXOm6OnpKSoUCjEsLEycO3euWFpaKori3xcqv/rqq6KHh4fo6Ogozp07V6yurtZuo6qqSnz66ae12xg4cKB45MiRZvxfa79uPu/GtOYC7Ztw2wXYCxYs0P4/njZtmvjBBx/Uu3B+48aNYkxMjGhjYyN6enqKkydP1rbV1NSIr776qhgSEiJaW1uLfn5+4qRJk8STJ0+KonhjogUXFxdx48aNYlhYmKhQKMS777673qQBn332mRgWFiZaW1uLnTt3Fr/99ttGn4O5MdRxDEAEIMrlctHd3V0cNGiQ+MEHH+gcN6J4Y6KFDz74oN42Dh48KEZFRYm2trbi4MGDxZ9++klnogVRFMUvv/xSDAgIEO3s7MT77rtPXLp0qejr69ui522uDHUcAxAFQRCdnJzEHj16iAsWLBBzcnKata/s7Gxx1KhRooODg9ipUydxx44dOhMtiKIoJiQkiNHR0aKNjY3Yu3dvcd26dSIAMS0tTc9nTERE+hJE8bZBzkREZLLmzp2LtLQ0HDhwQOoo1Epr167FnDlzUFpaesdrQ4mIqHU40QIRkQl79913MXLkSDg4OGDnzp345ptv8Nlnn0kdi1rg22+/RVhYGAICAnDixAksXLgQDzzwAAsiIqI2wKKIiMiEHTlyBG+//TbKy8sRFhaGjz/+GP/3f/8ndSxqgdzcXLz66qvIzc2Fn58fpk6dypvxEhG1EQ6fIyIiIiIii8bZ54iIiIiIyKKxKCIiIiIiIovGokhCs2fPhiAIePPNN3WWb9myxaj3pSgqKsLTTz+NiIgI2NnZISgoCM888wxKS0t1+mVlZWH8+PGwt7eHt7c3FixYgLq6OqPlMnV8Pc0LX0/zw9eUiIgaw6JIYra2tnjrrbdQXFzcZvu8du0arl27hnfffRenTp3CmjVr8Ntvv+ncZFWtVmP8+PGoqanBoUOH8M0332DNmjV49dVX2yynKeLraV74epofvqZERNQgaW+TZNlmzZol3nPPPWKXLl3EBQsWaJdv3rxZbOuXZsOGDaKNjY1YW1sriqIo7tixQ5TJZGJubq62z8qVK0VnZ2dRpVK1aTZTwdfTvPD1ND98TYmIqDE8UyQxuVyO5cuX4z//+Q+uXr3a7PXGjh0LR0fHRn+6deumV47S0lI4OzvDyurGLO1//fUXoqKi4OPjo+0zevRolJWV4fTp03pt25Lw9TQvfD3ND19TIiJqCO9T1A5MmjQJMTExWLJkCb766qtmrfPf//4XVVVVjbZbW1s3e/8FBQV444038Nhjj2mX5ebm6rw5A9A+zs3Nbfa2LRFfT/PC19P88DUlIqLbsShqJ9566y0MHz4c8+fPb1b/gIAAg+y3rKwM48ePR2RkJF577TWDbJP4epobvp7mh68pERHdisPn2okhQ4Zg9OjReOmll5rV3xBDOcrLyzFmzBg4OTlh8+bNOt90+vr64vr16zr9bz729fXV45lZJr6e5oWvp/nha0pERLfimaJ25M0330RMTAwiIiLu2Le1QznKysowevRoKBQK/Prrr7C1tdVp79+/P5YtW4a8vDx4e3sDAHbt2gVnZ2dERkY249kQX0/zwtfT/PA1JSKim1gUtSNRUVGYMWMGPv744zv2bc1QjrKyMowaNQqVlZX4/vvvUVZWhrKyMgCAl5cX5HI5Ro0ahcjISPzjH//A22+/jdzcXLzyyiuYN28eFApFi/dtSfh6mhe+nuaHrykREWlJPf2dJZs1a5Y4ceJEnWUZGRmijY2NUaeH3bt3rwigwZ+MjAxtv8zMTHHs2LGinZ2d6OnpKb7wwgva6WOpPr6e5oWvp/nha0pERI0RRFEU26b8IiIiIiIian840QIREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNEsqijKycnBa6+9hpycHKmjEBEREREZBD/jtp7FFUWvv/46f2GIiIiIyGzwM27rWVRRREREREREdDsWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEZAKqq6uljkBERERktlgUEZmA/Px8qSMQERERmS0WRUQmoKamBjU1NVLHICIiIjJLLIqITERZWZnUEYiIiIjMEosiIhNRXFwsdQQiIiIis8SiiMhE8LoiIiIiIuNgUURkIi5fvgxRFKWOQURERGR2WBQRmYjy8nJkZWVJHYOIiIjI7LAoIjIhx44d49kiIiIiIgNjUURkQgoLC3Hu3DmpYxARERGZFRZFRO1cbGwsBg4ciGXLlgEAEhMTUV5eLnEqIiIiIvPBooioncvNzcX169e19ylSqVT4448/oFKpJE5GREREZB5YFBGZoMLCQuzYsQNVVVVSRyEiIiIyeSyKiExUfn4+fvnlFxQWFkodhYiIiMiksSgiMmFlZWXYsmULTp48CY1GI3UcIiIiIpPEoojIxKnVahw+fBibN29Gdna21HGIiIiITA6LIiIzUVhYiO3bt2P79u3Iy8uTOg4RERGRybCSOgARGVZ2djays7MRHByM2NhYeHh4SB2JiIiIqF1jUURkpi5fvozLly8jNDQUPXv2hKenp9SRiIiIiNolFkVEZi4jIwMZGRnw8/ND165dERISAisrHvpEREREN/GTEVE7lpWVhcrKSgBATU0NioqK4O7u3qJt5eTkICcnB9bW1ggJCUF4eDgCAgIgk/HSQiIiIrJsLIqI2qEjR47gjTfewPbt2yGKIgCgsrIS//73vxEVFYXx48cjJCSkRduura3FhQsXcOHCBdjZ2SEiIgLdunWDg4ODAZ8BERERkelgUUTUzmzatAnTpk2DKIragugmURRx6tQpnDp1CnPnzkWvXr1ata+qqiqkpKTg1KlT6NevH7p169aq7RERERGZIo6bIWpHjhw5gmnTpkGtVkOtVjfYR6PRQKPRYNWqVcjMzDTIfuvq6pCQkID09HSDbI+IiIjIlLAoImpHli5d2uAZosbs2LHDoPs/ePAgysvLDbpNIiIiovaORRFRO5GVlYVt27Y1eobodhqNBidPnkRRUZHBMqhUKmzduhXFxcUG2yYRERFRe8eiiKid2L17d7PPEN0kiiLS0tIMmkOpVOKXX35Bfn6+QbdLRERE1F6xKCJqJ8rLy/WeHlsQBFRXVxs8S01NDVJSUgy+XSIiIqL2iEURUTvh5OQEjUaj1zqiKMLW1tbgWaytrREVFWXw7RIRERG1R5ySm6idGDFiBARB0GsInSAI6NKli0FzeHt7Y9iwYXBxcTHodomIiIjaK54pImongoKCcM8990Aulzerv0wmQ3R0NNzd3Q2yf7lcjr59++Lee+9lQUREREQWhUURUTuyePFiCIIAQRCa1X/cuHEG2a+vry8mT56MmJgYva9rIiIiIjJ1/PRD1I706dMHP/74I+RyeaNnjGQyGWQyGR577DGEhIS0an+urq64++67MWHCBLi5ubVqW0RERESmitcUEbUzkydPxqFDh/DGG29g27ZtOtcYCYKAqKgojBs3rlUFkbu7O3r27ImwsLBmn5UiIiIiMlcsiojaoT59+uDXX39FVlYWYmJiUFxcDHt7eyxevLhV1xB5eXmhV69eCAoKYjFERERE9D8mNXxu//79mDBhAvz9/SEIArZs2SJ1JCKjCgoKgr29PQDAxsamxQWRi4sLRo4cifvuuw/BwcEsiIiIiCwcP1frMqmiqKKiAj169MCnn34qdRQik2BtbY277roL999/P0JDQ1kMEREREQB+rr5di4bPpaenY/Xq1UhPT8dHH30Eb29v7Ny5E0FBQejWrZuhM2qNHTsWY8eONdr2icyFIAjo3Lkz+vTpoz3TRERERHQTP1fr0vtM0b59+xAVFYXExERs2rQJSqUSAHDixAksWbLE4AFbQ6VSoaysTPtzMyuROQsNDcX999+PuLg4FkREREQWRKlU6nz2ValUUkcyGXoXRYsWLcLSpUuxa9cu2NjYaJcPHz4chw8fNmi41lqxYgVcXFy0P3FxcVJHIjIKQRAQHh6OqVOnYuTIkZxem4iIyALFxcXpfPZdsWKF1JFMht7D51JTU7Fu3bp6y729vVFQUGCQUIby0ksv4fnnn9c+TklJYWFEZic4OBh9+/ZlIURERGTh9u3bh5iYGO1jhUIhXRgTo3dR5OrqipycHISGhuosT05ORkBAgMGCGYJCodD5ZXB0dJQwDZFh2draYvDgwfWORSIiIrJMjo6OcHZ2ljqGSdJ7+Nz06dOxcOFC5ObmQhAEaDQaJCQkYP78+Zg5c6YxMhLRbTw8PDB58mQWREREREQGoPeZouXLl2PevHkIDAyEWq1GZGQk1Go1HnroIbzyyivGyKilVCpx8eJF7eOMjAykpKTA3d0dQUFBRt03kVR8fX1RV1enPevp5eWF8ePH61zTR0RERKQPfq7WJYiiKLZkxStXriA1NRVKpRI9e/ZEp06dDJ2tnvj4eAwbNqze8lmzZmHNmjV3XD8pKQm9e/fG8ePH0atXLyMkJDKO9PR07N69G9bW1pg6dSqHghIREZFWSz7jtvZztblp0X2KACAwMBCBgYGGzHJHQ4cORQtrOCKzEBERwYKIiIiIWo2fq3XpfU3RlClT8NZbb9Vb/vbbb2Pq1KkGCUVEDeM1RERERESGp3dRtH//fowbN67e8rFjx2L//v0GCUVE9dnY2MDHx0fqGERERERmR++iSKlUNniBt7W1NcrKygwSiojq69ChA2QyvQ9ZIiIiIroDvT9hRUVF4ccff6y3fP369YiMjDRIKCKqLzg4WOoIRERERGZJ74kWFi9ejMmTJyM9PR3Dhw8HAOzevRs//PADfvrpJ4MHJKIbOHSOiIiIyDj0LoomTJiALVu2YPny5fj5559hZ2eH6Oho/Pnnn4iLizNGRiKLJwgCZ50jIiIik1ZdXY2amhqdZc7OzhKl0dWiKbnHjx+P8ePHGzoLETXC1taW1xMRERGRyamsrMSLL76IDRs2oLCwsF67Wq2WIFV9Lf6UVVNTg6tXryIrK0vnh4gMz9bWVuoIRNSEuro6qSMQEbVLCxYswJ49e7By5UooFAr897//xeuvvw5/f398++23UsfT0vtM0YULF/DII4/g0KFDOstFUYQgCO2m2iMyJ+3l1DIRNayyspLHKRFRA7Zu3Ypvv/0WQ4cOxZw5czB48GCEh4cjODgYa9euxYwZM6SOCKAFRdHs2bNhZWWFbdu2wc/PD4IgGCMXEd3CyqpFI12JqI0olUoWRUREDSgqKkJYWBiAG1/yFhUVAQAGDRqEJ554QspoOvT+pJWSkoLjx4+jS5cuxshDRERkcgoKCuDv7y91DCKidicsLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG09L6mKDIyEgUFBcbIQkREZJKuXLkCURSljkFE1O7MmTMHJ06cAAAsWrQIn376KWxtbfGvf/0LCxYskDjd3/Q+U/TWW2/hxRdfxPLlyxEVFQVra2uddg4fICIiS6NUKpGZmYnQ0FCpoxARtSv/+te/tP++++67kZaWhuPHjyM8PBzR0dESJtOld1F09913AwBGjBihs5wTLRARkSVLTk5GSEgIr7UlIrrFt99+i2nTpkGhUAAAgoODERwcjJqaGnz77beYOXOmxAlv0Lso2rt3rzFyEBERmbSCggJcu3YNAQEBUkchImo35syZgzFjxsDb21tneXl5OebMmWO6RVFcXJwxchAREZmk2NhYZGRkwNHRER4eHrjvvvsgl8uljkVE1C7cHE12u6tXr8LFxUWCRA1r0Ty/Bw4cwBdffIFLly7hp59+QkBAAL777juEhoZi0KBBhs5IRETUbuXm5qKoqAgajQaFhYVISEjA4MGDOYyOiCxaz549IQgCBEHAiBEjdG4volarkZGRgTFjxkiYUJfeRdHGjRvxj3/8AzNmzEBSUhJUKhUAoLS0FMuXL8eOHTsMHpKIiMhUpKWloaqqCgMHDoSjo6PUcYiIJHHfffcBuHE7n9GjR+v8PbSxsUFISAimTJkiUbr69C6Kli5dis8//xwzZ87E+vXrtcsHDhyIpUuXGjQcERGRKbp8+TKuXr2KTp06oXv37nB3d5c6EhFRm1qyZAkAICQkBNOmTYOtra3EiZqmd1F07tw5DBkypN5yFxcXlJSUGCITERGRyVOr1UhLS0NaWhr8/PwQExODDh06cFgdEVmUWbNmSR2hWfQuinx9fXHx4kWEhIToLD948CDCwsIMlYuIiMhs5OTkICcnBwEBARgyZAicnJykjkREZDRubm7N/gKoqKjIyGmaR++iaO7cuXj22Wfx9ddfQxAEXLt2DX/99Rfmz5+PxYsXGyMjERGRWcjOzsbGjRvRv39/dO7cmWeNiMgsffjhh1JH0JveRdGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiIyGzU1Ndi3bx9SU1PRvXt3dOzYEdbW1lLHIiIyGFMZMncrvYoitVqNhIQEzJs3DwsWLMDFixehVCoRGRnJGXaIiMjiZGVlobKyEsCNYqeoqKjZkyoUFRVh//79OHToEIKCghAaGooOHTpo7/pORGQu0tPTsXr1aqSnp+Ojjz6Ct7c3du7ciaCgIHTr1k3qeAAAmT6d5XI5Ro0aheLiYtjY2CAyMhJ9+/ZlQURERBblyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPa26urqcOnSJezevRvfffcdtm7dipSUFBQVFUEURSM9AyKitrFv3z5ERUUhMTERmzZtglKpBACcOHFCO0Nde6D38Lnu3bvj0qVLCA0NNUYeIiKidm3Tpk2YNm0aRFGsV7SIoohTp07h1KlTmDt3Lnr16qXXtjUajXZShiNHjsDR0REhISGIiIiAh4eHIZ8GEVGbWLRoEZYuXYrnn39eZ5KZ4cOH45NPPpEwmS69zhQBN+5TNH/+fGzbtg05OTkoKyvT+SEiIjJXR44cwbRp06BWq6FWqxvso9FooNFosGrVKr3OGDVEqVTi1KlT2LhxI3777TftDdOJiExFamoqJk2aVG+5t7c3CgoKJEjUML2LonHjxuHEiRO499570aFDB7i5ucHNzQ2urq5wc3MzRkYiIqJ2YenSpQ2eIWrMjh07DLbvrKwsHDhwwGDbIyJqC66ursjJyam3PDk5GQEBARIkapjew+f27t1rjBxERETtWlZWFrZt29bsgkij0eDkyZN6Tb5wJ5cuXcK5c+cQERFhkO0RERnb9OnTsXDhQvz0008QBAEajQYJCQmYP38+Zs6cKXU8Lb2Lori4OGPkICIiatd2796t98QHoigiLS0NAwYMMFiO/fv3QyaToVOnTgbbJhGRsSxfvhzz5s1DYGAg1Go1IiMjoVar8dBDD+GVV16ROp6W3sPnAODAgQN4+OGHMWDAAGRnZwMAvvvuOxw8eNCg4YiIiNqL8vJyyGT6vW0KgoDq6mqD5hBFEfv379dOBU5E1J7Z2Nhg1apVSE9Px7Zt2/D9998jLS0N3333HeRyudTxtPQuijZu3IjRo0fDzs4OSUlJ2os+S0tLsXz5coMHJCIiag+cnJyg0Wj0WkcURdja2hopERGR6QgKCsK4cePwwAMPtMsz3XoPn1u6dCk+//xzzJw5E+vXr9cuHzhwIJYuXWrQcERERO3FiBEjIAiCXkPoBEFAly5dDJrD2toaw4cPh729vUG3S0RkKM8//3yz+77//vtGTNJ8ehdF586dw5AhQ+otd3FxQUlJiSEyERERtTtBQUG45557sGPHjkan476VTCZDVFSUwSZZAAAvLy8MGzYMrq6uBtsmEZGhJScn6zxOSkpCXV2ddpKY8+fPQy6Xo3fv3lLEa5DeRZGvry8uXryIkJAQneUHDx5EWFiYoXIRERG1O4sXL8bOnTubfcZo3LhxBtmvlZUVevXqhejoaL2vayIiamu3zlb9/vvvw8nJCd9884329j3FxcWYM2cOBg8eLFXEevT+yzp37lw8++yzSExMhCAIuHbtGtauXYv58+fjiSeeMEZGIiKidqFPnz748ccfIZfLG71AWCaTQSaT4bHHHqv3BaK+BEFAREQEpk2bhpiYGBZERGRy3nvvPaxYsULnfqZubm5YunQp3nvvPQmT6dL7TNGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiJqNyZPnoxDhw7hjTfeqHffIkEQEBUVhXHjxrWqILKxsUHnzp0RFRUFJycnA6QmIpJGWVkZ8vPz6y3Pz89HeXm5BIka1qyi6OTJk+jevTtkMhkEQcDLL7+MBQsW4OLFi1AqlYiMjISjo6OxsxIREbULffr0wa+//oqsrCzExMSguLgY9vb2WLx4cYuvIRIEAf7+/ujUqRNCQ0NhbW1t4NRERG1v0qRJmDNnDt577z307dsXAJCYmIgFCxZg8uTJEqf7W7OKop49eyInJwfe3t4ICwvD0aNH4eHhgcjISGPnIyIiareCgoJgb2+P4uJi2NjYtKggcnd3R+fOndGxY0c4ODgYISURkXQ+//xzzJ8/Hw899BBqa2sB3LhO8tFHH8U777wjcbq/NasocnV1RUZGBry9vZGZman3fRqIiIjob1ZWVujYsSMiIyPh6ekJQRCkjkREZBT29vb47LPP8M477yA9PR0A2uWXQM0qiqZMmYK4uDj4+flBEATExsY2eoHppUuXDBqQiIjIXDg6OiIyMhJdu3aFQqGQOg4RUZtxcHBAdHS01DEa1ayi6Msvv8TkyZNx8eJFPPPMM5g7dy4v/CQiImomb29vREVFITQ0lDPIERG1Q82eaGHUqFEYM2YMjh8/jmeffZZFERER0R14e3ujb9++2pEWRETUPuk90cK+fftQU1Nj7FxEREQmSy6Xo3///ujatSuLISIiE9Csc/g3J1oAIPlEC59++ilCQkJga2uLfv364ciRI5JlISIi8vX1hbu7O5ydnQHcGDc/ceJEREZGsiAionaNn6v/ZlITLfz44494/vnn8fnnn6Nfv3748MMPMXr0aJw7dw7e3t5G2y8REVFjjh07hh9//BGlpaVwdHTEhAkTOMSciNo9KT5XOzs7IyUlBWFhYUbZfmuY1EQL77//PubOnYs5c+YAuDHv+fbt2/H1119j0aJFbZ6HiIjoJrlcjjFjxrAgIiKTIMXnalEUjbJdQ2hWUQQAY8aMAQDJJlqoqanB8ePH8dJLL2mXyWQy3H333fjrr78aXEelUkGlUmkfK5VKAEBdXZ325lFEREStVVdXh+7du8PJyYnvL0TU5urq6gDc+KxbVlamXa5QKBqc/r8ln6vNXbOLoptWr15tjBx3VFBQALVaDR8fH53lPj4+SEtLa3CdFStW4PXXX6+3vF+/fkbJSEREREQklbi4OJ3HS5YswWuvvVavX0s+VxvCww8/rL3+sr1pVlE0efJkrFmzBs7Ozpg8eXKTfTdt2mSQYIbw0ksv4fnnn9c+TklJQVxcHBITE9GzZ08JkxERkTk5fPgw7rrrLqljEJGFSk5ORr9+/bBv3z7ExMRol7e3m0SvXLlS6giNalZR5OLiop1Bx8XFxaiBGuPp6Qm5XI7r16/rLL9+/Tp8fX0bXOf2U4aOjo4AACsrK1hbWxsvLBERWRRfX1++rxCRZKysbnykd3R0bNaZmJZ8rjZ3zSqKbh0yJ9XwORsbG/Tu3Ru7d+/GfffdBwDQaDTYvXs3nnrqKUkyERERAX9/6UZEZAr4ubo+va8pktLzzz+PWbNmITY2Fn379sWHH36IiooK7awZREREUrC1tZU6AhGRXvi5WleziqKePXs2+wZ0SUlJrQrUlGnTpiE/Px+vvvoqcnNzERMTg99++63eRWJERERtiUUREZkafq7W1ayi6OZpNQCorq7GZ599hsjISPTv3x/AjQtMT58+jSeffNIoIW/11FNPWexpPSIiap9sbGykjkBEpLe2/lxdXV2NmpoanWXtZTa6ZhVFS5Ys0f77//7v//DMM8/gjTfeqNfnypUrhk1HRERkAmQymdQRiIjapcrKSrz44ovYsGEDCgsL67Wr1WoJUtWn91/xn376CTNnzqy3/OGHH8bGjRsNEoqIiIiIiEzfggULsGfPHqxcuRIKhQL//e9/8frrr8Pf3x/ffvut1PG09C6K7OzskJCQUG95QkICx1QTEREREZHW1q1b8dlnn2HKlCmwsrLC4MGD8corr2D58uVYu3at1PG09J597rnnnsMTTzyBpKQk9O3bFwCQmJiIr7/+GosXLzZ4QCIiovZOFMVmT0hERGRJioqKEBYWBuDG9UNFRUUAgEGDBuGJJ56QMpoOvYuiRYsWISwsDB999BG+//57AEDXrl2xevVqPPDAAwYPSERE1N6Joih1BCKidiksLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG0WnSfogceeIAFEBER0f/I5XKpIxARtUtz5szBiRMnEBcXh0WLFmHChAn45JNPUFtbi/fff1/qeFomdfNWIiIiIiIyHf/617+0/7777ruRlpaG48ePIzw8HNHR0RIm08U5RImIiIjIbNXW1kodwaJ9++23UKlU2sfBwcGYPHkyunTpYtqzzxERERERmYrbbxZKbWvOnDkoLS2tt7y8vBxz5syRIFHDWBQRERERkdniRCjSamx2zqtXr8LFxUWCRA3jNUVEREREZLZYFEmjZ8+eEAQBgiBgxIgRsLL6u+xQq9XIyMjAmDFjJEyoS++iSK1WY82aNdi9ezfy8vKg0Wh02vfs2WOwcERERERErcFriqRx3333AQBSUlIwevRoODo6attsbGwQEhKCKVOmSJSuPr2LomeffRZr1qzB+PHj0b17d96sjoiIiIjarVsv8qe2s2TJEgBASEgIpk2bBltbW4kTNU3vomj9+vXYsGEDxo0bZ4w8REREREQGU11d3eh1LWR8s2bNkjpCs+hdFNnY2CA8PNwYWYiIiIiIDEqtVkOlUrX7MxXmxM3NrdlFaFFRkZHTNI/eRdELL7yAjz76CJ988gkrbiIiIiJq98rKylgUtaEPP/xQ6gh607soOnjwIPbu3YudO3eiW7dusLa21mnftGmTwcIREREREbVWQUEBvL29pY5hMUxlyNyt9C6KXF1dMWnSJGNkISIiIiIyuKysLERGRkodw2Klp6dj9erVSE9Px0cffQRvb2/s3LkTQUFB6Natm9TxALSgKFq9erUxchARERERGcWVK1dQWlrarm4Wain27duHsWPHYuDAgdi/fz+WLVsGb29vnDhxAl999RV+/vlnqSMCAGRSByAiIiIiMiZRFJGYmCh1DIu0aNEiLF26FLt27YKNjY12+fDhw3H48GEJk+nS+0wRAPz888/YsGEDsrKyUFNTo9OWlJRkkGBERERERIaSmZmJS5cuISwsTOooFiU1NRXr1q2rt9zb2xsFBQUSJGqY3meKPv74Y8yZMwc+Pj5ITk5G37594eHhgUuXLmHs2LHGyEhEREREpLfY2FgMGjQIy5YtAwAcOHAAFRUVEqeyLK6ursjJyam3PDk5GQEBARIkapjeRdFnn32GL7/8Ev/5z39gY2ODF198Ebt27cIzzzyD0tJSY2QkIiIiItJbbm4url+/jrKyMgCASqXC7t27odFoJE5mOaZPn46FCxciNzcXgiBAo9EgISEB8+fPx8yZM6WOp6V3UZSVlYUBAwYAAOzs7FBeXg4A+Mc//oEffvjBsOmIiIiIiAwoNzcXBw8ehCiKUkexCMuXL0eXLl0QGBgIpVKJyMhIDBkyBAMGDMArr7widTwtvYsiX19f7Z1ng4KCtBdIZWRk8JeLiIiIiNq9tLQ0HDt2jJ9d24CNjQ1WrVqF9PR0bNu2Dd9//z3S0tLw3XffQS6XSx1PS++JFoYPH45ff/0VPXv2xJw5c/Cvf/0LP//8M44dO4bJkycbIyMRERERkUElJydDEAT07t0bgiBIHcfsBQUFISgoSOoYjdK7KPryyy+14zDnzZsHDw8PHDp0CPfeey/++c9/GjwgEREREZExJCUlQa1Wo2/fviyMDOj5559vdt/333/fiEmaT++iSCaTQSb7e9Td9OnTMX36dIOGIiIiIiJqCydOnEB1dTUGDRrUroZzmbLk5GSdx0lJSairq0NERAQA4Pz585DL5ejdu7cU8RrUovsUHThwAF988QXS09Px888/IyAgAN999x1CQ0MxaNAgQ2ckIiIiIjKac+fOobi4GMOHD4ezs7PUcUze3r17tf9+//334eTkhG+++QZubm4AgOLiYsyZMweDBw+WKmI9ek+0sHHjRowePRp2dnZITk6GSqUCAJSWlmL58uUGD0hEREREZGx5eXn4+eefcfLkSU7ZbUDvvfceVqxYoS2IAMDNzQ1Lly7Fe++9J2EyXXoXRUuXLsXnn3+OVatWwdraWrt84MCBSEpKMmg4IiIiIqK2UldXh8OHD2Pz5s24fv261HHMQllZGfLz8+stz8/P197apz3Quyg6d+4chgwZUm+5i4sLSkpKDJGJiIiIiEgyhYWF+OWXX7B//37tqChqmUmTJmHOnDnYtGkTrl69iqtXr2Ljxo149NFH29XM1XpfU+Tr64uLFy8iJCREZ/nBgwcRFhZmqFxERERERJJKS0vD5cuXMWDAAISFhXGGuhb4/PPPMX/+fDz00EOora0FAFhZWeHRRx/FO++8I3G6v+l9pmju3Ll49tlnkZiYCEEQcO3aNaxduxbz58/HE088YYyMRERERESSqKqqwu7du/HHH3+goqJC6jgmx97eHp999hkKCwuRnJyM5ORkFBUV4bPPPoODg4PU8bT0PlO0aNEiaDQajBgxApWVlRgyZAgUCgXmz5+Pp59+2hgZiYiIiIgkdfnyZeTm5mLAgAEIDw/nWSM9OTg4IDo6WuoYjdK7KBIEAS+//DIWLFiAixcvQqlUIjIyEo6OjsbIR0RERESkt6ysLFRWVgIAampqUFRUBHd391ZtU6VSYe/evcjMzNSeGCDzoPfwuZtsbGwQGRmJvn37siAiIiIionbhyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPU+MjIysHnzZu32yfQ1+0zRI4880qx+X3/9dYvDEBERERG11KZNmzBt2jSIoghRFHXaRFHEqVOncOrUKcydOxe9evVq1b7Kysrw66+/Yty4cfDy8mrVtkh6zT5TtGbNGuzduxclJSUoLi5u9IeIiIiIqK0dOXIE06ZNg1qthlqtbrCPRqOBRqPBqlWrDHLGSKVSYceOHfwMbAaafaboiSeewA8//ICMjAzMmTMHDz/8cKvHZRIRERERGcLSpUsbPEPUmB07duDJJ59s9X5vFkb33nsvnJycWr09kkazzxR9+umnyMnJwYsvvoitW7ciMDAQDzzwAH7//fdm//IRERERERlaVlYWtm3b1ugZottpNBqcPHkSRUVFBtl/RUUFtm3bhrKyMoNsj9qeXhMtKBQKPPjgg9i1axfOnDmDbt264cknn0RISAiUSqWxMhIRERERNWr37t16f0kviiLS0tIMlqG8vBxbt25FSUmJwbZJbafFs8/JZDIIggBRFJtdlRMRERERGVp5eTlkMv0+1gqCgOrqaoPmqKiowI4dO6BSqQy6XTI+vX57VCoVfvjhB4wcORKdO3dGamoqPvnkE2RlZRl9Wu5ly5ZhwIABsLe3h6urq1H3RURERESmw8nJCRqNRq91RFGEra2twbMolUqcPHnS4NuViqV8Bm92UfTkk0/Cz88Pb775Ju655x5cuXIFP/30E8aNG6d3Zd4SNTU1mDp1Kp544gmj74uIiIiITMeIESMgCIJe6wiCgC5duhgljznNRmcpn8GbPfvc559/jqCgIISFhWHfvn3Yt29fg/02bdpksHC3ev311wHcmBqciIiIiOimoKAg3HPPPdixY0ezLuuQyWSIiooy2kzKPj4+RtmuFCzlM3izi6KZM2fqXYETEREREbWFxYsXY+fOndpr3u9k3LhxRskRHByM7t27G2XbZDzNLopMsTpUqVQ6F7pxhjwiIiIi89SnTx/8+OOPmDZtWqMTgd285OOxxx5DSEiIQfevUCgQGxuLyMhIyU4kKJVKnWnBFQoFFAqFJFlMjfEvBmrCokWLIAhCkz+tmSpxxYoVcHFx0f7ExcUZMD0RERERtSeTJ0/GoUOHMG7cuHqFiSAIiIqKwsKFC9GzZ0+D7dPa2hq9evXCgw8+iG7dukk6siouLk7ns++KFSsa7Gfsz+CmSBAlvPNqfn4+CgsLm+wTFhYGGxsb7eM1a9bgueeea9Yc8LefKUpJSUFcXByOHz+OXr16tTg3EREREbVvWVlZiImJQXFxMezt7bF48WKDXkMkk8nQrVs39OzZ0yiz2OkjKSkJvXv3xr59+xATE6Nd3tiZImN/BjdFzR4+ZwxeXl7w8vIy2vZv/0Uw9rThRERERNQ+BAUFwd7eHsXFxbCxsTFoQeTj44MhQ4bAzc3NYNs0BEdHRzg7O9+xn7E/g5siSYsifWRlZaGoqAhZWVlQq9VISUkBAISHh7PYISIiIiKjs7a2Rp8+fSQfJteWLOUzuMkURa+++iq++eYb7eObY0H37t2LoUOHSpSKiIiIiCxBWFgY7rrrLrMqBJrDUj6Dm0xRtGbNGpOcAY+IiIiITJePjw/69esHX19fqaNIwlI+g5tMUURERERE1FZcXFzQt29fhISEWMxQOUvGooiIiIiI6H+sra0RGxuLbt26ae9rROaPRREREREREW7MWDd48GA4ODhIHYXaGIsiIiIiIrJoMpkMd911l0XNKke6WBQRERERkcVSKBQYNWoU/Pz8pI5CEmJRREREREQWycHBAePGjWt3N2GltseiiIiIiIgsjqOjIyZMmAAnJyepo1A7wCk1iIiIiMii2NnZYfz48SyISItFERERERFZDJlMhpEjR8LFxUXqKNSOcPgcEREREZklX19f1NXVQaFQaJf1798fvr6+Eqai9ohFERERERGZpWPHjuHixYvYs2cPACA0NBSRkZESp6L2iMPniIiIiMjs2djYYNCgQbwPETWIRRERERERmb3IyEjY2dlJHYPaKRZFRERERGT2OnfuLHUEasdYFBERERGRWXN1dYWrq6vUMagdY1FERERERGatQ4cOUkegdo5FERERERGZNR8fH6kjUDvHooiIiIiIzJq7u7vUEaidY1FERERERGZLEAQ4OztLHYPaORZFRERERGS2bG1tIZfLpY5B7RyLIiIiIiIyW7a2tlJHIBPAooiIiIiIzBbPElFzsCgiIiIiIrMlCILUEcgEsCgiIiIiIrMlk/HjLt0Zf0uIiIiIyGxx+Bw1B4siIiIiIjJbLIqoOVgUEREREZHZ4vA5ag7+lhARERGR2eJEC9QcLIqIiIiIyGxx+Bw1B4siIiIiIjJbPFNEzcGiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCwaiyIiIiIiIrJoVlIHIOPIyclBTk6O1DHIQPz8/ODn5yd1DDIQHp/mh8eoeeExal54fFJzWFRR5OfnhyVLlpj9gaFSqfDggw9i3759UkchA4mLi8Pvv/8OhUIhdRRqJR6f5onHqPngMWp+LOH4tJTPuMYkiKIoSh2CDKusrAwuLi7Yt28fHB0dpY5DraRUKhEXF4fS0lI4OztLHYdaicen+eExal54jJoXHp/UXBZ1psjSxMTE8A+AGSgrK5M6AhkBj0/zwWPUPPEYNQ88Pqm5ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5aY9QWFloSvp3nh62l++JqaF76e5oWvJzUXJ1ogIiIiIiKLxjNFRERERERk0VgUERERERGRRWNRREREREREFo1FEZHEBEHAli1b2mx/a9asgaura5vtj4iIaOjQoXjuueea1bet36dee+01xMTENLt/ZmYmBEFASkqK0TJR22NRRNSE2bNnQxAEPP744/Xa5s2bB0EQMHv27EbXj4+PhyAI2h8fHx9MmTIFly5dMmJqIrqVIY9jmUwGFxcX9OzZEy+++CJycnKMmJyIAP2LlrZ2829ESUmJ1FGoFVgUEd1BYGAg1q9fj6qqKu2y6upqrFu3DkFBQc3axrlz53Dt2jX89NNPOH36NCZMmAC1Wm2syER0G0Mex0ePHsXChQvx559/onv37khNTW10nZqamlZnJyIi42NRRHQHvXr1QmBgIDZt2qRdtmnTJgQFBaFnz57N2oa3tzf8/PwwZMgQvPrqqzhz5gwuXrzYYN+FCxeic+fOsLe3R1hYGBYvXoza2lqdPlu3bkWfPn1ga2sLT09PTJo0SdumUqkwf/58BAQEwMHBAf369UN8fHy9/WzZsgWdOnWCra0tRo8ejStXrui0r1y5Eh07doSNjQ0iIiLw3XffNeu5ErVHhjqOfX190blzZ0yfPh0JCQnw8vLCE088oe0ze/Zs3HfffVi2bBn8/f0REREBoOFhsq6urlizZo328aFDhxATEwNbW1vExsZiy5YtHKJDJqmiogIzZ86Eo6Mj/Pz88N577+m0N/d9CrgxlO7111/HiRMntGdsbx4377//PqKiouDg4IDAwEA8+eSTUCqVd8z35ptvwsfHB05OTnj00UdRXV1dr89///tfdO3aFba2tujSpQs+++yzBreVmZmJYcOGAQDc3Nx0zjz/9ttvGDRoEFxdXeHh4YF77rkH6enpd8xH0mBRRNQMjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHOzdu7dFz5moPTDkcQzcOJYff/xxJCQkIC8vT7t89+7dOHfuHHbt2oVt27Y1a1tlZWWYMGECoqKikJSUhDfeeAMLFy5scTYiKS1YsAD79u3DL7/8gj/++APx8fFISkrStjfnfeqmadOm4YUXXkC3bt2Qk5ODnJwcTJs2DQAgk8nw8ccf4/Tp0/jmm2+wZ88evPjii01m27BhA1577TUsX74cx44dg5+fX72CZ+3atXj11VexbNkynD17FsuXL8fixYvxzTff1NteYGAgNm7cCODG2eScnBx89NFHAG4Uh88//zyOHTuG3bt3QyaTYdKkSdBoNPr9D6W2IRJRo2bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja6/d+9eEYBYXFwsiqIoXrt2TRwwYIAYEBAgqlQqURRFEYC4efPmRrfxzjvviL1799Y+7t+/vzhjxowG+16+fFmUy+Vidna2zvIRI0aIL730kiiKorh69WoRgHj48GFt+9mzZ0UAYmJioiiKojhgwABx7ty5OtuYOnWqOG7cuEZzErVXhj6Ob7Vz506dY2fWrFmij4+P9vi+qaHj3MXFRVy9erUoiqK4cuVK0cPDQ6yqqtK2r1q1SgQgJicnt+RpE0mivLxctLGxETds2KBdVlhYKNrZ2YnPPvtss9+nXFxctG1LliwRe/Toccd9//TTT6KHh0eTffr37y8++eSTOsv69euns/2OHTuK69at0+nzxhtviP379xdFURQzMjJ0js2m/kbcKj8/XwQgpqam3vG5UNuzkqgWIzIpXl5eGD9+PNasWQNRFDF+/Hh4eno2e/0OHTpAFEVUVlaiR48e2LhxI2xsbBrs++OPP+Ljjz9Geno6lEol6urq4OzsrG1PSUnB3LlzG1w3NTUVarUanTt31lmuUqng4eGhfWxlZYU+ffpoH3fp0gWurq44e/Ys+vbti7Nnz+Kxxx7T2cbAgQO1334RmaLWHscNEUURwI3hcTdFRUU1enw35ty5c4iOjoatra122a1ngIlMRXp6OmpqatCvXz/tMnd3d+1Q0ua+TzXHn3/+iRUrViAtLQ1lZWWoq6tDdXU1KisrYW9vD0dHR23fhx9+GJ9//jnOnj1bb9KV/v37a0dCVFRUID09HY8++qjOe21dXR1cXFz0ynfhwgW8+uqrSExMREFBgfYMUVZWFrp3767Xtsj4WBQRNdMjjzyCp556CgDw6aef6rXugQMH4OzsDG9vbzg5OTXa76+//sKMGTPw+uuvY/To0XBxccH69et1xmPfHH7XEKVSCblcjuPHj0Mul+u03frmQGSpWnMcN+Ts2bMAgJCQEO0yBweHev0EQdAWUDfdfq0gkSUw1PtUZmYm7rnnHjzxxBNYtmwZ3N3dcfDgQTz66KOoqamBvb29zvV4t365eKd8ALBq1Sqdwg5Avbx3MmHCBAQHB2PVqlXw9/eHRqNB9+7dOQFLO8WiiKiZxowZg5qaGgiCgNGjR+u1bmhoaLPuuXDo0CEEBwfj5Zdf1i67fPmyTp/o6Gjs3r27wWshevbsCbVajby8PAwePLjR/dTV1eHYsWPab6LPnTuHkpISdO3aFQDQtWtXJCQkYNasWdp1EhISEBkZecfnQNSeteY4vl1VVRW+/PJLDBkyBF5eXk329fLy0pm++8KFC6isrNQ+joiIwPfffw+VSgWFQgEAOHr0aKvyEUmhY8eOsLa2RmJionZmx+LiYpw/fx5xcXHNfp+6lY2NTb0ZW48fPw6NRoP33nsPMtmNS+Q3bNig0yc8PLzetrp27YrExETMnDlTu+zw4cPaf/v4+MDf3x+XLl3CjBkzmp0PgE7GwsJCnDt3DqtWrdI+z4MHDzZreyQNFkVEzSSXy7XfCuv7bVFzderUCVlZWVi/fj369OmD7du3Y/PmzTp9lixZghEjRqBjx46YPn066urqsGPHDu2sdTNmzMDMmTPx3nvvoWfPnsjPz8fu3bsRHR2N8ePHAwCsra3x9NNP4+OPP4aVlRWeeuop3HXXXdoiacGCBXjggQfQs2dP3H333di6dSs2bdqEP//80yjPm6ittOY4zsvLQ3V1NcrLy3H8+HG8/fbbKCgo0JnRrjHDhw/HJ598gv79+0OtVmPhwoWwtrbWtj/00EN4+eWX8dhjj2HRokXIysrCu+++C0B3aB5Re+fo6IhHH30UCxYsgIeHB7y9vfHyyy9rC5fmvk/dKiQkBBkZGUhJSUGHDh3g5OSE8PBw1NbW4j//+Q8mTJiAhIQEfP7553fM9+yzz2L27NmIjY3FwIEDsXbtWpw+fRphYWHaPq+//jqeeeYZuLi4YMyYMVCpVDh27BiKi4vx/PPP19tmcHAwBEHAtm3bMG7cONjZ2cHNzQ0eHh748ssv4efnh6ysLCxa9P/bu/uYpq64D+DfastLWxS0iEVRLFR8CdSpM5kuBUTFxSXgCxqj+DKjk0RQ55DHRBHd3JQ55rLF6ciSiUGSJfKHUdAY1k6G8S0K6kTnfJ/iTNQpqNAKv+ePPdzHzoJso4L2+0lMuOeee+45TX7S3+Wcc//nP3yy5HEduqKJqJNrXqDdkv+yQLsZ/rYAOzMzU3r27Cl6vV5mzJghX3zxhcuCUxGR3bt3y7Bhw8THx0cMBoNMmTJFOedwOCQ7O1vCw8NFo9GI0WiUyZMny+nTp0Xk/xew7t69W0wmk/j6+sq4cePk2rVrLvfYunWrmEwm0Wg0MnDgQCkoKGhxDESdWXvFMQBRqVQSEBAgFotFMjMzpaampk33unnzpkyYMEF0Op2YzWYpKSlx2WhBRKSiokJiYmLEx8dHRowYIbt27RIAcv78+X84YqKOVVtbK7NnzxatVishISGSm5srsbGxsnTpUhFp+++pZvX19TJ16lQJDAwUAErc5OXlidFoFH9/f0lMTJSCgoI2bXiwYcMGMRgMotfrZe7cubJy5crnNnIoLCxUfs8GBQWJ1WqV4uJiEXl+owURkfXr10vv3r1FpVIp/58cPHhQBg8eLL6+vhITEyN2u/2FmytRx1GJ/G2SMxEREXW4wsJCzJ8/Hw8ePGh1LSEREf13nD5HRETUCRQUFMBkMqFPnz6oqqpCVlYWpk+fzoSIiOglYFJERETUCdy+fRvZ2dm4ffs2jEYjUlJSsGHDho7uFhGRV+D0OSIiIiIi8mpdOroDREREREREHYlJEVEnYLfboVKp8Oeff3Z0V4jIDcYoEdHrjdPniDoBh8OBe/fuISQkhO8kIeqEGKNERK83JkVEREREROTVOH2OyAPi4uKQnp6OZcuWISgoCCEhIcjPz8ejR48wf/585W3cpaWlAJ6fmvP9998jMDAQBw4cwODBg6HX6zFx4kTU1NS43GPZsmUu901OTsa8efOU461bt8JsNsPPzw8hISGYNm2ap4dO9EpgjBIR0bOYFBF5yI4dO2AwGHDs2DGkp6cjLS0NKSkpGD16NE6ePIkJEyYgNTUVjx8/dnv948ePsXnzZuzcuROHDh3C9evX8eGHH7b5/idOnEBGRgbWr1+PCxcuYP/+/bBare01PKJXHmOUiIiaMSki8hCLxYLVq1fDbDZj1apV8PPzg8FgwMKFC2E2m5GdnY27d+/i9OnTbq93Op3Ytm0bRo4cieHDh2PJkiUoKytr8/2vX78OnU6Hd999F/3798cbb7yBjIyM9hoe0SuPMUpERM2YFBF5SExMjPJz165d0bNnT0RHRytlISEhAIA7d+64vV6r1SIiIkI5NhqNLdZ1Z/z48ejfvz9MJhNSU1NRWFjY4hNvIm/EGCUiomZMiog8RKPRuByrVCqXsuYdrJqamtp8/bP7onTp0gV/3yfF6XQqPwcEBODkyZMoKiqC0WhEdnY2LBYLtxQm+j+MUSIiasakiOgVFRwc7LKou7GxEWfPnnWpo1arMW7cOOTm5uL06dO4evUqfvzxx5fdVSKvxBglInp1qDu6A0T074wdOxYffPAB9u3bh4iICOTl5bk8Yd67dy8uX74Mq9WKoKAglJSUoKmpCVFRUR3XaSIvwhglInp1MCkiekW99957qKqqwpw5c6BWq7F8+XLEx8cr5wMDA1FcXIycnBzU19fDbDajqKgIQ4cO7cBeE3kPxigR0auDL28lIiIiIiKvxjVFRERERETk1ZgUERERERGRV2NSREREREREXo1JEREREREReTUmRUQdzG63Q6VSvdQXNs6bNw/Jyckv7X5EREREnRmTIiI3tm3bhoCAADx9+lQpq6urg0ajQVxcnEvd5qTm0qVLbtvKycmBSqWCSqWCWq1GeHg4li9fjrq6Ok8OgYjg2Vg2GAywWq3YsmULGhoaPDkMIiLyMCZFRG7Ex8ejrq4OJ06cUMrKy8vRu3dvHD16FPX19Uq5zWZDv379EBER0WJ7Q4cORU1NDa5evYpNmzbh22+/xYoVKzw6BiLyXCxfv34dNpsNKSkp+PTTTzF69GjU1ta2eJ3D4WifARERkUcwKSJyIyoqCkajEXa7XSmz2+1ISkrCgAEDcOTIEZfyZ1/I6I5arUbv3r3Rt29fzJgxA7NmzcKePXvc1r179y5mzpyJPn36QKvVIjo6GkVFRS51mpqakJubi8jISPj6+qJfv37YsGGDcv7GjRuYPn06AgMD0aNHDyQlJeHq1avP3WvdunUIDg5Gt27dsHjxYpcvbg0NDcjIyECvXr3g5+eHt99+G8ePH291nESdjadiOTQ0FNHR0UhPT8dPP/2Es2fPYtOmTUq98PBwfPTRR5gzZw66deuGRYsWuZ0qW1lZCZVK5RKf+fn5CAsLg1arxeTJk5GXl4fAwMD/+lEQEVErmBQRtSA+Ph42m005ttlsiIuLQ2xsrFL+5MkTHD169IVfpP7O39+/xSfH9fX1GDFiBPbt24ezZ89i0aJFSE1NxbFjx5Q6q1atwsaNG7FmzRqcO3cOu3btQkhICADA6XQiMTERAQEBKC8vR0VFBfR6PSZOnOhyz7KyMlRXV8Nut6OoqAjFxcVYt26dcn7lypXYvXs3duzYgZMnTyIyMhKJiYm4d+/ePxorUUfzZCwDwKBBg/DOO++guLjYpXzz5s2wWCw4deoU1qxZ06a2KioqsHjxYixduhSVlZUYP368ywMPIiLyECEit/Lz80Wn04nT6ZSHDx+KWq2WO3fuyK5du8RqtYqISFlZmQCQa9eutdjO2rVrxWKxKMcnTpwQg8Eg06ZNExERm80mAOT+/fsttjFp0iRZsWKFiIg8fPhQfH19JT8/323dnTt3SlRUlDQ1NSllDQ0N4u/vLwcOHBARkblz50qPHj3k0aNHSp1vvvlG9Hq9NDY2Sl1dnWg0GiksLFTOOxwOCQ0Nldzc3Bb7SdQZeSqWn5WVlSX+/v7Kcf/+/SU5OdmljrtYP3XqlACQK1euiIjIjBkzZNKkSS7XzZo1S7p37972ARMR0T+m7tCMjKgTi4uLw6NHj3D8+HHcv38fAwcORHBwMGJjYzF//nzU19fDbrfDZDKhX79+rbZ15swZ6PV6NDY2wuFwYNKkSfj666/d1m1sbMQnn3yCH374ATdv3oTD4UBDQwO0Wi0AoLq6Gg0NDUhISHB7fVVVFX777TcEBAS4lNfX17ssILdYLEqbAPDWW2+hrq4ON27cwIMHD+B0OjFmzBjlvEajwahRo1BdXd36B0fUybRnLLdERKBSqVzKRo4c+Y/buXDhAiZPnuxSNmrUKOzdu/df9YuIiNqGSRFRCyIjI9G3b1/YbDbcv38fsbGxAIDQ0FCEhYXh8OHDsNlsGDt27AvbioqKwp49e6BWqxEaGgofH58W63722Wf48ssvsWXLFkRHR0On02HZsmXK1Dd/f/9W71VXV4cRI0agsLDwuXPBwcEv7CvR66Y9Y7kl1dXVGDBggEuZTqdzOe7S5a8Z6yKilDmdzn99TyIiaj9cU0TUivj4eNjtdtjtdpfte61WK0pLS3Hs2LE2rUHw8fFBZGQkwsPDW02IgL/WFCQlJWH27NmwWCwwmUz49ddflfNmsxn+/v4oKytze/3w4cNx8eJF9OrVC5GRkS7/unfvrtSrqqrCkydPlOMjR45Ar9cjLCwMERER8PHxQUVFhXLe6XTi+PHjGDJkyAvHS9TZtFcsu3P+/Hns378fU6dObbVe80OJmpoapayystKlTlRU1HMbmnCDEyIiz2NSRNSK+Ph4/Pzzz6isrFSeLgNAbGwstm/fDofD8a+/SLXEbDbj4MGDOHz4MKqrq/H+++/jjz/+UM77+fkhKysLK1euREFBAS5duoQjR47gu+++AwDMmjULBoMBSUlJKC8vx5UrV2C325GRkYHff/9dacfhcGDBggU4d+4cSkpKsHbtWixZsgRdunSBTqdDWloaMjMzsX//fpw7dw4LFy7E48ePsWDBgnYdL9HL0F6x/PTpU9y+fRu3bt3CmTNn8NVXXyE2NhbDhg1DZmZmq9dGRkYiLCwMOTk5uHjxIvbt24fPP//cpU56ejpKSkqQl5eHixcvYvv27SgtLX1uah4REbUvTp8jakV8fDyePHmCQYMGKbu7AX99kaqtrVW2+21Pq1evxuXLl5GYmAitVotFixYhOTkZDx48UOqsWbMGarUa2dnZuHXrFoxGIxYvXgwA0Gq1OHToELKysjBlyhTU1taiT58+SEhIQLdu3ZQ2EhISYDabYbVa0dDQgJkzZyInJ0c5v3HjRjQ1NSE1NRW1tbUYOXIkDhw4gKCgoHYdL9HL0F6x/Msvv8BoNKJr167o3r07hgwZglWrViEtLQ2+vr6tXqvRaFBUVIS0tDTExMTgzTffxMcff4yUlBSlzpgxY7Bt2zasW7cOq1evRmJiIpYvX97iGkQiImofKnl2cjMRERF1KgsXLsT58+dRXl7e0V0hInpt8S9FREREncjmzZsxfvx46HQ6lJaWYseOHdi6dWtHd4uI6LXGvxQRERF1ItOnT4fdbkdtbS1MJhPS09OV6bFEROQZTIqIiIiIiMircfc5IiIiIiLyakyKiIiIiIjIqzEpIiIiIiIir8akiIiIiIiIvBqTIiIiIiIi8mpMioiIiIiIyKsxKSIiIiIiIq/GpIiIiIiIiLwakyIiIiIiIvJq/wvnnQ3fkVlEQgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "np.random.seed(9999) # Fix the seed so the results are replicable.\n", + "N = 20\n", + "# Create samples\n", + "y = norm.rvs(loc=3, scale=0.4, size=N*4)\n", + "y[N:2*N] = y[N:2*N]+1\n", + "y[2*N:3*N] = y[2*N:3*N]-0.5\n", + "# Add a `Treatment` column\n", + "t1 = np.repeat('Placebo', N*2).tolist()\n", + "t2 = np.repeat('Drug', N*2).tolist()\n", + "treatment = t1 + t2 \n", + "# Add a `Rep` column as the first variable for the 2 replicates of experiments done\n", + "rep = []\n", + "for i in range(N*2):\n", + " rep.append('Rep1')\n", + " rep.append('Rep2')\n", + "# Add a `Genotype` column as the second variable\n", + "wt = np.repeat('W', N).tolist()\n", + "mt = np.repeat('M', N).tolist()\n", + "wt2 = np.repeat('W', N).tolist()\n", + "mt2 = np.repeat('M', N).tolist()\n", + "genotype = wt + mt + wt2 + mt2\n", + "# Add an `id` column for paired data plotting.\n", + "id = list(range(0, N*2))\n", + "id_col = id + id \n", + "# Combine all columns into a DataFrame.\n", + "df_delta2 = pd.DataFrame({'ID' : id_col,\n", + " 'Rep' : rep,\n", + " 'Genotype' : genotype, \n", + " 'Treatment': treatment,\n", + " 'Y' : y\n", + " })\n", + "unpaired_delta2 = dabest.load(data = df_delta2, x = [\"Genotype\", \"Genotype\"], y = \"Y\", delta2 = True, experiment = \"Treatment\")\n", + "unpaired_delta2.mean_diff.plot();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class MiniMetaDelta(object):\n", + " \"\"\"\n", + " A class to compute and store the weighted delta.\n", + " A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``.\n", + " \n", + " \"\"\"\n", + "\n", + " def __init__(self, effectsizedataframe, permutation_count,\n", + " ci=95):\n", + " from ._stats_tools import effsize as es\n", + " from ._stats_tools import confint_1group as ci1g\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + " \n", + " self.__effsizedf = effectsizedataframe.results\n", + " self.__dabest_obj = effectsizedataframe.dabest_obj\n", + " self.__ci = ci\n", + " self.__resamples = effectsizedataframe.resamples\n", + " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", + " self.__permutation_count = permutation_count\n", + " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", + " self.__control = np.array(self.__effsizedf[\"control\"])\n", + " self.__test = np.array(self.__effsizedf[\"test\"])\n", + " self.__control_N = np.array(self.__effsizedf[\"control_N\"])\n", + " self.__test_N = np.array(self.__effsizedf[\"test_N\"])\n", + "\n", + "\n", + " idx = self.__dabest_obj.idx\n", + " dat = self.__dabest_obj._plot_data\n", + " xvar = self.__dabest_obj._xvar\n", + " yvar = self.__dabest_obj._yvar\n", + "\n", + " # compute the variances of each control group and each test group\n", + " control_var=[]\n", + " test_var=[]\n", + " for j, current_tuple in enumerate(idx):\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " control_var.append(np.var(control, ddof=1))\n", + "\n", + " tname = current_tuple[1]\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + " test_var.append(np.var(test, ddof=1))\n", + " self.__control_var = np.array(control_var)\n", + " self.__test_var = np.array(test_var)\n", + "\n", + " # Compute pooled group variances for each pair of experiment groups\n", + " # based on the raw data\n", + " self.__group_var = ci2g.calculate_group_var(self.__control_var, \n", + " self.__control_N,\n", + " self.__test_var, \n", + " self.__test_N)\n", + "\n", + " # Compute the weighted average mean differences of the bootstrap data\n", + " # using the pooled group variances of the raw data as the inverse of \n", + " # weights\n", + " self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta(\n", + " self.__group_var, \n", + " self.__bootstraps, \n", + " self.__resamples)\n", + "\n", + " # Compute the weighted average mean difference based on the raw data\n", + " self.__difference = es.weighted_delta(self.__effsizedf[\"difference\"],\n", + " self.__group_var)\n", + "\n", + " sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)\n", + "\n", + "\n", + " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", + " self.__bootstraps_weighted_delta, self.__difference)\n", + " \n", + " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", + " self.__bootstraps_weighted_delta, \n", + " np.mean))\n", + "\n", + " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", + "\n", + " # Compute BCa intervals.\n", + " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", + " self.__bias_correction, self.__acceleration_value,\n", + " self.__resamples, ci)\n", + " \n", + " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", + "\n", + " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", + " self.__bca_low = sorted_weighted_deltas[bca_idx_low]\n", + " self.__bca_high = sorted_weighted_deltas[bca_idx_high]\n", + "\n", + " err1 = \"The $lim_type limit of the interval\"\n", + " err2 = \"was in the $loc 10 values.\"\n", + " err3 = \"The result should be considered unstable.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if bca_idx_low <= 10:\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", + " loc=\"bottom\"),\n", + " stacklevel=1)\n", + "\n", + " if bca_idx_high >= self.__resamples-9:\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", + " loc=\"top\"),\n", + " stacklevel=1)\n", + "\n", + " else:\n", + " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", + " err2 = \"It is set to the effect size itself.\"\n", + " err3 = \"All bootstrap values were likely all the same.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if isnan(bca_idx_low):\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", + " stacklevel=0)\n", + "\n", + " if isnan(bca_idx_high):\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", + " stacklevel=0)\n", + "\n", + " # Compute percentile intervals.\n", + " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", + " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", + "\n", + " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", + " self.__pct_low = sorted_weighted_deltas[pct_idx_low]\n", + " self.__pct_high = sorted_weighted_deltas[pct_idx_high]\n", + " \n", + " \n", + "\n", + " def __permutation_test(self):\n", + " \"\"\"\n", + " Perform a permutation test and obtain the permutation p-value\n", + " based on the permutation data.\n", + " \"\"\"\n", + " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", + " self.__permutations_var = np.array(self.__effsizedf[\"permutations_var\"])\n", + "\n", + " THRESHOLD = np.abs(self.__difference)\n", + "\n", + " all_num = []\n", + " all_denom = []\n", + "\n", + " groups = len(self.__permutations)\n", + " for i in range(0, len(self.__permutations[0])):\n", + " weight = [1/self.__permutations_var[j][i] for j in range(0, groups)]\n", + " all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)]))\n", + " all_denom.append(np.sum(weight))\n", + " \n", + " output=[]\n", + " for i in range(0, len(all_num)):\n", + " output.append(all_num[i]/all_denom[i])\n", + " \n", + " self.__permutations_weighted_delta = np.array(output)\n", + "\n", + " count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD)\n", + " self.__pvalue_permutation = count/self.__permutation_count\n", + "\n", + "\n", + "\n", + " def __repr__(self, header=True, sigfig=3):\n", + " from .misc_tools import print_greeting\n", + " \n", + " is_paired = self.__dabest_obj.is_paired\n", + "\n", + " PAIRED_STATUS = {'baseline' : 'paired', \n", + " 'sequential' : 'paired',\n", + " 'None' : 'unpaired'\n", + " }\n", + "\n", + " first_line = {\"paired_status\": PAIRED_STATUS[str(is_paired)]}\n", + " \n", + "\n", + " out1 = \"The weighted-average {paired_status} mean differences \".format(**first_line)\n", + " \n", + " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", + " if \".\" in str(self.__ci):\n", + " ci_width = base_string_fmt.format(self.__ci)\n", + " else:\n", + " ci_width = str(self.__ci)\n", + " \n", + " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", + " \"ci\" : ci_width,\n", + " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", + " \n", + " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", + " out = out1 + out2\n", + "\n", + " if header is True:\n", + " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", + "\n", + "\n", + " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", + "\n", + " \n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", + " p2 = \"calculated for legacy purposes only. \"\n", + " pvalue = p1 + p2\n", + "\n", + "\n", + " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", + " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", + " bs = bs1 + bs2\n", + "\n", + " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", + " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", + " \"zero difference is true.\"\n", + " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", + " \"control and test labels were performed.\"\n", + " pval_def = pval_def1 + pval_def2\n", + "\n", + "\n", + " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", + "\n", + "\n", + " def to_dict(self):\n", + " \"\"\"\n", + " Returns all attributes of the `dabest.MiniMetaDelta` object as a\n", + " dictionary.\n", + " \"\"\"\n", + " # Only get public (user-facing) attributes.\n", + " attrs = [a for a in dir(self)\n", + " if not a.startswith((\"_\", \"to_dict\"))]\n", + " out = {}\n", + " for a in attrs:\n", + " out[a] = getattr(self, a)\n", + " return out\n", + "\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " Returns the width of the confidence interval, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + "\n", + " @property\n", + " def alpha(self):\n", + " \"\"\"\n", + " Returns the significance level of the statistical test as a float\n", + " between 0 and 1.\n", + " \"\"\"\n", + " return self.__alpha\n", + "\n", + "\n", + " @property\n", + " def bias_correction(self):\n", + " return self.__bias_correction\n", + "\n", + "\n", + " @property\n", + " def bootstraps(self):\n", + " '''\n", + " Return the bootstrapped differences from all the experiment groups.\n", + " '''\n", + " return self.__bootstraps\n", + "\n", + "\n", + " @property\n", + " def jackknives(self):\n", + " return self.__jackknives\n", + "\n", + "\n", + " @property\n", + " def acceleration_value(self):\n", + " return self.__acceleration_value\n", + "\n", + "\n", + " @property\n", + " def bca_low(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__bca_low\n", + "\n", + "\n", + " @property\n", + " def bca_high(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval upper limit.\n", + " \"\"\"\n", + " return self.__bca_high\n", + "\n", + "\n", + " @property\n", + " def bca_interval_idx(self):\n", + " return self.__bca_interval_idx\n", + "\n", + "\n", + " @property\n", + " def control(self):\n", + " '''\n", + " Return the names of the control groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__control\n", + "\n", + "\n", + " @property\n", + " def test(self):\n", + " '''\n", + " Return the names of the test groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__test\n", + " \n", + " @property\n", + " def control_N(self):\n", + " '''\n", + " Return the sizes of the control groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__control_N\n", + "\n", + "\n", + " @property\n", + " def test_N(self):\n", + " '''\n", + " Return the sizes of the test groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__test_N\n", + "\n", + "\n", + " @property\n", + " def control_var(self):\n", + " '''\n", + " Return the estimated population variances of the control groups \n", + " from all the experiment groups in order. Here the population \n", + " variance is estimated from the sample variance. \n", + " '''\n", + " return self.__control_var\n", + "\n", + "\n", + " @property\n", + " def test_var(self):\n", + " '''\n", + " Return the estimated population variances of the control groups \n", + " from all the experiment groups in order. Here the population \n", + " variance is estimated from the sample variance. \n", + " '''\n", + " return self.__test_var\n", + "\n", + " \n", + " @property\n", + " def group_var(self):\n", + " '''\n", + " Return the pooled group variances of all the experiment groups \n", + " in order. \n", + " '''\n", + " return self.__group_var\n", + "\n", + "\n", + " @property\n", + " def bootstraps_weighted_delta(self):\n", + " '''\n", + " Return the weighted-average mean differences calculated from the bootstrapped \n", + " deltas and weights across the experiment groups, where the weights are \n", + " the inverse of the pooled group variances.\n", + " '''\n", + " return self.__bootstraps_weighted_delta\n", + "\n", + "\n", + " @property\n", + " def difference(self):\n", + " '''\n", + " Return the weighted-average delta calculated from the raw data.\n", + " '''\n", + " return self.__difference\n", + "\n", + "\n", + " @property\n", + " def pct_interval_idx (self):\n", + " return self.__pct_interval_idx \n", + "\n", + "\n", + " @property\n", + " def pct_low(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_low\n", + "\n", + "\n", + " @property\n", + " def pct_high(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_high\n", + "\n", + "\n", + " @property\n", + " def pvalue_permutation(self):\n", + " try:\n", + " return self.__pvalue_permutation\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__pvalue_permutation\n", + " \n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permuations taken.\n", + " \"\"\"\n", + " return self.__permutation_count\n", + "\n", + " \n", + " @property\n", + " def permutations(self):\n", + " '''\n", + " Return the mean differences of permutations obtained during\n", + " the permutation test for each experiment group.\n", + " '''\n", + " try:\n", + " return self.__permutations\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations\n", + "\n", + "\n", + " @property\n", + " def permutations_var(self):\n", + " '''\n", + " Return the pooled group variances of permutations obtained during\n", + " the permutation test for each experiment group.\n", + " '''\n", + " try:\n", + " return self.__permutations_var\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations_var\n", + "\n", + " \n", + " @property\n", + " def permutations_weighted_delta(self):\n", + " '''\n", + " Return the weighted-average deltas of permutations obtained \n", + " during the permutation test.\n", + " '''\n", + " try:\n", + " return self.__permutations_weighted_delta\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations_weighted_delta\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The weighted delta is calcuated as follows:\n", + "\n", + "$$\\theta_{\\text{weighted}} = \\frac{\\Sigma\\hat{\\theta_{i}}w_{i}}{{\\Sigma}w_{i}}$$\n", + "\n", + "where:\n", + "\n", + "$$\\hat{\\theta_{i}} = \\text{Mean difference for replicate }i$$\n", + "\n", + "\n", + "$$w_{i} = \\text{Weight for replicate }i = \\frac{1}{s_{i}^2} $$\n", + "\n", + "$$s_{i}^2 = \\text{Pooled variance for replicate }i = \\frac{(n_{test}-1)s_{test}^2+(n_{control}-1)s_{control}^2}{n_{test}+n_{control}-2}$$\n", + "\n", + "$$n = \\text{sample size and }s^2 = \\text{variance for control/test.}$$\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: mini-meta-delta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:44 2023.\n", + "\n", + "The weighted-average unpaired mean differences is 0.0336 [95%CI -0.137, 0.228].\n", + "The p-value of the two-sided permutation t-test is 0.736, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Ns = 20\n", + "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", + "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", + "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", + "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", + "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", + "my_df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", + " 'Control 2' : c2, 'Test 2' : t2,\n", + " 'Control 3' : c3, 'Test 3' : t3})\n", + "my_dabest_object = dabest.load(my_df, idx=((\"Control 1\", \"Test 1\"), (\"Control 2\", \"Test 2\"), (\"Control 3\", \"Test 3\")), mini_meta=True)\n", + "my_dabest_object.mean_diff.mini_meta_delta" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As of version 2023.02.14, weighted delta can only be calculated for mean difference, and not for standardized measures such as Cohen's *d*.\n", + "\n", + "Details about the calculated weighted delta are accessed as attributes of the ``mini_meta_delta`` class. See the `minimetadelta` for details on usage.\n", + "\n", + "Refer to Chapter 10 of the Cochrane handbook for further information on meta-analysis: \n", + "https://training.cochrane.org/handbook/current/chapter-10\n", + "\t\t" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index 4f000c36..7ad4e9c6 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -97,28 +97,6 @@ " b.set_linewidth(line_width)\n", "\n", "\n", - "\n", - "# def align_yaxis(ax1, v1, ax2, v2):\n", - "# \"\"\"adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1\"\"\"\n", - "# # Taken from\n", - "# # http://stackoverflow.com/questions/7630778/\n", - "# # matplotlib-align-origin-of-right-axis-with-specific-left-axis-value\n", - "# _, y1 = ax1.transData.transform((0, v1))\n", - "# _, y2 = ax2.transData.transform((0, v2))\n", - "# inv = ax2.transData.inverted()\n", - "# _, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))\n", - "# miny, maxy = ax2.get_ylim()\n", - "# ax2.set_ylim(miny+dy, maxy+dy)\n", - "#\n", - "#\n", - "#\n", - "# def rotate_ticks(axes, angle=45, alignment='right'):\n", - "# for tick in axes.get_xticklabels():\n", - "# tick.set_rotation(angle)\n", - "# tick.set_horizontalalignment(alignment)\n", - "\n", - "\n", - "\n", "def get_swarm_spans(coll):\n", " \"\"\"\n", " Given a matplotlib Collection, will obtain the x and y spans\n", From 4e4913b822421287ae021cb4aaaa7aff67e7aca0 Mon Sep 17 00:00:00 2001 From: cyberosa Date: Thu, 14 Dec 2023 20:34:27 +0100 Subject: [PATCH 02/10] Organizing imports in class. Reduced load of numpy library --- dabest/_classes.py | 138 +++++++++++++------------------------ nbs/API/class.ipynb | 162 ++++++++++++++++---------------------------- 2 files changed, 104 insertions(+), 196 deletions(-) diff --git a/dabest/_classes.py b/dabest/_classes.py index 43d40d03..94031680 100644 --- a/dabest/_classes.py +++ b/dabest/_classes.py @@ -4,10 +4,22 @@ __all__ = ['Dabest', 'TwoGroupsEffectSize', 'EffectSizeDataFrame', 'PermutationTest'] # %% ../nbs/API/class.ipynb 4 -import numpy as np -from scipy.stats import norm +# Import standard data science libraries +from numpy import array, isnan, isinf, repeat, random, issubdtype, number, isin, abs, var +from numpy import sort as npsort +import lqrt import pandas as pd +import seaborn as sns +import scipy.stats as spstats +from scipy.stats import norm from scipy.stats import randint +import datetime as dt +import statsmodels +from statsmodels.stats.contingency_tables import mcnemar +from string import Template +import warnings +from numpy import nan as npnan +from numpy.random import PCG64, RandomState # %% ../nbs/API/class.ipynb 6 class Dabest(object): @@ -26,11 +38,6 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, use `dabest.load()` to parse your DataFrame prior to analysis. """ - # Import standard data science libraries. - import numpy as np - import pandas as pd - import seaborn as sns - self.__delta2 = delta2 self.__experiment = experiment self.__ci = ci @@ -65,7 +72,7 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise ValueError(err0 + err1) elif all([isinstance(i, (tuple, list)) for i in idx]): all_idx_lengths = [len(t) for t in idx] - if (np.array(all_idx_lengths) != 2).any(): + if (array(all_idx_lengths) != 2).any(): err1 = "`mini_meta` is True, but some idx " err2 = "in {} does not consist only of two groups.".format(idx) raise ValueError(err1 + err2) @@ -74,10 +81,11 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, # Check if this is a 2x2 ANOVA case and x & y are valid columns # Create experiment_label and x1_level - if delta2 is True: - if proportional is True: + if delta2: + if proportional: err0 = '`proportional` and `delta` cannot be True at the same time.' raise ValueError(err0) + # idx should not be specified if idx: err0 = '`idx` should not be specified when `delta2` is True.'.format(len(x)) @@ -87,17 +95,18 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, if len(x) != 2: err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x)) raise ValueError(err0) - else: - for i in x: - if i not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(i) - raise IndexError(err) + + for i in x: + if i not in data_in.columns: + err = '{0} is not a column in `data`. Please check.'.format(i) + raise IndexError(err) # Check if y is valid if not y: err0 = '`delta2` is True but `y` is not indicated.' raise ValueError(err0) - elif y not in data_in.columns: + + if y not in data_in.columns: err = '{0} is not a column in `data`. Please check.'.format(y) raise IndexError(err) @@ -111,11 +120,11 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, if len(experiment_label) != 2: err0 = '`experiment_label` does not have a length of 2.' raise ValueError(err0) - else: - for i in experiment_label: - if i not in data_in[experiment].unique(): - err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) - raise IndexError(err) + + for i in experiment_label: + if i not in data_in[experiment].unique(): + err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) + raise IndexError(err) else: experiment_label = data_in[experiment].unique() @@ -231,7 +240,7 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise IndexError(err) # check y is numeric. - if not np.issubdtype(data_in[y].dtype, np.number): + if not issubdtype(data_in[y].dtype, number): err = '{0} is a column in `data`, but it is not numeric.'.format(y) raise ValueError(err) @@ -350,17 +359,8 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, def __repr__(self): from .__init__ import __version__ - import datetime as dt - import numpy as np - from .misc_tools import print_greeting - # Removed due to the deprecation of is_paired - #if self.__is_paired: - # es = "Paired e" - #else: - # es = "E" - greeting_header = print_greeting() RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', @@ -411,13 +411,6 @@ def __repr__(self): return "\n".join(out) - - # def __variable_name(self): - # return [k for k,v in locals().items() if v is self] - # - # @property - # def variable_name(self): - # return self.__variable_name() @property def mean_diff(self): @@ -721,20 +714,6 @@ def __init__(self, control, test, effect_size, resamples=5000, permutation_count=5000, random_seed=12345): - - - import numpy as np - from numpy import array, isnan, isinf - from numpy import sort as npsort - from numpy.random import choice, seed - - import scipy.stats as spstats - - # import statsmodels.stats.power as power - import statsmodels - - from string import Template - import warnings from ._stats_tools import effsize as es from ._stats_tools import confint_2group_diff as ci2g @@ -763,7 +742,7 @@ def __init__(self, control, test, effect_size, err1 = "`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined." raise ValueError(err1) - if proportional==True and (np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False): + if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False): err1 = "`proportional` is True; Only accept binary data consisting of 0 and 1." raise ValueError(err1) @@ -894,8 +873,7 @@ def __init__(self, control, test, effect_size, # for binary paired data, use McNemar's test # References: # https://en.wikipedia.org/wiki/McNemar%27s_test - from statsmodels.stats.contingency_tables import mcnemar - import pandas as pd + df_temp = pd.DataFrame({'control': control, 'test': test}) x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)]) x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)]) @@ -921,7 +899,6 @@ def __init__(self, control, test, effect_size, kruskal = spstats.kruskal(control, test, nan_policy='omit') self.__pvalue_kruskal = kruskal.pvalue self.__statistic_kruskal = kruskal.statistic - # self.__power = np.nan else: # for mean difference, Cohen's d, and Hedges' g. # Welch's t-test, assumes normality of distributions, @@ -1172,7 +1149,6 @@ def pct_high(self): @property def pvalue_brunner_munzel(self): - from numpy import nan as npnan try: return self.__pvalue_brunner_munzel except AttributeError: @@ -1180,7 +1156,6 @@ def pvalue_brunner_munzel(self): @property def statistic_brunner_munzel(self): - from numpy import nan as npnan try: return self.__statistic_brunner_munzel except AttributeError: @@ -1190,7 +1165,6 @@ def statistic_brunner_munzel(self): @property def pvalue_wilcoxon(self): - from numpy import nan as npnan try: return self.__pvalue_wilcoxon except AttributeError: @@ -1198,7 +1172,6 @@ def pvalue_wilcoxon(self): @property def statistic_wilcoxon(self): - from numpy import nan as npnan try: return self.__statistic_wilcoxon except AttributeError: @@ -1206,7 +1179,6 @@ def statistic_wilcoxon(self): @property def pvalue_mcnemar(self): - from numpy import nan as npnan try: return self.__pvalue_mcnemar except AttributeError: @@ -1214,7 +1186,6 @@ def pvalue_mcnemar(self): @property def statistic_mcnemar(self): - from numpy import nan as npnan try: return self.__statistic_mcnemar except AttributeError: @@ -1224,7 +1195,6 @@ def statistic_mcnemar(self): @property def pvalue_paired_students_t(self): - from numpy import nan as npnan try: return self.__pvalue_paired_students_t except AttributeError: @@ -1232,7 +1202,6 @@ def pvalue_paired_students_t(self): @property def statistic_paired_students_t(self): - from numpy import nan as npnan try: return self.__statistic_paired_students_t except AttributeError: @@ -1242,7 +1211,6 @@ def statistic_paired_students_t(self): @property def pvalue_kruskal(self): - from numpy import nan as npnan try: return self.__pvalue_kruskal except AttributeError: @@ -1250,7 +1218,6 @@ def pvalue_kruskal(self): @property def statistic_kruskal(self): - from numpy import nan as npnan try: return self.__statistic_kruskal except AttributeError: @@ -1260,7 +1227,6 @@ def statistic_kruskal(self): @property def pvalue_welch(self): - from numpy import nan as npnan try: return self.__pvalue_welch except AttributeError: @@ -1268,7 +1234,6 @@ def pvalue_welch(self): @property def statistic_welch(self): - from numpy import nan as npnan try: return self.__statistic_welch except AttributeError: @@ -1278,7 +1243,6 @@ def statistic_welch(self): @property def pvalue_students_t(self): - from numpy import nan as npnan try: return self.__pvalue_students_t except AttributeError: @@ -1286,7 +1250,6 @@ def pvalue_students_t(self): @property def statistic_students_t(self): - from numpy import nan as npnan try: return self.__statistic_students_t except AttributeError: @@ -1296,7 +1259,6 @@ def statistic_students_t(self): @property def pvalue_mann_whitney(self): - from numpy import nan as npnan try: return self.__pvalue_mann_whitney except AttributeError: @@ -1306,7 +1268,6 @@ def pvalue_mann_whitney(self): @property def statistic_mann_whitney(self): - from numpy import nan as npnan try: return self.__statistic_mann_whitney except AttributeError: @@ -1339,7 +1300,6 @@ def permutations_var(self): @property def proportional_difference(self): - from numpy import nan as npnan try: return self.__proportional_difference except AttributeError: @@ -1380,7 +1340,6 @@ def __init__(self, dabest, effect_size, def __pre_calc(self): - import pandas as pd from .misc_tools import print_greeting, get_varname from ._stats_tools import confint_2group_diff as ci2g from ._delta_objects import MiniMetaDelta, DeltaDelta @@ -1550,8 +1509,6 @@ def __repr__(self): def __calc_lqrt(self): - import lqrt - import pandas as pd rnd_seed = self.__random_seed db_obj = self.__dabest_obj @@ -2039,16 +1996,13 @@ class PermutationTest: """ - def __init__(self, control:np.array, - test:np.array, # These should be numerical iterables. + def __init__(self, control: array, + test: array, # These should be numerical iterables. effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' is_paired:str=None, permutation_count:int=5000, # The number of permutations (reshuffles) to perform. random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable. **kwargs): - - import numpy as np - from numpy.random import PCG64, RandomState from ._stats_tools.effsize import two_group_difference from ._stats_tools.confint_2group_diff import calculate_group_var @@ -2060,20 +2014,20 @@ def __init__(self, control:np.array, raise ValueError("The two arrays do not have the same length.") # Initialise random number generator. - # rng = np.random.default_rng(seed=random_seed) + # rng = random.default_rng(seed=random_seed) rng = RandomState(PCG64(random_seed)) # Set required constants and variables - control = np.array(control) - test = np.array(test) + control = array(control) + test = array(test) control_sample = control.copy() test_sample = test.copy() - BAG = np.array([*control, *test]) + BAG = array([*control, *test]) CONTROL_LEN = int(len(control)) EXTREME_COUNT = 0. - THRESHOLD = np.abs(two_group_difference(control, test, + THRESHOLD = abs(two_group_difference(control, test, is_paired, effect_size)) self.__permutations = [] self.__permutations_var = [] @@ -2103,18 +2057,18 @@ def __init__(self, control:np.array, es = two_group_difference(control_sample, test_sample, False, effect_size) - var = calculate_group_var(np.var(control_sample, ddof=1), + group_var = calculate_group_var(var(control_sample, ddof=1), CONTROL_LEN, - np.var(test_sample, ddof=1), + var(test_sample, ddof=1), len(test_sample)) self.__permutations.append(es) - self.__permutations_var.append(var) + self.__permutations_var.append(group_var) - if np.abs(es) > THRESHOLD: + if abs(es) > THRESHOLD: EXTREME_COUNT += 1. - self.__permutations = np.array(self.__permutations) - self.__permutations_var = np.array(self.__permutations_var) + self.__permutations = array(self.__permutations) + self.__permutations_var = array(self.__permutations_var) self.pvalue = EXTREME_COUNT / permutation_count diff --git a/nbs/API/class.ipynb b/nbs/API/class.ipynb index e4cba0a9..00b7c6f0 100644 --- a/nbs/API/class.ipynb +++ b/nbs/API/class.ipynb @@ -55,10 +55,22 @@ "outputs": [], "source": [ "#| export\n", - "import numpy as np\n", - "from scipy.stats import norm\n", + "# Import standard data science libraries\n", + "from numpy import array, isnan, isinf, repeat, random, issubdtype, number, isin, abs, var\n", + "from numpy import sort as npsort\n", + "import lqrt\n", "import pandas as pd\n", - "from scipy.stats import randint" + "import seaborn as sns\n", + "import scipy.stats as spstats\n", + "from scipy.stats import norm\n", + "from scipy.stats import randint\n", + "import datetime as dt\n", + "import statsmodels\n", + "from statsmodels.stats.contingency_tables import mcnemar\n", + "from string import Template\n", + "import warnings\n", + "from numpy import nan as npnan\n", + "from numpy.random import PCG64, RandomState" ] }, { @@ -96,11 +108,6 @@ " use `dabest.load()` to parse your DataFrame prior to analysis.\n", " \"\"\"\n", "\n", - " # Import standard data science libraries.\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - "\n", " self.__delta2 = delta2\n", " self.__experiment = experiment\n", " self.__ci = ci\n", @@ -135,7 +142,7 @@ " raise ValueError(err0 + err1)\n", " elif all([isinstance(i, (tuple, list)) for i in idx]):\n", " all_idx_lengths = [len(t) for t in idx]\n", - " if (np.array(all_idx_lengths) != 2).any():\n", + " if (array(all_idx_lengths) != 2).any():\n", " err1 = \"`mini_meta` is True, but some idx \"\n", " err2 = \"in {} does not consist only of two groups.\".format(idx)\n", " raise ValueError(err1 + err2)\n", @@ -144,10 +151,11 @@ "\n", " # Check if this is a 2x2 ANOVA case and x & y are valid columns\n", " # Create experiment_label and x1_level\n", - " if delta2 is True:\n", - " if proportional is True:\n", + " if delta2:\n", + " if proportional:\n", " err0 = '`proportional` and `delta` cannot be True at the same time.'\n", " raise ValueError(err0)\n", + " \n", " # idx should not be specified\n", " if idx:\n", " err0 = '`idx` should not be specified when `delta2` is True.'.format(len(x))\n", @@ -157,17 +165,18 @@ " if len(x) != 2:\n", " err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x))\n", " raise ValueError(err0)\n", - " else:\n", - " for i in x:\n", - " if i not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(i)\n", - " raise IndexError(err)\n", + " \n", + " for i in x:\n", + " if i not in data_in.columns:\n", + " err = '{0} is not a column in `data`. Please check.'.format(i)\n", + " raise IndexError(err)\n", "\n", " # Check if y is valid\n", " if not y:\n", " err0 = '`delta2` is True but `y` is not indicated.'\n", " raise ValueError(err0)\n", - " elif y not in data_in.columns:\n", + " \n", + " if y not in data_in.columns:\n", " err = '{0} is not a column in `data`. Please check.'.format(y)\n", " raise IndexError(err)\n", "\n", @@ -181,11 +190,11 @@ " if len(experiment_label) != 2:\n", " err0 = '`experiment_label` does not have a length of 2.'\n", " raise ValueError(err0)\n", - " else: \n", - " for i in experiment_label:\n", - " if i not in data_in[experiment].unique():\n", - " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", - " raise IndexError(err)\n", + " \n", + " for i in experiment_label:\n", + " if i not in data_in[experiment].unique():\n", + " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", + " raise IndexError(err)\n", " else:\n", " experiment_label = data_in[experiment].unique()\n", "\n", @@ -301,7 +310,7 @@ " raise IndexError(err)\n", "\n", " # check y is numeric.\n", - " if not np.issubdtype(data_in[y].dtype, np.number):\n", + " if not issubdtype(data_in[y].dtype, number):\n", " err = '{0} is a column in `data`, but it is not numeric.'.format(y)\n", " raise ValueError(err)\n", "\n", @@ -420,17 +429,8 @@ "\n", " def __repr__(self):\n", " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", - "\n", " from .misc_tools import print_greeting\n", "\n", - " # Removed due to the deprecation of is_paired\n", - " #if self.__is_paired:\n", - " # es = \"Paired e\"\n", - " #else:\n", - " # es = \"E\"\n", - "\n", " greeting_header = print_greeting()\n", "\n", " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", @@ -481,13 +481,6 @@ "\n", " return \"\\n\".join(out)\n", "\n", - "\n", - " # def __variable_name(self):\n", - " # return [k for k,v in locals().items() if v is self]\n", - " #\n", - " # @property\n", - " # def variable_name(self):\n", - " # return self.__variable_name()\n", " \n", " @property\n", " def mean_diff(self):\n", @@ -1242,22 +1235,22 @@ } ], "source": [ - "np.random.seed(12345) # Fix the seed so the results are replicable.\n", + "random.seed(12345) # Fix the seed so the results are replicable.\n", "N=20\n", "y = norm.rvs(loc=3, scale=0.4, size=N*4)\n", "y[N:2*N] = y[N:2*N]+1\n", "y[2*N:3*N] = y[2*N:3*N]-0.5\n", - "t1 = np.repeat('Placebo', N*2).tolist()\n", - "t2 = np.repeat('Drug', N*2).tolist()\n", + "t1 = repeat('Placebo', N*2).tolist()\n", + "t2 = repeat('Drug', N*2).tolist()\n", "treatment = t1 + t2\n", "rep = []\n", "for i in range(N*2):\n", " rep.append('Rep1')\n", " rep.append('Rep2')\n", - "wt = np.repeat('W', N).tolist()\n", - "mt = np.repeat('M', N).tolist()\n", - "wt2 = np.repeat('W', N).tolist()\n", - "mt2 = np.repeat('M', N).tolist()\n", + "wt = repeat('W', N).tolist()\n", + "mt = repeat('M', N).tolist()\n", + "wt2 = repeat('W', N).tolist()\n", + "mt2 = repeat('M', N).tolist()\n", "genotype = wt + mt + wt2 + mt2\n", "id = list(range(0, N*2))\n", "id_col = id + id\n", @@ -1373,20 +1366,6 @@ " resamples=5000, \n", " permutation_count=5000, \n", " random_seed=12345):\n", - "\n", - " \n", - " import numpy as np\n", - " from numpy import array, isnan, isinf\n", - " from numpy import sort as npsort\n", - " from numpy.random import choice, seed\n", - "\n", - " import scipy.stats as spstats\n", - "\n", - " # import statsmodels.stats.power as power\n", - " import statsmodels\n", - "\n", - " from string import Template\n", - " import warnings\n", " \n", " from ._stats_tools import effsize as es\n", " from ._stats_tools import confint_2group_diff as ci2g\n", @@ -1415,7 +1394,7 @@ " err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n", " raise ValueError(err1)\n", "\n", - " if proportional==True and (np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False):\n", + " if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False):\n", " err1 = \"`proportional` is True; Only accept binary data consisting of 0 and 1.\"\n", " raise ValueError(err1)\n", "\n", @@ -1546,8 +1525,7 @@ " # for binary paired data, use McNemar's test\n", " # References:\n", " # https://en.wikipedia.org/wiki/McNemar%27s_test\n", - " from statsmodels.stats.contingency_tables import mcnemar\n", - " import pandas as pd\n", + "\n", " df_temp = pd.DataFrame({'control': control, 'test': test})\n", " x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)])\n", " x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)])\n", @@ -1573,7 +1551,6 @@ " kruskal = spstats.kruskal(control, test, nan_policy='omit')\n", " self.__pvalue_kruskal = kruskal.pvalue\n", " self.__statistic_kruskal = kruskal.statistic\n", - " # self.__power = np.nan\n", "\n", " else: # for mean difference, Cohen's d, and Hedges' g.\n", " # Welch's t-test, assumes normality of distributions,\n", @@ -1824,7 +1801,6 @@ "\n", " @property\n", " def pvalue_brunner_munzel(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_brunner_munzel\n", " except AttributeError:\n", @@ -1832,7 +1808,6 @@ "\n", " @property\n", " def statistic_brunner_munzel(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_brunner_munzel\n", " except AttributeError:\n", @@ -1842,7 +1817,6 @@ "\n", " @property\n", " def pvalue_wilcoxon(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_wilcoxon\n", " except AttributeError:\n", @@ -1850,7 +1824,6 @@ "\n", " @property\n", " def statistic_wilcoxon(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_wilcoxon\n", " except AttributeError:\n", @@ -1858,7 +1831,6 @@ "\n", " @property\n", " def pvalue_mcnemar(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_mcnemar\n", " except AttributeError:\n", @@ -1866,7 +1838,6 @@ "\n", " @property\n", " def statistic_mcnemar(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_mcnemar\n", " except AttributeError:\n", @@ -1876,7 +1847,6 @@ "\n", " @property\n", " def pvalue_paired_students_t(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_paired_students_t\n", " except AttributeError:\n", @@ -1884,7 +1854,6 @@ "\n", " @property\n", " def statistic_paired_students_t(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_paired_students_t\n", " except AttributeError:\n", @@ -1894,7 +1863,6 @@ "\n", " @property\n", " def pvalue_kruskal(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_kruskal\n", " except AttributeError:\n", @@ -1902,7 +1870,6 @@ "\n", " @property\n", " def statistic_kruskal(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_kruskal\n", " except AttributeError:\n", @@ -1912,7 +1879,6 @@ "\n", " @property\n", " def pvalue_welch(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_welch\n", " except AttributeError:\n", @@ -1920,7 +1886,6 @@ "\n", " @property\n", " def statistic_welch(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_welch\n", " except AttributeError:\n", @@ -1930,7 +1895,6 @@ "\n", " @property\n", " def pvalue_students_t(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_students_t\n", " except AttributeError:\n", @@ -1938,7 +1902,6 @@ "\n", " @property\n", " def statistic_students_t(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_students_t\n", " except AttributeError:\n", @@ -1948,7 +1911,6 @@ "\n", " @property\n", " def pvalue_mann_whitney(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__pvalue_mann_whitney\n", " except AttributeError:\n", @@ -1958,7 +1920,6 @@ "\n", " @property\n", " def statistic_mann_whitney(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__statistic_mann_whitney\n", " except AttributeError:\n", @@ -1991,7 +1952,6 @@ "\n", " @property\n", " def proportional_difference(self):\n", - " from numpy import nan as npnan\n", " try:\n", " return self.__proportional_difference\n", " except AttributeError:\n", @@ -2031,7 +1991,7 @@ } ], "source": [ - "np.random.seed(12345)\n", + "random.seed(12345)\n", "control = norm.rvs(loc=0, size=30)\n", "test = norm.rvs(loc=0.5, size=30)\n", "effsize = dabest.TwoGroupsEffectSize(control, test, \"mean_diff\")\n", @@ -2137,7 +2097,6 @@ "\n", "\n", " def __pre_calc(self):\n", - " import pandas as pd\n", " from .misc_tools import print_greeting, get_varname\n", " from ._stats_tools import confint_2group_diff as ci2g\n", " from ._delta_objects import MiniMetaDelta, DeltaDelta\n", @@ -2307,8 +2266,6 @@ " \n", " \n", " def __calc_lqrt(self):\n", - " import lqrt\n", - " import pandas as pd\n", " \n", " rnd_seed = self.__random_seed\n", " db_obj = self.__dabest_obj\n", @@ -2782,7 +2739,7 @@ "metadata": {}, "outputs": [], "source": [ - "np.random.seed(9999) # Fix the seed so the results are replicable.\n", + "random.seed(9999) # Fix the seed so the results are replicable.\n", "# pop_size = 10000 # Size of each population.\n", "Ns = 20 # The number of samples taken from each population\n", "\n", @@ -2800,8 +2757,8 @@ "\n", "\n", "# Add a `gender` column for coloring the data.\n", - "females = np.repeat('Female', Ns/2).tolist()\n", - "males = np.repeat('Male', Ns/2).tolist()\n", + "females = repeat('Female', Ns/2).tolist()\n", + "males = repeat('Male', Ns/2).tolist()\n", "gender = females + males\n", "\n", "# Add an `id` column for paired data plotting.\n", @@ -3024,16 +2981,13 @@ " \n", " \"\"\"\n", " \n", - " def __init__(self, control:np.array,\n", - " test:np.array, # These should be numerical iterables.\n", + " def __init__(self, control: array,\n", + " test: array, # These should be numerical iterables.\n", " effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", " is_paired:str=None,\n", " permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n", " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n", " **kwargs):\n", - " \n", - " import numpy as np\n", - " from numpy.random import PCG64, RandomState\n", " from ._stats_tools.effsize import two_group_difference\n", " from ._stats_tools.confint_2group_diff import calculate_group_var\n", " \n", @@ -3045,20 +2999,20 @@ " raise ValueError(\"The two arrays do not have the same length.\")\n", "\n", " # Initialise random number generator.\n", - " # rng = np.random.default_rng(seed=random_seed)\n", + " # rng = random.default_rng(seed=random_seed)\n", " rng = RandomState(PCG64(random_seed))\n", "\n", " # Set required constants and variables\n", - " control = np.array(control)\n", - " test = np.array(test)\n", + " control = array(control)\n", + " test = array(test)\n", "\n", " control_sample = control.copy()\n", " test_sample = test.copy()\n", "\n", - " BAG = np.array([*control, *test])\n", + " BAG = array([*control, *test])\n", " CONTROL_LEN = int(len(control))\n", " EXTREME_COUNT = 0.\n", - " THRESHOLD = np.abs(two_group_difference(control, test, \n", + " THRESHOLD = abs(two_group_difference(control, test, \n", " is_paired, effect_size))\n", " self.__permutations = []\n", " self.__permutations_var = []\n", @@ -3088,18 +3042,18 @@ " es = two_group_difference(control_sample, test_sample, \n", " False, effect_size)\n", " \n", - " var = calculate_group_var(np.var(control_sample, ddof=1), \n", + " group_var = calculate_group_var(var(control_sample, ddof=1), \n", " CONTROL_LEN, \n", - " np.var(test_sample, ddof=1), \n", + " var(test_sample, ddof=1), \n", " len(test_sample))\n", " self.__permutations.append(es)\n", - " self.__permutations_var.append(var)\n", + " self.__permutations_var.append(group_var)\n", "\n", - " if np.abs(es) > THRESHOLD:\n", + " if abs(es) > THRESHOLD:\n", " EXTREME_COUNT += 1.\n", "\n", - " self.__permutations = np.array(self.__permutations)\n", - " self.__permutations_var = np.array(self.__permutations_var)\n", + " self.__permutations = array(self.__permutations)\n", + " self.__permutations_var = array(self.__permutations_var)\n", "\n", " self.pvalue = EXTREME_COUNT / permutation_count\n", "\n", From a424c83a87123c458c224ebe105974e5d35e2e2c Mon Sep 17 00:00:00 2001 From: cyberosa Date: Fri, 15 Dec 2023 13:09:55 +0100 Subject: [PATCH 03/10] More fixing and ordering of imports. Cleaning and small changes --- dabest/_bootstrap_tools.py | 28 ++------ dabest/_stats_tools/confint_1group.py | 15 ++-- dabest/_stats_tools/confint_2group_diff.py | 45 +++--------- nbs/API/bootstrap.ipynb | 30 +++----- nbs/API/class.ipynb | 1 - nbs/API/confint_1group.ipynb | 17 +++-- nbs/API/confint_2group_diff.ipynb | 47 +++---------- nbs/API/effsize.ipynb | 82 ++++++++-------------- 8 files changed, 77 insertions(+), 188 deletions(-) diff --git a/dabest/_bootstrap_tools.py b/dabest/_bootstrap_tools.py index d04a46c8..45375310 100644 --- a/dabest/_bootstrap_tools.py +++ b/dabest/_bootstrap_tools.py @@ -5,6 +5,12 @@ # %% ../nbs/API/bootstrap.ipynb 3 import numpy as np +import pandas as pd +import seaborn as sns +from scipy.stats import norm +from scipy.stats import ttest_1samp, ttest_ind, ttest_rel +from scipy.stats import mannwhitneyu, wilcoxon, norm +import warnings # %% ../nbs/API/bootstrap.ipynb 4 class bootstrap: @@ -58,22 +64,12 @@ def __init__(self, reps:int=5000 # Number of bootstrap iterations to perform. ): - import numpy as np - import pandas as pd - import seaborn as sns - - from scipy.stats import norm - from numpy.random import randint - from scipy.stats import ttest_1samp, ttest_ind, ttest_rel - from scipy.stats import mannwhitneyu, wilcoxon, norm - import warnings - # Turn to pandas series. x1 = pd.Series(x1).dropna() diff = False # Initialise statfunction - if statfunction == None: + if statfunction is None: statfunction = np.mean # Compute two-sided alphas. @@ -198,7 +194,6 @@ def __init__(self, } def __repr__(self): - import numpy as np if 'mean' in self.statistic: stat = 'mean' @@ -228,7 +223,6 @@ def jackknife_indexes(data): For a given set of data Y, the jackknife sample J[i] is defined as the data set Y with the ith data point deleted. """ - import numpy as np base = np.arange(0,len(data)) return (np.delete(base,i) for i in base) @@ -238,14 +232,6 @@ def bca(data, alphas, statarray, statfunction, ostat, reps): Subroutine called to calculate the BCa statistics. Borrowed heavily from scikits.bootstrap code. ''' - import warnings - - import numpy as np - import pandas as pd - import seaborn as sns - - from scipy.stats import norm - from numpy.random import randint # The bias correction value. z0 = norm.ppf( ( 1.0*np.sum(statarray < ostat, axis = 0) ) / reps ) diff --git a/dabest/_stats_tools/confint_1group.py b/dabest/_stats_tools/confint_1group.py index 29d82f74..88c9ec70 100644 --- a/dabest/_stats_tools/confint_1group.py +++ b/dabest/_stats_tools/confint_1group.py @@ -6,23 +6,23 @@ # %% ../../nbs/API/confint_1group.ipynb 4 import numpy as np +from numpy.random import PCG64, RandomState +from scipy.stats import norm +from numpy import sort as npsort # %% ../../nbs/API/confint_1group.ipynb 5 def create_bootstrap_indexes(array, resamples=5000, random_seed=12345): """Given an array-like, returns a generator of bootstrap indexes to be used for resampling. """ - import numpy as np - from numpy.random import PCG64, RandomState + rng = RandomState(PCG64(random_seed)) indexes = range(0, len(array)) out = (rng.choice(indexes, len(indexes), replace=True) for i in range(0, resamples)) - - # Reset RNG - # rng = RandomState(MT19937()) + return out @@ -49,7 +49,6 @@ def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345, *args, **kwargs): """Bootstraps func(x), with the number of specified resamples.""" - import numpy as np # Create bootstrap indexes. boot_indexes = create_bootstrap_indexes(x, resamples=resamples, @@ -64,7 +63,7 @@ def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345, def compute_1group_bias_correction(x, bootstraps, func, *args, **kwargs): - from scipy.stats import norm + metric = func(x, *args, **kwargs) prop_boots_less_than_metric = sum(bootstraps < metric) / len(bootstraps) @@ -101,7 +100,7 @@ def summary_ci_1group(x:np.array,# An numerical iterable. """ from . import confint_2group_diff as ci2g - from numpy import sort as npsort + boots = compute_1group_bootstraps(x, func, resamples=resamples, random_seed=random_seed, diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index fe482fd4..9dfd41bb 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -7,6 +7,13 @@ # %% ../../nbs/API/confint_2group_diff.ipynb 4 import numpy as np +from numpy import arange, delete, errstate +from numpy import mean as npmean +from numpy import sum as npsum +from numpy.random import PCG64, RandomState +import pandas as pd +from scipy.stats import norm +from numpy import isnan # %% ../../nbs/API/confint_2group_diff.ipynb 5 def create_jackknife_indexes(data): @@ -24,7 +31,6 @@ def create_jackknife_indexes(data): ------- Generator that yields all jackknife bootstrap samples. """ - from numpy import arange, delete index_range = arange(0, len(data)) return (delete(index_range, i) for i in index_range) @@ -36,7 +42,6 @@ def create_repeated_indexes(data): Convenience function. Given an array-like with length N, returns a generator that yields N indexes [0, 1, ..., N]. """ - from numpy import arange index_range = arange(0, len(data)) return (index_range for i in index_range) @@ -92,9 +97,6 @@ def compute_meandiff_jackknife(x0, x1, is_paired, effect_size): def _calc_accel(jack_dist): - from numpy import mean as npmean - from numpy import sum as npsum - from numpy import errstate jack_mean = npmean(jack_dist) @@ -111,10 +113,7 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, """Bootstraps the effect_size for 2 groups.""" from . import effsize as __es - import numpy as np - from numpy.random import PCG64, RandomState - - # rng = RandomState(default_rng(random_seed)) + rng = RandomState(PCG64(random_seed)) out = np.repeat(np.nan, resamples) @@ -135,23 +134,6 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size) - - # check whether there are any infinities in the bootstrap, - # which likely indicates the sample sizes are too small as - # the computation of Cohen's d and Hedges' g necessitated - # a division by zero. - # Added in v0.2.6. - - # num_infinities = len(out[np.isinf(out)]) - # print(num_infinities) - # if num_infinities > 0: - # warn_msg = "There are {} bootstraps that are not defined. "\ - # "This is likely due to smaple sample sizes. "\ - # "The values in a bootstrap for a group will be more likely "\ - # "to be all equal, with a resulting variance of zero. "\ - # "The computation of Cohen's d and Hedges' g will therefore "\ - # "involved a division by zero. " - # warnings.warn(warn_msg.format(num_infinities), category="UserWarning") return out @@ -168,10 +150,6 @@ def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1 """ - import numpy as np - import pandas as pd - from numpy.random import PCG64, RandomState - rng = RandomState(PCG64(random_seed)) x1_len = len(x1) x2_len = len(x2) @@ -243,10 +221,8 @@ def compute_meandiff_bias_correction(bootstraps, #An numerical iterable, compris and effect size. """ - from scipy.stats import norm - from numpy import array - B = array(bootstraps) + B = np.array(bootstraps) prop_less_than_es = sum(B < effsize) / len(B) return norm.ppf(prop_less_than_es) @@ -275,8 +251,6 @@ def compute_interval_limits(bias, acceleration, n_boots, ci=95): Supply the bias, acceleration factor, and number of bootstraps. """ - from scipy.stats import norm - from numpy import isnan, nan alpha = _compute_alpha_from_ci(ci) @@ -307,7 +281,6 @@ def calculate_weighted_delta(group_var, differences, resamples): ''' Compute the weighted deltas. ''' - import numpy as np weight = 1/group_var denom = np.sum(weight) diff --git a/nbs/API/bootstrap.ipynb b/nbs/API/bootstrap.ipynb index fe9d2c48..503f0f74 100644 --- a/nbs/API/bootstrap.ipynb +++ b/nbs/API/bootstrap.ipynb @@ -43,7 +43,13 @@ "outputs": [], "source": [ "#|export\n", - "import numpy as np" + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from scipy.stats import norm\n", + "from scipy.stats import ttest_1samp, ttest_ind, ttest_rel\n", + "from scipy.stats import mannwhitneyu, wilcoxon, norm\n", + "import warnings" ] }, { @@ -105,22 +111,12 @@ " reps:int=5000 # Number of bootstrap iterations to perform.\n", " ):\n", "\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - "\n", - " from scipy.stats import norm\n", - " from numpy.random import randint\n", - " from scipy.stats import ttest_1samp, ttest_ind, ttest_rel\n", - " from scipy.stats import mannwhitneyu, wilcoxon, norm\n", - " import warnings\n", - "\n", " # Turn to pandas series.\n", " x1 = pd.Series(x1).dropna()\n", " diff = False\n", "\n", " # Initialise statfunction\n", - " if statfunction == None:\n", + " if statfunction is None:\n", " statfunction = np.mean\n", "\n", " # Compute two-sided alphas.\n", @@ -245,7 +241,6 @@ " }\n", "\n", " def __repr__(self):\n", - " import numpy as np\n", "\n", " if 'mean' in self.statistic:\n", " stat = 'mean'\n", @@ -283,7 +278,6 @@ " For a given set of data Y, the jackknife sample J[i] is defined as the\n", " data set Y with the ith data point deleted.\n", " \"\"\"\n", - " import numpy as np\n", "\n", " base = np.arange(0,len(data))\n", " return (np.delete(base,i) for i in base)\n", @@ -293,14 +287,6 @@ " Subroutine called to calculate the BCa statistics.\n", " Borrowed heavily from scikits.bootstrap code.\n", " '''\n", - " import warnings\n", - "\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - "\n", - " from scipy.stats import norm\n", - " from numpy.random import randint\n", "\n", " # The bias correction value.\n", " z0 = norm.ppf( ( 1.0*np.sum(statarray < ostat, axis = 0) ) / reps )\n", diff --git a/nbs/API/class.ipynb b/nbs/API/class.ipynb index 00b7c6f0..32e9a973 100644 --- a/nbs/API/class.ipynb +++ b/nbs/API/class.ipynb @@ -65,7 +65,6 @@ "from scipy.stats import norm\n", "from scipy.stats import randint\n", "import datetime as dt\n", - "import statsmodels\n", "from statsmodels.stats.contingency_tables import mcnemar\n", "from string import Template\n", "import warnings\n", diff --git a/nbs/API/confint_1group.ipynb b/nbs/API/confint_1group.ipynb index 1e547098..3b3c9acd 100644 --- a/nbs/API/confint_1group.ipynb +++ b/nbs/API/confint_1group.ipynb @@ -54,7 +54,10 @@ "outputs": [], "source": [ "#|export\n", - "import numpy as np" + "import numpy as np\n", + "from numpy.random import PCG64, RandomState\n", + "from scipy.stats import norm\n", + "from numpy import sort as npsort" ] }, { @@ -69,17 +72,14 @@ " \"\"\"Given an array-like, returns a generator of bootstrap indexes\n", " to be used for resampling.\n", " \"\"\"\n", - " import numpy as np\n", - " from numpy.random import PCG64, RandomState\n", + "\n", " rng = RandomState(PCG64(random_seed))\n", " \n", " indexes = range(0, len(array))\n", "\n", " out = (rng.choice(indexes, len(indexes), replace=True)\n", " for i in range(0, resamples))\n", - " \n", - " # Reset RNG\n", - " # rng = RandomState(MT19937())\n", + "\n", " return out\n", "\n", "\n", @@ -106,7 +106,6 @@ " *args, **kwargs):\n", " \"\"\"Bootstraps func(x), with the number of specified resamples.\"\"\"\n", "\n", - " import numpy as np\n", " \n", " # Create bootstrap indexes.\n", " boot_indexes = create_bootstrap_indexes(x, resamples=resamples,\n", @@ -121,7 +120,7 @@ "\n", "\n", "def compute_1group_bias_correction(x, bootstraps, func, *args, **kwargs):\n", - " from scipy.stats import norm\n", + "\n", " metric = func(x, *args, **kwargs)\n", " prop_boots_less_than_metric = sum(bootstraps < metric) / len(bootstraps)\n", "\n", @@ -158,7 +157,7 @@ "\n", " \"\"\"\n", " from . import confint_2group_diff as ci2g\n", - " from numpy import sort as npsort\n", + "\n", "\n", " boots = compute_1group_bootstraps(x, func, resamples=resamples,\n", " random_seed=random_seed,\n", diff --git a/nbs/API/confint_2group_diff.ipynb b/nbs/API/confint_2group_diff.ipynb index c2285f8f..93c3417a 100644 --- a/nbs/API/confint_2group_diff.ipynb +++ b/nbs/API/confint_2group_diff.ipynb @@ -55,7 +55,14 @@ "outputs": [], "source": [ "#|export\n", - "import numpy as np" + "import numpy as np\n", + "from numpy import arange, delete, errstate\n", + "from numpy import mean as npmean\n", + "from numpy import sum as npsum\n", + "from numpy.random import PCG64, RandomState\n", + "import pandas as pd\n", + "from scipy.stats import norm\n", + "from numpy import isnan" ] }, { @@ -81,7 +88,6 @@ " -------\n", " Generator that yields all jackknife bootstrap samples.\n", " \"\"\"\n", - " from numpy import arange, delete\n", "\n", " index_range = arange(0, len(data))\n", " return (delete(index_range, i) for i in index_range)\n", @@ -93,7 +99,6 @@ " Convenience function. Given an array-like with length N,\n", " returns a generator that yields N indexes [0, 1, ..., N].\n", " \"\"\"\n", - " from numpy import arange\n", "\n", " index_range = arange(0, len(data))\n", " return (index_range for i in index_range)\n", @@ -149,9 +154,6 @@ "\n", "\n", "def _calc_accel(jack_dist):\n", - " from numpy import mean as npmean\n", - " from numpy import sum as npsum\n", - " from numpy import errstate\n", "\n", " jack_mean = npmean(jack_dist)\n", "\n", @@ -168,10 +170,7 @@ " \"\"\"Bootstraps the effect_size for 2 groups.\"\"\"\n", " \n", " from . import effsize as __es\n", - " import numpy as np\n", - " from numpy.random import PCG64, RandomState\n", - " \n", - " # rng = RandomState(default_rng(random_seed))\n", + "\n", " rng = RandomState(PCG64(random_seed))\n", "\n", " out = np.repeat(np.nan, resamples)\n", @@ -192,23 +191,6 @@ " \n", " out[i] = __es.two_group_difference(x0_sample, x1_sample,\n", " is_paired, effect_size)\n", - " \n", - " # check whether there are any infinities in the bootstrap,\n", - " # which likely indicates the sample sizes are too small as\n", - " # the computation of Cohen's d and Hedges' g necessitated \n", - " # a division by zero.\n", - " # Added in v0.2.6.\n", - " \n", - " # num_infinities = len(out[np.isinf(out)])\n", - " # print(num_infinities)\n", - " # if num_infinities > 0:\n", - " # warn_msg = \"There are {} bootstraps that are not defined. \"\\\n", - " # \"This is likely due to smaple sample sizes. \"\\\n", - " # \"The values in a bootstrap for a group will be more likely \"\\\n", - " # \"to be all equal, with a resulting variance of zero. \"\\\n", - " # \"The computation of Cohen's d and Hedges' g will therefore \"\\\n", - " # \"involved a division by zero. \"\n", - " # warnings.warn(warn_msg.format(num_infinities), category=\"UserWarning\")\n", " \n", " return out\n", "\n", @@ -225,10 +207,6 @@ " \n", " \"\"\"\n", "\n", - " import numpy as np\n", - " import pandas as pd\n", - " from numpy.random import PCG64, RandomState\n", - "\n", " rng = RandomState(PCG64(random_seed))\n", " x1_len = len(x1)\n", " x2_len = len(x2)\n", @@ -300,10 +278,8 @@ " and effect size.\n", "\n", " \"\"\"\n", - " from scipy.stats import norm\n", - " from numpy import array\n", "\n", - " B = array(bootstraps)\n", + " B = np.array(bootstraps)\n", " prop_less_than_es = sum(B < effsize) / len(B)\n", "\n", " return norm.ppf(prop_less_than_es)\n", @@ -332,8 +308,6 @@ "\n", " Supply the bias, acceleration factor, and number of bootstraps.\n", " \"\"\"\n", - " from scipy.stats import norm\n", - " from numpy import isnan, nan\n", "\n", " alpha = _compute_alpha_from_ci(ci)\n", "\n", @@ -364,7 +338,6 @@ " '''\n", " Compute the weighted deltas.\n", " '''\n", - " import numpy as np\n", "\n", " weight = 1/group_var\n", " denom = np.sum(weight)\n", diff --git a/nbs/API/effsize.ipynb b/nbs/API/effsize.ipynb index 45a854e3..ca4c7385 100644 --- a/nbs/API/effsize.ipynb +++ b/nbs/API/effsize.ipynb @@ -55,7 +55,11 @@ "source": [ "#|export\n", "from __future__ import annotations\n", - "import numpy as np" + "import numpy as np\n", + "import warnings\n", + "import pandas as pd\n", + "from scipy.special import gamma\n", + "from scipy.stats import mannwhitneyu" ] }, { @@ -114,8 +118,7 @@ " median of `test`.\n", "\n", " \"\"\"\n", - " import numpy as np\n", - " import warnings\n", + "\n", "\n", " if effect_size == \"mean_diff\":\n", " return func_difference(control, test, np.mean, is_paired)\n", @@ -165,13 +168,12 @@ " Applies func to `control` and `test`, and then returns the difference.\n", " \n", " \"\"\"\n", - " import numpy as np\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", "\n", " if is_paired:\n", @@ -250,13 +252,12 @@ " - https://en.wikipedia.org/wiki/Bessel%27s_correction\n", " - https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation\n", " \"\"\"\n", - " import numpy as np\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", @@ -306,9 +307,7 @@ " and a dict for mapping the 0s and 1s to the actual labels, e.g.{1: \"Smoker\", 0: \"Non-smoker\"}\n", " '''\n", "\n", - " import numpy as np\n", " np.seterr(divide='ignore', invalid='ignore')\n", - " import pandas as pd\n", "\n", " # Check whether dataframe contains only 0s and 1s.\n", " if np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False:\n", @@ -317,10 +316,10 @@ " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", " # Aligned with cohens_d calculation.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", - " test = np.array(test)\n", + " if ~isinstance(test, np.ndarray):\n", + " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", "\n", @@ -353,13 +352,12 @@ " See [here](https://en.wikipedia.org/wiki/Effect_size#Hedges'_g)\n", "\n", " \"\"\"\n", - " import numpy as np\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", @@ -386,14 +384,13 @@ " Computes Cliff's delta for 2 samples.\n", " See [here](https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data)\n", " \"\"\"\n", - " import numpy as np\n", - " from scipy.stats import mannwhitneyu\n", + "\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", "\n", " c = control[~np.isnan(control)]\n", @@ -406,17 +403,6 @@ " U, _ = mannwhitneyu(t, c, alternative='two-sided')\n", " cliffs_delta = ((2 * U) / (control_n * test_n)) - 1\n", "\n", - " # more = 0\n", - " # less = 0\n", - " #\n", - " # for i, c in enumerate(control):\n", - " # for j, t in enumerate(test):\n", - " # if t > c:\n", - " # more += 1\n", - " # elif t < c:\n", - " # less += 1\n", - " #\n", - " # cliffs_delta = (more - less) / (control_n * test_n)\n", "\n", " return cliffs_delta\n" ] @@ -430,37 +416,30 @@ "source": [ "#|export\n", "def _compute_standardizers(control, test):\n", - " from numpy import mean, var, sqrt, nan\n", + " # TODO missing docstring\n", " # For calculation of correlation; not currently used.\n", " # from scipy.stats import pearsonr\n", "\n", " control_n = len(control)\n", " test_n = len(test)\n", "\n", - " control_mean = mean(control)\n", - " test_mean = mean(test)\n", + " control_mean = np.mean(control)\n", + " test_mean = np.mean(test)\n", "\n", " control_var = var(control, ddof=1) # use N-1 to compute the variance.\n", " test_var = var(test, ddof=1)\n", "\n", - " control_std = sqrt(control_var)\n", - " test_std = sqrt(test_var)\n", + " control_std = np.sqrt(control_var)\n", + " test_std = np.sqrt(test_var)\n", "\n", " # For unpaired 2-groups standardized mean difference.\n", - " pooled = sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) /\n", + " pooled = np.sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) /\n", " (control_n + test_n - 2)\n", " )\n", "\n", " # For paired standardized mean difference.\n", - " average = sqrt((control_var + test_var) / 2)\n", - "\n", - " # if len(control) == len(test):\n", - " # corr = pearsonr(control, test)[0]\n", - " # std_diff = sqrt(control_var + test_var - (2 * corr * control_std * test_std))\n", - " # std_diff_corrected = std_diff / (sqrt(2 * (1 - corr)))\n", - " # return pooled, average, std_diff_corrected\n", - " #\n", - " # else:\n", + " average = np.sqrt((control_var + test_var) / 2)\n", + "\n", " return pooled, average # indent if you implement above code chunk." ] }, @@ -487,16 +466,12 @@ " ISBN 0-12-336380-2.\n", " \"\"\"\n", "\n", - " from scipy.special import gamma\n", - " from numpy import sqrt, isinf\n", - " import warnings\n", - "\n", " df = n1 + n2 - 2\n", " numer = gamma(df / 2)\n", " denom0 = gamma((df - 1) / 2)\n", - " denom = sqrt(df / 2) * denom0\n", + " denom = np.sqrt(df / 2) * denom0\n", "\n", - " if isinf(numer) or isinf(denom):\n", + " if np.isinf(numer) or np.isinf(denom):\n", " # occurs when df is too large.\n", " # Apply Hedges and Olkin's approximation.\n", " df_sum = n1 + n2\n", @@ -522,7 +497,6 @@ " Compute the weighted deltas where the weight is the inverse of the\n", " pooled group difference.\n", " '''\n", - " import numpy as np\n", "\n", " weight = np.true_divide(1, group_var)\n", " return np.sum(difference*weight)/np.sum(weight)" From 7ad8b7a1cca522a78a5ba3bd439898c21a260e58 Mon Sep 17 00:00:00 2001 From: cyberosa Date: Fri, 15 Dec 2023 16:27:11 +0100 Subject: [PATCH 04/10] effsize objects in separate notebook. Cleaning and small changes --- dabest/__init__.py | 2 +- dabest/_classes.py | 1511 +------------ dabest/_delta_objects.py | 3 +- dabest/_effsize_objects.py | 1439 +++++++++++++ dabest/_stats_tools/effsize.py | 80 +- nbs/API/class.ipynb | 1895 +---------------- nbs/API/delta_objects.ipynb | 3 +- nbs/API/effsize.ipynb | 10 +- nbs/API/effsize_objects.ipynb | 1861 ++++++++++++++++ nbs/tests/test_01_effsizes_pvals.ipynb | 3 +- nbs/tests/test_03_plotting.py | 2 +- ..._04_repeated_measures_effsizes_pvals.ipynb | 3 +- .../test_06_delta-delta_effsize_pvals.ipynb | 3 +- nbs/tests/test_08_mini_meta_pvals.ipynb | 3 +- 14 files changed, 3391 insertions(+), 3427 deletions(-) create mode 100644 dabest/_effsize_objects.py create mode 100644 nbs/API/effsize_objects.ipynb diff --git a/dabest/__init__.py b/dabest/__init__.py index 2e46392d..4c99e500 100644 --- a/dabest/__init__.py +++ b/dabest/__init__.py @@ -1,5 +1,5 @@ from ._api import load, prop_dataset from ._stats_tools import effsize as effsize -from ._classes import TwoGroupsEffectSize, PermutationTest +from ._effsize_objects import TwoGroupsEffectSize, PermutationTest __version__ = "2023.03.29" diff --git a/dabest/_classes.py b/dabest/_classes.py index 94031680..8e89dff9 100644 --- a/dabest/_classes.py +++ b/dabest/_classes.py @@ -1,25 +1,18 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/class.ipynb. # %% auto 0 -__all__ = ['Dabest', 'TwoGroupsEffectSize', 'EffectSizeDataFrame', 'PermutationTest'] +__all__ = ['Dabest'] # %% ../nbs/API/class.ipynb 4 # Import standard data science libraries -from numpy import array, isnan, isinf, repeat, random, issubdtype, number, isin, abs, var -from numpy import sort as npsort -import lqrt +from numpy import array, repeat, random, issubdtype, number import pandas as pd import seaborn as sns -import scipy.stats as spstats from scipy.stats import norm from scipy.stats import randint import datetime as dt -import statsmodels -from statsmodels.stats.contingency_tables import mcnemar from string import Template import warnings -from numpy import nan as npnan -from numpy.random import PCG64, RandomState # %% ../nbs/API/class.ipynb 6 class Dabest(object): @@ -37,6 +30,7 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, statistics. You should not be calling this class directly; instead, use `dabest.load()` to parse your DataFrame prior to analysis. """ + from ._effsize_objects import EffectSizeDataFrame self.__delta2 = delta2 self.__experiment = experiment @@ -50,17 +44,17 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, self.__mini_meta = mini_meta # Make a copy of the data, so we don't make alterations to it. + # TODO is this really needed? data_in = data.copy() # Check if it is a valid mini_meta case - if mini_meta is True: - + if mini_meta: # Only mini_meta calculation but not proportional and delta-delta function - if proportional is True: + if proportional: err0 = '`proportional` and `mini_meta` cannot be True at the same time.' raise ValueError(err0) - elif delta2 is True: + elif delta2: err0 = '`delta` and `mini_meta` cannot be True at the same time.' raise ValueError(err0) @@ -70,7 +64,7 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) err1 = 'does not contain exactly 2 columns.' raise ValueError(err0 + err1) - elif all([isinstance(i, (tuple, list)) for i in idx]): + if all([isinstance(i, (tuple, list)) for i in idx]): all_idx_lengths = [len(t) for t in idx] if (array(all_idx_lengths) != 2).any(): err1 = "`mini_meta` is True, but some idx " @@ -140,17 +134,17 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise IndexError(err) else: - x1_level = data_in[x[0]].unique() - elif experiment is not None: + x1_level = data_in[x[0]].unique() + # TODO what if experiment is None? + elif experiment: experiment_label = data_in[experiment].unique() x1_level = data_in[x[0]].unique() self.__experiment_label = experiment_label self.__x1_level = x1_level - # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case - if idx is None and x is not None and y is not None: + if x and y and idx is None: # Add a length check for unique values in the first element in list x, # if the length is greater than 2, force delta2 to be False # Should be removed if delta2 for situations other than 2x2 is supported @@ -213,23 +207,22 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise ValueError(err) # Check if there is a typo on paired - if paired is not None: - if paired not in ("baseline", "sequential"): - err = '{} assigned for `paired` is not valid.'.format(paired) - raise ValueError(err) + if paired and paired not in ("baseline", "sequential"): + err = '{} assigned for `paired` is not valid.'.format(paired) + raise ValueError(err) # Determine the type of data: wide or long. - if x is None and y is not None: + if y and x is None: err = 'You have only specified `y`. Please also specify `x`.' raise ValueError(err) - elif y is None and x is not None: + if x and y is None: err = 'You have only specified `x`. Please also specify `y`.' raise ValueError(err) # Identify the type of data that was passed in. - elif x is not None and y is not None: + if x and y: # Assume we have a long dataset. # check both x and y are column names in data. if x not in data_in.columns: @@ -292,12 +285,12 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, # remove any NA rows. plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True) - + # TODO these comments should not be in the code but on the release notes of the package version # Lines 131 to 140 added in v0.2.3. # Fixes a bug that jammed up when the xvar column was already # a pandas Categorical. Now we check for this and act appropriately. if isinstance(plot_data[self.__xvar].dtype, - pd.CategoricalDtype) is True: + pd.CategoricalDtype): plot_data[self.__xvar].cat.remove_unused_categories(inplace=True) plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, ordered=True, @@ -307,9 +300,9 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, categories=all_plot_groups, ordered=True) - + # TODO Move all the plot_data logic to the function returning self.__plot_data self.__plot_data = plot_data - + # TODO Move all the all_plot_groups logic to the function returning self.__all_plot_groups self.__all_plot_groups = all_plot_groups @@ -318,7 +311,8 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, if id_col is None: err = "`id_col` must be specified if `paired` is assigned with a not NoneType value." raise IndexError(err) - elif id_col not in plot_data.columns: + + if id_col not in plot_data.columns: err = "{} is not a column in `data`. ".format(id_col) raise IndexError(err) @@ -360,7 +354,7 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, def __repr__(self): from .__init__ import __version__ from .misc_tools import print_greeting - + greeting_header = print_greeting() RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', @@ -396,10 +390,10 @@ def __repr__(self): for ix, test_name in enumerate(current_tuple[1:]): comparisons.append("{} minus {}".format(test_name, control_name)) - if self.__delta2 is True: + if self.__delta2: comparisons.append("{} minus {} (only for mean difference)".format(self.__experiment_label[1], self.__experiment_label[0])) - if self.__mini_meta is True: + if self.__mini_meta: comparisons.append("weighted delta (only for mean difference)") for j, g in enumerate(comparisons): @@ -650,1454 +644,3 @@ def _all_plot_groups(self): Returns the all plot groups, as indicated via the `idx` keyword. """ return self.__all_plot_groups - -# %% ../nbs/API/class.ipynb 28 -class TwoGroupsEffectSize(object): - - """ - A class to compute and store the results of bootstrapped - mean differences between two groups. - - Compute the effect size between two groups. - - Parameters - ---------- - control : array-like - test : array-like - These should be numerical iterables. - effect_size : string. - Any one of the following are accepted inputs: - 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' - is_paired : string, default None - resamples : int, default 5000 - The number of bootstrap resamples to be taken for the calculation - of the confidence interval limits. - permutation_count : int, default 5000 - The number of permutations (reshuffles) to perform for the - computation of the permutation p-value - ci : float, default 95 - The confidence interval width. The default of 95 produces 95% - confidence intervals. - random_seed : int, default 12345 - `random_seed` is used to seed the random number generator during - bootstrap resampling. This ensures that the confidence intervals - reported are replicable. - - Returns - ------- - A :py:class:`TwoGroupEffectSize` object: - `difference` : float - The effect size of the difference between the control and the test. - `effect_size` : string - The type of effect size reported. - `is_paired` : string - The type of repeated-measures experiment. - `ci` : float - Returns the width of the confidence interval, in percent. - `alpha` : float - Returns the significance level of the statistical test as a float between 0 and 1. - `resamples` : int - The number of resamples performed during the bootstrap procedure. - `bootstraps` : numpy ndarray - The generated bootstraps of the effect size. - `random_seed` : int - The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned. - `bca_low, bca_high` : float - The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively. - `pct_low, pct_high` : float - The percentile confidence interval lower limit and upper limits, respectively. - """ - - def __init__(self, control, test, effect_size, - proportional=False, - is_paired=None, ci=95, - resamples=5000, - permutation_count=5000, - random_seed=12345): - - from ._stats_tools import effsize as es - from ._stats_tools import confint_2group_diff as ci2g - - - self.__EFFECT_SIZE_DICT = {"mean_diff" : "mean difference", - "median_diff" : "median difference", - "cohens_d" : "Cohen's d", - "cohens_h" : "Cohen's h", - "hedges_g" : "Hedges' g", - "cliffs_delta" : "Cliff's delta", - "delta_g" : "deltas' g"} - - - kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] - if effect_size not in kosher_es: - err1 = "The effect size '{}'".format(effect_size) - err2 = "is not one of {}".format(kosher_es) - raise ValueError(" ".join([err1, err2])) - - if effect_size == "cliffs_delta" and is_paired: - err1 = "`paired` is not None; therefore Cliff's delta is not defined." - raise ValueError(err1) - - if proportional==True and effect_size not in ['mean_diff','cohens_h']: - err1 = "`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined." - raise ValueError(err1) - - if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False): - err1 = "`proportional` is True; Only accept binary data consisting of 0 and 1." - raise ValueError(err1) - - # Convert to numpy arrays for speed. - # NaNs are automatically dropped. - control = array(control) - test = array(test) - control = control[~isnan(control)] - test = test[~isnan(test)] - - self.__effect_size = effect_size - self.__control = control - self.__test = test - self.__is_paired = is_paired - self.__resamples = resamples - self.__permutation_count = permutation_count - self.__random_seed = random_seed - self.__ci = ci - self.__alpha = ci2g._compute_alpha_from_ci(ci) - - self.__difference = es.two_group_difference( - control, test, is_paired, effect_size) - - self.__jackknives = ci2g.compute_meandiff_jackknife( - control, test, is_paired, effect_size) - - self.__acceleration_value = ci2g._calc_accel(self.__jackknives) - - bootstraps = ci2g.compute_bootstrapped_diff( - control, test, is_paired, effect_size, - resamples, random_seed) - self.__bootstraps = bootstraps - - sorted_bootstraps = npsort(self.__bootstraps) - # Added in v0.2.6. - # Raises a UserWarning if there are any infiinities in the bootstraps. - num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)]) - - if num_infinities > 0: - warn_msg = "There are {} bootstrap(s) that are not defined. "\ - "This is likely due to smaple sample sizes. "\ - "The values in a bootstrap for a group will be more likely "\ - "to be all equal, with a resulting variance of zero. "\ - "The computation of Cohen's d and Hedges' g thus "\ - "involved a division by zero. " - warnings.warn(warn_msg.format(num_infinities), - category=UserWarning) - - self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps, self.__difference) - - # Compute BCa intervals. - bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) - - self.__bca_interval_idx = (bca_idx_low, bca_idx_high) - - if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_bootstraps[bca_idx_low] - self.__bca_high = sorted_bootstraps[bca_idx_high] - - err1 = "The $lim_type limit of the interval" - err2 = "was in the $loc 10 values." - err3 = "The result should be considered unstable." - err_temp = Template(" ".join([err1, err2, err3])) - - if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) - - if bca_idx_high >= resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) - - else: - err1 = "The $lim_type limit of the BCa interval cannot be computed." - err2 = "It is set to the effect size itself." - err3 = "All bootstrap values were likely all the same." - err_temp = Template(" ".join([err1, err2, err3])) - - if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) - - if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) - - # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * resamples) - pct_idx_high = int((1-(self.__alpha/2)) * resamples) - - self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_bootstraps[pct_idx_low] - self.__pct_high = sorted_bootstraps[pct_idx_high] - - # Perform statistical tests. - - self.__PermutationTest_result = PermutationTest(control, test, - effect_size, - is_paired, - permutation_count) - - if is_paired and proportional is False: - # Wilcoxon, a non-parametric version of the paired T-test. - wilcoxon = spstats.wilcoxon(control, test) - self.__pvalue_wilcoxon = wilcoxon.pvalue - self.__statistic_wilcoxon = wilcoxon.statistic - - - if effect_size != "median_diff": - # Paired Student's t-test. - paired_t = spstats.ttest_rel(control, test, nan_policy='omit') - self.__pvalue_paired_students_t = paired_t.pvalue - self.__statistic_paired_students_t = paired_t.statistic - - standardized_es = es.cohens_d(control, test, is_paired) - # self.__power = power.tt_solve_power(standardized_es, - # len(control), - # alpha=self.__alpha) - - elif is_paired and proportional is True: - # for binary paired data, use McNemar's test - # References: - # https://en.wikipedia.org/wiki/McNemar%27s_test - - df_temp = pd.DataFrame({'control': control, 'test': test}) - x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)]) - x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)]) - x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)]) - x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)]) - table = [[x1,x2],[x3,x4]] - _mcnemar = mcnemar(table, exact=True, correction=True) - self.__pvalue_mcnemar = _mcnemar.pvalue - self.__statistic_mcnemar = _mcnemar.statistic - - elif effect_size == "cliffs_delta": - # Let's go with Brunner-Munzel! - brunner_munzel = spstats.brunnermunzel(control, test, - nan_policy='omit') - self.__pvalue_brunner_munzel = brunner_munzel.pvalue - self.__statistic_brunner_munzel = brunner_munzel.statistic - - - elif effect_size == "median_diff": - # According to scipy's documentation of the function, - # "The Kruskal-Wallis H-test tests the null hypothesis - # that the population median of all of the groups are equal." - kruskal = spstats.kruskal(control, test, nan_policy='omit') - self.__pvalue_kruskal = kruskal.pvalue - self.__statistic_kruskal = kruskal.statistic - - else: # for mean difference, Cohen's d, and Hedges' g. - # Welch's t-test, assumes normality of distributions, - # but does not assume equal variances. - welch = spstats.ttest_ind(control, test, equal_var=False, - nan_policy='omit') - self.__pvalue_welch = welch.pvalue - self.__statistic_welch = welch.statistic - - # Student's t-test, assumes normality of distributions, - # as well as assumption of equal variances. - students_t = spstats.ttest_ind(control, test, equal_var=True, - nan_policy='omit') - self.__pvalue_students_t = students_t.pvalue - self.__statistic_students_t = students_t.statistic - - # Mann-Whitney test: Non parametric, - # does not assume normality of distributions - try: - mann_whitney = spstats.mannwhitneyu(control, test, - alternative='two-sided') - self.__pvalue_mann_whitney = mann_whitney.pvalue - self.__statistic_mann_whitney = mann_whitney.statistic - except ValueError: - # Occurs when the control and test are exactly identical - # in terms of rank (eg. all zeros.) - pass - - - - standardized_es = es.cohens_d(control, test, is_paired = None) - - # The Cohen's h calculation is for binary categorical data - try: - self.__proportional_difference = es.cohens_h(control, test) - except ValueError: - # Occur only when the data consists not only 0's and 1's. - pass - # self.__power = power.tt_ind_solve_power(standardized_es, - # len(control), - # alpha=self.__alpha, - # ratio=len(test)/len(control) - # ) - - - - - - - def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): - - # # Deprecated in v0.3.0; permutation p-values will be reported by default. - # UNPAIRED_ES_TO_TEST = {"mean_diff" : "Mann-Whitney", - # "median_diff" : "Kruskal", - # "cohens_d" : "Mann-Whitney", - # "hedges_g" : "Mann-Whitney", - # "cliffs_delta" : "Brunner-Munzel"} - # - # TEST_TO_PVAL_ATTR = {"Mann-Whitney" : "pvalue_mann_whitney", - # "Kruskal" : "pvalue_kruskal", - # "Brunner-Munzel" : "pvalue_brunner_munzel", - # "Wilcoxon" : "pvalue_wilcoxon"} - - RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', - 'sequential': 'for the sequential design of repeated-measures experiment \n', - 'None' : '' - } - - PAIRED_STATUS = {'baseline' : 'paired', - 'sequential' : 'paired', - 'None' : 'unpaired' - } - - first_line = {"rm_status" : RM_STATUS[str(self.__is_paired)], - "es" : self.__EFFECT_SIZE_DICT[self.__effect_size], - "paired_status": PAIRED_STATUS[str(self.__is_paired)]} - - - out1 = "The {paired_status} {es} {rm_status}".format(**first_line) - - base_string_fmt = "{:." + str(sigfig) + "}" - if "." in str(self.__ci): - ci_width = base_string_fmt.format(self.__ci) - else: - ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - - out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) - out = out1 + out2 - - # # Deprecated in v0.3.0; permutation p-values will be reported by default. - # if self.__is_paired: - # stats_test = "Wilcoxon" - # else: - # stats_test = UNPAIRED_ES_TO_TEST[self.__effect_size] - - - # pval_rounded = base_string_fmt.format(getattr(self, - # TEST_TO_PVAL_ATTR[stats_test]) - # ) - - pval_rounded = base_string_fmt.format(self.pvalue_permutation) - - # # Deprecated in v0.3.0; permutation p-values will be reported by default. - # pvalue = "The two-sided p-value of the {} test is {}.".format(stats_test, - # pval_rounded) - - # pvalue = "The two-sided p-value of the {} test is {}.".format(stats_test, - # pval_rounded) - - - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) - p2 = "calculated for legacy purposes only. " - pvalue = p1 + p2 - - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) - bs2 = "the confidence interval is bias-corrected and accelerated." - bs = bs1 + bs2 - - pval_def1 = "Any p-value reported is the probability of observing the" + \ - "effect size (or greater),\nassuming the null hypothesis of" + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." - pval_def = pval_def1 + pval_def2 - - if show_resample_count and define_pval: - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - elif show_resample_count is False and define_pval is True: - return "{}\n{}\n\n{}".format(out, pvalue, pval_def) - elif show_resample_count is True and define_pval is False: - return "{}\n{}\n\n{}".format(out, pvalue, bs) - else: - return "{}\n{}".format(out, pvalue) - - - - def to_dict(self): - """ - Returns the attributes of the `dabest.TwoGroupEffectSize` object as a - dictionary. - """ - # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] - out = {} - for a in attrs: - out[a] = getattr(self, a) - return out - - - @property - def difference(self): - """ - Returns the difference between the control and the test. - """ - return self.__difference - - @property - def effect_size(self): - """ - Returns the type of effect size reported. - """ - return self.__EFFECT_SIZE_DICT[self.__effect_size] - - @property - def is_paired(self): - return self.__is_paired - - @property - def ci(self): - """ - Returns the width of the confidence interval, in percent. - """ - return self.__ci - - @property - def alpha(self): - """ - Returns the significance level of the statistical test as a float - between 0 and 1. - """ - return self.__alpha - - @property - def resamples(self): - """ - The number of resamples performed during the bootstrap procedure. - """ - return self.__resamples - - @property - def bootstraps(self): - """ - The generated bootstraps of the effect size. - """ - return self.__bootstraps - - @property - def random_seed(self): - """ - The number used to initialise the numpy random seed generator, ie. - `seed_value` from `numpy.random.seed(seed_value)` is returned. - """ - return self.__random_seed - - @property - def bca_interval_idx(self): - return self.__bca_interval_idx - - @property - def bca_low(self): - """ - The bias-corrected and accelerated confidence interval lower limit. - """ - return self.__bca_low - - @property - def bca_high(self): - """ - The bias-corrected and accelerated confidence interval upper limit. - """ - return self.__bca_high - - @property - def pct_interval_idx(self): - return self.__pct_interval_idx - - @property - def pct_low(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_low - - @property - def pct_high(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_high - - - - @property - def pvalue_brunner_munzel(self): - try: - return self.__pvalue_brunner_munzel - except AttributeError: - return npnan - - @property - def statistic_brunner_munzel(self): - try: - return self.__statistic_brunner_munzel - except AttributeError: - return npnan - - - - @property - def pvalue_wilcoxon(self): - try: - return self.__pvalue_wilcoxon - except AttributeError: - return npnan - - @property - def statistic_wilcoxon(self): - try: - return self.__statistic_wilcoxon - except AttributeError: - return npnan - - @property - def pvalue_mcnemar(self): - try: - return self.__pvalue_mcnemar - except AttributeError: - return npnan - - @property - def statistic_mcnemar(self): - try: - return self.__statistic_mcnemar - except AttributeError: - return npnan - - - - @property - def pvalue_paired_students_t(self): - try: - return self.__pvalue_paired_students_t - except AttributeError: - return npnan - - @property - def statistic_paired_students_t(self): - try: - return self.__statistic_paired_students_t - except AttributeError: - return npnan - - - - @property - def pvalue_kruskal(self): - try: - return self.__pvalue_kruskal - except AttributeError: - return npnan - - @property - def statistic_kruskal(self): - try: - return self.__statistic_kruskal - except AttributeError: - return npnan - - - - @property - def pvalue_welch(self): - try: - return self.__pvalue_welch - except AttributeError: - return npnan - - @property - def statistic_welch(self): - try: - return self.__statistic_welch - except AttributeError: - return npnan - - - - @property - def pvalue_students_t(self): - try: - return self.__pvalue_students_t - except AttributeError: - return npnan - - @property - def statistic_students_t(self): - try: - return self.__statistic_students_t - except AttributeError: - return npnan - - - - @property - def pvalue_mann_whitney(self): - try: - return self.__pvalue_mann_whitney - except AttributeError: - return npnan - - - - @property - def statistic_mann_whitney(self): - try: - return self.__statistic_mann_whitney - except AttributeError: - return npnan - - # Introduced in v0.3.0. - @property - def pvalue_permutation(self): - return self.__PermutationTest_result.pvalue - - # - # - @property - def permutation_count(self): - """ - The number of permuations taken. - """ - return self.__PermutationTest_result.permutation_count - - - @property - def permutations(self): - return self.__PermutationTest_result.permutations - - - @property - def permutations_var(self): - return self.__PermutationTest_result.permutations_var - - - @property - def proportional_difference(self): - try: - return self.__proportional_difference - except AttributeError: - return npnan - - -# %% ../nbs/API/class.ipynb 32 -class EffectSizeDataFrame(object): - """A class that generates and stores the results of bootstrapped effect - sizes for several comparisons.""" - - def __init__(self, dabest, effect_size, - is_paired, ci=95, proportional=False, - resamples=5000, - permutation_count=5000, - random_seed=12345, - x1_level=None, x2=None, - delta2=False, experiment_label=None, - mini_meta=False): - """ - Parses the data from a Dabest object, enabling plotting and printing - capability for the effect size of interest. - """ - - self.__dabest_obj = dabest - self.__effect_size = effect_size - self.__is_paired = is_paired - self.__ci = ci - self.__resamples = resamples - self.__permutation_count = permutation_count - self.__random_seed = random_seed - self.__proportional = proportional - self.__x1_level = x1_level - self.__experiment_label = experiment_label - self.__x2 = x2 - self.__delta2 = delta2 - self.__mini_meta = mini_meta - - - def __pre_calc(self): - from .misc_tools import print_greeting, get_varname - from ._stats_tools import confint_2group_diff as ci2g - from ._delta_objects import MiniMetaDelta, DeltaDelta - - idx = self.__dabest_obj.idx - dat = self.__dabest_obj._plot_data - xvar = self.__dabest_obj._xvar - yvar = self.__dabest_obj._yvar - - out = [] - reprs = [] - - if self.__delta2==True: - mixed_data = [] - for j, current_tuple in enumerate(idx): - if self.__is_paired != "sequential": - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - - for ix, tname in enumerate(current_tuple[1:]): - if self.__is_paired == "sequential": - cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() - mixed_data.append(control) - mixed_data.append(test) - bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3], - self.__is_paired, self.__resamples, self.__random_seed) - - - for j, current_tuple in enumerate(idx): - if self.__is_paired!="sequential": - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - - for ix, tname in enumerate(current_tuple[1:]): - if self.__is_paired == "sequential": - cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() - - result = TwoGroupsEffectSize(control, test, - self.__effect_size, - self.__proportional, - self.__is_paired, - self.__ci, - self.__resamples, - self.__permutation_count, - self.__random_seed) - r_dict = result.to_dict() - r_dict["control"] = cname - r_dict["test"] = tname - r_dict["control_N"] = int(len(control)) - r_dict["test_N"] = int(len(test)) - out.append(r_dict) - if j == len(idx)-1 and ix == len(current_tuple)-2: - if self.__delta2 and self.__effect_size in ["mean_diff","delta_g"]: - resamp_count = False - def_pval = False - elif self.__mini_meta and self.__effect_size == "mean_diff": - resamp_count = False - def_pval = False - else: - resamp_count = True - def_pval = True - else: - resamp_count = False - def_pval = False - - text_repr = result.__repr__(show_resample_count=resamp_count, - define_pval=def_pval) - - to_replace = "between {} and {} is".format(cname, tname) - text_repr = text_repr.replace("is", to_replace, 1) - - reprs.append(text_repr) - - - self.__for_print = "\n\n".join(reprs) - - out_ = pd.DataFrame(out) - - columns_in_order = ['control', 'test', 'control_N', 'test_N', - 'effect_size', 'is_paired', - 'difference', 'ci', - - 'bca_low', 'bca_high', 'bca_interval_idx', - 'pct_low', 'pct_high', 'pct_interval_idx', - - 'bootstraps', 'resamples', 'random_seed', - - 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var', - - 'pvalue_welch', - 'statistic_welch', - - 'pvalue_students_t', - 'statistic_students_t', - - 'pvalue_mann_whitney', - 'statistic_mann_whitney', - - 'pvalue_brunner_munzel', - 'statistic_brunner_munzel', - - 'pvalue_wilcoxon', - 'statistic_wilcoxon', - - 'pvalue_mcnemar', - 'statistic_mcnemar', - - 'pvalue_paired_students_t', - 'statistic_paired_students_t', - - 'pvalue_kruskal', - 'statistic_kruskal', - 'proportional_difference' - ] - self.__results = out_.reindex(columns=columns_in_order) - self.__results.dropna(axis="columns", how="all", inplace=True) - - # Add the is_paired column back when is_paired is None - if self.is_paired is None: - self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1)) - - # Create and compute the delta-delta statistics - if self.__delta2 is True: - self.__delta_delta = DeltaDelta(self, - self.__permutation_count, - bootstraps_delta_delta, - self.__ci) - reprs.append(self.__delta_delta.__repr__(header=False)) - elif self.__delta2 is True and self.__effect_size not in ["mean_diff", "delta_g"]: - self.__delta_delta = "Delta-delta is not supported for {}.".format(self.__effect_size) - else: - self.__delta_delta = "`delta2` is False; delta-delta is therefore not calculated." - - # Create and compute the weighted average statistics - if self.__mini_meta is True and self.__effect_size == "mean_diff": - self.__mini_meta_delta = MiniMetaDelta(self, - self.__permutation_count, - self.__ci) - reprs.append(self.__mini_meta_delta.__repr__(header=False)) - elif self.__mini_meta is True and self.__effect_size != "mean_diff": - self.__mini_meta_delta = "Weighted delta is not supported for {}.".format(self.__effect_size) - else: - self.__mini_meta_delta = "`mini_meta` is False; weighted delta is therefore not calculated." - - - varname = get_varname(self.__dabest_obj) - lastline = "To get the results of all valid statistical tests, " +\ - "use `{}.{}.statistical_tests`".format(varname, self.__effect_size) - reprs.append(lastline) - - reprs.insert(0, print_greeting()) - - self.__for_print = "\n\n".join(reprs) - - - def __repr__(self): - try: - return self.__for_print - except AttributeError: - self.__pre_calc() - return self.__for_print - - - - def __calc_lqrt(self): - - rnd_seed = self.__random_seed - db_obj = self.__dabest_obj - dat = db_obj._plot_data - xvar = db_obj._xvar - yvar = db_obj._yvar - delta2 = self.__delta2 - - - out = [] - - for j, current_tuple in enumerate(db_obj.idx): - if self.__is_paired != "sequential": - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - - for ix, tname in enumerate(current_tuple[1:]): - if self.__is_paired == "sequential": - cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() - - if self.__is_paired: - # Refactored here in v0.3.0 for performance issues. - lqrt_result = lqrt.lqrtest_rel(control, test, - random_state=rnd_seed) - - out.append({"control": cname, "test": tname, - "control_N": int(len(control)), - "test_N": int(len(test)), - "pvalue_paired_lqrt": lqrt_result.pvalue, - "statistic_paired_lqrt": lqrt_result.statistic - }) - - else: - # Likelihood Q-Ratio test: - lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, - random_state=rnd_seed, - equal_var=True) - - - lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, - random_state=rnd_seed, - equal_var=False) - - out.append({"control": cname, "test": tname, - "control_N": int(len(control)), - "test_N": int(len(test)), - - "pvalue_lqrt_equal_var" : lqrt_equal_var_result.pvalue, - "statistic_lqrt_equal_var" : lqrt_equal_var_result.statistic, - "pvalue_lqrt_unequal_var" : lqrt_unequal_var_result.pvalue, - "statistic_lqrt_unequal_var" : lqrt_unequal_var_result.statistic, - }) - self.__lqrt_results = pd.DataFrame(out) - - - def plot(self, color_col=None, - - raw_marker_size=6, es_marker_size=9, - - swarm_label=None, contrast_label=None, delta2_label=None, - swarm_ylim=None, contrast_ylim=None, delta2_ylim=None, - - custom_palette=None, swarm_desat=0.5, halfviolin_desat=1, - halfviolin_alpha=0.8, - - face_color = None, - #bar plot - bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None, - # error bar of proportion plot - ci=None, ci_type='bca', err_color=None, - - float_contrast=True, - show_pairs=True, - show_delta2=True, - show_mini_meta=True, - group_summaries=None, - group_summaries_offset=0.1, - - fig_size=None, - dpi=100, - ax=None, - - contrast_show_es = False, - es_sf = 2, - es_fontsize = 10, - - contrast_show_deltas = True, - - gridkey_rows=None, - gridkey_merge_pairs = False, - gridkey_show_Ns = True, - gridkey_show_es = True, - - swarmplot_kwargs=None, - barplot_kwargs=None, - violinplot_kwargs=None, - slopegraph_kwargs=None, - sankey_kwargs=None, - reflines_kwargs=None, - group_summary_kwargs=None, - legend_kwargs=None, - title=None, fontsize_title = 16, - fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12, - fontsize_delta2label = 12): - - """ - Creates an estimation plot for the effect size of interest. - - - Parameters - ---------- - color_col : string, default None - Column to be used for colors. - raw_marker_size : float, default 6 - The diameter (in points) of the marker dots plotted in the - swarmplot. - es_marker_size : float, default 9 - The size (in points) of the effect size points on the difference - axes. - swarm_label, contrast_label, delta2_label : strings, default None - Set labels for the y-axis of the swarmplot and the contrast plot, - respectively. If `swarm_label` is not specified, it defaults to - "value", unless a column name was passed to `y`. If - `contrast_label` is not specified, it defaults to the effect size - being plotted. If `delta2_label` is not specifed, it defaults to - "delta - delta" - swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None - The desired y-limits of the raw data (swarmplot) axes, the - difference axes and the delta-delta axes respectively, as a tuple. - These will be autoscaled to sensible values if they are not - specified. The delta2 axes and contrast axes should have the same - limits for y. When `show_delta2` is True, if both of the `contrast_ylim` - and `delta2_ylim` are not None, then they must be specified with the - same values; when `show_delta2` is True and only one of them is specified, - then the other will automatically be assigned with the same value. - Specifying `delta2_ylim` does not have any effect when `show_delta2` is - False. - custom_palette : dict, list, or matplotlib color palette, default None - This keyword accepts a dictionary with {'group':'color'} pairings, - a list of RGB colors, or a specified matplotlib palette. This - palette will be used to color the swarmplot. If `color_col` is not - specified, then each group will be colored in sequence according - to the default palette currently used by matplotlib. - Please take a look at the seaborn commands `color_palette` - and `cubehelix_palette` to generate a custom palette. Both - these functions generate a list of RGB colors. - See: - https://seaborn.pydata.org/generated/seaborn.color_palette.html - https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html - The named colors of matplotlib can be found here: - https://matplotlib.org/examples/color/named_colors.html - swarm_desat : float, default 1 - Decreases the saturation of the colors in the swarmplot by the - desired proportion. Uses `seaborn.desaturate()` to acheive this. - halfviolin_desat : float, default 0.5 - Decreases the saturation of the colors of the half-violin bootstrap - curves by the desired proportion. Uses `seaborn.desaturate()` to - acheive this. - halfviolin_alpha : float, default 0.8 - The alpha (transparency) level of the half-violin bootstrap curves. - float_contrast : boolean, default True - Whether or not to display the halfviolin bootstrapped difference - distribution alongside the raw data. - show_pairs : boolean, default True - If the data is paired, whether or not to show the raw data as a - swarmplot, or as slopegraph, with a line joining each pair of - observations. - show_delta2, show_mini_meta : boolean, default True - If delta-delta or mini-meta delta is calculated, whether or not to - show the delta-delta plot or mini-meta plot. - group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None. - Plots the summary statistics for each group. If 'mean_sd', then - the mean and standard deviation of each group is plotted as a - notched line beside each group. If 'median_quantiles', then the - median and 25th and 75th percentiles of each group is plotted - instead. If 'None', the summaries are not shown. - group_summaries_offset : float, default 0.1 - If group summaries are displayed, they will be offset from the raw - data swarmplot groups by this value. - fig_size : tuple, default None - The desired dimensions of the figure as a (length, width) tuple. - dpi : int, default 100 - The dots per inch of the resulting figure. - ax : matplotlib.Axes, default None - Provide an existing Axes for the plots to be created. If no Axes is - specified, a new matplotlib Figure will be created. - gridkey_rows : list, default None - Provide a list of row labels for the gridkey. The supplied idx is - checked against the row labels to determine whether the corresponding - cell should be populated or not. - swarmplot_kwargs : dict, default None - Pass any keyword arguments accepted by the seaborn `swarmplot` - command here, as a dict. If None, the following keywords are - passed to sns.swarmplot : {'size':`raw_marker_size`}. - violinplot_kwargs : dict, default None - Pass any keyword arguments accepted by the matplotlib ` - pyplot.violinplot` command here, as a dict. If None, the following - keywords are passed to violinplot : {'widths':0.5, 'vert':True, - 'showextrema':False, 'showmedians':False}. - slopegraph_kwargs : dict, default None - This will change the appearance of the lines used to join each pair - of observations when `show_pairs=True`. Pass any keyword arguments - accepted by matplotlib `plot()` function here, as a dict. - If None, the following keywords are - passed to plot() : {'linewidth':1, 'alpha':0.5}. - sankey_kwargs: dict, default None - Whis will change the appearance of the sankey diagram used to depict - paired proportional data when `show_pairs=True` and `proportional=True`. - Pass any keyword arguments accepted by plot_tools.sankeydiag() function - here, as a dict. If None, the following keywords are passed to sankey diagram: - {"width": 0.5, "align": "center", "alpha": 0.4, "bar_width": 0.1, "rightColor": False} - reflines_kwargs : dict, default None - This will change the appearance of the zero reference lines. Pass - any keyword arguments accepted by the matplotlib Axes `hlines` - command here, as a dict. If None, the following keywords are - passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75, - 'zorder':2, 'color' : default y-tick color}. - group_summary_kwargs : dict, default None - Pass any keyword arguments accepted by the matplotlib.lines.Line2D - command here, as a dict. This will change the appearance of the - vertical summary lines for each group, if `group_summaries` is not - 'None'. If None, the following keywords are passed to - matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}. - legend_kwargs : dict, default None - Pass any keyword arguments accepted by the matplotlib Axes - `legend` command here, as a dict. If None, the following keywords - are passed to matplotlib.Axes.legend : {'loc':'upper left', - 'frameon':False}. - title : string, default None - Title for the plot. If None, no title will be displayed. Pass any - keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here, - as a string. - fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large' - Font size for the plot title. If a float, the fontsize in points. The - string values denote sizes relative to the default font size. Pass any keyword arguments accepted - by the matplotlib.pyplot.suptitle `fontsize` command here, as a string. - fontsize_rawxlabel : float, default 12 - Font size for the raw axes xlabel. - fontsize_rawylabel : float, default 12 - Font size for the raw axes ylabel. - fontsize_contrastxlabel : float, default 12 - Font size for the contrast axes xlabel. - fontsize_contrastylabel : float, default 12 - Font size for the contrast axes ylabel. - fontsize_delta2label : float, default 12 - Font size for the delta-delta axes ylabel. - - - Returns - ------- - A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``. - - The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it. - - If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` - itself, while the effect size axes is accessed at ``ax.contrast_axes``. - See the last example below. - - - - """ - - from .plotter import EffectSizeDataFramePlotter - - if hasattr(self, "results") is False: - self.__pre_calc() - - if self.__delta2: - color_col = self.__x2 - - # if self.__proportional: - # raw_marker_size = 0.01 - - # Modification incurred due to update of Seaborn - ci = ('ci', ci) if ci is not None else None - - all_kwargs = locals() - del all_kwargs["self"] - - out = EffectSizeDataFramePlotter(self, **all_kwargs) - - return out - - - @property - def proportional(self): - """ - Returns the proportional parameter - class. - """ - return self.__proportional - - @property - def results(self): - """Prints all pairwise comparisons nicely.""" - try: - return self.__results - except AttributeError: - self.__pre_calc() - return self.__results - - - - @property - def statistical_tests(self): - results_df = self.results - - # Select only the statistics and p-values. - stats_columns = [c for c in results_df.columns - if c.startswith("statistic") or c.startswith("pvalue")] - - default_cols = ['control', 'test', 'control_N', 'test_N', - 'effect_size', 'is_paired', - 'difference', 'ci', 'bca_low', 'bca_high'] - - cols_of_interest = default_cols + stats_columns - - return results_df[cols_of_interest] - - - @property - def _for_print(self): - return self.__for_print - - @property - def _plot_data(self): - return self.__dabest_obj._plot_data - - @property - def idx(self): - return self.__dabest_obj.idx - - @property - def xvar(self): - return self.__dabest_obj._xvar - - @property - def yvar(self): - return self.__dabest_obj._yvar - - @property - def is_paired(self): - return self.__is_paired - - @property - def ci(self): - """ - The width of the confidence interval being produced, in percent. - """ - return self.__ci - - @property - def x1_level(self): - return self.__x1_level - - - @property - def x2(self): - return self.__x2 - - - @property - def experiment_label(self): - return self.__experiment_label - - - @property - def delta2(self): - return self.__delta2 - - - @property - def resamples(self): - """ - The number of resamples (with replacement) during bootstrap resampling." - """ - return self.__resamples - - @property - def random_seed(self): - """ - The seed used by `numpy.seed()` for bootstrap resampling. - """ - return self.__random_seed - - @property - def effect_size(self): - """The type of effect size being computed.""" - return self.__effect_size - - @property - def dabest_obj(self): - """ - Returns the `dabest` object that invoked the current EffectSizeDataFrame - class. - """ - return self.__dabest_obj - - @property - def proportional(self): - """ - Returns the proportional parameter - class. - """ - return self.__proportional - - @property - def lqrt(self): - """Returns all pairwise Lq-Likelihood Ratio Type test results - as a pandas DataFrame. - - For more information on LqRT tests, see https://arxiv.org/abs/1911.11922 - """ - try: - return self.__lqrt_results - except AttributeError: - self.__calc_lqrt() - return self.__lqrt_results - - - @property - def mini_meta(self): - """ - Returns the mini_meta boolean parameter. - """ - return self.__mini_meta - - - @property - def mini_meta_delta(self): - """ - Returns the mini_meta results. - """ - try: - return self.__mini_meta_delta - except AttributeError: - self.__pre_calc() - return self.__mini_meta_delta - - - @property - def delta_delta(self): - """ - Returns the mini_meta results. - """ - try: - return self.__delta_delta - except AttributeError: - self.__pre_calc() - return self.__delta_delta - - - -# %% ../nbs/API/class.ipynb 50 -class PermutationTest: - """ - A class to compute and report permutation tests. - - Parameters - ---------- - control : array-like - test : array-like - These should be numerical iterables. - effect_size : string. - Any one of the following are accepted inputs: - 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g" or 'cliffs_delta' - is_paired : string, default None - permutation_count : int, default 10000 - The number of permutations (reshuffles) to perform. - random_seed : int, default 12345 - `random_seed` is used to seed the random number generator during - bootstrap resampling. This ensures that the generated permutations - are replicable. - - Returns - ------- - A :py:class:`PermutationTest` object: - `difference`:float - The effect size of the difference between the control and the test. - `effect_size`:string - The type of effect size reported. - - - """ - - def __init__(self, control: array, - test: array, # These should be numerical iterables. - effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' - is_paired:str=None, - permutation_count:int=5000, # The number of permutations (reshuffles) to perform. - random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable. - **kwargs): - from ._stats_tools.effsize import two_group_difference - from ._stats_tools.confint_2group_diff import calculate_group_var - - - self.__permutation_count = permutation_count - - # Run Sanity Check. - if is_paired and len(control) != len(test): - raise ValueError("The two arrays do not have the same length.") - - # Initialise random number generator. - # rng = random.default_rng(seed=random_seed) - rng = RandomState(PCG64(random_seed)) - - # Set required constants and variables - control = array(control) - test = array(test) - - control_sample = control.copy() - test_sample = test.copy() - - BAG = array([*control, *test]) - CONTROL_LEN = int(len(control)) - EXTREME_COUNT = 0. - THRESHOLD = abs(two_group_difference(control, test, - is_paired, effect_size)) - self.__permutations = [] - self.__permutations_var = [] - - for i in range(int(permutation_count)): - - if is_paired: - # Select which control-test pairs to swap. - random_idx = rng.choice(CONTROL_LEN, - rng.randint(0, CONTROL_LEN+1), - replace=False) - - # Perform swap. - for i in random_idx: - _placeholder = control_sample[i] - control_sample[i] = test_sample[i] - test_sample[i] = _placeholder - - else: - # Shuffle the bag and assign to control and test groups. - # NB. rng.shuffle didn't produce replicable results... - shuffled = rng.permutation(BAG) - control_sample = shuffled[:CONTROL_LEN] - test_sample = shuffled[CONTROL_LEN:] - - - es = two_group_difference(control_sample, test_sample, - False, effect_size) - - group_var = calculate_group_var(var(control_sample, ddof=1), - CONTROL_LEN, - var(test_sample, ddof=1), - len(test_sample)) - self.__permutations.append(es) - self.__permutations_var.append(group_var) - - if abs(es) > THRESHOLD: - EXTREME_COUNT += 1. - - self.__permutations = array(self.__permutations) - self.__permutations_var = array(self.__permutations_var) - - self.pvalue = EXTREME_COUNT / permutation_count - - - def __repr__(self): - return("{} permutations were taken. The p-value is {}.".format(self.permutation_count, - self.pvalue)) - - - @property - def permutation_count(self): - """ - The number of permuations taken. - """ - return self.__permutation_count - - - @property - def permutations(self): - """ - The effect sizes of all the permutations in a list. - """ - return self.__permutations - - - @property - def permutations_var(self): - """ - The experiment group variance of all the permutations in a list. - """ - return self.__permutations_var - diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py index 06e1f9a1..c8324332 100644 --- a/dabest/_delta_objects.py +++ b/dabest/_delta_objects.py @@ -6,10 +6,9 @@ # %% ../nbs/API/delta_objects.ipynb 5 from scipy.stats import norm import pandas as pd -from scipy.stats import randint import numpy as np from numpy import sort as npsort -from numpy import sqrt, isinf, isnan +from numpy import isnan from string import Template import warnings import datetime as dt diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py new file mode 100644 index 00000000..2695de83 --- /dev/null +++ b/dabest/_effsize_objects.py @@ -0,0 +1,1439 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/effsize_objects.ipynb. + +# %% auto 0 +__all__ = ['TwoGroupsEffectSize', 'EffectSizeDataFrame', 'PermutationTest'] + +# %% ../nbs/API/effsize_objects.ipynb 5 +import pandas as pd +import lqrt +from scipy.stats import norm +from numpy import array, isnan, isinf, repeat, random, isin, abs, var +from numpy import sort as npsort +from numpy import nan as npnan +from numpy.random import PCG64, RandomState +from statsmodels.stats.contingency_tables import mcnemar +import warnings +from string import Template +import scipy.stats as spstats + +# %% ../nbs/API/effsize_objects.ipynb 6 +class TwoGroupsEffectSize(object): + + """ + A class to compute and store the results of bootstrapped + mean differences between two groups. + + Compute the effect size between two groups. + + Parameters + ---------- + control : array-like + test : array-like + These should be numerical iterables. + effect_size : string. + Any one of the following are accepted inputs: + 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' + is_paired : string, default None + resamples : int, default 5000 + The number of bootstrap resamples to be taken for the calculation + of the confidence interval limits. + permutation_count : int, default 5000 + The number of permutations (reshuffles) to perform for the + computation of the permutation p-value + ci : float, default 95 + The confidence interval width. The default of 95 produces 95% + confidence intervals. + random_seed : int, default 12345 + `random_seed` is used to seed the random number generator during + bootstrap resampling. This ensures that the confidence intervals + reported are replicable. + + Returns + ------- + A :py:class:`TwoGroupEffectSize` object: + `difference` : float + The effect size of the difference between the control and the test. + `effect_size` : string + The type of effect size reported. + `is_paired` : string + The type of repeated-measures experiment. + `ci` : float + Returns the width of the confidence interval, in percent. + `alpha` : float + Returns the significance level of the statistical test as a float between 0 and 1. + `resamples` : int + The number of resamples performed during the bootstrap procedure. + `bootstraps` : numpy ndarray + The generated bootstraps of the effect size. + `random_seed` : int + The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned. + `bca_low, bca_high` : float + The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively. + `pct_low, pct_high` : float + The percentile confidence interval lower limit and upper limits, respectively. + """ + + def __init__(self, control, test, effect_size, + proportional=False, + is_paired=None, ci=95, + resamples=5000, + permutation_count=5000, + random_seed=12345): + + from ._stats_tools import effsize as es + from ._stats_tools import confint_2group_diff as ci2g + + + self.__EFFECT_SIZE_DICT = {"mean_diff" : "mean difference", + "median_diff" : "median difference", + "cohens_d" : "Cohen's d", + "cohens_h" : "Cohen's h", + "hedges_g" : "Hedges' g", + "cliffs_delta" : "Cliff's delta", + "delta_g" : "deltas' g"} + + + kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] + if effect_size not in kosher_es: + err1 = "The effect size '{}'".format(effect_size) + err2 = "is not one of {}".format(kosher_es) + raise ValueError(" ".join([err1, err2])) + + if effect_size == "cliffs_delta" and is_paired: + err1 = "`paired` is not None; therefore Cliff's delta is not defined." + raise ValueError(err1) + + if proportional==True and effect_size not in ['mean_diff','cohens_h']: + err1 = "`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined." + raise ValueError(err1) + + if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False): + err1 = "`proportional` is True; Only accept binary data consisting of 0 and 1." + raise ValueError(err1) + + # Convert to numpy arrays for speed. + # NaNs are automatically dropped. + control = array(control) + test = array(test) + control = control[~isnan(control)] + test = test[~isnan(test)] + + self.__effect_size = effect_size + # TODO refactor this + self.__control = control + self.__test = test + self.__is_paired = is_paired + self.__resamples = resamples + self.__permutation_count = permutation_count + self.__random_seed = random_seed + self.__ci = ci + self.__alpha = ci2g._compute_alpha_from_ci(ci) + + self.__difference = es.two_group_difference( + control, test, is_paired, effect_size) + + self.__jackknives = ci2g.compute_meandiff_jackknife( + control, test, is_paired, effect_size) + + self.__acceleration_value = ci2g._calc_accel(self.__jackknives) + + bootstraps = ci2g.compute_bootstrapped_diff( + control, test, is_paired, effect_size, + resamples, random_seed) + self.__bootstraps = bootstraps + + sorted_bootstraps = npsort(self.__bootstraps) + # Added in v0.2.6. + # Raises a UserWarning if there are any infiinities in the bootstraps. + num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)]) + + if num_infinities > 0: + warn_msg = "There are {} bootstrap(s) that are not defined. "\ + "This is likely due to smaple sample sizes. "\ + "The values in a bootstrap for a group will be more likely "\ + "to be all equal, with a resulting variance of zero. "\ + "The computation of Cohen's d and Hedges' g thus "\ + "involved a division by zero. " + warnings.warn(warn_msg.format(num_infinities), + category=UserWarning) + + self.__bias_correction = ci2g.compute_meandiff_bias_correction( + self.__bootstraps, self.__difference) + + # Compute BCa intervals. + bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( + self.__bias_correction, self.__acceleration_value, + self.__resamples, ci) + + self.__bca_interval_idx = (bca_idx_low, bca_idx_high) + + if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): + self.__bca_low = sorted_bootstraps[bca_idx_low] + self.__bca_high = sorted_bootstraps[bca_idx_high] + + err1 = "The $lim_type limit of the interval" + err2 = "was in the $loc 10 values." + err3 = "The result should be considered unstable." + err_temp = Template(" ".join([err1, err2, err3])) + + if bca_idx_low <= 10: + warnings.warn(err_temp.substitute(lim_type="lower", + loc="bottom"), + stacklevel=1) + + if bca_idx_high >= resamples-9: + warnings.warn(err_temp.substitute(lim_type="upper", + loc="top"), + stacklevel=1) + + else: + # TODO improve error handling, separate file + err1 = "The $lim_type limit of the BCa interval cannot be computed." + err2 = "It is set to the effect size itself." + err3 = "All bootstrap values were likely all the same." + err_temp = Template(" ".join([err1, err2, err3])) + + if isnan(bca_idx_low): + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), + stacklevel=0) + + if isnan(bca_idx_high): + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), + stacklevel=0) + + # Compute percentile intervals. + pct_idx_low = int((self.__alpha/2) * resamples) + pct_idx_high = int((1-(self.__alpha/2)) * resamples) + + self.__pct_interval_idx = (pct_idx_low, pct_idx_high) + self.__pct_low = sorted_bootstraps[pct_idx_low] + self.__pct_high = sorted_bootstraps[pct_idx_high] + + # Perform statistical tests. + self.__PermutationTest_result = PermutationTest(control, test, + effect_size, + is_paired, + permutation_count) + + if is_paired and proportional is False: + # Wilcoxon, a non-parametric version of the paired T-test. + wilcoxon = spstats.wilcoxon(control, test) + self.__pvalue_wilcoxon = wilcoxon.pvalue + self.__statistic_wilcoxon = wilcoxon.statistic + + + if effect_size != "median_diff": + # Paired Student's t-test. + paired_t = spstats.ttest_rel(control, test, nan_policy='omit') + self.__pvalue_paired_students_t = paired_t.pvalue + self.__statistic_paired_students_t = paired_t.statistic + # TODO dead code + standardized_es = es.cohens_d(control, test, is_paired) + + elif is_paired and proportional: + # for binary paired data, use McNemar's test + # References: + # https://en.wikipedia.org/wiki/McNemar%27s_test + + df_temp = pd.DataFrame({'control': control, 'test': test}) + x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)]) + x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)]) + x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)]) + x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)]) + table = [[x1,x2],[x3,x4]] + _mcnemar = mcnemar(table, exact=True, correction=True) + self.__pvalue_mcnemar = _mcnemar.pvalue + self.__statistic_mcnemar = _mcnemar.statistic + + elif effect_size == "cliffs_delta": + # Let's go with Brunner-Munzel! + brunner_munzel = spstats.brunnermunzel(control, test, + nan_policy='omit') + self.__pvalue_brunner_munzel = brunner_munzel.pvalue + self.__statistic_brunner_munzel = brunner_munzel.statistic + + + elif effect_size == "median_diff": + # According to scipy's documentation of the function, + # "The Kruskal-Wallis H-test tests the null hypothesis + # that the population median of all of the groups are equal." + kruskal = spstats.kruskal(control, test, nan_policy='omit') + self.__pvalue_kruskal = kruskal.pvalue + self.__statistic_kruskal = kruskal.statistic + + else: # for mean difference, Cohen's d, and Hedges' g. + # Welch's t-test, assumes normality of distributions, + # but does not assume equal variances. + welch = spstats.ttest_ind(control, test, equal_var=False, + nan_policy='omit') + self.__pvalue_welch = welch.pvalue + self.__statistic_welch = welch.statistic + + # Student's t-test, assumes normality of distributions, + # as well as assumption of equal variances. + students_t = spstats.ttest_ind(control, test, equal_var=True, + nan_policy='omit') + self.__pvalue_students_t = students_t.pvalue + self.__statistic_students_t = students_t.statistic + + # Mann-Whitney test: Non parametric, + # does not assume normality of distributions + try: + mann_whitney = spstats.mannwhitneyu(control, test, + alternative='two-sided') + self.__pvalue_mann_whitney = mann_whitney.pvalue + self.__statistic_mann_whitney = mann_whitney.statistic + except ValueError: + # TODO At least print some warning? + # Occurs when the control and test are exactly identical + # in terms of rank (eg. all zeros.) + pass + + + standardized_es = es.cohens_d(control, test, is_paired = None) + + # The Cohen's h calculation is for binary categorical data + try: + self.__proportional_difference = es.cohens_h(control, test) + except ValueError: + # TODO At least print some warning? + # Occur only when the data consists not only 0's and 1's. + pass + + + + + + + def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): + + RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', + 'sequential': 'for the sequential design of repeated-measures experiment \n', + 'None' : '' + } + + PAIRED_STATUS = {'baseline' : 'paired', + 'sequential' : 'paired', + 'None' : 'unpaired' + } + + first_line = {"rm_status" : RM_STATUS[str(self.__is_paired)], + "es" : self.__EFFECT_SIZE_DICT[self.__effect_size], + "paired_status": PAIRED_STATUS[str(self.__is_paired)]} + + + out1 = "The {paired_status} {es} {rm_status}".format(**first_line) + + base_string_fmt = "{:." + str(sigfig) + "}" + if "." in str(self.__ci): + ci_width = base_string_fmt.format(self.__ci) + else: + ci_width = str(self.__ci) + + ci_out = {"es" : base_string_fmt.format(self.__difference), + "ci" : ci_width, + "bca_low" : base_string_fmt.format(self.__bca_low), + "bca_high" : base_string_fmt.format(self.__bca_high)} + + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) + out = out1 + out2 + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) + p2 = "calculated for legacy purposes only. " + pvalue = p1 + p2 + + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) + bs2 = "the confidence interval is bias-corrected and accelerated." + bs = bs1 + bs2 + + pval_def1 = "Any p-value reported is the probability of observing the" + \ + "effect size (or greater),\nassuming the null hypothesis of" + \ + "zero difference is true." + pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ + "control and test labels were performed." + pval_def = pval_def1 + pval_def2 + + if show_resample_count and define_pval: + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + elif ~show_resample_count and define_pval: + return "{}\n{}\n\n{}".format(out, pvalue, pval_def) + elif show_resample_count and ~define_pval: + return "{}\n{}\n\n{}".format(out, pvalue, bs) + else: + return "{}\n{}".format(out, pvalue) + + + + def to_dict(self): + """ + Returns the attributes of the `dabest.TwoGroupEffectSize` object as a + dictionary. + """ + # Only get public (user-facing) attributes. + attrs = [a for a in dir(self) + if not a.startswith(("_", "to_dict"))] + out = {} + for a in attrs: + out[a] = getattr(self, a) + return out + + + @property + def difference(self): + """ + Returns the difference between the control and the test. + """ + return self.__difference + + @property + def effect_size(self): + """ + Returns the type of effect size reported. + """ + return self.__EFFECT_SIZE_DICT[self.__effect_size] + + @property + def is_paired(self): + return self.__is_paired + + @property + def ci(self): + """ + Returns the width of the confidence interval, in percent. + """ + return self.__ci + + @property + def alpha(self): + """ + Returns the significance level of the statistical test as a float + between 0 and 1. + """ + return self.__alpha + + @property + def resamples(self): + """ + The number of resamples performed during the bootstrap procedure. + """ + return self.__resamples + + @property + def bootstraps(self): + """ + The generated bootstraps of the effect size. + """ + return self.__bootstraps + + @property + def random_seed(self): + """ + The number used to initialise the numpy random seed generator, ie. + `seed_value` from `numpy.random.seed(seed_value)` is returned. + """ + return self.__random_seed + + @property + def bca_interval_idx(self): + return self.__bca_interval_idx + + @property + def bca_low(self): + """ + The bias-corrected and accelerated confidence interval lower limit. + """ + return self.__bca_low + + @property + def bca_high(self): + """ + The bias-corrected and accelerated confidence interval upper limit. + """ + return self.__bca_high + + @property + def pct_interval_idx(self): + return self.__pct_interval_idx + + @property + def pct_low(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_low + + @property + def pct_high(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_high + + + + @property + def pvalue_brunner_munzel(self): + try: + return self.__pvalue_brunner_munzel + except AttributeError: + return npnan + + @property + def statistic_brunner_munzel(self): + try: + return self.__statistic_brunner_munzel + except AttributeError: + return npnan + + + + @property + def pvalue_wilcoxon(self): + try: + return self.__pvalue_wilcoxon + except AttributeError: + return npnan + + @property + def statistic_wilcoxon(self): + try: + return self.__statistic_wilcoxon + except AttributeError: + return npnan + + @property + def pvalue_mcnemar(self): + try: + return self.__pvalue_mcnemar + except AttributeError: + return npnan + + @property + def statistic_mcnemar(self): + try: + return self.__statistic_mcnemar + except AttributeError: + return npnan + + + + @property + def pvalue_paired_students_t(self): + # TODO Missing docstring + try: + return self.__pvalue_paired_students_t + except AttributeError: + return npnan + + @property + def statistic_paired_students_t(self): + # TODO Missing docstring + try: + return self.__statistic_paired_students_t + except AttributeError: + return npnan + + + + @property + def pvalue_kruskal(self): + # TODO Missing docstring + try: + return self.__pvalue_kruskal + except AttributeError: + return npnan + + @property + def statistic_kruskal(self): + # TODO Missing docstring + try: + return self.__statistic_kruskal + except AttributeError: + return npnan + + + @property + def pvalue_welch(self): + # TODO Missing docstring + try: + return self.__pvalue_welch + except AttributeError: + return npnan + + @property + def statistic_welch(self): + # TODO Missing docstring + try: + return self.__statistic_welch + except AttributeError: + return npnan + + + + @property + def pvalue_students_t(self): + # TODO Missing docstring + try: + return self.__pvalue_students_t + except AttributeError: + return npnan + + @property + def statistic_students_t(self): + # TODO Missing docstring + try: + return self.__statistic_students_t + except AttributeError: + return npnan + + + + @property + def pvalue_mann_whitney(self): + # TODO Missing docstring + try: + return self.__pvalue_mann_whitney + except AttributeError: + return npnan + + + + @property + def statistic_mann_whitney(self): + # TODO Missing docstring + try: + return self.__statistic_mann_whitney + except AttributeError: + return npnan + + @property + def pvalue_permutation(self): + # TODO Missing docstring + return self.__PermutationTest_result.pvalue + + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__PermutationTest_result.permutation_count + + + @property + def permutations(self): + return self.__PermutationTest_result.permutations + + + @property + def permutations_var(self): + return self.__PermutationTest_result.permutations_var + + + @property + def proportional_difference(self): + try: + return self.__proportional_difference + except AttributeError: + return npnan + + +# %% ../nbs/API/effsize_objects.ipynb 10 +class EffectSizeDataFrame(object): + """A class that generates and stores the results of bootstrapped effect + sizes for several comparisons.""" + + def __init__(self, dabest, effect_size, + is_paired, ci=95, proportional=False, + resamples=5000, + permutation_count=5000, + random_seed=12345, + x1_level=None, x2=None, + delta2=False, experiment_label=None, + mini_meta=False): + """ + Parses the data from a Dabest object, enabling plotting and printing + capability for the effect size of interest. + """ + + self.__dabest_obj = dabest + self.__effect_size = effect_size + self.__is_paired = is_paired + self.__ci = ci + self.__resamples = resamples + self.__permutation_count = permutation_count + self.__random_seed = random_seed + self.__proportional = proportional + self.__x1_level = x1_level + self.__experiment_label = experiment_label + self.__x2 = x2 + self.__delta2 = delta2 + self.__mini_meta = mini_meta + + + def __pre_calc(self): + from .misc_tools import print_greeting, get_varname + from ._stats_tools import confint_2group_diff as ci2g + from ._delta_objects import MiniMetaDelta, DeltaDelta + + idx = self.__dabest_obj.idx + dat = self.__dabest_obj._plot_data + xvar = self.__dabest_obj._xvar + yvar = self.__dabest_obj._yvar + + out = [] + reprs = [] + + if self.__delta2==True: + mixed_data = [] + for j, current_tuple in enumerate(idx): + if self.__is_paired != "sequential": + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + + for ix, tname in enumerate(current_tuple[1:]): + if self.__is_paired == "sequential": + cname = current_tuple[ix] + control = dat[dat[xvar] == cname][yvar].copy() + test = dat[dat[xvar] == tname][yvar].copy() + mixed_data.append(control) + mixed_data.append(test) + bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3], + self.__is_paired, self.__resamples, self.__random_seed) + + + for j, current_tuple in enumerate(idx): + if self.__is_paired!="sequential": + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + + for ix, tname in enumerate(current_tuple[1:]): + if self.__is_paired == "sequential": + cname = current_tuple[ix] + control = dat[dat[xvar] == cname][yvar].copy() + test = dat[dat[xvar] == tname][yvar].copy() + + result = TwoGroupsEffectSize(control, test, + self.__effect_size, + self.__proportional, + self.__is_paired, + self.__ci, + self.__resamples, + self.__permutation_count, + self.__random_seed) + r_dict = result.to_dict() + r_dict["control"] = cname + r_dict["test"] = tname + r_dict["control_N"] = int(len(control)) + r_dict["test_N"] = int(len(test)) + out.append(r_dict) + if j == len(idx)-1 and ix == len(current_tuple)-2: + if self.__delta2 and self.__effect_size in ["mean_diff","delta_g"]: + resamp_count = False + def_pval = False + elif self.__mini_meta and self.__effect_size == "mean_diff": + resamp_count = False + def_pval = False + else: + resamp_count = True + def_pval = True + else: + resamp_count = False + def_pval = False + + text_repr = result.__repr__(show_resample_count=resamp_count, + define_pval=def_pval) + + to_replace = "between {} and {} is".format(cname, tname) + text_repr = text_repr.replace("is", to_replace, 1) + + reprs.append(text_repr) + + + self.__for_print = "\n\n".join(reprs) + + out_ = pd.DataFrame(out) + + columns_in_order = ['control', 'test', 'control_N', 'test_N', + 'effect_size', 'is_paired', + 'difference', 'ci', + + 'bca_low', 'bca_high', 'bca_interval_idx', + 'pct_low', 'pct_high', 'pct_interval_idx', + + 'bootstraps', 'resamples', 'random_seed', + + 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var', + + 'pvalue_welch', + 'statistic_welch', + + 'pvalue_students_t', + 'statistic_students_t', + + 'pvalue_mann_whitney', + 'statistic_mann_whitney', + + 'pvalue_brunner_munzel', + 'statistic_brunner_munzel', + + 'pvalue_wilcoxon', + 'statistic_wilcoxon', + + 'pvalue_mcnemar', + 'statistic_mcnemar', + + 'pvalue_paired_students_t', + 'statistic_paired_students_t', + + 'pvalue_kruskal', + 'statistic_kruskal', + 'proportional_difference' + ] + self.__results = out_.reindex(columns=columns_in_order) + self.__results.dropna(axis="columns", how="all", inplace=True) + + # Add the is_paired column back when is_paired is None + if self.is_paired is None: + self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1)) + + # Create and compute the delta-delta statistics + if self.__delta2: + self.__delta_delta = DeltaDelta(self, + self.__permutation_count, + bootstraps_delta_delta, + self.__ci) + reprs.append(self.__delta_delta.__repr__(header=False)) + elif self.__delta2 and self.__effect_size not in ["mean_diff", "delta_g"]: + self.__delta_delta = "Delta-delta is not supported for {}.".format(self.__effect_size) + else: + self.__delta_delta = "`delta2` is False; delta-delta is therefore not calculated." + + # Create and compute the weighted average statistics + if self.__mini_meta and self.__effect_size == "mean_diff": + self.__mini_meta_delta = MiniMetaDelta(self, + self.__permutation_count, + self.__ci) + reprs.append(self.__mini_meta_delta.__repr__(header=False)) + elif self.__mini_meta and self.__effect_size != "mean_diff": + self.__mini_meta_delta = "Weighted delta is not supported for {}.".format(self.__effect_size) + else: + self.__mini_meta_delta = "`mini_meta` is False; weighted delta is therefore not calculated." + + + varname = get_varname(self.__dabest_obj) + lastline = "To get the results of all valid statistical tests, " +\ + "use `{}.{}.statistical_tests`".format(varname, self.__effect_size) + reprs.append(lastline) + + reprs.insert(0, print_greeting()) + + self.__for_print = "\n\n".join(reprs) + + + def __repr__(self): + try: + return self.__for_print + except AttributeError: + self.__pre_calc() + return self.__for_print + + + + def __calc_lqrt(self): + + rnd_seed = self.__random_seed + db_obj = self.__dabest_obj + dat = db_obj._plot_data + xvar = db_obj._xvar + yvar = db_obj._yvar + delta2 = self.__delta2 + + + out = [] + + for j, current_tuple in enumerate(db_obj.idx): + if self.__is_paired != "sequential": + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + + for ix, tname in enumerate(current_tuple[1:]): + if self.__is_paired == "sequential": + cname = current_tuple[ix] + control = dat[dat[xvar] == cname][yvar].copy() + test = dat[dat[xvar] == tname][yvar].copy() + + if self.__is_paired: + # Refactored here in v0.3.0 for performance issues. + lqrt_result = lqrt.lqrtest_rel(control, test, + random_state=rnd_seed) + + out.append({"control": cname, "test": tname, + "control_N": int(len(control)), + "test_N": int(len(test)), + "pvalue_paired_lqrt": lqrt_result.pvalue, + "statistic_paired_lqrt": lqrt_result.statistic + }) + + else: + # Likelihood Q-Ratio test: + lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, + random_state=rnd_seed, + equal_var=True) + + + lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, + random_state=rnd_seed, + equal_var=False) + + out.append({"control": cname, "test": tname, + "control_N": int(len(control)), + "test_N": int(len(test)), + + "pvalue_lqrt_equal_var" : lqrt_equal_var_result.pvalue, + "statistic_lqrt_equal_var" : lqrt_equal_var_result.statistic, + "pvalue_lqrt_unequal_var" : lqrt_unequal_var_result.pvalue, + "statistic_lqrt_unequal_var" : lqrt_unequal_var_result.statistic, + }) + self.__lqrt_results = pd.DataFrame(out) + + + def plot(self, color_col=None, + + raw_marker_size=6, es_marker_size=9, + + swarm_label=None, contrast_label=None, delta2_label=None, + swarm_ylim=None, contrast_ylim=None, delta2_ylim=None, + + custom_palette=None, swarm_desat=0.5, halfviolin_desat=1, + halfviolin_alpha=0.8, + + face_color = None, + #bar plot + bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None, + # error bar of proportion plot + ci=None, ci_type='bca', err_color=None, + + float_contrast=True, + show_pairs=True, + show_delta2=True, + show_mini_meta=True, + group_summaries=None, + group_summaries_offset=0.1, + + fig_size=None, + dpi=100, + ax=None, + + contrast_show_es = False, + es_sf = 2, + es_fontsize = 10, + + contrast_show_deltas = True, + + gridkey_rows=None, + gridkey_merge_pairs = False, + gridkey_show_Ns = True, + gridkey_show_es = True, + + swarmplot_kwargs=None, + barplot_kwargs=None, + violinplot_kwargs=None, + slopegraph_kwargs=None, + sankey_kwargs=None, + reflines_kwargs=None, + group_summary_kwargs=None, + legend_kwargs=None, + title=None, fontsize_title = 16, + fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12, + fontsize_delta2label = 12): + + """ + Creates an estimation plot for the effect size of interest. + + + Parameters + ---------- + color_col : string, default None + Column to be used for colors. + raw_marker_size : float, default 6 + The diameter (in points) of the marker dots plotted in the + swarmplot. + es_marker_size : float, default 9 + The size (in points) of the effect size points on the difference + axes. + swarm_label, contrast_label, delta2_label : strings, default None + Set labels for the y-axis of the swarmplot and the contrast plot, + respectively. If `swarm_label` is not specified, it defaults to + "value", unless a column name was passed to `y`. If + `contrast_label` is not specified, it defaults to the effect size + being plotted. If `delta2_label` is not specifed, it defaults to + "delta - delta" + swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None + The desired y-limits of the raw data (swarmplot) axes, the + difference axes and the delta-delta axes respectively, as a tuple. + These will be autoscaled to sensible values if they are not + specified. The delta2 axes and contrast axes should have the same + limits for y. When `show_delta2` is True, if both of the `contrast_ylim` + and `delta2_ylim` are not None, then they must be specified with the + same values; when `show_delta2` is True and only one of them is specified, + then the other will automatically be assigned with the same value. + Specifying `delta2_ylim` does not have any effect when `show_delta2` is + False. + custom_palette : dict, list, or matplotlib color palette, default None + This keyword accepts a dictionary with {'group':'color'} pairings, + a list of RGB colors, or a specified matplotlib palette. This + palette will be used to color the swarmplot. If `color_col` is not + specified, then each group will be colored in sequence according + to the default palette currently used by matplotlib. + Please take a look at the seaborn commands `color_palette` + and `cubehelix_palette` to generate a custom palette. Both + these functions generate a list of RGB colors. + See: + https://seaborn.pydata.org/generated/seaborn.color_palette.html + https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html + The named colors of matplotlib can be found here: + https://matplotlib.org/examples/color/named_colors.html + swarm_desat : float, default 1 + Decreases the saturation of the colors in the swarmplot by the + desired proportion. Uses `seaborn.desaturate()` to acheive this. + halfviolin_desat : float, default 0.5 + Decreases the saturation of the colors of the half-violin bootstrap + curves by the desired proportion. Uses `seaborn.desaturate()` to + acheive this. + halfviolin_alpha : float, default 0.8 + The alpha (transparency) level of the half-violin bootstrap curves. + float_contrast : boolean, default True + Whether or not to display the halfviolin bootstrapped difference + distribution alongside the raw data. + show_pairs : boolean, default True + If the data is paired, whether or not to show the raw data as a + swarmplot, or as slopegraph, with a line joining each pair of + observations. + show_delta2, show_mini_meta : boolean, default True + If delta-delta or mini-meta delta is calculated, whether or not to + show the delta-delta plot or mini-meta plot. + group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None. + Plots the summary statistics for each group. If 'mean_sd', then + the mean and standard deviation of each group is plotted as a + notched line beside each group. If 'median_quantiles', then the + median and 25th and 75th percentiles of each group is plotted + instead. If 'None', the summaries are not shown. + group_summaries_offset : float, default 0.1 + If group summaries are displayed, they will be offset from the raw + data swarmplot groups by this value. + fig_size : tuple, default None + The desired dimensions of the figure as a (length, width) tuple. + dpi : int, default 100 + The dots per inch of the resulting figure. + ax : matplotlib.Axes, default None + Provide an existing Axes for the plots to be created. If no Axes is + specified, a new matplotlib Figure will be created. + gridkey_rows : list, default None + Provide a list of row labels for the gridkey. The supplied idx is + checked against the row labels to determine whether the corresponding + cell should be populated or not. + swarmplot_kwargs : dict, default None + Pass any keyword arguments accepted by the seaborn `swarmplot` + command here, as a dict. If None, the following keywords are + passed to sns.swarmplot : {'size':`raw_marker_size`}. + violinplot_kwargs : dict, default None + Pass any keyword arguments accepted by the matplotlib ` + pyplot.violinplot` command here, as a dict. If None, the following + keywords are passed to violinplot : {'widths':0.5, 'vert':True, + 'showextrema':False, 'showmedians':False}. + slopegraph_kwargs : dict, default None + This will change the appearance of the lines used to join each pair + of observations when `show_pairs=True`. Pass any keyword arguments + accepted by matplotlib `plot()` function here, as a dict. + If None, the following keywords are + passed to plot() : {'linewidth':1, 'alpha':0.5}. + sankey_kwargs: dict, default None + Whis will change the appearance of the sankey diagram used to depict + paired proportional data when `show_pairs=True` and `proportional=True`. + Pass any keyword arguments accepted by plot_tools.sankeydiag() function + here, as a dict. If None, the following keywords are passed to sankey diagram: + {"width": 0.5, "align": "center", "alpha": 0.4, "bar_width": 0.1, "rightColor": False} + reflines_kwargs : dict, default None + This will change the appearance of the zero reference lines. Pass + any keyword arguments accepted by the matplotlib Axes `hlines` + command here, as a dict. If None, the following keywords are + passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75, + 'zorder':2, 'color' : default y-tick color}. + group_summary_kwargs : dict, default None + Pass any keyword arguments accepted by the matplotlib.lines.Line2D + command here, as a dict. This will change the appearance of the + vertical summary lines for each group, if `group_summaries` is not + 'None'. If None, the following keywords are passed to + matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}. + legend_kwargs : dict, default None + Pass any keyword arguments accepted by the matplotlib Axes + `legend` command here, as a dict. If None, the following keywords + are passed to matplotlib.Axes.legend : {'loc':'upper left', + 'frameon':False}. + title : string, default None + Title for the plot. If None, no title will be displayed. Pass any + keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here, + as a string. + fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large' + Font size for the plot title. If a float, the fontsize in points. The + string values denote sizes relative to the default font size. Pass any keyword arguments accepted + by the matplotlib.pyplot.suptitle `fontsize` command here, as a string. + fontsize_rawxlabel : float, default 12 + Font size for the raw axes xlabel. + fontsize_rawylabel : float, default 12 + Font size for the raw axes ylabel. + fontsize_contrastxlabel : float, default 12 + Font size for the contrast axes xlabel. + fontsize_contrastylabel : float, default 12 + Font size for the contrast axes ylabel. + fontsize_delta2label : float, default 12 + Font size for the delta-delta axes ylabel. + + + Returns + ------- + A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``. + + The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it. + + If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` + itself, while the effect size axes is accessed at ``ax.contrast_axes``. + See the last example below. + + + + """ + + from .plotter import EffectSizeDataFramePlotter + + if hasattr(self, "results") is False: + self.__pre_calc() + + if self.__delta2: + color_col = self.__x2 + + # if self.__proportional: + # raw_marker_size = 0.01 + + # Modification incurred due to update of Seaborn + ci = ('ci', ci) if ci is not None else None + + all_kwargs = locals() + del all_kwargs["self"] + + out = EffectSizeDataFramePlotter(self, **all_kwargs) + + return out + + + @property + def proportional(self): + """ + Returns the proportional parameter + class. + """ + return self.__proportional + + @property + def results(self): + """Prints all pairwise comparisons nicely.""" + try: + return self.__results + except AttributeError: + self.__pre_calc() + return self.__results + + + + @property + def statistical_tests(self): + results_df = self.results + + # Select only the statistics and p-values. + stats_columns = [c for c in results_df.columns + if c.startswith("statistic") or c.startswith("pvalue")] + + default_cols = ['control', 'test', 'control_N', 'test_N', + 'effect_size', 'is_paired', + 'difference', 'ci', 'bca_low', 'bca_high'] + + cols_of_interest = default_cols + stats_columns + + return results_df[cols_of_interest] + + + @property + def _for_print(self): + return self.__for_print + + @property + def _plot_data(self): + return self.__dabest_obj._plot_data + + @property + def idx(self): + return self.__dabest_obj.idx + + @property + def xvar(self): + return self.__dabest_obj._xvar + + @property + def yvar(self): + return self.__dabest_obj._yvar + + @property + def is_paired(self): + return self.__is_paired + + @property + def ci(self): + """ + The width of the confidence interval being produced, in percent. + """ + return self.__ci + + @property + def x1_level(self): + return self.__x1_level + + + @property + def x2(self): + return self.__x2 + + + @property + def experiment_label(self): + return self.__experiment_label + + + @property + def delta2(self): + return self.__delta2 + + + @property + def resamples(self): + """ + The number of resamples (with replacement) during bootstrap resampling." + """ + return self.__resamples + + @property + def random_seed(self): + """ + The seed used by `numpy.seed()` for bootstrap resampling. + """ + return self.__random_seed + + @property + def effect_size(self): + """The type of effect size being computed.""" + return self.__effect_size + + @property + def dabest_obj(self): + """ + Returns the `dabest` object that invoked the current EffectSizeDataFrame + class. + """ + return self.__dabest_obj + + @property + def proportional(self): + """ + Returns the proportional parameter + class. + """ + return self.__proportional + + @property + def lqrt(self): + """Returns all pairwise Lq-Likelihood Ratio Type test results + as a pandas DataFrame. + + For more information on LqRT tests, see https://arxiv.org/abs/1911.11922 + """ + try: + return self.__lqrt_results + except AttributeError: + self.__calc_lqrt() + return self.__lqrt_results + + + @property + def mini_meta(self): + """ + Returns the mini_meta boolean parameter. + """ + return self.__mini_meta + + + @property + def mini_meta_delta(self): + """ + Returns the mini_meta results. + """ + try: + return self.__mini_meta_delta + except AttributeError: + self.__pre_calc() + return self.__mini_meta_delta + + + @property + def delta_delta(self): + """ + Returns the mini_meta results. + """ + try: + return self.__delta_delta + except AttributeError: + self.__pre_calc() + return self.__delta_delta + + + +# %% ../nbs/API/effsize_objects.ipynb 29 +class PermutationTest: + """ + A class to compute and report permutation tests. + + Parameters + ---------- + control : array-like + test : array-like + These should be numerical iterables. + effect_size : string. + Any one of the following are accepted inputs: + 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g" or 'cliffs_delta' + is_paired : string, default None + permutation_count : int, default 10000 + The number of permutations (reshuffles) to perform. + random_seed : int, default 12345 + `random_seed` is used to seed the random number generator during + bootstrap resampling. This ensures that the generated permutations + are replicable. + + Returns + ------- + A :py:class:`PermutationTest` object: + `difference`:float + The effect size of the difference between the control and the test. + `effect_size`:string + The type of effect size reported. + + + """ + + def __init__(self, control: array, + test: array, # These should be numerical iterables. + effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' + is_paired:str=None, + permutation_count:int=5000, # The number of permutations (reshuffles) to perform. + random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable. + **kwargs): + from ._stats_tools.effsize import two_group_difference + from ._stats_tools.confint_2group_diff import calculate_group_var + + + self.__permutation_count = permutation_count + + # Run Sanity Check. + if is_paired and len(control) != len(test): + raise ValueError("The two arrays do not have the same length.") + + # Initialise random number generator. + # rng = random.default_rng(seed=random_seed) + rng = RandomState(PCG64(random_seed)) + + # Set required constants and variables + control = array(control) + test = array(test) + + control_sample = control.copy() + test_sample = test.copy() + + BAG = array([*control, *test]) + CONTROL_LEN = int(len(control)) + EXTREME_COUNT = 0. + THRESHOLD = abs(two_group_difference(control, test, + is_paired, effect_size)) + self.__permutations = [] + self.__permutations_var = [] + + for i in range(int(permutation_count)): + + if is_paired: + # Select which control-test pairs to swap. + random_idx = rng.choice(CONTROL_LEN, + rng.randint(0, CONTROL_LEN+1), + replace=False) + + # Perform swap. + for i in random_idx: + _placeholder = control_sample[i] + control_sample[i] = test_sample[i] + test_sample[i] = _placeholder + + else: + # Shuffle the bag and assign to control and test groups. + # NB. rng.shuffle didn't produce replicable results... + shuffled = rng.permutation(BAG) + control_sample = shuffled[:CONTROL_LEN] + test_sample = shuffled[CONTROL_LEN:] + + + es = two_group_difference(control_sample, test_sample, + False, effect_size) + + group_var = calculate_group_var(var(control_sample, ddof=1), + CONTROL_LEN, + var(test_sample, ddof=1), + len(test_sample)) + self.__permutations.append(es) + self.__permutations_var.append(group_var) + + if abs(es) > THRESHOLD: + EXTREME_COUNT += 1. + + self.__permutations = array(self.__permutations) + self.__permutations_var = array(self.__permutations_var) + + self.pvalue = EXTREME_COUNT / permutation_count + + + def __repr__(self): + return("{} permutations were taken. The p-value is {}.".format(self.permutation_count, + self.pvalue)) + + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__permutation_count + + + @property + def permutations(self): + """ + The effect sizes of all the permutations in a list. + """ + return self.__permutations + + + @property + def permutations_var(self): + """ + The experiment group variance of all the permutations in a list. + """ + return self.__permutations_var + diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py index cf9e81b4..b5d0a6ee 100644 --- a/dabest/_stats_tools/effsize.py +++ b/dabest/_stats_tools/effsize.py @@ -3,6 +3,9 @@ # %% ../../nbs/API/effsize.ipynb 4 from __future__ import annotations import numpy as np +import warnings +from scipy.special import gamma +from scipy.stats import mannwhitneyu # %% auto 0 __all__ = ['two_group_difference', 'func_difference', 'cohens_d', 'cohens_h', 'hedges_g', 'cliffs_delta', 'weighted_delta'] @@ -56,8 +59,7 @@ def two_group_difference(control:list|tuple|np.ndarray, #Accepts lists, tuples, median of `test`. """ - import numpy as np - import warnings + if effect_size == "mean_diff": return func_difference(control, test, np.mean, is_paired) @@ -100,13 +102,12 @@ def func_difference(control:list|tuple|np.ndarray, # NaNs are automatically disc Applies func to `control` and `test`, and then returns the difference. """ - import numpy as np # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) if is_paired: @@ -178,13 +179,12 @@ def cohens_d(control:list|tuple|np.ndarray, - https://en.wikipedia.org/wiki/Bessel%27s_correction - https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation """ - import numpy as np # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -226,9 +226,7 @@ def cohens_h(control:list|tuple|np.ndarray, and a dict for mapping the 0s and 1s to the actual labels, e.g.{1: "Smoker", 0: "Non-smoker"} ''' - import numpy as np np.seterr(divide='ignore', invalid='ignore') - import pandas as pd # Check whether dataframe contains only 0s and 1s. if np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False: @@ -237,10 +235,10 @@ def cohens_h(control:list|tuple|np.ndarray, # Convert to numpy arrays for speed. # NaNs are automatically dropped. # Aligned with cohens_d calculation. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: - test = np.array(test) + if ~isinstance(test, np.ndarray): + test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -266,13 +264,12 @@ def hedges_g(control:list|tuple|np.ndarray, See [here](https://en.wikipedia.org/wiki/Effect_size#Hedges'_g) """ - import numpy as np # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -291,14 +288,13 @@ def cliffs_delta(control:list|tuple|np.ndarray, Computes Cliff's delta for 2 samples. See [here](https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data) """ - import numpy as np - from scipy.stats import mannwhitneyu + # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) c = control[~np.isnan(control)] @@ -311,54 +307,31 @@ def cliffs_delta(control:list|tuple|np.ndarray, U, _ = mannwhitneyu(t, c, alternative='two-sided') cliffs_delta = ((2 * U) / (control_n * test_n)) - 1 - # more = 0 - # less = 0 - # - # for i, c in enumerate(control): - # for j, t in enumerate(test): - # if t > c: - # more += 1 - # elif t < c: - # less += 1 - # - # cliffs_delta = (more - less) / (control_n * test_n) return cliffs_delta # %% ../../nbs/API/effsize.ipynb 11 def _compute_standardizers(control, test): - from numpy import mean, var, sqrt, nan + # TODO missing docstring # For calculation of correlation; not currently used. # from scipy.stats import pearsonr control_n = len(control) test_n = len(test) - control_mean = mean(control) - test_mean = mean(test) + control_var = np.var(control, ddof=1) # use N-1 to compute the variance. + test_var = np.var(test, ddof=1) - control_var = var(control, ddof=1) # use N-1 to compute the variance. - test_var = var(test, ddof=1) - - control_std = sqrt(control_var) - test_std = sqrt(test_var) # For unpaired 2-groups standardized mean difference. - pooled = sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) / + pooled = np.sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) / (control_n + test_n - 2) ) # For paired standardized mean difference. - average = sqrt((control_var + test_var) / 2) - - # if len(control) == len(test): - # corr = pearsonr(control, test)[0] - # std_diff = sqrt(control_var + test_var - (2 * corr * control_std * test_std)) - # std_diff_corrected = std_diff / (sqrt(2 * (1 - corr))) - # return pooled, average, std_diff_corrected - # - # else: + average = np.sqrt((control_var + test_var) / 2) + return pooled, average # indent if you implement above code chunk. # %% ../../nbs/API/effsize.ipynb 12 @@ -377,16 +350,12 @@ def _compute_hedges_correction_factor(n1, ISBN 0-12-336380-2. """ - from scipy.special import gamma - from numpy import sqrt, isinf - import warnings - df = n1 + n2 - 2 numer = gamma(df / 2) denom0 = gamma((df - 1) / 2) - denom = sqrt(df / 2) * denom0 + denom = np.sqrt(df / 2) * denom0 - if isinf(numer) or isinf(denom): + if np.isinf(numer) or np.isinf(denom): # occurs when df is too large. # Apply Hedges and Olkin's approximation. df_sum = n1 + n2 @@ -404,7 +373,6 @@ def weighted_delta(difference, group_var): Compute the weighted deltas where the weight is the inverse of the pooled group difference. ''' - import numpy as np weight = np.true_divide(1, group_var) return np.sum(difference*weight)/np.sum(weight) diff --git a/nbs/API/class.ipynb b/nbs/API/class.ipynb index 32e9a973..313f2680 100644 --- a/nbs/API/class.ipynb +++ b/nbs/API/class.ipynb @@ -56,20 +56,14 @@ "source": [ "#| export\n", "# Import standard data science libraries\n", - "from numpy import array, isnan, isinf, repeat, random, issubdtype, number, isin, abs, var\n", - "from numpy import sort as npsort\n", - "import lqrt\n", + "from numpy import array, repeat, random, issubdtype, number\n", "import pandas as pd\n", "import seaborn as sns\n", - "import scipy.stats as spstats\n", "from scipy.stats import norm\n", "from scipy.stats import randint\n", "import datetime as dt\n", - "from statsmodels.stats.contingency_tables import mcnemar\n", "from string import Template\n", - "import warnings\n", - "from numpy import nan as npnan\n", - "from numpy.random import PCG64, RandomState" + "import warnings" ] }, { @@ -106,6 +100,7 @@ " statistics. You should not be calling this class directly; instead,\n", " use `dabest.load()` to parse your DataFrame prior to analysis.\n", " \"\"\"\n", + " from ._effsize_objects import EffectSizeDataFrame\n", "\n", " self.__delta2 = delta2\n", " self.__experiment = experiment\n", @@ -119,17 +114,17 @@ " self.__mini_meta = mini_meta \n", "\n", " # Make a copy of the data, so we don't make alterations to it.\n", + " # TODO is this really needed?\n", " data_in = data.copy()\n", "\n", "\n", " # Check if it is a valid mini_meta case\n", - " if mini_meta is True:\n", - "\n", + " if mini_meta:\n", " # Only mini_meta calculation but not proportional and delta-delta function\n", - " if proportional is True:\n", + " if proportional:\n", " err0 = '`proportional` and `mini_meta` cannot be True at the same time.'\n", " raise ValueError(err0)\n", - " elif delta2 is True:\n", + " elif delta2:\n", " err0 = '`delta` and `mini_meta` cannot be True at the same time.'\n", " raise ValueError(err0)\n", " \n", @@ -139,7 +134,7 @@ " err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) \n", " err1 = 'does not contain exactly 2 columns.'\n", " raise ValueError(err0 + err1)\n", - " elif all([isinstance(i, (tuple, list)) for i in idx]):\n", + " if all([isinstance(i, (tuple, list)) for i in idx]):\n", " all_idx_lengths = [len(t) for t in idx]\n", " if (array(all_idx_lengths) != 2).any():\n", " err1 = \"`mini_meta` is True, but some idx \"\n", @@ -209,17 +204,17 @@ " raise IndexError(err)\n", "\n", " else:\n", - " x1_level = data_in[x[0]].unique() \n", - " elif experiment is not None:\n", + " x1_level = data_in[x[0]].unique()\n", + " # TODO what if experiment is None? \n", + " elif experiment:\n", " experiment_label = data_in[experiment].unique()\n", " x1_level = data_in[x[0]].unique() \n", " self.__experiment_label = experiment_label\n", " self.__x1_level = x1_level\n", "\n", "\n", - "\n", " # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case\n", - " if idx is None and x is not None and y is not None:\n", + " if x and y and idx is None:\n", " # Add a length check for unique values in the first element in list x, \n", " # if the length is greater than 2, force delta2 to be False\n", " # Should be removed if delta2 for situations other than 2x2 is supported\n", @@ -282,23 +277,22 @@ " raise ValueError(err)\n", "\n", " # Check if there is a typo on paired\n", - " if paired is not None:\n", - " if paired not in (\"baseline\", \"sequential\"):\n", - " err = '{} assigned for `paired` is not valid.'.format(paired)\n", - " raise ValueError(err)\n", + " if paired and paired not in (\"baseline\", \"sequential\"):\n", + " err = '{} assigned for `paired` is not valid.'.format(paired)\n", + " raise ValueError(err)\n", "\n", "\n", " # Determine the type of data: wide or long.\n", - " if x is None and y is not None:\n", + " if y and x is None:\n", " err = 'You have only specified `y`. Please also specify `x`.'\n", " raise ValueError(err)\n", "\n", - " elif y is None and x is not None:\n", + " if x and y is None:\n", " err = 'You have only specified `x`. Please also specify `y`.'\n", " raise ValueError(err)\n", "\n", " # Identify the type of data that was passed in.\n", - " elif x is not None and y is not None:\n", + " if x and y:\n", " # Assume we have a long dataset.\n", " # check both x and y are column names in data.\n", " if x not in data_in.columns:\n", @@ -361,12 +355,12 @@ " # remove any NA rows.\n", " plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True)\n", "\n", - " \n", + " # TODO these comments should not be in the code but on the release notes of the package version\n", " # Lines 131 to 140 added in v0.2.3.\n", " # Fixes a bug that jammed up when the xvar column was already \n", " # a pandas Categorical. Now we check for this and act appropriately.\n", " if isinstance(plot_data[self.__xvar].dtype, \n", - " pd.CategoricalDtype) is True:\n", + " pd.CategoricalDtype):\n", " plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)\n", " plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, \n", " ordered=True, \n", @@ -376,9 +370,9 @@ " categories=all_plot_groups,\n", " ordered=True)\n", " \n", - " \n", + " # TODO Move all the plot_data logic to the function returning self.__plot_data\n", " self.__plot_data = plot_data\n", - " \n", + " # TODO Move all the all_plot_groups logic to the function returning self.__all_plot_groups\n", " self.__all_plot_groups = all_plot_groups\n", "\n", "\n", @@ -387,7 +381,8 @@ " if id_col is None:\n", " err = \"`id_col` must be specified if `paired` is assigned with a not NoneType value.\"\n", " raise IndexError(err)\n", - " elif id_col not in plot_data.columns:\n", + " \n", + " if id_col not in plot_data.columns:\n", " err = \"{} is not a column in `data`. \".format(id_col)\n", " raise IndexError(err)\n", "\n", @@ -429,7 +424,7 @@ " def __repr__(self):\n", " from .__init__ import __version__\n", " from .misc_tools import print_greeting\n", - "\n", + " \n", " greeting_header = print_greeting()\n", "\n", " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", @@ -465,10 +460,10 @@ " for ix, test_name in enumerate(current_tuple[1:]):\n", " comparisons.append(\"{} minus {}\".format(test_name, control_name))\n", "\n", - " if self.__delta2 is True:\n", + " if self.__delta2:\n", " comparisons.append(\"{} minus {} (only for mean difference)\".format(self.__experiment_label[1], self.__experiment_label[0]))\n", " \n", - " if self.__mini_meta is True:\n", + " if self.__mini_meta:\n", " comparisons.append(\"weighted delta (only for mean difference)\")\n", "\n", " for j, g in enumerate(comparisons):\n", @@ -1295,1842 +1290,6 @@ "$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "6017e0d4", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class TwoGroupsEffectSize(object):\n", - "\n", - " \"\"\"\n", - " A class to compute and store the results of bootstrapped\n", - " mean differences between two groups.\n", - " \n", - " Compute the effect size between two groups.\n", - "\n", - " Parameters\n", - " ----------\n", - " control : array-like\n", - " test : array-like\n", - " These should be numerical iterables.\n", - " effect_size : string.\n", - " Any one of the following are accepted inputs:\n", - " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", - " is_paired : string, default None\n", - " resamples : int, default 5000\n", - " The number of bootstrap resamples to be taken for the calculation\n", - " of the confidence interval limits.\n", - " permutation_count : int, default 5000\n", - " The number of permutations (reshuffles) to perform for the \n", - " computation of the permutation p-value\n", - " ci : float, default 95\n", - " The confidence interval width. The default of 95 produces 95%\n", - " confidence intervals.\n", - " random_seed : int, default 12345\n", - " `random_seed` is used to seed the random number generator during\n", - " bootstrap resampling. This ensures that the confidence intervals\n", - " reported are replicable.\n", - "\n", - " Returns\n", - " -------\n", - " A :py:class:`TwoGroupEffectSize` object:\n", - " `difference` : float\n", - " The effect size of the difference between the control and the test.\n", - " `effect_size` : string\n", - " The type of effect size reported.\n", - " `is_paired` : string\n", - " The type of repeated-measures experiment.\n", - " `ci` : float\n", - " Returns the width of the confidence interval, in percent.\n", - " `alpha` : float\n", - " Returns the significance level of the statistical test as a float between 0 and 1.\n", - " `resamples` : int\n", - " The number of resamples performed during the bootstrap procedure.\n", - " `bootstraps` : numpy ndarray\n", - " The generated bootstraps of the effect size.\n", - " `random_seed` : int\n", - " The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned.\n", - " `bca_low, bca_high` : float\n", - " The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively.\n", - " `pct_low, pct_high` : float\n", - " The percentile confidence interval lower limit and upper limits, respectively.\n", - " \"\"\"\n", - "\n", - " def __init__(self, control, test, effect_size,\n", - " proportional=False,\n", - " is_paired=None, ci=95,\n", - " resamples=5000, \n", - " permutation_count=5000, \n", - " random_seed=12345):\n", - " \n", - " from ._stats_tools import effsize as es\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - "\n", - "\n", - " self.__EFFECT_SIZE_DICT = {\"mean_diff\" : \"mean difference\",\n", - " \"median_diff\" : \"median difference\",\n", - " \"cohens_d\" : \"Cohen's d\",\n", - " \"cohens_h\" : \"Cohen's h\",\n", - " \"hedges_g\" : \"Hedges' g\",\n", - " \"cliffs_delta\" : \"Cliff's delta\",\n", - " \"delta_g\" : \"deltas' g\"}\n", - "\n", - "\n", - " kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]\n", - " if effect_size not in kosher_es:\n", - " err1 = \"The effect size '{}'\".format(effect_size)\n", - " err2 = \"is not one of {}\".format(kosher_es)\n", - " raise ValueError(\" \".join([err1, err2]))\n", - "\n", - " if effect_size == \"cliffs_delta\" and is_paired:\n", - " err1 = \"`paired` is not None; therefore Cliff's delta is not defined.\"\n", - " raise ValueError(err1)\n", - "\n", - " if proportional==True and effect_size not in ['mean_diff','cohens_h']:\n", - " err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n", - " raise ValueError(err1)\n", - "\n", - " if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False):\n", - " err1 = \"`proportional` is True; Only accept binary data consisting of 0 and 1.\"\n", - " raise ValueError(err1)\n", - "\n", - " # Convert to numpy arrays for speed.\n", - " # NaNs are automatically dropped.\n", - " control = array(control)\n", - " test = array(test)\n", - " control = control[~isnan(control)]\n", - " test = test[~isnan(test)]\n", - "\n", - " self.__effect_size = effect_size\n", - " self.__control = control\n", - " self.__test = test\n", - " self.__is_paired = is_paired\n", - " self.__resamples = resamples\n", - " self.__permutation_count = permutation_count\n", - " self.__random_seed = random_seed\n", - " self.__ci = ci\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", - "\n", - " self.__difference = es.two_group_difference(\n", - " control, test, is_paired, effect_size)\n", - " \n", - " self.__jackknives = ci2g.compute_meandiff_jackknife(\n", - " control, test, is_paired, effect_size)\n", - "\n", - " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", - "\n", - " bootstraps = ci2g.compute_bootstrapped_diff(\n", - " control, test, is_paired, effect_size,\n", - " resamples, random_seed)\n", - " self.__bootstraps = bootstraps\n", - " \n", - " sorted_bootstraps = npsort(self.__bootstraps)\n", - " # Added in v0.2.6.\n", - " # Raises a UserWarning if there are any infiinities in the bootstraps.\n", - " num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)])\n", - " \n", - " if num_infinities > 0:\n", - " warn_msg = \"There are {} bootstrap(s) that are not defined. \"\\\n", - " \"This is likely due to smaple sample sizes. \"\\\n", - " \"The values in a bootstrap for a group will be more likely \"\\\n", - " \"to be all equal, with a resulting variance of zero. \"\\\n", - " \"The computation of Cohen's d and Hedges' g thus \"\\\n", - " \"involved a division by zero. \"\n", - " warnings.warn(warn_msg.format(num_infinities), \n", - " category=UserWarning)\n", - "\n", - " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps, self.__difference)\n", - "\n", - " # Compute BCa intervals.\n", - " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", - "\n", - " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", - "\n", - " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_bootstraps[bca_idx_low]\n", - " self.__bca_high = sorted_bootstraps[bca_idx_high]\n", - "\n", - " err1 = \"The $lim_type limit of the interval\"\n", - " err2 = \"was in the $loc 10 values.\"\n", - " err3 = \"The result should be considered unstable.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", - "\n", - " if bca_idx_high >= resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", - "\n", - " else:\n", - " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", - " err2 = \"It is set to the effect size itself.\"\n", - " err3 = \"All bootstrap values were likely all the same.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", - "\n", - " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", - "\n", - " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * resamples)\n", - "\n", - " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_bootstraps[pct_idx_low]\n", - " self.__pct_high = sorted_bootstraps[pct_idx_high]\n", - "\n", - " # Perform statistical tests.\n", - " \n", - " self.__PermutationTest_result = PermutationTest(control, test, \n", - " effect_size, \n", - " is_paired,\n", - " permutation_count)\n", - " \n", - " if is_paired and proportional is False:\n", - " # Wilcoxon, a non-parametric version of the paired T-test.\n", - " wilcoxon = spstats.wilcoxon(control, test)\n", - " self.__pvalue_wilcoxon = wilcoxon.pvalue\n", - " self.__statistic_wilcoxon = wilcoxon.statistic\n", - " \n", - " \n", - " if effect_size != \"median_diff\":\n", - " # Paired Student's t-test.\n", - " paired_t = spstats.ttest_rel(control, test, nan_policy='omit')\n", - " self.__pvalue_paired_students_t = paired_t.pvalue\n", - " self.__statistic_paired_students_t = paired_t.statistic\n", - "\n", - " standardized_es = es.cohens_d(control, test, is_paired)\n", - " # self.__power = power.tt_solve_power(standardized_es,\n", - " # len(control),\n", - " # alpha=self.__alpha)\n", - "\n", - " elif is_paired and proportional is True:\n", - " # for binary paired data, use McNemar's test\n", - " # References:\n", - " # https://en.wikipedia.org/wiki/McNemar%27s_test\n", - "\n", - " df_temp = pd.DataFrame({'control': control, 'test': test})\n", - " x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)])\n", - " x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)])\n", - " x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)])\n", - " x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)])\n", - " table = [[x1,x2],[x3,x4]]\n", - " _mcnemar = mcnemar(table, exact=True, correction=True)\n", - " self.__pvalue_mcnemar = _mcnemar.pvalue\n", - " self.__statistic_mcnemar = _mcnemar.statistic\n", - "\n", - " elif effect_size == \"cliffs_delta\":\n", - " # Let's go with Brunner-Munzel!\n", - " brunner_munzel = spstats.brunnermunzel(control, test,\n", - " nan_policy='omit')\n", - " self.__pvalue_brunner_munzel = brunner_munzel.pvalue\n", - " self.__statistic_brunner_munzel = brunner_munzel.statistic\n", - "\n", - "\n", - " elif effect_size == \"median_diff\":\n", - " # According to scipy's documentation of the function,\n", - " # \"The Kruskal-Wallis H-test tests the null hypothesis\n", - " # that the population median of all of the groups are equal.\"\n", - " kruskal = spstats.kruskal(control, test, nan_policy='omit')\n", - " self.__pvalue_kruskal = kruskal.pvalue\n", - " self.__statistic_kruskal = kruskal.statistic\n", - "\n", - " else: # for mean difference, Cohen's d, and Hedges' g.\n", - " # Welch's t-test, assumes normality of distributions,\n", - " # but does not assume equal variances.\n", - " welch = spstats.ttest_ind(control, test, equal_var=False,\n", - " nan_policy='omit')\n", - " self.__pvalue_welch = welch.pvalue\n", - " self.__statistic_welch = welch.statistic\n", - "\n", - " # Student's t-test, assumes normality of distributions,\n", - " # as well as assumption of equal variances.\n", - " students_t = spstats.ttest_ind(control, test, equal_var=True,\n", - " nan_policy='omit')\n", - " self.__pvalue_students_t = students_t.pvalue\n", - " self.__statistic_students_t = students_t.statistic\n", - "\n", - " # Mann-Whitney test: Non parametric,\n", - " # does not assume normality of distributions\n", - " try:\n", - " mann_whitney = spstats.mannwhitneyu(control, test, \n", - " alternative='two-sided')\n", - " self.__pvalue_mann_whitney = mann_whitney.pvalue\n", - " self.__statistic_mann_whitney = mann_whitney.statistic\n", - " except ValueError:\n", - " # Occurs when the control and test are exactly identical\n", - " # in terms of rank (eg. all zeros.)\n", - " pass\n", - " \n", - " \n", - "\n", - " standardized_es = es.cohens_d(control, test, is_paired = None)\n", - " \n", - " # The Cohen's h calculation is for binary categorical data\n", - " try:\n", - " self.__proportional_difference = es.cohens_h(control, test)\n", - " except ValueError:\n", - " # Occur only when the data consists not only 0's and 1's.\n", - " pass\n", - " # self.__power = power.tt_ind_solve_power(standardized_es,\n", - " # len(control),\n", - " # alpha=self.__alpha,\n", - " # ratio=len(test)/len(control)\n", - " # )\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", - " \n", - " # # Deprecated in v0.3.0; permutation p-values will be reported by default.\n", - " # UNPAIRED_ES_TO_TEST = {\"mean_diff\" : \"Mann-Whitney\",\n", - " # \"median_diff\" : \"Kruskal\",\n", - " # \"cohens_d\" : \"Mann-Whitney\",\n", - " # \"hedges_g\" : \"Mann-Whitney\",\n", - " # \"cliffs_delta\" : \"Brunner-Munzel\"}\n", - " # \n", - " # TEST_TO_PVAL_ATTR = {\"Mann-Whitney\" : \"pvalue_mann_whitney\",\n", - " # \"Kruskal\" : \"pvalue_kruskal\",\n", - " # \"Brunner-Munzel\" : \"pvalue_brunner_munzel\",\n", - " # \"Wilcoxon\" : \"pvalue_wilcoxon\"}\n", - " \n", - " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", - " 'sequential': 'for the sequential design of repeated-measures experiment \\n',\n", - " 'None' : ''\n", - " }\n", - "\n", - " PAIRED_STATUS = {'baseline' : 'paired', \n", - " 'sequential' : 'paired',\n", - " 'None' : 'unpaired'\n", - " }\n", - "\n", - " first_line = {\"rm_status\" : RM_STATUS[str(self.__is_paired)],\n", - " \"es\" : self.__EFFECT_SIZE_DICT[self.__effect_size],\n", - " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)]}\n", - " \n", - "\n", - " out1 = \"The {paired_status} {es} {rm_status}\".format(**first_line)\n", - " \n", - " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", - " if \".\" in str(self.__ci):\n", - " ci_width = base_string_fmt.format(self.__ci)\n", - " else:\n", - " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", - " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", - " out = out1 + out2\n", - " \n", - " # # Deprecated in v0.3.0; permutation p-values will be reported by default.\n", - " # if self.__is_paired:\n", - " # stats_test = \"Wilcoxon\"\n", - " # else:\n", - " # stats_test = UNPAIRED_ES_TO_TEST[self.__effect_size]\n", - " \n", - " \n", - " # pval_rounded = base_string_fmt.format(getattr(self,\n", - " # TEST_TO_PVAL_ATTR[stats_test])\n", - " # )\n", - " \n", - " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", - " \n", - " # # Deprecated in v0.3.0; permutation p-values will be reported by default.\n", - " # pvalue = \"The two-sided p-value of the {} test is {}.\".format(stats_test,\n", - " # pval_rounded)\n", - " \n", - " # pvalue = \"The two-sided p-value of the {} test is {}.\".format(stats_test,\n", - " # pval_rounded)\n", - " \n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", - " p2 = \"calculated for legacy purposes only. \"\n", - " pvalue = p1 + p2\n", - " \n", - " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", - " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", - " bs = bs1 + bs2\n", - "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", - " pval_def = pval_def1 + pval_def2\n", - "\n", - " if show_resample_count and define_pval:\n", - " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - " elif show_resample_count is False and define_pval is True:\n", - " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, pval_def)\n", - " elif show_resample_count is True and define_pval is False:\n", - " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, bs)\n", - " else:\n", - " return \"{}\\n{}\".format(out, pvalue)\n", - "\n", - "\n", - "\n", - " def to_dict(self):\n", - " \"\"\"\n", - " Returns the attributes of the `dabest.TwoGroupEffectSize` object as a\n", - " dictionary.\n", - " \"\"\"\n", - " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", - " out = {}\n", - " for a in attrs:\n", - " out[a] = getattr(self, a)\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def difference(self):\n", - " \"\"\"\n", - " Returns the difference between the control and the test.\n", - " \"\"\"\n", - " return self.__difference\n", - "\n", - " @property\n", - " def effect_size(self):\n", - " \"\"\"\n", - " Returns the type of effect size reported.\n", - " \"\"\"\n", - " return self.__EFFECT_SIZE_DICT[self.__effect_size]\n", - "\n", - " @property\n", - " def is_paired(self):\n", - " return self.__is_paired\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " Returns the width of the confidence interval, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - " @property\n", - " def alpha(self):\n", - " \"\"\"\n", - " Returns the significance level of the statistical test as a float\n", - " between 0 and 1.\n", - " \"\"\"\n", - " return self.__alpha\n", - "\n", - " @property\n", - " def resamples(self):\n", - " \"\"\"\n", - " The number of resamples performed during the bootstrap procedure.\n", - " \"\"\"\n", - " return self.__resamples\n", - "\n", - " @property\n", - " def bootstraps(self):\n", - " \"\"\"\n", - " The generated bootstraps of the effect size.\n", - " \"\"\"\n", - " return self.__bootstraps\n", - "\n", - " @property\n", - " def random_seed(self):\n", - " \"\"\"\n", - " The number used to initialise the numpy random seed generator, ie.\n", - " `seed_value` from `numpy.random.seed(seed_value)` is returned.\n", - " \"\"\"\n", - " return self.__random_seed\n", - "\n", - " @property\n", - " def bca_interval_idx(self):\n", - " return self.__bca_interval_idx\n", - "\n", - " @property\n", - " def bca_low(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__bca_low\n", - "\n", - " @property\n", - " def bca_high(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval upper limit.\n", - " \"\"\"\n", - " return self.__bca_high\n", - "\n", - " @property\n", - " def pct_interval_idx(self):\n", - " return self.__pct_interval_idx\n", - "\n", - " @property\n", - " def pct_low(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_low\n", - "\n", - " @property\n", - " def pct_high(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_high\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_brunner_munzel(self):\n", - " try:\n", - " return self.__pvalue_brunner_munzel\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_brunner_munzel(self):\n", - " try:\n", - " return self.__statistic_brunner_munzel\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_wilcoxon(self):\n", - " try:\n", - " return self.__pvalue_wilcoxon\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_wilcoxon(self):\n", - " try:\n", - " return self.__statistic_wilcoxon\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def pvalue_mcnemar(self):\n", - " try:\n", - " return self.__pvalue_mcnemar\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_mcnemar(self):\n", - " try:\n", - " return self.__statistic_mcnemar\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_paired_students_t(self):\n", - " try:\n", - " return self.__pvalue_paired_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_paired_students_t(self):\n", - " try:\n", - " return self.__statistic_paired_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_kruskal(self):\n", - " try:\n", - " return self.__pvalue_kruskal\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_kruskal(self):\n", - " try:\n", - " return self.__statistic_kruskal\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_welch(self):\n", - " try:\n", - " return self.__pvalue_welch\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_welch(self):\n", - " try:\n", - " return self.__statistic_welch\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_students_t(self):\n", - " try:\n", - " return self.__pvalue_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_students_t(self):\n", - " try:\n", - " return self.__statistic_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_mann_whitney(self):\n", - " try:\n", - " return self.__pvalue_mann_whitney\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def statistic_mann_whitney(self):\n", - " try:\n", - " return self.__statistic_mann_whitney\n", - " except AttributeError:\n", - " return npnan\n", - " \n", - " # Introduced in v0.3.0.\n", - " @property\n", - " def pvalue_permutation(self):\n", - " return self.__PermutationTest_result.pvalue\n", - " \n", - " # \n", - " # \n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__PermutationTest_result.permutation_count\n", - "\n", - " \n", - " @property\n", - " def permutations(self):\n", - " return self.__PermutationTest_result.permutations\n", - "\n", - " \n", - " @property\n", - " def permutations_var(self):\n", - " return self.__PermutationTest_result.permutations_var\n", - "\n", - "\n", - " @property\n", - " def proportional_difference(self):\n", - " try:\n", - " return self.__proportional_difference\n", - " except AttributeError:\n", - " return npnan\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d72ccb04", - "metadata": {}, - "source": [ - "#### Example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5d8a7a87", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "The unpaired mean difference is -0.253 [95%CI -0.78, 0.25].\n", - "The p-value of the two-sided permutation t-test is 0.348, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "random.seed(12345)\n", - "control = norm.rvs(loc=0, size=30)\n", - "test = norm.rvs(loc=0.5, size=30)\n", - "effsize = dabest.TwoGroupsEffectSize(control, test, \"mean_diff\")\n", - "effsize" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72a4c93e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'alpha': 0.05,\n", - " 'bca_high': 0.24951887238295106,\n", - " 'bca_interval_idx': (125, 4875),\n", - " 'bca_low': -0.7801782111071534,\n", - " 'bootstraps': array([-0.3649424 , -0.45018155, -0.56034412, ..., -0.49805581,\n", - " -0.25334475, -0.55206229]),\n", - " 'ci': 95,\n", - " 'difference': -0.25315417702752846,\n", - " 'effect_size': 'mean difference',\n", - " 'is_paired': None,\n", - " 'pct_high': 0.24951887238295106,\n", - " 'pct_interval_idx': (125, 4875),\n", - " 'pct_low': -0.7801782111071534,\n", - " 'permutation_count': 5000,\n", - " 'permutations': array([ 0.17221029, 0.03112419, -0.13911387, ..., -0.38007941,\n", - " 0.30261507, -0.09073054]),\n", - " 'permutations_var': array([0.07201642, 0.07251104, 0.07219407, ..., 0.07003705, 0.07094885,\n", - " 0.07238581]),\n", - " 'proportional_difference': nan,\n", - " 'pvalue_brunner_munzel': nan,\n", - " 'pvalue_kruskal': nan,\n", - " 'pvalue_mann_whitney': 0.5201446121616038,\n", - " 'pvalue_mcnemar': nan,\n", - " 'pvalue_paired_students_t': nan,\n", - " 'pvalue_permutation': 0.3484,\n", - " 'pvalue_students_t': 0.34743913903372836,\n", - " 'pvalue_welch': 0.3474493875548964,\n", - " 'pvalue_wilcoxon': nan,\n", - " 'random_seed': 12345,\n", - " 'resamples': 5000,\n", - " 'statistic_brunner_munzel': nan,\n", - " 'statistic_kruskal': nan,\n", - " 'statistic_mann_whitney': 494.0,\n", - " 'statistic_mcnemar': nan,\n", - " 'statistic_paired_students_t': nan,\n", - " 'statistic_students_t': 0.9472545159069105,\n", - " 'statistic_welch': 0.9472545159069105,\n", - " 'statistic_wilcoxon': nan}" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "effsize.to_dict() " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eb366b18", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class EffectSizeDataFrame(object):\n", - " \"\"\"A class that generates and stores the results of bootstrapped effect\n", - " sizes for several comparisons.\"\"\"\n", - "\n", - " def __init__(self, dabest, effect_size,\n", - " is_paired, ci=95, proportional=False,\n", - " resamples=5000, \n", - " permutation_count=5000,\n", - " random_seed=12345, \n", - " x1_level=None, x2=None, \n", - " delta2=False, experiment_label=None,\n", - " mini_meta=False):\n", - " \"\"\"\n", - " Parses the data from a Dabest object, enabling plotting and printing\n", - " capability for the effect size of interest.\n", - " \"\"\"\n", - "\n", - " self.__dabest_obj = dabest\n", - " self.__effect_size = effect_size\n", - " self.__is_paired = is_paired\n", - " self.__ci = ci\n", - " self.__resamples = resamples\n", - " self.__permutation_count = permutation_count\n", - " self.__random_seed = random_seed\n", - " self.__proportional = proportional\n", - " self.__x1_level = x1_level\n", - " self.__experiment_label = experiment_label \n", - " self.__x2 = x2\n", - " self.__delta2 = delta2 \n", - " self.__mini_meta = mini_meta\n", - "\n", - "\n", - " def __pre_calc(self):\n", - " from .misc_tools import print_greeting, get_varname\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - " from ._delta_objects import MiniMetaDelta, DeltaDelta\n", - "\n", - " idx = self.__dabest_obj.idx\n", - " dat = self.__dabest_obj._plot_data\n", - " xvar = self.__dabest_obj._xvar\n", - " yvar = self.__dabest_obj._yvar\n", - "\n", - " out = []\n", - " reprs = []\n", - " \n", - " if self.__delta2==True:\n", - " mixed_data = []\n", - " for j, current_tuple in enumerate(idx):\n", - " if self.__is_paired != \"sequential\":\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - "\n", - " for ix, tname in enumerate(current_tuple[1:]):\n", - " if self.__is_paired == \"sequential\":\n", - " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - " mixed_data.append(control)\n", - " mixed_data.append(test)\n", - " bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3],\n", - " self.__is_paired, self.__resamples, self.__random_seed)\n", - "\n", - "\n", - " for j, current_tuple in enumerate(idx):\n", - " if self.__is_paired!=\"sequential\":\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - "\n", - " for ix, tname in enumerate(current_tuple[1:]):\n", - " if self.__is_paired == \"sequential\":\n", - " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - "\n", - " result = TwoGroupsEffectSize(control, test,\n", - " self.__effect_size,\n", - " self.__proportional,\n", - " self.__is_paired,\n", - " self.__ci,\n", - " self.__resamples,\n", - " self.__permutation_count,\n", - " self.__random_seed)\n", - " r_dict = result.to_dict()\n", - " r_dict[\"control\"] = cname\n", - " r_dict[\"test\"] = tname\n", - " r_dict[\"control_N\"] = int(len(control))\n", - " r_dict[\"test_N\"] = int(len(test))\n", - " out.append(r_dict)\n", - " if j == len(idx)-1 and ix == len(current_tuple)-2:\n", - " if self.__delta2 and self.__effect_size in [\"mean_diff\",\"delta_g\"]:\n", - " resamp_count = False\n", - " def_pval = False\n", - " elif self.__mini_meta and self.__effect_size == \"mean_diff\":\n", - " resamp_count = False\n", - " def_pval = False\n", - " else:\n", - " resamp_count = True\n", - " def_pval = True\n", - " else:\n", - " resamp_count = False\n", - " def_pval = False\n", - "\n", - " text_repr = result.__repr__(show_resample_count=resamp_count,\n", - " define_pval=def_pval)\n", - "\n", - " to_replace = \"between {} and {} is\".format(cname, tname)\n", - " text_repr = text_repr.replace(\"is\", to_replace, 1)\n", - "\n", - " reprs.append(text_repr)\n", - "\n", - "\n", - " self.__for_print = \"\\n\\n\".join(reprs)\n", - "\n", - " out_ = pd.DataFrame(out)\n", - "\n", - " columns_in_order = ['control', 'test', 'control_N', 'test_N',\n", - " 'effect_size', 'is_paired',\n", - " 'difference', 'ci',\n", - "\n", - " 'bca_low', 'bca_high', 'bca_interval_idx',\n", - " 'pct_low', 'pct_high', 'pct_interval_idx',\n", - " \n", - " 'bootstraps', 'resamples', 'random_seed',\n", - " \n", - " 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var',\n", - " \n", - " 'pvalue_welch',\n", - " 'statistic_welch',\n", - "\n", - " 'pvalue_students_t',\n", - " 'statistic_students_t',\n", - "\n", - " 'pvalue_mann_whitney',\n", - " 'statistic_mann_whitney',\n", - "\n", - " 'pvalue_brunner_munzel',\n", - " 'statistic_brunner_munzel',\n", - "\n", - " 'pvalue_wilcoxon',\n", - " 'statistic_wilcoxon',\n", - "\n", - " 'pvalue_mcnemar',\n", - " 'statistic_mcnemar',\n", - "\n", - " 'pvalue_paired_students_t',\n", - " 'statistic_paired_students_t',\n", - "\n", - " 'pvalue_kruskal',\n", - " 'statistic_kruskal',\n", - " 'proportional_difference'\n", - " ]\n", - " self.__results = out_.reindex(columns=columns_in_order)\n", - " self.__results.dropna(axis=\"columns\", how=\"all\", inplace=True)\n", - " \n", - " # Add the is_paired column back when is_paired is None\n", - " if self.is_paired is None:\n", - " self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1))\n", - " \n", - " # Create and compute the delta-delta statistics\n", - " if self.__delta2 is True:\n", - " self.__delta_delta = DeltaDelta(self,\n", - " self.__permutation_count,\n", - " bootstraps_delta_delta,\n", - " self.__ci)\n", - " reprs.append(self.__delta_delta.__repr__(header=False))\n", - " elif self.__delta2 is True and self.__effect_size not in [\"mean_diff\", \"delta_g\"]:\n", - " self.__delta_delta = \"Delta-delta is not supported for {}.\".format(self.__effect_size)\n", - " else:\n", - " self.__delta_delta = \"`delta2` is False; delta-delta is therefore not calculated.\"\n", - "\n", - " # Create and compute the weighted average statistics\n", - " if self.__mini_meta is True and self.__effect_size == \"mean_diff\":\n", - " self.__mini_meta_delta = MiniMetaDelta(self,\n", - " self.__permutation_count,\n", - " self.__ci)\n", - " reprs.append(self.__mini_meta_delta.__repr__(header=False))\n", - " elif self.__mini_meta is True and self.__effect_size != \"mean_diff\":\n", - " self.__mini_meta_delta = \"Weighted delta is not supported for {}.\".format(self.__effect_size)\n", - " else:\n", - " self.__mini_meta_delta = \"`mini_meta` is False; weighted delta is therefore not calculated.\"\n", - " \n", - " \n", - " varname = get_varname(self.__dabest_obj)\n", - " lastline = \"To get the results of all valid statistical tests, \" +\\\n", - " \"use `{}.{}.statistical_tests`\".format(varname, self.__effect_size)\n", - " reprs.append(lastline)\n", - "\n", - " reprs.insert(0, print_greeting())\n", - "\n", - " self.__for_print = \"\\n\\n\".join(reprs)\n", - "\n", - "\n", - " def __repr__(self):\n", - " try:\n", - " return self.__for_print\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__for_print\n", - " \n", - " \n", - " \n", - " def __calc_lqrt(self):\n", - " \n", - " rnd_seed = self.__random_seed\n", - " db_obj = self.__dabest_obj\n", - " dat = db_obj._plot_data\n", - " xvar = db_obj._xvar\n", - " yvar = db_obj._yvar\n", - " delta2 = self.__delta2\n", - " \n", - "\n", - " out = []\n", - "\n", - " for j, current_tuple in enumerate(db_obj.idx):\n", - " if self.__is_paired != \"sequential\":\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - "\n", - " for ix, tname in enumerate(current_tuple[1:]):\n", - " if self.__is_paired == \"sequential\":\n", - " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - " \n", - " if self.__is_paired: \n", - " # Refactored here in v0.3.0 for performance issues.\n", - " lqrt_result = lqrt.lqrtest_rel(control, test, \n", - " random_state=rnd_seed)\n", - " \n", - " out.append({\"control\": cname, \"test\": tname, \n", - " \"control_N\": int(len(control)), \n", - " \"test_N\": int(len(test)),\n", - " \"pvalue_paired_lqrt\": lqrt_result.pvalue,\n", - " \"statistic_paired_lqrt\": lqrt_result.statistic\n", - " })\n", - "\n", - " else:\n", - " # Likelihood Q-Ratio test:\n", - " lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, \n", - " random_state=rnd_seed,\n", - " equal_var=True)\n", - " \n", - " \n", - " lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, \n", - " random_state=rnd_seed,\n", - " equal_var=False)\n", - " \n", - " out.append({\"control\": cname, \"test\": tname, \n", - " \"control_N\": int(len(control)), \n", - " \"test_N\": int(len(test)),\n", - " \n", - " \"pvalue_lqrt_equal_var\" : lqrt_equal_var_result.pvalue,\n", - " \"statistic_lqrt_equal_var\" : lqrt_equal_var_result.statistic,\n", - " \"pvalue_lqrt_unequal_var\" : lqrt_unequal_var_result.pvalue,\n", - " \"statistic_lqrt_unequal_var\" : lqrt_unequal_var_result.statistic,\n", - " }) \n", - " self.__lqrt_results = pd.DataFrame(out)\n", - "\n", - "\n", - " def plot(self, color_col=None,\n", - "\n", - " raw_marker_size=6, es_marker_size=9,\n", - "\n", - " swarm_label=None, contrast_label=None, delta2_label=None,\n", - " swarm_ylim=None, contrast_ylim=None, delta2_ylim=None,\n", - "\n", - " custom_palette=None, swarm_desat=0.5, halfviolin_desat=1,\n", - " halfviolin_alpha=0.8, \n", - "\n", - " face_color = None,\n", - " #bar plot\n", - " bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None,\n", - " # error bar of proportion plot\n", - " ci=None, ci_type='bca', err_color=None,\n", - "\n", - " float_contrast=True,\n", - " show_pairs=True,\n", - " show_delta2=True,\n", - " show_mini_meta=True,\n", - " group_summaries=None,\n", - " group_summaries_offset=0.1,\n", - "\n", - " fig_size=None,\n", - " dpi=100,\n", - " ax=None,\n", - " \n", - " contrast_show_es = False,\n", - " es_sf = 2,\n", - " es_fontsize = 10,\n", - " \n", - " contrast_show_deltas = True,\n", - " \n", - " gridkey_rows=None,\n", - " gridkey_merge_pairs = False,\n", - " gridkey_show_Ns = True,\n", - " gridkey_show_es = True,\n", - "\n", - " swarmplot_kwargs=None,\n", - " barplot_kwargs=None,\n", - " violinplot_kwargs=None,\n", - " slopegraph_kwargs=None,\n", - " sankey_kwargs=None,\n", - " reflines_kwargs=None,\n", - " group_summary_kwargs=None,\n", - " legend_kwargs=None,\n", - " title=None, fontsize_title = 16,\n", - " fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12,\n", - " fontsize_delta2label = 12):\n", - "\n", - " \"\"\"\n", - " Creates an estimation plot for the effect size of interest.\n", - " \n", - "\n", - " Parameters\n", - " ----------\n", - " color_col : string, default None\n", - " Column to be used for colors.\n", - " raw_marker_size : float, default 6\n", - " The diameter (in points) of the marker dots plotted in the\n", - " swarmplot.\n", - " es_marker_size : float, default 9\n", - " The size (in points) of the effect size points on the difference\n", - " axes.\n", - " swarm_label, contrast_label, delta2_label : strings, default None\n", - " Set labels for the y-axis of the swarmplot and the contrast plot,\n", - " respectively. If `swarm_label` is not specified, it defaults to\n", - " \"value\", unless a column name was passed to `y`. If\n", - " `contrast_label` is not specified, it defaults to the effect size\n", - " being plotted. If `delta2_label` is not specifed, it defaults to \n", - " \"delta - delta\"\n", - " swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None\n", - " The desired y-limits of the raw data (swarmplot) axes, the\n", - " difference axes and the delta-delta axes respectively, as a tuple. \n", - " These will be autoscaled to sensible values if they are not \n", - " specified. The delta2 axes and contrast axes should have the same \n", - " limits for y. When `show_delta2` is True, if both of the `contrast_ylim`\n", - " and `delta2_ylim` are not None, then they must be specified with the \n", - " same values; when `show_delta2` is True and only one of them is specified,\n", - " then the other will automatically be assigned with the same value.\n", - " Specifying `delta2_ylim` does not have any effect when `show_delta2` is\n", - " False. \n", - " custom_palette : dict, list, or matplotlib color palette, default None\n", - " This keyword accepts a dictionary with {'group':'color'} pairings,\n", - " a list of RGB colors, or a specified matplotlib palette. This\n", - " palette will be used to color the swarmplot. If `color_col` is not\n", - " specified, then each group will be colored in sequence according\n", - " to the default palette currently used by matplotlib.\n", - " Please take a look at the seaborn commands `color_palette`\n", - " and `cubehelix_palette` to generate a custom palette. Both\n", - " these functions generate a list of RGB colors.\n", - " See:\n", - " https://seaborn.pydata.org/generated/seaborn.color_palette.html\n", - " https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html\n", - " The named colors of matplotlib can be found here:\n", - " https://matplotlib.org/examples/color/named_colors.html\n", - " swarm_desat : float, default 1\n", - " Decreases the saturation of the colors in the swarmplot by the\n", - " desired proportion. Uses `seaborn.desaturate()` to acheive this.\n", - " halfviolin_desat : float, default 0.5\n", - " Decreases the saturation of the colors of the half-violin bootstrap\n", - " curves by the desired proportion. Uses `seaborn.desaturate()` to\n", - " acheive this.\n", - " halfviolin_alpha : float, default 0.8\n", - " The alpha (transparency) level of the half-violin bootstrap curves. \n", - " float_contrast : boolean, default True\n", - " Whether or not to display the halfviolin bootstrapped difference\n", - " distribution alongside the raw data.\n", - " show_pairs : boolean, default True\n", - " If the data is paired, whether or not to show the raw data as a\n", - " swarmplot, or as slopegraph, with a line joining each pair of\n", - " observations.\n", - " show_delta2, show_mini_meta : boolean, default True\n", - " If delta-delta or mini-meta delta is calculated, whether or not to \n", - " show the delta-delta plot or mini-meta plot.\n", - " group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None.\n", - " Plots the summary statistics for each group. If 'mean_sd', then\n", - " the mean and standard deviation of each group is plotted as a\n", - " notched line beside each group. If 'median_quantiles', then the\n", - " median and 25th and 75th percentiles of each group is plotted\n", - " instead. If 'None', the summaries are not shown.\n", - " group_summaries_offset : float, default 0.1\n", - " If group summaries are displayed, they will be offset from the raw\n", - " data swarmplot groups by this value. \n", - " fig_size : tuple, default None\n", - " The desired dimensions of the figure as a (length, width) tuple.\n", - " dpi : int, default 100\n", - " The dots per inch of the resulting figure.\n", - " ax : matplotlib.Axes, default None\n", - " Provide an existing Axes for the plots to be created. If no Axes is\n", - " specified, a new matplotlib Figure will be created.\n", - " gridkey_rows : list, default None\n", - " Provide a list of row labels for the gridkey. The supplied idx is\n", - " checked against the row labels to determine whether the corresponding\n", - " cell should be populated or not.\n", - " swarmplot_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the seaborn `swarmplot`\n", - " command here, as a dict. If None, the following keywords are\n", - " passed to sns.swarmplot : {'size':`raw_marker_size`}.\n", - " violinplot_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the matplotlib `\n", - " pyplot.violinplot` command here, as a dict. If None, the following\n", - " keywords are passed to violinplot : {'widths':0.5, 'vert':True,\n", - " 'showextrema':False, 'showmedians':False}.\n", - " slopegraph_kwargs : dict, default None\n", - " This will change the appearance of the lines used to join each pair\n", - " of observations when `show_pairs=True`. Pass any keyword arguments\n", - " accepted by matplotlib `plot()` function here, as a dict.\n", - " If None, the following keywords are\n", - " passed to plot() : {'linewidth':1, 'alpha':0.5}.\n", - " sankey_kwargs: dict, default None\n", - " Whis will change the appearance of the sankey diagram used to depict\n", - " paired proportional data when `show_pairs=True` and `proportional=True`. \n", - " Pass any keyword arguments accepted by plot_tools.sankeydiag() function\n", - " here, as a dict. If None, the following keywords are passed to sankey diagram:\n", - " {\"width\": 0.5, \"align\": \"center\", \"alpha\": 0.4, \"bar_width\": 0.1, \"rightColor\": False}\n", - " reflines_kwargs : dict, default None\n", - " This will change the appearance of the zero reference lines. Pass\n", - " any keyword arguments accepted by the matplotlib Axes `hlines`\n", - " command here, as a dict. If None, the following keywords are\n", - " passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75,\n", - " 'zorder':2, 'color' : default y-tick color}.\n", - " group_summary_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the matplotlib.lines.Line2D\n", - " command here, as a dict. This will change the appearance of the\n", - " vertical summary lines for each group, if `group_summaries` is not\n", - " 'None'. If None, the following keywords are passed to\n", - " matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}.\n", - " legend_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the matplotlib Axes\n", - " `legend` command here, as a dict. If None, the following keywords\n", - " are passed to matplotlib.Axes.legend : {'loc':'upper left',\n", - " 'frameon':False}.\n", - " title : string, default None\n", - " Title for the plot. If None, no title will be displayed. Pass any\n", - " keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here,\n", - " as a string.\n", - " fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large'\n", - " Font size for the plot title. If a float, the fontsize in points. The\n", - " string values denote sizes relative to the default font size. Pass any keyword arguments accepted\n", - " by the matplotlib.pyplot.suptitle `fontsize` command here, as a string.\n", - " fontsize_rawxlabel : float, default 12\n", - " Font size for the raw axes xlabel.\n", - " fontsize_rawylabel : float, default 12\n", - " Font size for the raw axes ylabel.\n", - " fontsize_contrastxlabel : float, default 12\n", - " Font size for the contrast axes xlabel.\n", - " fontsize_contrastylabel : float, default 12\n", - " Font size for the contrast axes ylabel.\n", - " fontsize_delta2label : float, default 12\n", - " Font size for the delta-delta axes ylabel.\n", - "\n", - "\n", - " Returns\n", - " -------\n", - " A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``.\n", - " \n", - " The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it.\n", - " \n", - " If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` \n", - " itself, while the effect size axes is accessed at ``ax.contrast_axes``.\n", - " See the last example below.\n", - " \n", - "\n", - "\n", - " \"\"\"\n", - "\n", - " from .plotter import EffectSizeDataFramePlotter\n", - "\n", - " if hasattr(self, \"results\") is False:\n", - " self.__pre_calc()\n", - "\n", - " if self.__delta2:\n", - " color_col = self.__x2\n", - "\n", - " # if self.__proportional:\n", - " # raw_marker_size = 0.01\n", - "\n", - " # Modification incurred due to update of Seaborn\n", - " ci = ('ci', ci) if ci is not None else None\n", - " \n", - " all_kwargs = locals()\n", - " del all_kwargs[\"self\"]\n", - "\n", - " out = EffectSizeDataFramePlotter(self, **all_kwargs)\n", - "\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def proportional(self):\n", - " \"\"\"\n", - " Returns the proportional parameter\n", - " class.\n", - " \"\"\"\n", - " return self.__proportional\n", - "\n", - " @property\n", - " def results(self):\n", - " \"\"\"Prints all pairwise comparisons nicely.\"\"\"\n", - " try:\n", - " return self.__results\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__results\n", - "\n", - "\n", - "\n", - " @property\n", - " def statistical_tests(self):\n", - " results_df = self.results\n", - "\n", - " # Select only the statistics and p-values.\n", - " stats_columns = [c for c in results_df.columns\n", - " if c.startswith(\"statistic\") or c.startswith(\"pvalue\")]\n", - "\n", - " default_cols = ['control', 'test', 'control_N', 'test_N',\n", - " 'effect_size', 'is_paired',\n", - " 'difference', 'ci', 'bca_low', 'bca_high']\n", - "\n", - " cols_of_interest = default_cols + stats_columns\n", - "\n", - " return results_df[cols_of_interest]\n", - "\n", - "\n", - " @property\n", - " def _for_print(self):\n", - " return self.__for_print\n", - "\n", - " @property\n", - " def _plot_data(self):\n", - " return self.__dabest_obj._plot_data\n", - "\n", - " @property\n", - " def idx(self):\n", - " return self.__dabest_obj.idx\n", - "\n", - " @property\n", - " def xvar(self):\n", - " return self.__dabest_obj._xvar\n", - "\n", - " @property\n", - " def yvar(self):\n", - " return self.__dabest_obj._yvar\n", - "\n", - " @property\n", - " def is_paired(self):\n", - " return self.__is_paired\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " The width of the confidence interval being produced, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - " @property\n", - " def x1_level(self):\n", - " return self.__x1_level\n", - "\n", - "\n", - " @property\n", - " def x2(self):\n", - " return self.__x2\n", - "\n", - "\n", - " @property\n", - " def experiment_label(self):\n", - " return self.__experiment_label\n", - " \n", - "\n", - " @property\n", - " def delta2(self):\n", - " return self.__delta2\n", - " \n", - "\n", - " @property\n", - " def resamples(self):\n", - " \"\"\"\n", - " The number of resamples (with replacement) during bootstrap resampling.\"\n", - " \"\"\"\n", - " return self.__resamples\n", - "\n", - " @property\n", - " def random_seed(self):\n", - " \"\"\"\n", - " The seed used by `numpy.seed()` for bootstrap resampling.\n", - " \"\"\"\n", - " return self.__random_seed\n", - "\n", - " @property\n", - " def effect_size(self):\n", - " \"\"\"The type of effect size being computed.\"\"\"\n", - " return self.__effect_size\n", - "\n", - " @property\n", - " def dabest_obj(self):\n", - " \"\"\"\n", - " Returns the `dabest` object that invoked the current EffectSizeDataFrame\n", - " class.\n", - " \"\"\"\n", - " return self.__dabest_obj\n", - "\n", - " @property\n", - " def proportional(self):\n", - " \"\"\"\n", - " Returns the proportional parameter\n", - " class.\n", - " \"\"\"\n", - " return self.__proportional\n", - " \n", - " @property\n", - " def lqrt(self):\n", - " \"\"\"Returns all pairwise Lq-Likelihood Ratio Type test results \n", - " as a pandas DataFrame.\n", - " \n", - " For more information on LqRT tests, see https://arxiv.org/abs/1911.11922\n", - " \"\"\"\n", - " try:\n", - " return self.__lqrt_results\n", - " except AttributeError:\n", - " self.__calc_lqrt()\n", - " return self.__lqrt_results\n", - " \n", - " \n", - " @property\n", - " def mini_meta(self):\n", - " \"\"\"\n", - " Returns the mini_meta boolean parameter.\n", - " \"\"\"\n", - " return self.__mini_meta\n", - "\n", - " \n", - " @property\n", - " def mini_meta_delta(self):\n", - " \"\"\"\n", - " Returns the mini_meta results.\n", - " \"\"\"\n", - " try:\n", - " return self.__mini_meta_delta\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__mini_meta_delta\n", - "\n", - " \n", - " @property\n", - " def delta_delta(self):\n", - " \"\"\"\n", - " Returns the mini_meta results.\n", - " \"\"\"\n", - " try:\n", - " return self.__delta_delta\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__delta_delta\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "0e1b8353", - "metadata": {}, - "source": [ - "#### Example: plot\n", - "\n", - "Create a Gardner-Altman estimation plot for the mean difference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a151b86", - "metadata": {}, - "outputs": [], - "source": [ - "random.seed(9999) # Fix the seed so the results are replicable.\n", - "# pop_size = 10000 # Size of each population.\n", - "Ns = 20 # The number of samples taken from each population\n", - "\n", - "# Create samples\n", - "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", - "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", - "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "\n", - "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", - "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", - "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", - "t4 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", - "t5 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "t6 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "\n", - "\n", - "# Add a `gender` column for coloring the data.\n", - "females = repeat('Female', Ns/2).tolist()\n", - "males = repeat('Male', Ns/2).tolist()\n", - "gender = females + males\n", - "\n", - "# Add an `id` column for paired data plotting.\n", - "id_col = pd.Series(range(1, Ns+1))\n", - "\n", - "# Combine samples and gender into a DataFrame.\n", - "df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", - " 'Control 2' : c2, 'Test 2' : t2,\n", - " 'Control 3' : c3, 'Test 3' : t3,\n", - " 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6,\n", - " 'Gender' : gender, 'ID' : id_col\n", - " })\n", - "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91d15864", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAGGCAYAAAC0W8IbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABYW0lEQVR4nO3deVhUZfsH8O/MAMO+ryKLuICIbJoKmrvikkv6M1vVMnrfUrNMS99yz6U9ezO3NLLULDUzc8lIMHcUSVQkRRQVEJUdZJs5vz94nZxgEIZhzjB8P9c1V87znPOcm0a555zznOeWCIIggIiIiAySVOwAiIiISDMmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZsBaXqLOysrBgwQJkZWWJHQoRUYvG38f10yIT9cKFC/kXg4hIZPx9XD8tLlETERE1J0zUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQEzETsAImpapXeu4+bxH5F/NQlSmSmc/CPQqtsomFk7iB0aEdUDEzWRESu8fgHnNs+FsrJM1Xbj6A+4fS4OwZM+hNzWWcToiKg+eOmbyIhd3vuFWpK+r7zwNq7FfSNCRETUUEzUREaqJOcqSnPSNfbfuXAIglKhx4iISBtM1ERGqupecZ39yqoKKKsq9BQNEWmLiZrISFm6+kBqYqax38KpNWRmFnqMiIi0wURNZKRMLWzgGjJQY79njzF6jIaItMVETWTE/Aa9BJdOfQFIVG0SmQm8Hn0K7mFRosVFRPVnMIl6+fLlkEgkeO211zRuExMTA4lEovYyNzfXX5BEzYzUxBT+j89Cl1fWou3QqWj/2Gvo9upG+PR5VuzQiKieDOI56oSEBKxZswbBwcEP3dbW1hapqamq9xKJpI6tiQgALBxbwcKxldhhEJEWRD+jLi4uxjPPPIN169bBweHhKyVJJBK4u7urXm5ubnqIkoiISByiJ+opU6Zg+PDhGDhQ86SXBxUXF8PHxwdeXl4YNWoUzp8/X+f25eXlKCwsVL2Ki+t+ZIWIiMiQiHrp+7vvvkNiYiISEhLqtb2/vz82bNiA4OBgFBQU4MMPP0RkZCTOnz+P1q1b17rPsmXLsHDhQl2GTUREpDeinVFfv34d06dPx6ZNm+o9ISwiIgITJkxAaGgo+vTpgx07dsDFxQVr1qzRuM+cOXNQUFCgesXHx+vqRyAiImpyop1Rnz59Gjk5OQgPD1e1KRQKHDp0CJ9//jnKy8shk8nqHMPU1BRhYWG4fPmyxm3kcjnkcrnqvbW1deODJyIi0hPREvWAAQOQnJys1vb8888jICAAb7311kOTNFCd2JOTkzFs2LCmCpOIiEhUoiVqGxsbBAUFqbVZWVnByclJ1T5hwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePHFF/UePxERkT4YxHPUmmRkZEAq/fs2el5eHqKjo5GdnQ0HBwd06dIFR48eRWBgoIhREhERNR2DStRxcXF1vv/kk0/wySef6C8gIiIikYn+HDURERFpxkRNRERkwJioiYio2Th06BBGjBiBVq1aQSKRYOfOnXVuHxcXV6OYk0QiQXZ2tn4C1gEmaiIiajZKSkoQEhKClStXNmi/1NRUZGVlqV6urq5NFKHuGdRkMiJqGkpFFUpvX4NUZgpLF2+xwyHS2tChQzF06NAG7+fq6gp7e3vdB6QHTNRERi7z5C5cP/o9KovzAAAWTq3hO+AFOHXoLnJkRNWKi4tRWFioev/PFSV1ITQ0FOXl5QgKCsKCBQvQs2dPnY7flHjpm8iI3Tz+I678ukaVpAHg3t0bSPnhXeRfOSNiZER/69OnD+zs7FSv+4tc6YKHhwdWr16N7du3Y/v27fDy8kLfvn2RmJios2M0NZ5RExkpZVUlrh/9vvZOQYmMw9/B3i9Mv0ER1SI+Ph6hoaGq97o8m/b394e/v7/qfWRkJNLS0vDJJ5/gm2++0dlxmhITNZGRKs66hKrSQo39hRnnoKgsg8y0ftXriJqKtbU1bG1t9Xa8bt264fDhw3o7XmPx0jeRkZJIH/LPWyKFRMJfAdTyJCUlwcPDQ+ww6o1n1ERGytqjPcxsnFBRdLfWfge/cEhNzPQcFVHjFBcXq5U2Tk9PR1JSEhwdHeHt7Y05c+bg5s2b2LhxIwDg008/RZs2bdCpUyeUlZXhyy+/xO+//45ff/1VrB+hwZioiYyURCqDT98JuPRzzfXxpSZm8O79tAhRETXOqVOn0K9fP9X7GTNmAAAmTpyImJgYZGVlISMjQ9VfUVGBN954Azdv3oSlpSWCg4Px22+/qY1h6CSCIAhiB6FPiYmJ6NKlC06fPo3w8HCxwyFqcndSDiPjj+9QmpMOALDzDYFP3wmwbR0gcmTU0vH3cf3wjJrIyDl37AXnjr1QWVIAiUwGE3NrsUMiogZgoiZqIUyt7MQOgYi0wCmfREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQGjImaiIjIgDFRExERGTAmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwE7EDIKKmVZaXhZsnd6Hg6p+QyEzg5B8Bj66PwdTCRuzQiKgemKiJjFhR5l84t+kdKMpLVG0l2WnIORuL4Invw8zaUcToiKg+eOmbyIhd3vO5WpK+rywvC9fivhUhIiJqKCZqIiNVejsDJdlpGvtvn4+DoFToMSIi0gYTNZGRqiwtrLNfWVkOZVWFnqIhIm0xURMZKUsXL0hkphr7zR1bQWZmoceIiEgbTNRERsrU0g6unftp7PfsNkqP0RCRtpioiYyYX9S/4Ogfod4okcKzxxh4dH1MnKCIqEH4eBaREZOZmiNw3DsoybmK/PQzkMhM4dShB+S2zmKHRkT1xERN1AJYufrCytVX7DCISAsGc+l7+fLlkEgkeO211+rc7ocffkBAQADMzc3RuXNn7NmzRz8BEhERicAgEnVCQgLWrFmD4ODgOrc7evQonnrqKUyePBlnzpzB6NGjMXr0aJw7d05PkRIREemX6Im6uLgYzzzzDNatWwcHB4c6t12xYgWGDBmCWbNmoWPHjli8eDHCw8Px+eef6ylaIiIi/RI9UU+ZMgXDhw/HwIEDH7rtsWPHamwXFRWFY8eONVV4REREohJ1Mtl3332HxMREJCQk1Gv77OxsuLm5qbW5ubkhOztb4z7l5eUoLy9XvS8uLtYuWCIiIhGIlqivX7+O6dOn48CBAzA3N2+y4yxbtgwLFy5ssvGJiIiakmiXvk+fPo2cnByEh4fDxMQEJiYmiI+Px2effQYTExMoFDWLBbi7u+PWrVtqbbdu3YK7u7vG48yZMwcFBQWqV3x8vM5/FiIioqYi2hn1gAEDkJycrNb2/PPPIyAgAG+99RZkMlmNfSIiIhAbG6v2CNeBAwcQERFRY9v75HI55HK56r21tXXjgyciItIT0RK1jY0NgoKC1NqsrKzg5OSkap8wYQI8PT2xbNkyAMD06dPRp08ffPTRRxg+fDi+++47nDp1CmvXrtV7/ERERPog+qzvumRkZCArK0v1PjIyEps3b8batWsREhKCbdu2YefOnTUSPhERkbEwqCVE4+Li6nwPAOPGjcO4ceP0ExAREZHIDPqMmoiIqKVjoiYiIjJgTNRERNRsHDp0CCNGjECrVq0gkUiwc+fOh+4TFxeH8PBwyOVytGvXDjExMU0epy4Z1D1qajo3b+dj1+E/kXr9Fmws5OjfJQC9Q9tDJuV3NSJqPkpKShASEoIXXngBY8aMeej26enpGD58OP79739j06ZNiI2NxYsvvggPDw9ERUXpIeLGY6JuARIuXsWCDT+jovLvRWSOX0jH74mpWPD8Y5DJmKyJqHkYOnQohg4dWu/tV69ejTZt2uCjjz4CAHTs2BGHDx/GJ5980mwSNX9DG7nKKgXe3/SrWpK+7/j5K9hznCVCich4GUMxJyZqI3fiQjryi0s19v968rweoyEiqqm4uBiFhYWq14OFlBpLUzGnwsJC3Lt3T2fHaUpM1EYur6ikzv67hZqTOBGRPvTp0wd2dnaq1/3VKKka71EbOR93pzr7fd0d9RQJEVHt4uPjERoaqnr/YH2GxtJUzMnW1hYWFhY6O05TYqI2csFtW8OvlTOuZN6ptX/Uo6H6DYiMXtL66agozoOZtQNCJ68QOxzjVFEKmFmKHYXOWFtbw9bWtknGjoiIwJ49e9TaHlbMydDw0ncLsOD5EfB0sVdrk0oleGF4JLoHthEnKDJaFcV5qCi6i4riPLFDMWKC2AGIpri4GElJSUhKSgJQ/fhVUlISMjIyAFSXNp4wYYJq+3//+9+4cuUK3nzzTVy8eBFffPEFvv/+e7z++utNEl9aWhreeecdPPXUU8jJyQEA7N27F+fPaz8fiGfULYCHsx3WvzUBx85fQer1W7A2l6NfuD9cHWzEDo2IqEFOnTqFfv36qd7PmDEDADBx4kTExMQgKytLlbQBoE2bNvjll1/w+uuvY8WKFWjdujW+/PLLJnk0Kz4+HkOHDkXPnj1x6NAhLFmyBK6urvjzzz+xfv16bNu2TatxmahbCJlMil7B7dAruJ3YoRARaa1v374QBM1XFGpbdaxv3744c+ZME0ZVbfbs2Xj33XcxY8YM2Nj8fSLUv39/fP7551qPy0vfRETNjaAUOwKqRXJyMh5//PEa7a6urrhzp/Z5QvXBRE1E1NwwURske3t7ZGVl1Wg/c+YMPD09tR6XiZqIqLlR1lxpkMT35JNP4q233kJ2djYkEgmUSiWOHDmCmTNnqk1waygmaiKi5qaqHKjjPi2JY+nSpQgICICXlxeKi4sRGBiI3r17IzIyEu+8847W43IyGRFRc6OsAhSVgImZ2JHQA8zMzLBu3TrMmzcPycnJKC4uRlhYGNq3b9+ocZmoiYiao4piwIQrCxoiLy8veHl56Ww8XvomImqOygrEjoD+YezYsXjvvfdqtL///vsYN26c1uMyURMRNUdl+WJHQP9w6NAhDBs2rEb70KFDcejQIa3H5aXvFkIQBJxKvYa/Mm7B2tIcvUPaw8HGeNYKJmpxSnPFjoD+obi4GGZmNecNmJqaorCwUOtxmahbgJy8Iryz7iekZ/39wP2anYcQPbIXHu8dJmJkRKS10rtiR0D/0LlzZ2zduhXz5s1Ta//uu+8QGBio9bhM1C3Awq92qyVpAKhUKPDFj/HwdnNEF38fkSIjIq0V33r4NqRXc+fOxZgxY5CWlob+/fsDAGJjY7Flyxb88MMPWo/LRG3kzl3JxF/XNf+D3nkoiYmaqDkqrLkCFolrxIgR2LlzJ5YuXYpt27bBwsICwcHB+O2339CnTx+tx2WiNnLpWbfr7E/TUKeaiAxc4U2xI6BaDB8+HMOHD9fpmEzURs7Oqu4JY/bWFnqKhIh0qvAmoFQCUj68Y2gqKiqQk5MDpVJ9TXZvb2+txmOiNnI9OrWBrZU5CkvKau0f/Ij2ExyISESKyupkba+7hTWocS5duoQXXngBR48eVWsXBAESiQQKhXZrtDNRGzkzUxO88eQgvBuzB5X/+EsS3sEbwyM7ixQZETVa7hUmagMyadIkmJiYYPfu3fDw8IBEItHJuEzULUBkUFt88cbT2PnHGaRm3IK1hRwDunbEwK4BMJHJxA6PiLR15y/AT/tJSqRbSUlJOH36NAICAnQ6LhN1C+Hr4YTXnhgodhhEpEs5KWJHQA8IDAzEnTu6n6DLWQhERM1VzgVAUSV2FPQ/7733Ht58803ExcXh7t27KCwsVHtpi2fURETNVeW96mTtESx2JARg4MDqq5YDBgxQa+dkMiKiluz6CSZqA3Hw4MEmGZeJmoioObt2FOgWLXYUBDRq9bG68B41EVEz0rVrV7Tu1A1dlyZWN+ReAfKvixsUqfzxxx949tlnERkZiZs3q1eP++abb3D48GGtx2SiJiJqRrKzs3EzMxvZhRV/N16JEy0e+tv27dsRFRUFCwsLJCYmory8HABQUFCApUuXaj0uEzURUXP31z5AEMSOosV79913sXr1aqxbtw6mpqaq9p49eyIxMVHrcZmoiYiau4IbwE3tEwHpRmpqKnr37l2j3c7ODvn5+VqPy0RNRGQMkjaJHUGL5+7ujsuXL9doP3z4MPz8/LQel4maiMgY3DwNXE8QO4oWLTo6GtOnT8eJEycgkUiQmZmJTZs2YebMmXj55Ze1HpePZxERGYujnwFj1wMmZmJH0iLNnj0bSqUSAwYMQGlpKXr37g25XI6ZM2di2rRpWo/LM2oiImORnwGc+UbsKFokhUKBP/74A1OmTEFubi7OnTuH48eP4/bt21i8eHGjxuYZNRGRMUnaBPg+Crh0EDuSFkUmk2Hw4MFISUmBvb09AgMDdTY2z6iJiIyJUgHELQUUlWJH0uIEBQXhypUrOh+XiZqIyNjkpgOJG8WOosV59913MXPmTOzevRtZWVmsnkVERHVI2gS06QM4txM7khZj2LBhAICRI0dCIpGo2lk9i4iIalIqgPjlwOjVgIy/6vXB4KpnXb58GWlpaejduzcsLCxU3xiIiMhA3LkEJMYAj7wodiQtgsFUz7p79y4GDhyIDh06YNiwYcjKygIATJ48GW+88YbOAyQiokY48y1w47TYUbQYBlE96/XXX4eJiQkyMjJgaWmpah8/fjz27dundSBERNQEBAGIXQgUZokdidEzmOpZv/76K9577z20bt1arb19+/a4du2a1oEQUbXS2xlI3fkBjr3/fzj63lhc3L4MxdlpYodFzVlZAbB/DlBeLHYkRs1gqmeVlJSonUnfl5ubC7lc3qCxVq1aheDgYNja2sLW1hYRERHYu3evxu1jYmIgkUjUXubm5g39EYgMVnF2Gv78agZun4uDouIelJVluJNyGGdjZqEg47zY4VFzlpsO/DYfUFSJHYnRMpjqWY8++ig2bvz7+TyJRAKlUon3338f/fr1a9BYrVu3xvLly3H69GmcOnUK/fv3x6hRo3D+vOZfSLa2tsjKylK9eBZPxuRq7AYoKu7VaFdWlSP9ty/1GouyqgK3zx/CzZM/If/KGQisd9z83TgFHP6YtaubSFNVz2rwrO/3338fAwYMwKlTp1BRUYE333wT58+fR25uLo4cOdKgsUaMGKH2fsmSJVi1ahWOHz+OTp061bqPRCKBu7t7Q8MmMniVpYXIT/9TY39x5l8oy78Fc3u3eo9ZUZKP7MR9KMw4B6mpGZwDe8O5Yy9IH/K4Tu6lk/hr18eoulekarNw9kLgE3Nh4ehZ7+OTAbr4C2DvDYQ8KXYkRud+9awNGzaoqmcdO3YMM2fOxNy5c7Uet8GJOigoCH/99Rc+//xz2NjYoLi4GGPGjMGUKVPg4eGhdSAKhQI//PADSkpKEBERoXG74uJi+Pj4QKlUIjw8HEuXLtWY1ImaE2VlOYC6z3QUlWX1Hq/41hWc2/Q2qkr/XhEp968TuHVmHwKfXAiZae23qu7dvYmUbUsh/GMJynt3ruP85nno8spaSKSyesdBBujEGsDRD/DqJnYkRqWpqmdp9Ry1nZ0d3n77ba0P+qDk5GRERESgrKwM1tbW+PHHHzUuZu7v748NGzYgODgYBQUF+PDDDxEZGYnz58/XmNx2X3l5uWrmHVCd6IkMkZmtE+T2bijPv1Vrv6mVPWRyK2Sf2Q+lohL2PsGwdPHWON5fP32slqTvK7iWjJvHtsO799O17pd5aneNJH1fWX427qYeh3PHnvX4ichgCUogdhEwZh1gq/0JFgFnz55FUFAQpFIpJBIJ3n77bcyaNQuXL19GcXExAgMDYW1t3ahjNDhRHzp0qM7+2m6k18Xf3x9JSUkoKCjAtm3bMHHiRMTHx9earCMiItTOtiMjI9GxY0esWbNGYxmxZcuWYeHChQ2KiUgMEokUrSP+D2l7V9bab+Xqi9MrJ0N4YDKQU0AkOox6AzJT9UmVRZmXUJqTrvFYt5J+hXfvp1F6OwO3z8ejqrwUtq0D4BTQEyVZNe+xPag46zITtTEoL6qeXDbyc9avboSwsDBkZWXB1dUVfn5+SEhIgJOTk06rZzU4Ufft27dG24MrkjV0LVMzMzO0a1e9Fm2XLl2QkJCAFStWYM2aNQ/d19TUFGFhYbXevL9vzpw5mDFjhup9UlJSk60eQ9RYHl2GoaqsGDeOfK+aVCY1lcPeNwS5l07W2P7uxaNIk1uhw4jX1Noriu/WeZzyoru4cuBLZJ74UdWWlbAL5g4ekNu51rmviUXjzg7IgNxOBY5+BvSeKXYkzZa9vT3S09Ph6uqKq1evQqlU6vwYDU7UeXl5au8rKytx5swZzJ07F0uWLGl0QEqlUu1SdV0UCgWSk5NVC6HXRi6Xqz021thLEERNzavnE/Do+hgKriUDghJ2Pp1x9us3NW5/O/kgfPtNhJm1g6rNwqn2W0H3mVk7qCXp+8rysuqcESyRyuDSiV90jUrKz4BbEOA/ROxImqWxY8eiT58+8PDwgEQiQdeuXSGT1T6HQ9sSmA1O1HZ2djXaBg0aBDMzM8yYMQOnT9d/qbo5c+Zg6NCh8Pb2RlFRETZv3oy4uDjs378fADBhwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePFFrmNLxsVEbgmnDt0BAEpFJUpva34MUVBWoTTnqlqitnRqDfs2ochPT6p1H0kds77L8rNh59O5+ovCP/j0mwi5rXM9fwpqNv74CHBuDzi1FTuSZmft2rUYM2YMLl++jFdffRXR0dGwsbHR6TF0VlLFzc0NqampDdonJycHEyZMQFZWFuzs7BAcHIz9+/dj0KBBAICMjAxIpX8/6p2Xl4fo6GhkZ2fDwcEBXbp0wdGjR3V6L4DI0EikJpCZWdT6fPV9tV2O7jDyDZzbMhelOVfV2t3DhuB2St3rDrsED4BL5/7I+fM3VBTnwtLFG626joC9X5hWPwMZOEUFcGAeMPZLwNRC7GialbNnz2Lw4MEYMmQITp8+jenTp4ufqM+ePav2XhAEZGVlYfny5QgNDW3QWOvXr6+zPy4uTu39J598gk8++aRBxyBq7iQSCVyC+iI7sfZV+yydvaFUVOH6ke8hNTGFk39PmNu7wszGEWEvfobcSyeRl5aIynuFsPfpDLfQwSi8eRGlZZqfgDC3dYF9m1C4hw5uqh+LDE3BDeD4KuDRGQ/fllQenEwWHx+PiooKnR+jwYk6NDQUEomkxipFPXr0wIYNG3QWGBH9zbv3M8hP/xNleZlq7VJTOSQmpjgb8/dkoPTfNsCzx+NoM+AFQCJB4fXzyDn7G5RVFbibchjX4r+FnW9IjTPt+8zt3WHnG9yUPw4Zqgs/Af7DANcAsSNpNgxyMll6uvojH1KpFC4uLlxzm6gJmVk7IOT5j5B1ajfupByGsqoC9r4hKCvIQf6Vfyz2Lyhx89h2WDh6oiwvGzePq08aq7pXhLsph2Ht3g7F2epPTMjMrdBh9CxIJA1eXZiMRcI6YPhHYkfRbBjkZDIfHx+tDkREjWNqaQvv3k+rFiopL7iNhM9f0Lj9zRM/oqLwjsZ+paBEwP+9jdvn46AovwcbzwB4hA+FmY2jzmMn3cjIyEBpaSkAoLRCiYzcMng76vgk6cYpoOAmYGe4S8WuXLkSH3zwAbKzsxESEoL//ve/6Nat9lXWYmJi8Pzzz6u1yeVylJXVf5W/uhjMZLLPPvus3gO++uqrWgdDRPVXeuda9QpTGty7c73u/W9dgYNfOJwDInUdGunYyZMnsXjxYvzyyy+q2455pVXwffskHuvsiLnDfPCIrw6Tw/UTgN0Y3Y2nQ1u3bsWMGTOwevVqdO/eHZ9++imioqKQmpoKV9fa1wCwtbVVm+z84NofujBkSPWjbaJOJqvvBC6JRMJETaQnppY1H5V8kExuCUV5qcZ+idQEEg2X6Mhw7NixA+PHj4cgCDXmBgkCsOdcLvaey8PW6I4YE6ajR+dyUnQzThP4+OOPER0drTpLXr16NX755Rds2LABs2fPrnUffRVz+uqrr5pk3Hol6n/elyYi8Vl7tIeli4/GZ6zdQgYh93ICynIza+138u8Bqcy01r7GuP8894PPdZN2Tp48ifHjx0OhUGgsM6pQAhIIGL8uBUffDNXNmXVB3VdjxFJRUYHTp09jzpw5qjapVIqBAwfi2LFjGvdrymJOY8aMQUxMDGxtbTFmTN1XIXbs2KHVMXT2HDUR6V/7x17Fuc1za5w5W7q2gfejT8HeLwwp378LQVml1m9iYQvvvs81SUyhk1c0ybgt0bvvvlvrmfQ/CQAECHh3zzX89EpQ4w9cWPuXu6ZSXFyMwsK/C8j8c0XJ++7cuQOFQgE3N/VSr25ubrh48WKtY2tTzKkh7OzsVJfSa1sQTBe0StQ3btzArl27kJGRUeOZsY8//lgngRHRw9l4BiAs+nNkJvyMgmtnITUxg3PHXnAPi4LMzAKO7R5B5+eW4frRH5B/5QykMhM4BfSEV6/xsHBsJXb4VIeMjAzs3r37oUn6PoUS+Dk5VzcTzMoKgLJCwNy2cePU0z/rL8yfPx8LFizQydjaFHNqiAcvd4t66ftBsbGxGDlyJPz8/HDx4kUEBQXh6tWrEAQB4eHhTREjEdXB3N4NfoM0L6Nr6xWITuPn6zGi5k+hUDTJ87ANsX///non6fsEAfj1Qh4mRrg9fOOHyU4BPJv2d3pVVfWVnvj4eLUFs2o7mwYAZ2dnyGQy3LqlXgr21q1b9b4HXZ9iToamwYl6zpw5mDlzJhYuXAgbGxts374drq6ueOaZZ1Qz34iImrPFixc32/K40d9eQvS3l3QwUg8djFE/1tbWsLV9+Nm7mZkZunTpgtjYWIwePRpAdSGn2NhYTJ06tV7Hqk8xp4YICwur9yzyxMTEh29UiwYn6pSUFGzZsqV6ZxMT3Lt3D9bW1li0aBFGjRqFl19+WatAiIgMxdy5c/H222+LGkNMTAxeeumlBu+37tn2ujmjdukIjK69NrqunDlzBt27d2/QPjNmzMDEiRPRtWtXdOvWDZ9++ilKSkpUs8D1Xczp/hcGACgrK8MXX3yBwMBA1eX248eP4/z583jllVe0PkaDE7WVlZXqvrSHhwfS0tJUs+fu3NG8uAIRia8s/xZKbl2BiYUtbL0Cdf48qbGQyWQaV5fSl6ioqFqXa66LRAIMDnSAqUwHK8vlXQKU5YC86UoDm5g0fJrU+PHjcfv2bcybNw/Z2dkIDQ3Fvn37VBPM9F3Maf78v28rvfjii3j11Vdr3PueP38+rl/Xfia9RGjgTZDRo0dj+PDhiI6OxsyZM/HTTz9h0qRJ2LFjBxwcHPDbb79pHYw+JCYmokuXLjh9+jTvqVOLUVVWjEu7V+Bu6nHVIinmDq3Qbvg02HNdb4M1cuRI7NmzBwqF4qHbyqTA8CBH3cz6vu/x1YBrR92N9w/G9vvYzs4Op06dQvv27dXaL126hK5du6KgoECrcRv8tevjjz9WXapYuHAhBgwYgK1bt8LX1/eh1bCISBwp25bi7sWjaiuZleVl4sLWBSi9e0Onx0paPx0nV0xA0vrpOh23JZo7dy4kEslDr3xIAEggwTvDdLzEs0L3laCMmYWFBY4cOVKj/ciRI42qh9Hg6w5Lly7Fs88+C6D6Mvjq1au1PjgRNb2im6kouPpnrX3KynJknvwJ7YZO0dnxKorzUFF0V2fjtWSPPPIItm7dqlqZrLYza5m0Okl/H91Rt8uIAoCt4a73bYhee+01vPzyy0hMTFStPX7ixAls2LABc+fO1XrcBifq27dvY8iQIXBxccGTTz6JZ599FiEhIVoHQE2jorIKPxw8jX0nzuNuQQm83BwwqlcIhkV0Fjs00rOC6+fr7C/MqLufxDVmzBgcPXoUixcvrvFctURSfbn7HV2v9Q0ArUIBKx0tSdpCzJ49G35+flixYgW+/fZbAEDHjh3x1Vdf4YknntB63AYn6p9++gl5eXn44YcfsHnzZnz88ccICAjAM888g6effhq+vr5aB0O6UaVQ4O21O5F0+e9Lmlcy7+CT72ORlnkH08b2a5LjvvLRZuQVlcLBxhJfvPF0kxyDGk5mWvszqfdJH9JP4nvkkUdUi0yFhoYiLy8PDpYmSHonXPfVs+4Lm9A04xq5J554olFJuTZaTQ10cHDASy+9hLi4OFy7dg2TJk3CN998g3bt2uk0ONLOoaRLakn6QbsO/4lr2U1zWTKvqBR3CoqRV6S5EATpR9HNi7hxdBsyE3bBppU/JFLN38mdAx/VY2TUGN7e3rC0tAQAWJpJmy5J+/QEWndpmrGpwRq11ndlZSVOnTqFEydO4OrVqzXWXyVxxCX9VXf/mb8wcWhEndtQ81RVVoKUbUvU7klLpDLYtA6o9RK3lVsbuIdxoSJ6gIk50JNVEA2JVmfUBw8eRHR0NNzc3DBp0iTY2tpi9+7duHFDt7NHSTtl5ZV191fU3U/N16VfVtSYOCYoFSjMOA/38CGwbtUBEqkMplb28IwYi87PLYeJ3FKkaMkgdZkI2DR9SUiqvwafUXt6eiI3NxdDhgzB2rVrMWLECI3rspI4Ovt54swlzQ/XB/mxGIMxKivIwd2Lmkv9Fd1MRVj053qMiJodBx+g8zixo6B/aHCiXrBgAcaNGwd7e/smCId0YXhkZ/z4RxKKSstq9Pl6OKFHJz8RoqKmVppzVe056X8quVVdPIerkZFGvWYATVCjnBqnwYk6Ojq6KeIgHXK0tcJ7/x6D9zbtw7Vbuar2IL9WGNM7DBnZuWjTio9dGBtTy7qLGphY2jBJk2adx1U/kkVaUygUiImJQWxsLHJycmpUYPv999+1GrdRk8nIcLX3csWXsyfgwtUs3MotwNFz6Th2Lg2LYn4BAPi1csa0sf15GdyI2HgGwMKpNe5pWGnMtfMAPUdEzYZLANCt4QVASN306dMRExOD4cOHIygoSGdfjJmojVygrwd2Hf4TcWdS1dqvZN7BnDU/YuWMp+Dt5ihSdKRr7R+bjvNb5kFRcU+t3dLVF66dB6CiKBdmNvy86QGWjsDgdwETM7Ejafa+++47fP/99zoroXkfE7WRu3k7H78nXqy1r6yiEtviEjFj/EA9R0VNxdYrEKEvfobMhJ9RcC0ZMlMzyO3dUZSZiqQvq+v12ngGwKffRBbjIMDMChj6AWDtInYkRsHMzKxJ1hPRQS00MmRJl66jrvpoZ/7K0F8wpBcWjq3QNupfCH/pc7iFDMKd8/Eoz8tW9RfdvIjzW+ai8PoFEaMk0cnMgKilgDMXqtKVN954AytWrGhQadL64Bm1kTMxqfu7mInINXep6SgVVbh2aFOtfYKiChmHNiPomXf1HBUZBIkUGLSQk8d07PDhwzh48CD27t2LTp06wdRUfQb9jh07tBqXidrIdQ9sA1OZDJUa6tn2Cm6r54hIX4qzLqOyOE9jf356EpRVFZDy3mTL03cO4BMpdhRGx97eHo8//rjOx2WiNnL21pZ4cmBXfLP/RI0+VwcbjOkTJkJUpB8Pu/wm6PwSHTUDEVOADoPFjsIoffXVV00yLhN1CzBhSASc7W2w7eBpXM/Jg5mpDH1CO2DS0Eg42FiJHR41EWuPdjC1skdlSX6t/Xa+IQ+trEVGpvM4IFi3lZ2o6TFRtxDDegRhWI8glJZVwMxUxnvTLYBUZgqvR5/ClX2ravRJpDJ4P8pSpC1K2/5Aj1fEjsLobdu2Dd9//z0yMjJQUVGh1peYmKjVmJz13cJYmpsxSbcgrbo+hnbDX4Xc7u/KdlbubRH45ELY+QSJGBnplWeX6vvSUv7Kb0qfffYZnn/+ebi5ueHMmTPo1q0bnJyccOXKFQwdOlTrcXlGTWTk3MOi4BY6CGW5WZDITGBuz3K0LYpLABc00ZMvvvgCa9euxVNPPYWYmBi8+eab8PPzw7x585Cbm/vwATTg1yuiFkAikcLCyZNJuqVx9AOGfQCYsZSpPmRkZCAysno2vYWFBYqKigAAzz33HLZs2aL1uEzURETGyLYVMOxDwLzuYi2kO+7u7qozZ29vbxw/fhwAkJ6e3qgnLJioiYiMjdym+kzayknsSFqU/v37Y9euXQCA559/Hq+//joGDRqE8ePHN+r5at6jJiIyJvdXHbNrLXYkLc7atWtVpS2nTJkCJycnHD16FCNHjsS//vUvrcdloiYiMiahT1fP8ia9k0qlkD4ws/7JJ5/Ek08+2fhxGz0CEREZBrvWQJdJYkfRov3xxx949tlnERERgZs3bwIAvvnmGxw+fFjrMZmoiQgAuJyoMejxCiAzffh21CS2b9+OqKgoWFhY4MyZMygvLwcAFBQUYOnSpVqPy0RN1IJVFOchbd8qHPvwCRxZMgJJ61/D7fOHxA6LtOESwEIbInv33XexevVqrFu3Tq1yVs+ePbVelQzgPWqiZk9QKpB76STuXDwKQVEJO58QuHbuB5mZeZ37VZYW4OzXb6IsL1PVVpx1Cak/vofywttoHTG2qUMnXQp9GpBIxI6iRUtNTUXv3r1rtNvZ2SE/P1/rcZmojUxpWQViT1/Etey7cLC1wqCuAXB14HOUxkpZVYELWxciPz1J1Xbnwh+4cewHdH52OcztXTXum3lyl1qSflBG/Ca4h0XBxNxa1yFTI7m7uwOCAu4mxX832rYCfB8VLygCUP3ZXL58Gb6+vmrthw8fhp+fn9bjMlEbkfPpmZj75S4UlZap2jbuO4Ypj/fFyF4h4gVGTeb64a1qSfq+8vxbuLT7U3R+dikEQYncSwkouPonJDITOPlHwLZ1R9xJ0Ty5RVlVjty/TsI1uH8TRk/aOHXqFJB/Hdj67N+NgaO5jrcBiI6OxvTp07FhwwZIJBJkZmbi2LFjmDlzJubOnav1uEzURqK8ogoLNvyslqQBQKkU8PmOg/D3doO/t7tI0VFTEAQB2Wf2aewvuPonijL/QtrelSjOuqxqv3lsO5w79oKysrzO8ZVVdfeTgZCaAP5DxI6CAMyePRtKpRIDBgxAaWkpevfuDblcjpkzZ2LatGlaj8uvYEYiLikV+cX3au0TBGDXkbMa9714LRs/HzmL+KS/UF5R1VQhko4pq8o11pq+L/3Al2pJ+r47KYdhYmFTx54S2PkENy5A0g+fCMDcTuwoCIBEIsHbb7+N3NxcnDt3DsePH8ft27exePHiRo3LM2ojceN2ft39OXk12nILS7Ao5hecT//7PqWNpRzT/28A+oR10HWIpGMyU3OYWTuiolhTVR4JCq9f0Lh/edFdSGSmEBSVNfqcAiJh4eSpo0ipSfn1EzsC+gczMzMEBgbqbDwmaiPhbFf3pJ/a+hds+Bkp17LV2opKy7Hs231wd7KDvzcrLRk69/ChyDi0qdY+61btUZz5l8Z9q0oLEDDuHVw98CXK8qv/HkikMrh06ou2w15pknhJxyRSwKub2FG0eC+88EK9ttuwYYNW4zNRG4n+4f748uc/UKbh0vXQHkFq789dyayRpO9TKJX48dAZzH6W970MXeueT6A4+zJy/zqh1m7h1BptBr2E5K9natzXxNwaTh26w6lDDxTdSEFVeQms3Pwgt2Ehh2bDxb+6AAeJKiYmBj4+PggLC2uShYOYqI2EjaU5Zj0VhWXf7kWVQqnW1yu4HfYeP4dv9h+Hp4s9RvQMRmpG7Un6vtSMW00ZLumIVGaCwCfmIT/9T9y5eBjKqkrYtwmFc8eekMpMYefTGQXXkmvd1zVkICSS6mkqtl66u0xHeuTWSewICMDLL7+MLVu2ID09Hc8//zyeffZZODo66mx8Jmoj0ju0Pfw8nbH7aHL1c9Q2lqisVCAu6e/LnxeuZuG3UynoF+5f51jWFmZNHS7pkH2bENi3qfkIXrvhryL5mzmoKLqj1m7j6Q/v3s/oKzxqKs6cS2IIVq5ciY8//hg7duzAhg0bMGfOHAwfPhyTJ0/G4MGDIWnkQjSizvpetWoVgoODYWtrC1tbW0RERGDv3r117vPDDz8gICAA5ubm6Ny5M/bs2aOnaJuH1i4O+Peo3lj2r8cxrEeQWpK+TxCAg6dTYWaq+XvagC4dmzJM0hMLx1YI/9dKtBn0IhzadYWjfwQ6jHwDnZ97DyZyS7HDo8ay9xE7AvofuVyOp556CgcOHMCFCxfQqVMnvPLKK/D19UVxcfHDB6iDqIm6devWWL58OU6fPo1Tp06hf//+GDVqFM6fP1/r9kePHsVTTz2FyZMn48yZMxg9ejRGjx6Nc+fO6Tny5mH/Sc0zfgUA4R28al1xsFObVjXuaVPzZWJuDc/uj6PTkwsROO4duAb3h9SEhRuMAmtOGySpVAqJRAJBEKBQKBo/ng5i0tqIESMwbNgwtG/fHh06dMCSJUtgbW2N48eP17r9ihUrMGTIEMyaNQsdO3bE4sWLER4ejs8//1zPkTcPuYUldfa72Nvgg1fGIjLIDy72NvBr5YyXRj6K9/49BnIz3hUhMmjmtoCcS7waivLycmzZsgWDBg1Chw4dkJycjM8//xwZGRmwtm7c52Qwv40VCgV++OEHlJSUICIiotZtjh07hhkzZqi1RUVFYefOnXqIsPnxdXfCyZSrGvt93J0Q0s4LIe289BcUEemGTSuxI6D/eeWVV/Ddd9/By8sLL7zwArZs2QJnZ2edjS96ok5OTkZERATKyspgbW2NH3/8UeOD4tnZ2XBzU3+2183NDdnZmmcwl5eXq2qCAmj0vYLmZHhkZ/x4KAmVtVx6sbGUY2DXABGiIjFUlZWg6GYKJFJT2HoHQsqaxc2fDdc5MBSrV6+Gt7c3/Pz8EB8fj/j4+Fq327Fjh1bji56o/f39kZSUhIKCAmzbtg0TJ05EfHy8zlZ1WbZsGRYuXKiTsZqbVs72+M+EoXhv036UVfy9+pSdlQUWvDACVuZyEaMjfRAEJa4d3IjMhF2qtb1Nrezh03cC3MOiRI6OGsWaa/cbigkTJjR6ZnddRE/UZmZmaNeuHQCgS5cuSEhIwIoVK7BmzZoa27q7u+PWLfXne2/dulVd9k2DOXPmqF0uT0pKQp8+fXQUveHrFdwOoe1b42BiKv66ngOlIKBLB2+0b625/CEZj4z4Tbhx9Ae1tsqSfFz+5TOYWFjDOaCnSJFRo1nz37ChiImJadLxRU/U/6RUKtUuVT8oIiICsbGxeO2111RtBw4c0HhPG6ieMi+X/33m2Nib+s2RUing8Nk0JP6VAQD49eQF2FjKMXVsP/QP5+VvY6WoKENmwi6N/TeO/MBE3ZxZuYgdAemJqIl6zpw5GDp0KLy9vVFUVITNmzcjLi4O+/fvB1B9OcHT0xPLli0DAEyfPh19+vTBRx99hOHDh+O7777DqVOnsHbtWjF/DIO3+Os9SLp0Xa2tqLQc723aD3dHWwT6clKKMSrOToOivFRzf9YlKCrKIDMz12NUpDOWXOq1pRD18aycnBxMmDAB/v7+GDBgABISErB//34MGjQIAJCRkYGsrCzV9pGRkdi8eTPWrl2LkJAQbNu2DTt37kRQEJ/51eTS9ZwaSfo+pVLA9rgzeo6I9EVmWvfqchKpDBKZTE/RkM6xtGWLIeoZ9fr16+vsj4uLq9E2btw4jBs3rokiMj4XH7Km98P6qfmycm8HcwcPlOVl1drv2KEHZ383Z2ZWYkdAeiLqGTU1PauHrNnNmd/GSyKRoM2gF6tLIf6DzNwKPn2eFSEq0hlTC7EjID1hojZyEZ38YCnXnKz7d6m7OAc1b04deqDzs0th7xcOSKSQmpjBpVNfhEz6CJYu3mKHR40h45fsloKJ2shZyM3w8uN9al3Tu31rV4zqVbPiEhkXO5/OCHp6MXr+ZxciZ/8I/8dnwdKZq9E1a1IZIG25v75XrlwJX19fmJubo3v37jh58mSd2zf3Yk4t95NuQYZ074QPXhmLiCA/ONpawdvNEc8Pi8RHU/8PFnWcbTeUg40lnO2s4WDDqkyGqCkXZCA9k7TcSYBbt27FjBkzMH/+fCQmJiIkJARRUVHIycmpdXtjKOYkEQRBEDsIfUpMTESXLl1w+vRphIeHix0OkdE5uWICKoruwszGCd2mbxQ7HONUcgew0t1a0mLR5vdx9+7d8cgjj6iKMSmVSnh5eWHatGmYPXt2je3Hjx+PkpIS7N69W9XWo0cPhIaGYvXq1br5QZoYz6iJiJqbWiYItgQVFRU4ffo0Bg4cqGqTSqUYOHAgjh07Vus+x44dU9seqC7mpGl7Q2RwK5OR+O6VVyD29EVcybwDB2tLDHykIzyc+MwmtRwKhQJKpVLsMDSrqgIqKx++nYGrqqoCUF0sqbCwUNX+zxUl77tz5w4UCkWtxZkuXrxY6zG0KeZkaJioW6DcwhIcO3cFlVUKhHXwgo/73yscpWZk4+21P6Gg5J6q7dtfTyB65KP4v768VUAtw+LFi1tsMR8x/LP+wvz587FgwQJxgjFATNQtTMzeY9gam4Aqxd9nC48Gt8NbzwyBVCrBvPU/qyVpAFAKAtb8dAj+Xm7o3NZT3yET6d3cuXPx9ttvix2GZuVFgNxG7Cga7cyZM+jevTvi4+MRGhqqaq/tbBoAnJ2dIZPJGlScSZtiToaGidoI3cjJQ+zpiyi+V44OXm7oE9YeZiYm2Hv8HDb9eqLG9n+cvQxryziEtfdCbmGJxnF3HfmTiZpaBJlMBpkhL68qyAHT5r+qnIlJdQqytraGra3tQ7c3MzNDly5dEBsbi9GjRwOonkwWGxuLqVOn1rqPNsWcDA0TtZH5as9RbPntJB6cyx+z9yiW/etxbItL1LjfbwkpsLGsewGFGzl5ugqTjJiZtYPaf6kJtNDJZAAwY8YMTJw4EV27dkW3bt3w6aefoqSkBM8//zwA4yzmxERtRA6fvYzNB2o++J+TV4T5G36uM9FWKhR42IN6zvZ1X2p75aPNyCsqhYONJb544+l6xUzGJ3TyCrFDaAFa7jPx48ePx+3btzFv3jxkZ2cjNDQU+/btU00Yy8jIgPSBxWDuF3N655138J///Aft27dvdsWcmKiNyM9Hzmrsu5GTB1MTGSqrFBq36dm5LX45lozSsopa+4f1qPsvdl5RKe4UFNcvWCLSXgs+owaAqVOnarzUbYzFnFr2p21krj/k0nQHL1eNfX6tnNGpTSvMeXYITE1q3psb1SsEEUF+jY6RiHSghSfqloZn1EbE2c4Kt/OLNPYP69EZeUWlyLxToNZuITfFhCE9ELP3GNJu3kaPTm1gIpOiuLQc9taWiOoeiJB2XBuayGBwOdgWhYnaiAzpEYSUa7U/xO9kZwV/HzeM698F59IykXbzNioVCoS194K/txuWf7sPZRVVavuM6hWCqWP76SN0aoR7uZm4cWwb8i6fAgA4tOuK1hFjYeHIGfrGi4m6JWGiNiJR3QKRmHoN8UmX1NrNzUzh7miL6Pe+UU0Yc7SxxKvj+qOrvy+eWvhljSQNAD8d/hNhHbzQs3M7fYRPWii+dQXJ38yGouzvx+pundmPOymH0fm55bB2q75dUVGUi4KMZEhkJrBvEwYTOQunNGs8o25RmKiNiEwqxdsThmFg13TVc9T+Xm44l56JPy/fUNs2t6gU7369B08P6oai0jKNY+45do6J2oClH/hSLUnfpygrQfqBLxH09GKk7V+NW2f2Q1BWTySUmVnAu88z8Oz+uL7DJV1hom5RmKiNjEQiQY9OfujRqfpM6krmbWyq5ZEtAKhSKHH47OU6x8up4543iauiOA8FVzXP9C+4ehZp+1YhO3GvWrui4h7SD3wJU0t7uHbmrQ0iQ8epg0bufHpWnf0Pe5zK09leh9GQLikq7gGo6+F3AbfOxmrsvXFsu85jIj1pWdWJWzwmaiNnaW5WZ7+DtSWc7aw19o/sGaLrkEhH5HauMK1j9S8Tc2sIVbU/Ew8ApTnp/0v2RGTImKiNXI9ObWBupnlN4H5dArDghcdgZ2Wh1i6RAJOGRiDc37upQyQtSWUmaPXISI39Lg+5rC2RmUAia/7rRRMZO96jNjIVVVXIySuCjYU57KwtYGUuR/SIXvjv9oM1tvX1cMLoXiGwspBj4zvP47dTKbiSeRu2VhYY9EhHeLk6ivATUEO0jhyHqntFyEzYBUFRPXNf8r8E7jvgBeRfOYN7d2/Uuq9zQE9IZfwV0CwJAieUtSD8V2okFAolNu4/jp+PnEVRaRmkEgm6BbbBy6N7Y2SvELg52mJ7XCJSr9+CtYUcA7oE4In+XWBlUV2Iw9LcDCN78TJ3cyORSNBm4GR4RoxF/pXqoiv2fuEws7IHALSN+jfOb10IQVGptp+plT28+zyn73BJZ3iPuiVhojYSH209gAMJKar3SkHA8fNXcOnGLax642l0D2yD7oFtRIyQmpKZlT1cO/ev0W7vF4aQSR/gxrHtyL9yBhKZCZwDesIz8v9gbqd5SVkycIISgAGX4SSdYqI2AtdzcvHbqZRa++4WlOCnw2cxaWjzqb1KumXt0R4BY2aLHQbpEmd9tyicTGYETly4Wue/2xPnrwAABEFAYUkZKiprrkJGRESGiWfULYAA4Ocjf2Jb3Blk3smHqUyGXiHtMHl4T7g52oodHhE1lJS/ulsSnlEbge6BvnX2m5uZ4rNtB5F5Jx8AUKlQ4GBiKl777HvcLai5/CQRGTgpf3W3JPy0jYCXqyMGdu1Ya5+DjSVSNVTUulNQjB2HEpsyNCIiaiQmaiMx88lBeGrgI7CxrH7cSiqRoEdgGzzeOwxVSqXG/Y4kp+krRCIi0gJvdBgJmUyKF4b3xLODuyMnrwjWlnLYW1vil6PJde6nUGhO4kREJD4maiNjZmqC1q5/r/8c3sEbEonmpzm6cIlQIiKDxkvfRs7D2Q4Du9R+/9pCboqxfcP1HBERETUEz6hbgNfHD4S1pRx7j59HWUX1UpLtW7ti6th+XM+7BagqK8atPw8gPz0JEpkpnP0j4dzpUUhZkIOoWWCibgFMTWR45fG+mDgkAtdu5cLaQg5vNyboluBebhaSv5mNiqI7qrbc1GPIOr0bnZ5+FyZySxGjI6L64KXvFsTKQo5AXw8m6Rbk8i8r1JL0fUU3U5FxaLMIERFRQzFRExmpe7lZKLimedZ/zp8HIAic9U9k6JioiYxURdHdOvuryoqhqCjTUzREpC0maiIjZe7gDkg0/xM3s3GCzMxCjxERkTaYqImMlNzWGU7+PTT2e3QZBolEoseIiEgbTNRERqzdsKmw9mhXo925Yy+0jhwnQkRE1FB8PIvIiJla2iHkhU+QeykB+elJkMpM4BTQE7atA8QOjYjqiYmayMhJJFI4degOpw7dxQ6FiLTAS99EREQGjImaiIjIgDFRExERGTDeo24h7uQX45djyUi9fgs2FnL07xKAbh19+XgOEZGBY6JuAc6m3cDcdbtQWl6havs9MRX9wv0x+5khkEqZrImIDBUvfRs5hUKJpd/sVUvS9x1MTMWvCRdEiIqIiOqLidrInUy5irsFJRr79x4/p8doiIiooZiojdydgqI6+2/nF+spEiIi0oaoiXrZsmV45JFHYGNjA1dXV4wePRqpqal17hMTEwOJRKL2Mjc311PEzY+ni0Od/a1d7PUTCBERaUXURB0fH48pU6bg+PHjOHDgACorKzF48GCUlGi+VAsAtra2yMrKUr2uXbump4ibn7D2XvBy1ZysR/YK0WM0RETUUKLO+t63b5/a+5iYGLi6uuL06dPo3bu3xv0kEgnc3d2bOjyjIJFIMP+FEZizekeNy9xPDnwEvYJrFmwgIiLDYVCPZxUUFAAAHB0d69yuuLgYPj4+UCqVCA8Px9KlS9GpUyd9hNgs+bg5IuY/kxB35i+156i93er+/0xEROIzmEStVCrx2muvoWfPnggKCtK4nb+/PzZs2IDg4GAUFBTgww8/RGRkJM6fP4/WrVvX2L68vBzl5eWq98XFLXPylJmpCQZ3C8TgboFih0JERA1gMIl6ypQpOHfuHA4fPlzndhEREYiIiFC9j4yMRMeOHbFmzRosXry4xvbLli3DwoULdR4vERGRPhjE41lTp07F7t27cfDgwVrPiutiamqKsLAwXL58udb+OXPmoKCgQPWKj4/XRchGqbDkHnIL657IR0RE+iXqGbUgCJg2bRp+/PFHxMXFoU2bNg0eQ6FQIDk5GcOGDau1Xy6XQy6Xq95bW1trHa+xunA1E+t3H8HZtJsAAF93JzwzuBv6hvmLHBkREYmaqKdMmYLNmzfjp59+go2NDbKzswEAdnZ2sLCwAABMmDABnp6eWLZsGQBg0aJF6NGjB9q1a4f8/Hx88MEHuHbtGl588UXRfo7m7OK1bMz6YjsqKhWqtqvZd7Fk416UVVRhSHdO0iMiEpOoiXrVqlUAgL59+6q1f/XVV5g0aRIAICMjA1Lp31fo8/LyEB0djezsbDg4OKBLly44evQoAgM5SUobG/cdU0vSD/p67zEM6toRMln97pA42Fiq/ZeIiBpP9EvfDxMXF6f2/pNPPsEnn3zSRBEZr9zCEuw9fg6p12/B2kKOAV0C0LmtJ06lal4s5k5BMVKv30Kgr0e9jvHFG0/rKlwiIvofg5n1TU3nfHom3l67EyVlf1fQOpCQgv7h/njYdyWl8uFfpoiIqOkYxKxvajoKhRJLNu5RS9L3/Z6YitZ1LC9qb22BDt6uTRkeERE9BBO1kUu4eLXOClkyqRRSqaTWvicHPAIzE150IaLmJzc3F8888wxsbW1hb2+PyZMnP3TBq759+9Yo+vTvf/9bTxFrxkRt5G7n113msrSsAosmj4Svu5OqzcnOClPG9MXYvuFNHR4RUZN45plncP78eRw4cAC7d+/GoUOH8NJLLz10v+joaLWiT++//74eoq0bT5eMXCtn+zr7PV3s0T2wDboHtkHGrVxUKRTwcXOq90xvIiJDk5KSgn379iEhIQFdu3YFAPz3v//FsGHD8OGHH6JVq1Ya97W0tDS4ok/8bWzkwjt4w7OOmtMjIoNVf/Z2c4RfKxcmaSJq1o4dOwZ7e3tVkgaAgQMHQiqV4sSJE3Xuu2nTJjg7OyMoKAhz5sxBaWlpU4f7UDyjNnISiQTzn38Ms1f/WGN50P/rG47eoe1FioyIqFpxcTEKCwtV7/+5omRDZWdnw9VVfSKsiYkJHB0dVQtr1ebpp5+Gj48PWrVqhbNnz+Ktt95CamoqduzYoXUsusBE3QK08XBGzH8m4eCZVPyVkQ0rCzkGdumINq2cxQ6NiAh9+vRRez9//nwsWLCgxnazZ8/Ge++9V+dYKSkpWsfx4D3szp07w8PDAwMGDEBaWhratm2r9biNxUTdQljITTGsRxCG9dBcQpSISAzx8fEIDQ1Vvdd0Nv3GG2+oVq3UxM/PD+7u7sjJyVFrr6qqQm5uboPuP3fv3h0AcPnyZSZq0h2FUonLN3KgFAS0b+0KE5lM7JCIiOpkbW0NW1vbh27n4uICFxeXh24XERGB/Px8nD59Gl26dAEA/P7771AqlarkWx9JSUkAAA+P+q3O2FSYqI3IrycvIGbvUdVz0w42lnh6UDeMfjRU3MCIiPSoY8eOGDJkCKKjo7F69WpUVlZi6tSpePLJJ1Uzvm/evIkBAwZg48aN6NatG9LS0rB582YMGzYMTk5OOHv2LF5//XX07t0bwcHBDzli02KiNhLxZ/7CB1t+VWvLKyrFyh1xkEmlGNFT3L9oRET6tGnTJkydOhUDBgyAVCrF2LFj8dlnn6n6KysrkZqaqprVbWZmht9++w2ffvopSkpK4OXlhbFjx+Kdd94R60dQYaI2Et/+qvmRgy2/ncSwiCDIpHzsiohaBkdHR2zevFljv6+vr1phKC8vL8THx+sjtAbjb24jcLegBFez72rsv51fjIxbuXqMiIiIdIVn1EbApB4LlEglEhxKuoTU69mwsTBHv3B/uDk+fPIGERGJi4naCNhZWyCoTSucS8+std/TxR7z1u9C5p0CVdtXe45iwtAIPDOom77CJCIiLfDSt5F4YXhPmJrUfBRLKpWgqkqplqQBQCkIiNlzFCfOp+srRCIi0gITtZHo3NYTH7wyFuEdvCH5X9XKzn6eePGxXriVV6hxv52Hk/QTIBERaYWXvo1Ipzat8N7LY1BWUQlBEGAhN8PPR/6sc59r2ZxkRkRkyJiojZC5manqzw42VnVu62Rr2dThEBFRI/DSt5HrHtgGDjaak3FU9056jIaIiBqKidrImZrI8ObTUZCb1rx4Ehnkh6HdWaSDiMiQ8dJ3C9A1wAdr33wWPx85i9Trt2BtIceALgHoFdyOq5URERk4JuoWopWzPf41qrfYYRARUQPxdIqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBqzFLniSkpIidghE9D8eHh7w8PAQO4wGy8rKQlZWlthhNFv8PVw/LS5Re3h4oE+fPnj22WfFDoWI/mf+/PlYsGCB2GE02Jo1a7Bw4UKxw2jW+vTp0yy/pOmTRBAEQewg9K0lfwsuLi5Gnz59EB8fD2tra7HDIT0y5M+eZ9T1Z8ifozaa62evTy0yUbdkhYWFsLOzQ0FBAWxtbcUOh/SIn71x4OfY8nAyGRERkQFjoiYiIjJgTNQtjFwux/z58yGXy8UOhfSMn71x4OfY8vAeNRERkQHjGTUREZEBY6ImIiIyYEzUREREBoyJmhokLi4OEokE+fn5YodCRNQiMFGLKDs7G9OmTYOfnx/kcjm8vLwwYsQIxMbG6vQ4ffv2xWuvvabTMeuydu1a9O3bF7a2tkzqjSSRSOp8NWbZTYlEgp07dz50uyVLliAyMhKWlpawt7fX+ngtGT9HaowWt9a3obh69Sp69uwJe3t7fPDBB+jcuTMqKyuxf/9+TJkyBRcvXtRrPIIgQKFQwMSk8X8lSktLMWTIEAwZMgRz5szRQXQt14PLU27duhXz5s1Damqqqk0fS0hWVFRg3LhxiIiIwPr165v8eMaInyM1ikCiGDp0qODp6SkUFxfX6MvLy1P9+dq1a8LIkSMFKysrwcbGRhg3bpyQnZ2t6p8/f74QEhIibNy4UfDx8RFsbW2F8ePHC4WFhYIgCMLEiRMFAGqv9PR04eDBgwIAYc+ePUJ4eLhgamoqHDx4UCgrKxOmTZsmuLi4CHK5XOjZs6dw8uRJ1fHu7/dgjJo0ZFt6uK+++kqws7NTa1u3bp0QEBAgyOVywd/fX1i5cqWqr7y8XJgyZYrg7u4uyOVywdvbW1i6dKkgCILg4+Oj9nfCx8dHq+NTw/FzpIbiGbUIcnNzsW/fPixZsgRWVlY1+u9fllIqlRg1ahSsra0RHx+PqqoqTJkyBePHj0dcXJxq+7S0NOzcuRO7d+9GXl4ennjiCSxfvhxLlizBihUr8NdffyEoKAiLFi0CALi4uODq1asAgNmzZ+PDDz+En58fHBwc8Oabb2L79u34+uuv4ePjg/fffx9RUVG4fPkyHB0dm/p/DTXApk2bMG/ePHz++ecICwvDmTNnEB0dDSsrK0ycOBGfffYZdu3ahe+//x7e3t64fv06rl+/DgBISEiAq6srvvrqKwwZMgQymUzkn6bl4udID8NELYLLly9DEAQEBATUuV1sbCySk5ORnp4OLy8vAMDGjRvRqVMnJCQk4JFHHgFQndBjYmJgY2MDAHjuuecQGxuLJUuWwM7ODmZmZrC0tIS7u3uNYyxatAiDBg0CAJSUlGDVqlWIiYnB0KFDAQDr1q3DgQMHsH79esyaNUtn/w+o8ebPn4+PPvoIY8aMAQC0adMGFy5cwJo1azBx4kRkZGSgffv26NWrFyQSCXx8fFT7uri4AKj+Uljb3wvSH36O9DCcTCYCoZ6LwaWkpMDLy0uVpAEgMDAQ9vb2agXXfX19VUkaqC4bl5OTU69jdO3aVfXntLQ0VFZWomfPnqo2U1NTdOvWjQXeDUxJSQnS0tIwefJkWFtbq17vvvsu0tLSAACTJk1CUlIS/P398eqrr+LXX38VOWr6J36OVB88oxZB+/btIZFIdDZhzNTUVO29RCKBUqms1761XXonw1dcXAyg+opH9+7d1fruX/4MDw9Heno69u7di99++w1PPPEEBg4ciG3btuk9XqodP0eqD55Ri8DR0RFRUVFYuXIlSkpKavTff5ypY8eOavejAODChQvIz89HYGBgvY9nZmYGhULx0O3atm0LMzMzHDlyRNVWWVmJhISEBh2Pmp6bmxtatWqFK1euoF27dmqvNm3aqLaztbXF+PHjsW7dOmzduhXbt29Hbm4ugOovePX5e0FNh58j1QfPqEWycuVK9OzZE926dcOiRYsQHByMqqoqHDhwAKtWrUJKSgoGDhyIzp0745lnnsGnn36KqqoqvPLKK+jTp4/aJeuH8fX1xYkTJ3D16lVYW1trnBRmZWWFl19+GbNmzYKjoyO8vb3x/vvvo7S0FJMnT6738bKzs5GdnY3Lly8DAJKTk2FjYwNvb29OSNOhhQsX4tVXX4WdnR2GDBmC8vJynDp1Cnl5eZgxYwY+/vhjeHh4ICwsDFKpFD/88APc3d1VkxV9fX0RGxuLnj17Qi6Xw8HBodbjZGRkIDc3FxkZGVAoFEhKSgIAtGvXTi+PFRk7fo70UGJPO2/JMjMzhSlTpgg+Pj6CmZmZ4OnpKYwcOVI4ePCgapv6Pp71oE8++UTtMY3U1FShR48egoWFRY3Hs/756NS9e/eEadOmCc7Ozlo/njV//vwaj4QBEL766ist/i/RfbU9VrNp0yYhNDRUMDMzExwcHITevXsLO3bsEARBENauXSuEhoYKVlZWgq2trTBgwAAhMTFRte+uXbuEdu3aCSYmJnU+1lPbI34A1P6eUv3xc6SGYplLIiIiA8Z71ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETURkgFj7ne5jojZQkyZNgkQiwfLly9Xad+7cCYlE0mTHzc3NxbRp0+Dv7w8LCwt4e3vj1VdfRUFBgdp2GRkZGD58OCwtLeHq6opZs2ahqqqqyeJqSfjZEwBERkYiKysLdnZ2YodCImOiNmDm5uZ47733kJeXp7djZmZmIjMzEx9++CHOnTuHmJgY7Nu3T23BE4VCgeHDh6OiogJHjx7F119/jZiYGMybN09vcRo7fvZkZmYGd3f3Jv1yRs2E2A9yU+0mTpwoPPbYY0JAQIAwa9YsVfuPP/4o6Ptj+/777wUzMzOhsrJSEARB2LNnjyCVStUWXlm1apVga2srlJeX6zU2Y8TP3jj16dNHmDp1qjB9+nTB3t5ecHV1FdauXSsUFxcLkyZNEqytrYW2bdsKe/bsEQSh5uJC9xdK2bdvnxAQECBYWVkJUVFRQmZmptoxpk+frnbcUaNGCRMnTlS9X7lypdCuXTtBLpcLrq6uwtixY5v6R6dG4hm1AZPJZFi6dCn++9//4saNG/Xeb+jQoWqVeP756tSpU4PiKCgogK2tLUxMqlecPXbsGDp37gw3NzfVNlFRUSgsLMT58+cbNDbVjp+9cfr666/h7OyMkydPYtq0aXj55Zcxbtw4REZGIjExEYMHD8Zzzz2H0tLSWvcvLS3Fhx9+iG+++QaHDh1CRkYGZs6cWe/jnzp1Cq+++ioWLVqE1NRU7Nu3D71799bVj0dNhGt9G7jHH38coaGhmD9/PtavX1+vfb788kvcu3dPY/8/q23V5c6dO1i8eDFeeuklVVt2drbaL2oAqvfZ2dn1Hpvqxs/e+ISEhOCdd94BAMyZMwfLly+Hs7MzoqOjAQDz5s3DqlWrcPbs2Vr3r6ysxOrVq9G2bVsAwNSpU7Fo0aJ6Hz8jIwNWVlZ47LHHYGNjAx8fH4SFhTXyp6KmxkTdDLz33nvo379/vb85e3p66uS4hYWFGD58OAIDA7FgwQKdjEkNw8/euAQHB6v+LJPJ4OTkhM6dO6va7n/pycnJga2tbY39LS0tVUkaaFjteQAYNGgQfHx84OfnhyFDhmDIkCF4/PHHYWlpqc2PQ3rCS9/NQO/evREVFYU5c+bUa3tdXP4sKirCkCFDYGNjgx9//FHtTMzd3R23bt1S2/7+e3d39wb8ZPQw/OyNS2214x9suz9xTFM9+dr2Fx4o1yCVStXeA9Vn4ffZ2NggMTERW7ZsgYeHB+bNm4eQkBA+AmbgeEbdTCxfvhyhoaHw9/d/6LaNvfxZWFiIqKgoyOVy7Nq1C+bm5mr9ERERWLJkCXJycuDq6goAOHDgAGxtbVm3ugnws6f6cnFxQVZWluq9QqHAuXPn0K9fP1WbiYkJBg4ciIEDB2L+/Pmwt7fH77//jjFjxogRMtUDE3Uzcb8u9WefffbQbRtz+bOwsBCDBw9GaWkpvv32WxQWFqKwsBBA9S8BmUyGwYMHIzAwEM899xzef/99ZGdn45133sGUKVMgl8u1PjbVjp891Vf//v0xY8YM/PLLL2jbti0+/vhjtbPl3bt348qVK+jduzccHBywZ88eKJXKen0JJPEwUTcjixYtwtatW5v0GImJiThx4gSA6oLyD0pPT4evry9kMhl2796Nl19+GREREbCyssLEiRMbNKmFGoafPdXHCy+8gD///BMTJkyAiYkJXn/9dbWzaXt7e+zYsQMLFixAWVkZ2rdvjy1btjT4aQDSL9ajJiIiMmCcTEZERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERELQxrXTcvTNRERI2QnZ2NadOmwc/PD3K5HF5eXhgxYgRiY2N1epy+ffvitdde0+mYdVm7di369u0LW1tbJnWRMVETEWnp6tWr6NKlC37//Xd88MEHSE5Oxr59+9CvXz9MmTJF7/EIgoCqqiqdjFVaWoohQ4bgP//5j07Go0YQtcgmEVEzNnToUMHT01MoLi6u0Xe/jrQgCMK1a9eEkSNHClZWVoKNjY0wbtw4tZre8+fPF0JCQoSNGzcKPj4+gq2trTB+/HihsLBQEITqGuUA1F7p6emqmtV79uwRwsPDBVNTU+HgwYNCWVmZMG3aNMHFxUWQy+VCz549hZMnT6qO989a13VpyLbUNHhGTUSkhdzcXOzbtw9TpkyBlZVVjX57e3sA1QU2Ro0ahdzcXMTHx+PAgQO4cuUKxo8fr7Z9Wloadu7cid27d2P37t2Ij4/H8uXLAQArVqxAREQEoqOjkZWVhaysLHh5ean2nT17NpYvX46UlBQEBwfjzTffxPbt2/H1118jMTER7dq1Q1RUFHJzc5vufwg1GS4hSkSkhcuXL0MQBAQEBNS5XWxsLJKTk5Genq5Krhs3bkSnTp2QkJCARx55BEB1Qo+JiYGNjQ0A4LnnnkNsbCyWLFkCOzs7mJmZwdLSstYqZYsWLcKgQYMAACUlJVi1ahViYmIwdOhQAMC6detw4MABrF+/HrNmzdLZ/wPSD55RExFpQajn6sspKSnw8vJSOwMODAyEvb09UlJSVG2+vr6qJA00rNZ0165dVX9OS0tDZWUlevbsqWozNTVFt27d1I5HzQcTNRGRFtq3bw+JRIKLFy/qZLzaak1rqkv9T7VdeifjwURNRKQFR0dHREVFYeXKlSgpKanRf/9xpo4dO+L69eu4fv26qu/ChQvIz89vUA1vMzMzKBSKh27Xtm1bmJmZ4ciRI6q2yspKJCQksGZ4M8VETUSkpZUrV0KhUKBbt27Yvn07Ll26hJSUFHz22WeIiIgAAAwcOFBVUzwxMREnT57EhAkT0KdPH7VL1g/j6+uLEydO4OrVq7hz547Gs20rKyu8/PLLmDVrFvbt24cLFy4gOjoapaWlmDx5cr2Pl52djaSkJFy+fBkAkJycjKSkJE5IEwETNRGRlvz8/JCYmIh+/frhjTfeQFBQEAYNGoTY2FisWrUKQPUl7J9++gkODg7o3bs3Bg4cCD8/vwbXF585cyZkMhkCAwPh4uKCjIwMjdsuX74cY8eOxXPPPYfw8HBcvnwZ+/fvh4ODQ72Pt3r1aoSFhSE6OhoA0Lt3b4SFhWHXrl0Nipsaj/WoiYiIDBjPqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQG7P8BAzIgTT7Tai8AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig1 = my_data.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a37d4519", - "metadata": {}, - "source": [ - " Create a Gardner-Altman plot for the Hedges' g effect size." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5e9cac0b", - "metadata": {}, - "outputs": [], - "source": [ - "fig2 = my_data.hedges_g.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f40f8fe0", - "metadata": {}, - "source": [ - "Create a Cumming estimation plot for the mean difference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0e6a68e", - "metadata": {}, - "outputs": [], - "source": [ - "fig3 = my_data.mean_diff.plot(float_contrast=True);" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1ee59074", - "metadata": {}, - "source": [ - " Create a paired Gardner-Altman plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "89a19ee0", - "metadata": {}, - "outputs": [], - "source": [ - "my_data_paired = dabest.load(df, idx=(\"Control 1\", \"Test 1\"),\n", - " id_col = \"ID\", paired='baseline')\n", - "fig4 = my_data_paired.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3c37066a", - "metadata": {}, - "source": [ - "Create a multi-group Cumming plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "896cac2a", - "metadata": {}, - "outputs": [], - "source": [ - "my_multi_groups = dabest.load(df, id_col = \"ID\", \n", - " idx=((\"Control 1\", \"Test 1\"),\n", - " (\"Control 2\", \"Test 2\")))\n", - "fig5 = my_multi_groups.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "de81e2e4", - "metadata": {}, - "source": [ - "Create a shared control Cumming plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f7d518b5", - "metadata": {}, - "outputs": [], - "source": [ - "my_shared_control = dabest.load(df, id_col = \"ID\",\n", - " idx=(\"Control 1\", \"Test 1\",\n", - " \"Test 2\", \"Test 3\"))\n", - "fig6 = my_shared_control.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c80ba34f", - "metadata": {}, - "source": [ - "Create a repeated meausures (against baseline) Slopeplot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d46fd3a", - "metadata": {}, - "outputs": [], - "source": [ - "my_rm_baseline = dabest.load(df, id_col = \"ID\", paired = \"baseline\",\n", - " idx=(\"Control 1\", \"Test 1\",\n", - " \"Test 2\", \"Test 3\"))\n", - "fig7 = my_rm_baseline.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4eaf4362", - "metadata": {}, - "source": [ - "Create a repeated meausures (sequential) Slopeplot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4b6a3727", - "metadata": {}, - "outputs": [], - "source": [ - "my_rm_sequential = dabest.load(df, id_col = \"ID\", paired = \"sequential\",\n", - " idx=(\"Control 1\", \"Test 1\",\n", - " \"Test 2\", \"Test 3\"))\n", - "fig8 = my_rm_sequential.mean_diff.plot();" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d22bdc4c", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class PermutationTest:\n", - " \"\"\"\n", - " A class to compute and report permutation tests.\n", - " \n", - " Parameters\n", - " ----------\n", - " control : array-like\n", - " test : array-like\n", - " These should be numerical iterables.\n", - " effect_size : string.\n", - " Any one of the following are accepted inputs:\n", - " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g\" or 'cliffs_delta'\n", - " is_paired : string, default None\n", - " permutation_count : int, default 10000\n", - " The number of permutations (reshuffles) to perform.\n", - " random_seed : int, default 12345\n", - " `random_seed` is used to seed the random number generator during\n", - " bootstrap resampling. This ensures that the generated permutations\n", - " are replicable.\n", - " \n", - " Returns\n", - " -------\n", - " A :py:class:`PermutationTest` object:\n", - " `difference`:float\n", - " The effect size of the difference between the control and the test.\n", - " `effect_size`:string\n", - " The type of effect size reported.\n", - " \n", - " \n", - " \"\"\"\n", - " \n", - " def __init__(self, control: array,\n", - " test: array, # These should be numerical iterables.\n", - " effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", - " is_paired:str=None,\n", - " permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n", - " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n", - " **kwargs):\n", - " from ._stats_tools.effsize import two_group_difference\n", - " from ._stats_tools.confint_2group_diff import calculate_group_var\n", - " \n", - "\n", - " self.__permutation_count = permutation_count\n", - "\n", - " # Run Sanity Check.\n", - " if is_paired and len(control) != len(test):\n", - " raise ValueError(\"The two arrays do not have the same length.\")\n", - "\n", - " # Initialise random number generator.\n", - " # rng = random.default_rng(seed=random_seed)\n", - " rng = RandomState(PCG64(random_seed))\n", - "\n", - " # Set required constants and variables\n", - " control = array(control)\n", - " test = array(test)\n", - "\n", - " control_sample = control.copy()\n", - " test_sample = test.copy()\n", - "\n", - " BAG = array([*control, *test])\n", - " CONTROL_LEN = int(len(control))\n", - " EXTREME_COUNT = 0.\n", - " THRESHOLD = abs(two_group_difference(control, test, \n", - " is_paired, effect_size))\n", - " self.__permutations = []\n", - " self.__permutations_var = []\n", - "\n", - " for i in range(int(permutation_count)):\n", - " \n", - " if is_paired:\n", - " # Select which control-test pairs to swap.\n", - " random_idx = rng.choice(CONTROL_LEN,\n", - " rng.randint(0, CONTROL_LEN+1),\n", - " replace=False)\n", - "\n", - " # Perform swap.\n", - " for i in random_idx:\n", - " _placeholder = control_sample[i]\n", - " control_sample[i] = test_sample[i]\n", - " test_sample[i] = _placeholder\n", - " \n", - " else:\n", - " # Shuffle the bag and assign to control and test groups.\n", - " # NB. rng.shuffle didn't produce replicable results...\n", - " shuffled = rng.permutation(BAG) \n", - " control_sample = shuffled[:CONTROL_LEN]\n", - " test_sample = shuffled[CONTROL_LEN:]\n", - "\n", - "\n", - " es = two_group_difference(control_sample, test_sample, \n", - " False, effect_size)\n", - " \n", - " group_var = calculate_group_var(var(control_sample, ddof=1), \n", - " CONTROL_LEN, \n", - " var(test_sample, ddof=1), \n", - " len(test_sample))\n", - " self.__permutations.append(es)\n", - " self.__permutations_var.append(group_var)\n", - "\n", - " if abs(es) > THRESHOLD:\n", - " EXTREME_COUNT += 1.\n", - "\n", - " self.__permutations = array(self.__permutations)\n", - " self.__permutations_var = array(self.__permutations_var)\n", - "\n", - " self.pvalue = EXTREME_COUNT / permutation_count\n", - "\n", - "\n", - " def __repr__(self):\n", - " return(\"{} permutations were taken. The p-value is {}.\".format(self.permutation_count, \n", - " self.pvalue))\n", - "\n", - "\n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__permutation_count\n", - "\n", - "\n", - " @property\n", - " def permutations(self):\n", - " \"\"\"\n", - " The effect sizes of all the permutations in a list.\n", - " \"\"\"\n", - " return self.__permutations\n", - "\n", - " \n", - " @property\n", - " def permutations_var(self):\n", - " \"\"\"\n", - " The experiment group variance of all the permutations in a list.\n", - " \"\"\"\n", - " return self.__permutations_var\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3214e42a", - "metadata": {}, - "source": [ - "**Notes**:\n", - " \n", - "The basic concept of permutation tests is the same as that behind bootstrapping.\n", - "In an \"exact\" permutation test, all possible resuffles of the control and test \n", - "labels are performed, and the proportion of effect sizes that equal or exceed \n", - "the observed effect size is computed. This is the probability, under the null \n", - "hypothesis of zero difference between test and control groups, of observing the\n", - "effect size: the p-value of the Student's t-test.\n", - "\n", - "Exact permutation tests are impractical: computing the effect sizes for all reshuffles quickly exceeds trivial computational loads. A control group and a test group both with 10 observations each would have a total of $20!$ or $2.43 \\times {10}^{18}$ reshuffles.\n", - "Therefore, in practice, \"approximate\" permutation tests are performed, where a sufficient number of reshuffles are performed (5,000 or 10,000), from which the p-value is computed.\n", - "\n", - "More information can be found [here](https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests).\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "cc181ae2", - "metadata": {}, - "source": [ - "#### Example: permutation test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3fc2c6b7", - "metadata": {}, - "outputs": [], - "source": [ - "control = norm.rvs(loc=0, size=30, random_state=12345)\n", - "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", - "perm_test = dabest.PermutationTest(control, test, \n", - " effect_size=\"mean_diff\", \n", - " is_paired=None)\n", - "perm_test" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/nbs/API/delta_objects.ipynb b/nbs/API/delta_objects.ipynb index 4dff4c67..0abed891 100644 --- a/nbs/API/delta_objects.ipynb +++ b/nbs/API/delta_objects.ipynb @@ -61,10 +61,9 @@ "#| export\n", "from scipy.stats import norm\n", "import pandas as pd\n", - "from scipy.stats import randint\n", "import numpy as np\n", "from numpy import sort as npsort\n", - "from numpy import sqrt, isinf, isnan\n", + "from numpy import isnan\n", "from string import Template\n", "import warnings\n", "import datetime as dt" diff --git a/nbs/API/effsize.ipynb b/nbs/API/effsize.ipynb index ca4c7385..bed57a4b 100644 --- a/nbs/API/effsize.ipynb +++ b/nbs/API/effsize.ipynb @@ -57,7 +57,6 @@ "from __future__ import annotations\n", "import numpy as np\n", "import warnings\n", - "import pandas as pd\n", "from scipy.special import gamma\n", "from scipy.stats import mannwhitneyu" ] @@ -423,14 +422,9 @@ " control_n = len(control)\n", " test_n = len(test)\n", "\n", - " control_mean = np.mean(control)\n", - " test_mean = np.mean(test)\n", + " control_var = np.var(control, ddof=1) # use N-1 to compute the variance.\n", + " test_var = np.var(test, ddof=1)\n", "\n", - " control_var = var(control, ddof=1) # use N-1 to compute the variance.\n", - " test_var = var(test, ddof=1)\n", - "\n", - " control_std = np.sqrt(control_var)\n", - " test_std = np.sqrt(test_var)\n", "\n", " # For unpaired 2-groups standardized mean difference.\n", " pooled = np.sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) /\n", diff --git a/nbs/API/effsize_objects.ipynb b/nbs/API/effsize_objects.ipynb new file mode 100644 index 00000000..8f45cc5d --- /dev/null +++ b/nbs/API/effsize_objects.ipynb @@ -0,0 +1,1861 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Effectsize objects\n", + "\n", + "> The different objects involved in the computations of bootstrapped effect sizes.\n", + "\n", + "- order: 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp _effsize_objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from __future__ import annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *\n", + "import nbdev\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import dabest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import pandas as pd\n", + "import lqrt\n", + "from scipy.stats import norm\n", + "from numpy import array, isnan, isinf, repeat, random, isin, abs, var\n", + "from numpy import sort as npsort\n", + "from numpy import nan as npnan\n", + "from numpy.random import PCG64, RandomState\n", + "from statsmodels.stats.contingency_tables import mcnemar\n", + "import warnings\n", + "from string import Template\n", + "import scipy.stats as spstats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class TwoGroupsEffectSize(object):\n", + "\n", + " \"\"\"\n", + " A class to compute and store the results of bootstrapped\n", + " mean differences between two groups.\n", + " \n", + " Compute the effect size between two groups.\n", + "\n", + " Parameters\n", + " ----------\n", + " control : array-like\n", + " test : array-like\n", + " These should be numerical iterables.\n", + " effect_size : string.\n", + " Any one of the following are accepted inputs:\n", + " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", + " is_paired : string, default None\n", + " resamples : int, default 5000\n", + " The number of bootstrap resamples to be taken for the calculation\n", + " of the confidence interval limits.\n", + " permutation_count : int, default 5000\n", + " The number of permutations (reshuffles) to perform for the \n", + " computation of the permutation p-value\n", + " ci : float, default 95\n", + " The confidence interval width. The default of 95 produces 95%\n", + " confidence intervals.\n", + " random_seed : int, default 12345\n", + " `random_seed` is used to seed the random number generator during\n", + " bootstrap resampling. This ensures that the confidence intervals\n", + " reported are replicable.\n", + "\n", + " Returns\n", + " -------\n", + " A :py:class:`TwoGroupEffectSize` object:\n", + " `difference` : float\n", + " The effect size of the difference between the control and the test.\n", + " `effect_size` : string\n", + " The type of effect size reported.\n", + " `is_paired` : string\n", + " The type of repeated-measures experiment.\n", + " `ci` : float\n", + " Returns the width of the confidence interval, in percent.\n", + " `alpha` : float\n", + " Returns the significance level of the statistical test as a float between 0 and 1.\n", + " `resamples` : int\n", + " The number of resamples performed during the bootstrap procedure.\n", + " `bootstraps` : numpy ndarray\n", + " The generated bootstraps of the effect size.\n", + " `random_seed` : int\n", + " The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned.\n", + " `bca_low, bca_high` : float\n", + " The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively.\n", + " `pct_low, pct_high` : float\n", + " The percentile confidence interval lower limit and upper limits, respectively.\n", + " \"\"\"\n", + "\n", + " def __init__(self, control, test, effect_size,\n", + " proportional=False,\n", + " is_paired=None, ci=95,\n", + " resamples=5000, \n", + " permutation_count=5000, \n", + " random_seed=12345):\n", + " \n", + " from ._stats_tools import effsize as es\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + "\n", + "\n", + " self.__EFFECT_SIZE_DICT = {\"mean_diff\" : \"mean difference\",\n", + " \"median_diff\" : \"median difference\",\n", + " \"cohens_d\" : \"Cohen's d\",\n", + " \"cohens_h\" : \"Cohen's h\",\n", + " \"hedges_g\" : \"Hedges' g\",\n", + " \"cliffs_delta\" : \"Cliff's delta\",\n", + " \"delta_g\" : \"deltas' g\"}\n", + "\n", + "\n", + " kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]\n", + " if effect_size not in kosher_es:\n", + " err1 = \"The effect size '{}'\".format(effect_size)\n", + " err2 = \"is not one of {}\".format(kosher_es)\n", + " raise ValueError(\" \".join([err1, err2]))\n", + "\n", + " if effect_size == \"cliffs_delta\" and is_paired:\n", + " err1 = \"`paired` is not None; therefore Cliff's delta is not defined.\"\n", + " raise ValueError(err1)\n", + "\n", + " if proportional==True and effect_size not in ['mean_diff','cohens_h']:\n", + " err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n", + " raise ValueError(err1)\n", + "\n", + " if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False):\n", + " err1 = \"`proportional` is True; Only accept binary data consisting of 0 and 1.\"\n", + " raise ValueError(err1)\n", + "\n", + " # Convert to numpy arrays for speed.\n", + " # NaNs are automatically dropped.\n", + " control = array(control)\n", + " test = array(test)\n", + " control = control[~isnan(control)]\n", + " test = test[~isnan(test)]\n", + "\n", + " self.__effect_size = effect_size\n", + " # TODO refactor this\n", + " self.__control = control\n", + " self.__test = test\n", + " self.__is_paired = is_paired\n", + " self.__resamples = resamples\n", + " self.__permutation_count = permutation_count\n", + " self.__random_seed = random_seed\n", + " self.__ci = ci\n", + " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", + "\n", + " self.__difference = es.two_group_difference(\n", + " control, test, is_paired, effect_size)\n", + " \n", + " self.__jackknives = ci2g.compute_meandiff_jackknife(\n", + " control, test, is_paired, effect_size)\n", + "\n", + " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", + "\n", + " bootstraps = ci2g.compute_bootstrapped_diff(\n", + " control, test, is_paired, effect_size,\n", + " resamples, random_seed)\n", + " self.__bootstraps = bootstraps\n", + " \n", + " sorted_bootstraps = npsort(self.__bootstraps)\n", + " # Added in v0.2.6.\n", + " # Raises a UserWarning if there are any infiinities in the bootstraps.\n", + " num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)])\n", + " \n", + " if num_infinities > 0:\n", + " warn_msg = \"There are {} bootstrap(s) that are not defined. \"\\\n", + " \"This is likely due to smaple sample sizes. \"\\\n", + " \"The values in a bootstrap for a group will be more likely \"\\\n", + " \"to be all equal, with a resulting variance of zero. \"\\\n", + " \"The computation of Cohen's d and Hedges' g thus \"\\\n", + " \"involved a division by zero. \"\n", + " warnings.warn(warn_msg.format(num_infinities), \n", + " category=UserWarning)\n", + "\n", + " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", + " self.__bootstraps, self.__difference)\n", + "\n", + " # Compute BCa intervals.\n", + " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", + " self.__bias_correction, self.__acceleration_value,\n", + " self.__resamples, ci)\n", + "\n", + " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", + "\n", + " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", + " self.__bca_low = sorted_bootstraps[bca_idx_low]\n", + " self.__bca_high = sorted_bootstraps[bca_idx_high]\n", + "\n", + " err1 = \"The $lim_type limit of the interval\"\n", + " err2 = \"was in the $loc 10 values.\"\n", + " err3 = \"The result should be considered unstable.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if bca_idx_low <= 10:\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", + " loc=\"bottom\"),\n", + " stacklevel=1)\n", + "\n", + " if bca_idx_high >= resamples-9:\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", + " loc=\"top\"),\n", + " stacklevel=1)\n", + "\n", + " else:\n", + " # TODO improve error handling, separate file\n", + " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", + " err2 = \"It is set to the effect size itself.\"\n", + " err3 = \"All bootstrap values were likely all the same.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if isnan(bca_idx_low):\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", + " stacklevel=0)\n", + "\n", + " if isnan(bca_idx_high):\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", + " stacklevel=0)\n", + "\n", + " # Compute percentile intervals.\n", + " pct_idx_low = int((self.__alpha/2) * resamples)\n", + " pct_idx_high = int((1-(self.__alpha/2)) * resamples)\n", + "\n", + " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", + " self.__pct_low = sorted_bootstraps[pct_idx_low]\n", + " self.__pct_high = sorted_bootstraps[pct_idx_high]\n", + "\n", + " # Perform statistical tests.\n", + " self.__PermutationTest_result = PermutationTest(control, test, \n", + " effect_size, \n", + " is_paired,\n", + " permutation_count)\n", + " \n", + " if is_paired and proportional is False:\n", + " # Wilcoxon, a non-parametric version of the paired T-test.\n", + " wilcoxon = spstats.wilcoxon(control, test)\n", + " self.__pvalue_wilcoxon = wilcoxon.pvalue\n", + " self.__statistic_wilcoxon = wilcoxon.statistic\n", + " \n", + " \n", + " if effect_size != \"median_diff\":\n", + " # Paired Student's t-test.\n", + " paired_t = spstats.ttest_rel(control, test, nan_policy='omit')\n", + " self.__pvalue_paired_students_t = paired_t.pvalue\n", + " self.__statistic_paired_students_t = paired_t.statistic\n", + " # TODO dead code\n", + " standardized_es = es.cohens_d(control, test, is_paired)\n", + "\n", + " elif is_paired and proportional:\n", + " # for binary paired data, use McNemar's test\n", + " # References:\n", + " # https://en.wikipedia.org/wiki/McNemar%27s_test\n", + "\n", + " df_temp = pd.DataFrame({'control': control, 'test': test})\n", + " x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)])\n", + " x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)])\n", + " x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)])\n", + " x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)])\n", + " table = [[x1,x2],[x3,x4]]\n", + " _mcnemar = mcnemar(table, exact=True, correction=True)\n", + " self.__pvalue_mcnemar = _mcnemar.pvalue\n", + " self.__statistic_mcnemar = _mcnemar.statistic\n", + "\n", + " elif effect_size == \"cliffs_delta\":\n", + " # Let's go with Brunner-Munzel!\n", + " brunner_munzel = spstats.brunnermunzel(control, test,\n", + " nan_policy='omit')\n", + " self.__pvalue_brunner_munzel = brunner_munzel.pvalue\n", + " self.__statistic_brunner_munzel = brunner_munzel.statistic\n", + "\n", + "\n", + " elif effect_size == \"median_diff\":\n", + " # According to scipy's documentation of the function,\n", + " # \"The Kruskal-Wallis H-test tests the null hypothesis\n", + " # that the population median of all of the groups are equal.\"\n", + " kruskal = spstats.kruskal(control, test, nan_policy='omit')\n", + " self.__pvalue_kruskal = kruskal.pvalue\n", + " self.__statistic_kruskal = kruskal.statistic\n", + "\n", + " else: # for mean difference, Cohen's d, and Hedges' g.\n", + " # Welch's t-test, assumes normality of distributions,\n", + " # but does not assume equal variances.\n", + " welch = spstats.ttest_ind(control, test, equal_var=False,\n", + " nan_policy='omit')\n", + " self.__pvalue_welch = welch.pvalue\n", + " self.__statistic_welch = welch.statistic\n", + "\n", + " # Student's t-test, assumes normality of distributions,\n", + " # as well as assumption of equal variances.\n", + " students_t = spstats.ttest_ind(control, test, equal_var=True,\n", + " nan_policy='omit')\n", + " self.__pvalue_students_t = students_t.pvalue\n", + " self.__statistic_students_t = students_t.statistic\n", + "\n", + " # Mann-Whitney test: Non parametric,\n", + " # does not assume normality of distributions\n", + " try:\n", + " mann_whitney = spstats.mannwhitneyu(control, test, \n", + " alternative='two-sided')\n", + " self.__pvalue_mann_whitney = mann_whitney.pvalue\n", + " self.__statistic_mann_whitney = mann_whitney.statistic\n", + " except ValueError:\n", + " # TODO At least print some warning?\n", + " # Occurs when the control and test are exactly identical\n", + " # in terms of rank (eg. all zeros.)\n", + " pass\n", + " \n", + " \n", + " standardized_es = es.cohens_d(control, test, is_paired = None)\n", + " \n", + " # The Cohen's h calculation is for binary categorical data\n", + " try:\n", + " self.__proportional_difference = es.cohens_h(control, test)\n", + " except ValueError:\n", + " # TODO At least print some warning?\n", + " # Occur only when the data consists not only 0's and 1's.\n", + " pass\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", + " \n", + " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", + " 'sequential': 'for the sequential design of repeated-measures experiment \\n',\n", + " 'None' : ''\n", + " }\n", + "\n", + " PAIRED_STATUS = {'baseline' : 'paired', \n", + " 'sequential' : 'paired',\n", + " 'None' : 'unpaired'\n", + " }\n", + "\n", + " first_line = {\"rm_status\" : RM_STATUS[str(self.__is_paired)],\n", + " \"es\" : self.__EFFECT_SIZE_DICT[self.__effect_size],\n", + " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)]}\n", + " \n", + "\n", + " out1 = \"The {paired_status} {es} {rm_status}\".format(**first_line)\n", + " \n", + " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", + " if \".\" in str(self.__ci):\n", + " ci_width = base_string_fmt.format(self.__ci)\n", + " else:\n", + " ci_width = str(self.__ci)\n", + " \n", + " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", + " \"ci\" : ci_width,\n", + " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", + " \n", + " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", + " out = out1 + out2\n", + " \n", + " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", + " \n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", + " p2 = \"calculated for legacy purposes only. \"\n", + " pvalue = p1 + p2\n", + " \n", + " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", + " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", + " bs = bs1 + bs2\n", + "\n", + " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", + " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", + " \"zero difference is true.\"\n", + " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", + " \"control and test labels were performed.\"\n", + " pval_def = pval_def1 + pval_def2\n", + "\n", + " if show_resample_count and define_pval:\n", + " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", + " elif ~show_resample_count and define_pval:\n", + " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, pval_def)\n", + " elif show_resample_count and ~define_pval:\n", + " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, bs)\n", + " else:\n", + " return \"{}\\n{}\".format(out, pvalue)\n", + "\n", + "\n", + "\n", + " def to_dict(self):\n", + " \"\"\"\n", + " Returns the attributes of the `dabest.TwoGroupEffectSize` object as a\n", + " dictionary.\n", + " \"\"\"\n", + " # Only get public (user-facing) attributes.\n", + " attrs = [a for a in dir(self)\n", + " if not a.startswith((\"_\", \"to_dict\"))]\n", + " out = {}\n", + " for a in attrs:\n", + " out[a] = getattr(self, a)\n", + " return out\n", + "\n", + "\n", + " @property\n", + " def difference(self):\n", + " \"\"\"\n", + " Returns the difference between the control and the test.\n", + " \"\"\"\n", + " return self.__difference\n", + "\n", + " @property\n", + " def effect_size(self):\n", + " \"\"\"\n", + " Returns the type of effect size reported.\n", + " \"\"\"\n", + " return self.__EFFECT_SIZE_DICT[self.__effect_size]\n", + "\n", + " @property\n", + " def is_paired(self):\n", + " return self.__is_paired\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " Returns the width of the confidence interval, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + " @property\n", + " def alpha(self):\n", + " \"\"\"\n", + " Returns the significance level of the statistical test as a float\n", + " between 0 and 1.\n", + " \"\"\"\n", + " return self.__alpha\n", + "\n", + " @property\n", + " def resamples(self):\n", + " \"\"\"\n", + " The number of resamples performed during the bootstrap procedure.\n", + " \"\"\"\n", + " return self.__resamples\n", + "\n", + " @property\n", + " def bootstraps(self):\n", + " \"\"\"\n", + " The generated bootstraps of the effect size.\n", + " \"\"\"\n", + " return self.__bootstraps\n", + "\n", + " @property\n", + " def random_seed(self):\n", + " \"\"\"\n", + " The number used to initialise the numpy random seed generator, ie.\n", + " `seed_value` from `numpy.random.seed(seed_value)` is returned.\n", + " \"\"\"\n", + " return self.__random_seed\n", + "\n", + " @property\n", + " def bca_interval_idx(self):\n", + " return self.__bca_interval_idx\n", + "\n", + " @property\n", + " def bca_low(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__bca_low\n", + "\n", + " @property\n", + " def bca_high(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval upper limit.\n", + " \"\"\"\n", + " return self.__bca_high\n", + "\n", + " @property\n", + " def pct_interval_idx(self):\n", + " return self.__pct_interval_idx\n", + "\n", + " @property\n", + " def pct_low(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_low\n", + "\n", + " @property\n", + " def pct_high(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_high\n", + "\n", + "\n", + "\n", + " @property\n", + " def pvalue_brunner_munzel(self):\n", + " try:\n", + " return self.__pvalue_brunner_munzel\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_brunner_munzel(self):\n", + " try:\n", + " return self.__statistic_brunner_munzel\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + "\n", + "\n", + " @property\n", + " def pvalue_wilcoxon(self):\n", + " try:\n", + " return self.__pvalue_wilcoxon\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_wilcoxon(self):\n", + " try:\n", + " return self.__statistic_wilcoxon\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_mcnemar(self):\n", + " try:\n", + " return self.__pvalue_mcnemar\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_mcnemar(self):\n", + " try:\n", + " return self.__statistic_mcnemar\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + "\n", + "\n", + " @property\n", + " def pvalue_paired_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_paired_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_paired_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_paired_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + "\n", + "\n", + " @property\n", + " def pvalue_kruskal(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_kruskal\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_kruskal(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_kruskal\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + "\n", + " @property\n", + " def pvalue_welch(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_welch\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_welch(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_welch\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + "\n", + "\n", + " @property\n", + " def pvalue_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + "\n", + "\n", + " @property\n", + " def pvalue_mann_whitney(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_mann_whitney\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + "\n", + "\n", + " @property\n", + " def statistic_mann_whitney(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_mann_whitney\n", + " except AttributeError:\n", + " return npnan\n", + " \n", + " @property\n", + " def pvalue_permutation(self):\n", + " # TODO Missing docstring\n", + " return self.__PermutationTest_result.pvalue\n", + " \n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permuations taken.\n", + " \"\"\"\n", + " return self.__PermutationTest_result.permutation_count\n", + "\n", + " \n", + " @property\n", + " def permutations(self):\n", + " return self.__PermutationTest_result.permutations\n", + "\n", + " \n", + " @property\n", + " def permutations_var(self):\n", + " return self.__PermutationTest_result.permutations_var\n", + "\n", + "\n", + " @property\n", + " def proportional_difference(self):\n", + " try:\n", + " return self.__proportional_difference\n", + " except AttributeError:\n", + " return npnan\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "The unpaired mean difference is -0.253 [95%CI -0.78, 0.25].\n", + "The p-value of the two-sided permutation t-test is 0.348, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "random.seed(12345)\n", + "control = norm.rvs(loc=0, size=30)\n", + "test = norm.rvs(loc=0.5, size=30)\n", + "effsize = dabest.TwoGroupsEffectSize(control, test, \"mean_diff\")\n", + "effsize" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'alpha': 0.05,\n", + " 'bca_high': 0.24951887238295106,\n", + " 'bca_interval_idx': (125, 4875),\n", + " 'bca_low': -0.7801782111071534,\n", + " 'bootstraps': array([-0.3649424 , -0.45018155, -0.56034412, ..., -0.49805581,\n", + " -0.25334475, -0.55206229]),\n", + " 'ci': 95,\n", + " 'difference': -0.25315417702752846,\n", + " 'effect_size': 'mean difference',\n", + " 'is_paired': None,\n", + " 'pct_high': 0.24951887238295106,\n", + " 'pct_interval_idx': (125, 4875),\n", + " 'pct_low': -0.7801782111071534,\n", + " 'permutation_count': 5000,\n", + " 'permutations': array([ 0.17221029, 0.03112419, -0.13911387, ..., -0.38007941,\n", + " 0.30261507, -0.09073054]),\n", + " 'permutations_var': array([0.07201642, 0.07251104, 0.07219407, ..., 0.07003705, 0.07094885,\n", + " 0.07238581]),\n", + " 'proportional_difference': nan,\n", + " 'pvalue_brunner_munzel': nan,\n", + " 'pvalue_kruskal': nan,\n", + " 'pvalue_mann_whitney': 0.5201446121616038,\n", + " 'pvalue_mcnemar': nan,\n", + " 'pvalue_paired_students_t': nan,\n", + " 'pvalue_permutation': 0.3484,\n", + " 'pvalue_students_t': 0.34743913903372836,\n", + " 'pvalue_welch': 0.3474493875548964,\n", + " 'pvalue_wilcoxon': nan,\n", + " 'random_seed': 12345,\n", + " 'resamples': 5000,\n", + " 'statistic_brunner_munzel': nan,\n", + " 'statistic_kruskal': nan,\n", + " 'statistic_mann_whitney': 494.0,\n", + " 'statistic_mcnemar': nan,\n", + " 'statistic_paired_students_t': nan,\n", + " 'statistic_students_t': 0.9472545159069105,\n", + " 'statistic_welch': 0.9472545159069105,\n", + " 'statistic_wilcoxon': nan}" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "effsize.to_dict() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class EffectSizeDataFrame(object):\n", + " \"\"\"A class that generates and stores the results of bootstrapped effect\n", + " sizes for several comparisons.\"\"\"\n", + "\n", + " def __init__(self, dabest, effect_size,\n", + " is_paired, ci=95, proportional=False,\n", + " resamples=5000, \n", + " permutation_count=5000,\n", + " random_seed=12345, \n", + " x1_level=None, x2=None, \n", + " delta2=False, experiment_label=None,\n", + " mini_meta=False):\n", + " \"\"\"\n", + " Parses the data from a Dabest object, enabling plotting and printing\n", + " capability for the effect size of interest.\n", + " \"\"\"\n", + "\n", + " self.__dabest_obj = dabest\n", + " self.__effect_size = effect_size\n", + " self.__is_paired = is_paired\n", + " self.__ci = ci\n", + " self.__resamples = resamples\n", + " self.__permutation_count = permutation_count\n", + " self.__random_seed = random_seed\n", + " self.__proportional = proportional\n", + " self.__x1_level = x1_level\n", + " self.__experiment_label = experiment_label \n", + " self.__x2 = x2\n", + " self.__delta2 = delta2 \n", + " self.__mini_meta = mini_meta\n", + "\n", + "\n", + " def __pre_calc(self):\n", + " from .misc_tools import print_greeting, get_varname\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + " from ._delta_objects import MiniMetaDelta, DeltaDelta\n", + "\n", + " idx = self.__dabest_obj.idx\n", + " dat = self.__dabest_obj._plot_data\n", + " xvar = self.__dabest_obj._xvar\n", + " yvar = self.__dabest_obj._yvar\n", + "\n", + " out = []\n", + " reprs = []\n", + " \n", + " if self.__delta2==True:\n", + " mixed_data = []\n", + " for j, current_tuple in enumerate(idx):\n", + " if self.__is_paired != \"sequential\":\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + "\n", + " for ix, tname in enumerate(current_tuple[1:]):\n", + " if self.__is_paired == \"sequential\":\n", + " cname = current_tuple[ix]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + " mixed_data.append(control)\n", + " mixed_data.append(test)\n", + " bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3],\n", + " self.__is_paired, self.__resamples, self.__random_seed)\n", + "\n", + "\n", + " for j, current_tuple in enumerate(idx):\n", + " if self.__is_paired!=\"sequential\":\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + "\n", + " for ix, tname in enumerate(current_tuple[1:]):\n", + " if self.__is_paired == \"sequential\":\n", + " cname = current_tuple[ix]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + "\n", + " result = TwoGroupsEffectSize(control, test,\n", + " self.__effect_size,\n", + " self.__proportional,\n", + " self.__is_paired,\n", + " self.__ci,\n", + " self.__resamples,\n", + " self.__permutation_count,\n", + " self.__random_seed)\n", + " r_dict = result.to_dict()\n", + " r_dict[\"control\"] = cname\n", + " r_dict[\"test\"] = tname\n", + " r_dict[\"control_N\"] = int(len(control))\n", + " r_dict[\"test_N\"] = int(len(test))\n", + " out.append(r_dict)\n", + " if j == len(idx)-1 and ix == len(current_tuple)-2:\n", + " if self.__delta2 and self.__effect_size in [\"mean_diff\",\"delta_g\"]:\n", + " resamp_count = False\n", + " def_pval = False\n", + " elif self.__mini_meta and self.__effect_size == \"mean_diff\":\n", + " resamp_count = False\n", + " def_pval = False\n", + " else:\n", + " resamp_count = True\n", + " def_pval = True\n", + " else:\n", + " resamp_count = False\n", + " def_pval = False\n", + "\n", + " text_repr = result.__repr__(show_resample_count=resamp_count,\n", + " define_pval=def_pval)\n", + "\n", + " to_replace = \"between {} and {} is\".format(cname, tname)\n", + " text_repr = text_repr.replace(\"is\", to_replace, 1)\n", + "\n", + " reprs.append(text_repr)\n", + "\n", + "\n", + " self.__for_print = \"\\n\\n\".join(reprs)\n", + "\n", + " out_ = pd.DataFrame(out)\n", + "\n", + " columns_in_order = ['control', 'test', 'control_N', 'test_N',\n", + " 'effect_size', 'is_paired',\n", + " 'difference', 'ci',\n", + "\n", + " 'bca_low', 'bca_high', 'bca_interval_idx',\n", + " 'pct_low', 'pct_high', 'pct_interval_idx',\n", + " \n", + " 'bootstraps', 'resamples', 'random_seed',\n", + " \n", + " 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var',\n", + " \n", + " 'pvalue_welch',\n", + " 'statistic_welch',\n", + "\n", + " 'pvalue_students_t',\n", + " 'statistic_students_t',\n", + "\n", + " 'pvalue_mann_whitney',\n", + " 'statistic_mann_whitney',\n", + "\n", + " 'pvalue_brunner_munzel',\n", + " 'statistic_brunner_munzel',\n", + "\n", + " 'pvalue_wilcoxon',\n", + " 'statistic_wilcoxon',\n", + "\n", + " 'pvalue_mcnemar',\n", + " 'statistic_mcnemar',\n", + "\n", + " 'pvalue_paired_students_t',\n", + " 'statistic_paired_students_t',\n", + "\n", + " 'pvalue_kruskal',\n", + " 'statistic_kruskal',\n", + " 'proportional_difference'\n", + " ]\n", + " self.__results = out_.reindex(columns=columns_in_order)\n", + " self.__results.dropna(axis=\"columns\", how=\"all\", inplace=True)\n", + " \n", + " # Add the is_paired column back when is_paired is None\n", + " if self.is_paired is None:\n", + " self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1))\n", + " \n", + " # Create and compute the delta-delta statistics\n", + " if self.__delta2:\n", + " self.__delta_delta = DeltaDelta(self,\n", + " self.__permutation_count,\n", + " bootstraps_delta_delta,\n", + " self.__ci)\n", + " reprs.append(self.__delta_delta.__repr__(header=False))\n", + " elif self.__delta2 and self.__effect_size not in [\"mean_diff\", \"delta_g\"]:\n", + " self.__delta_delta = \"Delta-delta is not supported for {}.\".format(self.__effect_size)\n", + " else:\n", + " self.__delta_delta = \"`delta2` is False; delta-delta is therefore not calculated.\"\n", + "\n", + " # Create and compute the weighted average statistics\n", + " if self.__mini_meta and self.__effect_size == \"mean_diff\":\n", + " self.__mini_meta_delta = MiniMetaDelta(self,\n", + " self.__permutation_count,\n", + " self.__ci)\n", + " reprs.append(self.__mini_meta_delta.__repr__(header=False))\n", + " elif self.__mini_meta and self.__effect_size != \"mean_diff\":\n", + " self.__mini_meta_delta = \"Weighted delta is not supported for {}.\".format(self.__effect_size)\n", + " else:\n", + " self.__mini_meta_delta = \"`mini_meta` is False; weighted delta is therefore not calculated.\"\n", + " \n", + " \n", + " varname = get_varname(self.__dabest_obj)\n", + " lastline = \"To get the results of all valid statistical tests, \" +\\\n", + " \"use `{}.{}.statistical_tests`\".format(varname, self.__effect_size)\n", + " reprs.append(lastline)\n", + "\n", + " reprs.insert(0, print_greeting())\n", + "\n", + " self.__for_print = \"\\n\\n\".join(reprs)\n", + "\n", + "\n", + " def __repr__(self):\n", + " try:\n", + " return self.__for_print\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__for_print\n", + " \n", + " \n", + " \n", + " def __calc_lqrt(self):\n", + " \n", + " rnd_seed = self.__random_seed\n", + " db_obj = self.__dabest_obj\n", + " dat = db_obj._plot_data\n", + " xvar = db_obj._xvar\n", + " yvar = db_obj._yvar\n", + " delta2 = self.__delta2\n", + " \n", + "\n", + " out = []\n", + "\n", + " for j, current_tuple in enumerate(db_obj.idx):\n", + " if self.__is_paired != \"sequential\":\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + "\n", + " for ix, tname in enumerate(current_tuple[1:]):\n", + " if self.__is_paired == \"sequential\":\n", + " cname = current_tuple[ix]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + " \n", + " if self.__is_paired: \n", + " # Refactored here in v0.3.0 for performance issues.\n", + " lqrt_result = lqrt.lqrtest_rel(control, test, \n", + " random_state=rnd_seed)\n", + " \n", + " out.append({\"control\": cname, \"test\": tname, \n", + " \"control_N\": int(len(control)), \n", + " \"test_N\": int(len(test)),\n", + " \"pvalue_paired_lqrt\": lqrt_result.pvalue,\n", + " \"statistic_paired_lqrt\": lqrt_result.statistic\n", + " })\n", + "\n", + " else:\n", + " # Likelihood Q-Ratio test:\n", + " lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, \n", + " random_state=rnd_seed,\n", + " equal_var=True)\n", + " \n", + " \n", + " lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, \n", + " random_state=rnd_seed,\n", + " equal_var=False)\n", + " \n", + " out.append({\"control\": cname, \"test\": tname, \n", + " \"control_N\": int(len(control)), \n", + " \"test_N\": int(len(test)),\n", + " \n", + " \"pvalue_lqrt_equal_var\" : lqrt_equal_var_result.pvalue,\n", + " \"statistic_lqrt_equal_var\" : lqrt_equal_var_result.statistic,\n", + " \"pvalue_lqrt_unequal_var\" : lqrt_unequal_var_result.pvalue,\n", + " \"statistic_lqrt_unequal_var\" : lqrt_unequal_var_result.statistic,\n", + " }) \n", + " self.__lqrt_results = pd.DataFrame(out)\n", + "\n", + "\n", + " def plot(self, color_col=None,\n", + "\n", + " raw_marker_size=6, es_marker_size=9,\n", + "\n", + " swarm_label=None, contrast_label=None, delta2_label=None,\n", + " swarm_ylim=None, contrast_ylim=None, delta2_ylim=None,\n", + "\n", + " custom_palette=None, swarm_desat=0.5, halfviolin_desat=1,\n", + " halfviolin_alpha=0.8, \n", + "\n", + " face_color = None,\n", + " #bar plot\n", + " bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None,\n", + " # error bar of proportion plot\n", + " ci=None, ci_type='bca', err_color=None,\n", + "\n", + " float_contrast=True,\n", + " show_pairs=True,\n", + " show_delta2=True,\n", + " show_mini_meta=True,\n", + " group_summaries=None,\n", + " group_summaries_offset=0.1,\n", + "\n", + " fig_size=None,\n", + " dpi=100,\n", + " ax=None,\n", + " \n", + " contrast_show_es = False,\n", + " es_sf = 2,\n", + " es_fontsize = 10,\n", + " \n", + " contrast_show_deltas = True,\n", + " \n", + " gridkey_rows=None,\n", + " gridkey_merge_pairs = False,\n", + " gridkey_show_Ns = True,\n", + " gridkey_show_es = True,\n", + "\n", + " swarmplot_kwargs=None,\n", + " barplot_kwargs=None,\n", + " violinplot_kwargs=None,\n", + " slopegraph_kwargs=None,\n", + " sankey_kwargs=None,\n", + " reflines_kwargs=None,\n", + " group_summary_kwargs=None,\n", + " legend_kwargs=None,\n", + " title=None, fontsize_title = 16,\n", + " fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12,\n", + " fontsize_delta2label = 12):\n", + "\n", + " \"\"\"\n", + " Creates an estimation plot for the effect size of interest.\n", + " \n", + "\n", + " Parameters\n", + " ----------\n", + " color_col : string, default None\n", + " Column to be used for colors.\n", + " raw_marker_size : float, default 6\n", + " The diameter (in points) of the marker dots plotted in the\n", + " swarmplot.\n", + " es_marker_size : float, default 9\n", + " The size (in points) of the effect size points on the difference\n", + " axes.\n", + " swarm_label, contrast_label, delta2_label : strings, default None\n", + " Set labels for the y-axis of the swarmplot and the contrast plot,\n", + " respectively. If `swarm_label` is not specified, it defaults to\n", + " \"value\", unless a column name was passed to `y`. If\n", + " `contrast_label` is not specified, it defaults to the effect size\n", + " being plotted. If `delta2_label` is not specifed, it defaults to \n", + " \"delta - delta\"\n", + " swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None\n", + " The desired y-limits of the raw data (swarmplot) axes, the\n", + " difference axes and the delta-delta axes respectively, as a tuple. \n", + " These will be autoscaled to sensible values if they are not \n", + " specified. The delta2 axes and contrast axes should have the same \n", + " limits for y. When `show_delta2` is True, if both of the `contrast_ylim`\n", + " and `delta2_ylim` are not None, then they must be specified with the \n", + " same values; when `show_delta2` is True and only one of them is specified,\n", + " then the other will automatically be assigned with the same value.\n", + " Specifying `delta2_ylim` does not have any effect when `show_delta2` is\n", + " False. \n", + " custom_palette : dict, list, or matplotlib color palette, default None\n", + " This keyword accepts a dictionary with {'group':'color'} pairings,\n", + " a list of RGB colors, or a specified matplotlib palette. This\n", + " palette will be used to color the swarmplot. If `color_col` is not\n", + " specified, then each group will be colored in sequence according\n", + " to the default palette currently used by matplotlib.\n", + " Please take a look at the seaborn commands `color_palette`\n", + " and `cubehelix_palette` to generate a custom palette. Both\n", + " these functions generate a list of RGB colors.\n", + " See:\n", + " https://seaborn.pydata.org/generated/seaborn.color_palette.html\n", + " https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html\n", + " The named colors of matplotlib can be found here:\n", + " https://matplotlib.org/examples/color/named_colors.html\n", + " swarm_desat : float, default 1\n", + " Decreases the saturation of the colors in the swarmplot by the\n", + " desired proportion. Uses `seaborn.desaturate()` to acheive this.\n", + " halfviolin_desat : float, default 0.5\n", + " Decreases the saturation of the colors of the half-violin bootstrap\n", + " curves by the desired proportion. Uses `seaborn.desaturate()` to\n", + " acheive this.\n", + " halfviolin_alpha : float, default 0.8\n", + " The alpha (transparency) level of the half-violin bootstrap curves. \n", + " float_contrast : boolean, default True\n", + " Whether or not to display the halfviolin bootstrapped difference\n", + " distribution alongside the raw data.\n", + " show_pairs : boolean, default True\n", + " If the data is paired, whether or not to show the raw data as a\n", + " swarmplot, or as slopegraph, with a line joining each pair of\n", + " observations.\n", + " show_delta2, show_mini_meta : boolean, default True\n", + " If delta-delta or mini-meta delta is calculated, whether or not to \n", + " show the delta-delta plot or mini-meta plot.\n", + " group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None.\n", + " Plots the summary statistics for each group. If 'mean_sd', then\n", + " the mean and standard deviation of each group is plotted as a\n", + " notched line beside each group. If 'median_quantiles', then the\n", + " median and 25th and 75th percentiles of each group is plotted\n", + " instead. If 'None', the summaries are not shown.\n", + " group_summaries_offset : float, default 0.1\n", + " If group summaries are displayed, they will be offset from the raw\n", + " data swarmplot groups by this value. \n", + " fig_size : tuple, default None\n", + " The desired dimensions of the figure as a (length, width) tuple.\n", + " dpi : int, default 100\n", + " The dots per inch of the resulting figure.\n", + " ax : matplotlib.Axes, default None\n", + " Provide an existing Axes for the plots to be created. If no Axes is\n", + " specified, a new matplotlib Figure will be created.\n", + " gridkey_rows : list, default None\n", + " Provide a list of row labels for the gridkey. The supplied idx is\n", + " checked against the row labels to determine whether the corresponding\n", + " cell should be populated or not.\n", + " swarmplot_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the seaborn `swarmplot`\n", + " command here, as a dict. If None, the following keywords are\n", + " passed to sns.swarmplot : {'size':`raw_marker_size`}.\n", + " violinplot_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the matplotlib `\n", + " pyplot.violinplot` command here, as a dict. If None, the following\n", + " keywords are passed to violinplot : {'widths':0.5, 'vert':True,\n", + " 'showextrema':False, 'showmedians':False}.\n", + " slopegraph_kwargs : dict, default None\n", + " This will change the appearance of the lines used to join each pair\n", + " of observations when `show_pairs=True`. Pass any keyword arguments\n", + " accepted by matplotlib `plot()` function here, as a dict.\n", + " If None, the following keywords are\n", + " passed to plot() : {'linewidth':1, 'alpha':0.5}.\n", + " sankey_kwargs: dict, default None\n", + " Whis will change the appearance of the sankey diagram used to depict\n", + " paired proportional data when `show_pairs=True` and `proportional=True`. \n", + " Pass any keyword arguments accepted by plot_tools.sankeydiag() function\n", + " here, as a dict. If None, the following keywords are passed to sankey diagram:\n", + " {\"width\": 0.5, \"align\": \"center\", \"alpha\": 0.4, \"bar_width\": 0.1, \"rightColor\": False}\n", + " reflines_kwargs : dict, default None\n", + " This will change the appearance of the zero reference lines. Pass\n", + " any keyword arguments accepted by the matplotlib Axes `hlines`\n", + " command here, as a dict. If None, the following keywords are\n", + " passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75,\n", + " 'zorder':2, 'color' : default y-tick color}.\n", + " group_summary_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the matplotlib.lines.Line2D\n", + " command here, as a dict. This will change the appearance of the\n", + " vertical summary lines for each group, if `group_summaries` is not\n", + " 'None'. If None, the following keywords are passed to\n", + " matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}.\n", + " legend_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the matplotlib Axes\n", + " `legend` command here, as a dict. If None, the following keywords\n", + " are passed to matplotlib.Axes.legend : {'loc':'upper left',\n", + " 'frameon':False}.\n", + " title : string, default None\n", + " Title for the plot. If None, no title will be displayed. Pass any\n", + " keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here,\n", + " as a string.\n", + " fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large'\n", + " Font size for the plot title. If a float, the fontsize in points. The\n", + " string values denote sizes relative to the default font size. Pass any keyword arguments accepted\n", + " by the matplotlib.pyplot.suptitle `fontsize` command here, as a string.\n", + " fontsize_rawxlabel : float, default 12\n", + " Font size for the raw axes xlabel.\n", + " fontsize_rawylabel : float, default 12\n", + " Font size for the raw axes ylabel.\n", + " fontsize_contrastxlabel : float, default 12\n", + " Font size for the contrast axes xlabel.\n", + " fontsize_contrastylabel : float, default 12\n", + " Font size for the contrast axes ylabel.\n", + " fontsize_delta2label : float, default 12\n", + " Font size for the delta-delta axes ylabel.\n", + "\n", + "\n", + " Returns\n", + " -------\n", + " A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``.\n", + " \n", + " The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it.\n", + " \n", + " If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` \n", + " itself, while the effect size axes is accessed at ``ax.contrast_axes``.\n", + " See the last example below.\n", + " \n", + "\n", + "\n", + " \"\"\"\n", + "\n", + " from .plotter import EffectSizeDataFramePlotter\n", + "\n", + " if hasattr(self, \"results\") is False:\n", + " self.__pre_calc()\n", + "\n", + " if self.__delta2:\n", + " color_col = self.__x2\n", + "\n", + " # if self.__proportional:\n", + " # raw_marker_size = 0.01\n", + "\n", + " # Modification incurred due to update of Seaborn\n", + " ci = ('ci', ci) if ci is not None else None\n", + " \n", + " all_kwargs = locals()\n", + " del all_kwargs[\"self\"]\n", + "\n", + " out = EffectSizeDataFramePlotter(self, **all_kwargs)\n", + "\n", + " return out\n", + "\n", + "\n", + " @property\n", + " def proportional(self):\n", + " \"\"\"\n", + " Returns the proportional parameter\n", + " class.\n", + " \"\"\"\n", + " return self.__proportional\n", + "\n", + " @property\n", + " def results(self):\n", + " \"\"\"Prints all pairwise comparisons nicely.\"\"\"\n", + " try:\n", + " return self.__results\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__results\n", + "\n", + "\n", + "\n", + " @property\n", + " def statistical_tests(self):\n", + " results_df = self.results\n", + "\n", + " # Select only the statistics and p-values.\n", + " stats_columns = [c for c in results_df.columns\n", + " if c.startswith(\"statistic\") or c.startswith(\"pvalue\")]\n", + "\n", + " default_cols = ['control', 'test', 'control_N', 'test_N',\n", + " 'effect_size', 'is_paired',\n", + " 'difference', 'ci', 'bca_low', 'bca_high']\n", + "\n", + " cols_of_interest = default_cols + stats_columns\n", + "\n", + " return results_df[cols_of_interest]\n", + "\n", + "\n", + " @property\n", + " def _for_print(self):\n", + " return self.__for_print\n", + "\n", + " @property\n", + " def _plot_data(self):\n", + " return self.__dabest_obj._plot_data\n", + "\n", + " @property\n", + " def idx(self):\n", + " return self.__dabest_obj.idx\n", + "\n", + " @property\n", + " def xvar(self):\n", + " return self.__dabest_obj._xvar\n", + "\n", + " @property\n", + " def yvar(self):\n", + " return self.__dabest_obj._yvar\n", + "\n", + " @property\n", + " def is_paired(self):\n", + " return self.__is_paired\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " The width of the confidence interval being produced, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + " @property\n", + " def x1_level(self):\n", + " return self.__x1_level\n", + "\n", + "\n", + " @property\n", + " def x2(self):\n", + " return self.__x2\n", + "\n", + "\n", + " @property\n", + " def experiment_label(self):\n", + " return self.__experiment_label\n", + " \n", + "\n", + " @property\n", + " def delta2(self):\n", + " return self.__delta2\n", + " \n", + "\n", + " @property\n", + " def resamples(self):\n", + " \"\"\"\n", + " The number of resamples (with replacement) during bootstrap resampling.\"\n", + " \"\"\"\n", + " return self.__resamples\n", + "\n", + " @property\n", + " def random_seed(self):\n", + " \"\"\"\n", + " The seed used by `numpy.seed()` for bootstrap resampling.\n", + " \"\"\"\n", + " return self.__random_seed\n", + "\n", + " @property\n", + " def effect_size(self):\n", + " \"\"\"The type of effect size being computed.\"\"\"\n", + " return self.__effect_size\n", + "\n", + " @property\n", + " def dabest_obj(self):\n", + " \"\"\"\n", + " Returns the `dabest` object that invoked the current EffectSizeDataFrame\n", + " class.\n", + " \"\"\"\n", + " return self.__dabest_obj\n", + "\n", + " @property\n", + " def proportional(self):\n", + " \"\"\"\n", + " Returns the proportional parameter\n", + " class.\n", + " \"\"\"\n", + " return self.__proportional\n", + " \n", + " @property\n", + " def lqrt(self):\n", + " \"\"\"Returns all pairwise Lq-Likelihood Ratio Type test results \n", + " as a pandas DataFrame.\n", + " \n", + " For more information on LqRT tests, see https://arxiv.org/abs/1911.11922\n", + " \"\"\"\n", + " try:\n", + " return self.__lqrt_results\n", + " except AttributeError:\n", + " self.__calc_lqrt()\n", + " return self.__lqrt_results\n", + " \n", + " \n", + " @property\n", + " def mini_meta(self):\n", + " \"\"\"\n", + " Returns the mini_meta boolean parameter.\n", + " \"\"\"\n", + " return self.__mini_meta\n", + "\n", + " \n", + " @property\n", + " def mini_meta_delta(self):\n", + " \"\"\"\n", + " Returns the mini_meta results.\n", + " \"\"\"\n", + " try:\n", + " return self.__mini_meta_delta\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__mini_meta_delta\n", + "\n", + " \n", + " @property\n", + " def delta_delta(self):\n", + " \"\"\"\n", + " Returns the mini_meta results.\n", + " \"\"\"\n", + " try:\n", + " return self.__delta_delta\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__delta_delta\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: plot\n", + "\n", + "Create a Gardner-Altman estimation plot for the mean difference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "random.seed(9999) # Fix the seed so the results are replicable.\n", + "# pop_size = 10000 # Size of each population.\n", + "Ns = 20 # The number of samples taken from each population\n", + "\n", + "# Create samples\n", + "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", + "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", + "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "\n", + "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", + "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", + "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", + "t4 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", + "t5 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "t6 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "\n", + "\n", + "# Add a `gender` column for coloring the data.\n", + "females = repeat('Female', Ns/2).tolist()\n", + "males = repeat('Male', Ns/2).tolist()\n", + "gender = females + males\n", + "\n", + "# Add an `id` column for paired data plotting.\n", + "id_col = pd.Series(range(1, Ns+1))\n", + "\n", + "# Combine samples and gender into a DataFrame.\n", + "df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", + " 'Control 2' : c2, 'Test 2' : t2,\n", + " 'Control 3' : c3, 'Test 3' : t3,\n", + " 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6,\n", + " 'Gender' : gender, 'ID' : id_col\n", + " })\n", + "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAGGCAYAAAC0W8IbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABYW0lEQVR4nO3deVhUZfsH8O/MAMO+ryKLuICIbJoKmrvikkv6M1vVMnrfUrNMS99yz6U9ezO3NLLULDUzc8lIMHcUSVQkRRQVEJUdZJs5vz94nZxgEIZhzjB8P9c1V87znPOcm0a555zznOeWCIIggIiIiAySVOwAiIiISDMmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZsBaXqLOysrBgwQJkZWWJHQoRUYvG38f10yIT9cKFC/kXg4hIZPx9XD8tLlETERE1J0zUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQEzETsAImpapXeu4+bxH5F/NQlSmSmc/CPQqtsomFk7iB0aEdUDEzWRESu8fgHnNs+FsrJM1Xbj6A+4fS4OwZM+hNzWWcToiKg+eOmbyIhd3vuFWpK+r7zwNq7FfSNCRETUUEzUREaqJOcqSnPSNfbfuXAIglKhx4iISBtM1ERGqupecZ39yqoKKKsq9BQNEWmLiZrISFm6+kBqYqax38KpNWRmFnqMiIi0wURNZKRMLWzgGjJQY79njzF6jIaItMVETWTE/Aa9BJdOfQFIVG0SmQm8Hn0K7mFRosVFRPVnMIl6+fLlkEgkeO211zRuExMTA4lEovYyNzfXX5BEzYzUxBT+j89Cl1fWou3QqWj/2Gvo9upG+PR5VuzQiKieDOI56oSEBKxZswbBwcEP3dbW1hapqamq9xKJpI6tiQgALBxbwcKxldhhEJEWRD+jLi4uxjPPPIN169bBweHhKyVJJBK4u7urXm5ubnqIkoiISByiJ+opU6Zg+PDhGDhQ86SXBxUXF8PHxwdeXl4YNWoUzp8/X+f25eXlKCwsVL2Ki+t+ZIWIiMiQiHrp+7vvvkNiYiISEhLqtb2/vz82bNiA4OBgFBQU4MMPP0RkZCTOnz+P1q1b17rPsmXLsHDhQl2GTUREpDeinVFfv34d06dPx6ZNm+o9ISwiIgITJkxAaGgo+vTpgx07dsDFxQVr1qzRuM+cOXNQUFCgesXHx+vqRyAiImpyop1Rnz59Gjk5OQgPD1e1KRQKHDp0CJ9//jnKy8shk8nqHMPU1BRhYWG4fPmyxm3kcjnkcrnqvbW1deODJyIi0hPREvWAAQOQnJys1vb8888jICAAb7311kOTNFCd2JOTkzFs2LCmCpOIiEhUoiVqGxsbBAUFqbVZWVnByclJ1T5hwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePHFF/UePxERkT4YxHPUmmRkZEAq/fs2el5eHqKjo5GdnQ0HBwd06dIFR48eRWBgoIhREhERNR2DStRxcXF1vv/kk0/wySef6C8gIiIikYn+HDURERFpxkRNRERkwJioiYio2Th06BBGjBiBVq1aQSKRYOfOnXVuHxcXV6OYk0QiQXZ2tn4C1gEmaiIiajZKSkoQEhKClStXNmi/1NRUZGVlqV6urq5NFKHuGdRkMiJqGkpFFUpvX4NUZgpLF2+xwyHS2tChQzF06NAG7+fq6gp7e3vdB6QHTNRERi7z5C5cP/o9KovzAAAWTq3hO+AFOHXoLnJkRNWKi4tRWFioev/PFSV1ITQ0FOXl5QgKCsKCBQvQs2dPnY7flHjpm8iI3Tz+I678ukaVpAHg3t0bSPnhXeRfOSNiZER/69OnD+zs7FSv+4tc6YKHhwdWr16N7du3Y/v27fDy8kLfvn2RmJios2M0NZ5RExkpZVUlrh/9vvZOQYmMw9/B3i9Mv0ER1SI+Ph6hoaGq97o8m/b394e/v7/qfWRkJNLS0vDJJ5/gm2++0dlxmhITNZGRKs66hKrSQo39hRnnoKgsg8y0ftXriJqKtbU1bG1t9Xa8bt264fDhw3o7XmPx0jeRkZJIH/LPWyKFRMJfAdTyJCUlwcPDQ+ww6o1n1ERGytqjPcxsnFBRdLfWfge/cEhNzPQcFVHjFBcXq5U2Tk9PR1JSEhwdHeHt7Y05c+bg5s2b2LhxIwDg008/RZs2bdCpUyeUlZXhyy+/xO+//45ff/1VrB+hwZioiYyURCqDT98JuPRzzfXxpSZm8O79tAhRETXOqVOn0K9fP9X7GTNmAAAmTpyImJgYZGVlISMjQ9VfUVGBN954Azdv3oSlpSWCg4Px22+/qY1h6CSCIAhiB6FPiYmJ6NKlC06fPo3w8HCxwyFqcndSDiPjj+9QmpMOALDzDYFP3wmwbR0gcmTU0vH3cf3wjJrIyDl37AXnjr1QWVIAiUwGE3NrsUMiogZgoiZqIUyt7MQOgYi0wCmfREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQGjImaiIjIgDFRExERGTAmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwE7EDIKKmVZaXhZsnd6Hg6p+QyEzg5B8Bj66PwdTCRuzQiKgemKiJjFhR5l84t+kdKMpLVG0l2WnIORuL4Invw8zaUcToiKg+eOmbyIhd3vO5WpK+rywvC9fivhUhIiJqKCZqIiNVejsDJdlpGvtvn4+DoFToMSIi0gYTNZGRqiwtrLNfWVkOZVWFnqIhIm0xURMZKUsXL0hkphr7zR1bQWZmoceIiEgbTNRERsrU0g6unftp7PfsNkqP0RCRtpioiYyYX9S/4Ogfod4okcKzxxh4dH1MnKCIqEH4eBaREZOZmiNw3DsoybmK/PQzkMhM4dShB+S2zmKHRkT1xERN1AJYufrCytVX7DCISAsGc+l7+fLlkEgkeO211+rc7ocffkBAQADMzc3RuXNn7NmzRz8BEhERicAgEnVCQgLWrFmD4ODgOrc7evQonnrqKUyePBlnzpzB6NGjMXr0aJw7d05PkRIREemX6Im6uLgYzzzzDNatWwcHB4c6t12xYgWGDBmCWbNmoWPHjli8eDHCw8Px+eef6ylaIiIi/RI9UU+ZMgXDhw/HwIEDH7rtsWPHamwXFRWFY8eONVV4REREohJ1Mtl3332HxMREJCQk1Gv77OxsuLm5qbW5ubkhOztb4z7l5eUoLy9XvS8uLtYuWCIiIhGIlqivX7+O6dOn48CBAzA3N2+y4yxbtgwLFy5ssvGJiIiakmiXvk+fPo2cnByEh4fDxMQEJiYmiI+Px2effQYTExMoFDWLBbi7u+PWrVtqbbdu3YK7u7vG48yZMwcFBQWqV3x8vM5/FiIioqYi2hn1gAEDkJycrNb2/PPPIyAgAG+99RZkMlmNfSIiIhAbG6v2CNeBAwcQERFRY9v75HI55HK56r21tXXjgyciItIT0RK1jY0NgoKC1NqsrKzg5OSkap8wYQI8PT2xbNkyAMD06dPRp08ffPTRRxg+fDi+++47nDp1CmvXrtV7/ERERPog+qzvumRkZCArK0v1PjIyEps3b8batWsREhKCbdu2YefOnTUSPhERkbEwqCVE4+Li6nwPAOPGjcO4ceP0ExAREZHIDPqMmoiIqKVjoiYiIjJgTNRERNRsHDp0CCNGjECrVq0gkUiwc+fOh+4TFxeH8PBwyOVytGvXDjExMU0epy4Z1D1qajo3b+dj1+E/kXr9Fmws5OjfJQC9Q9tDJuV3NSJqPkpKShASEoIXXngBY8aMeej26enpGD58OP79739j06ZNiI2NxYsvvggPDw9ERUXpIeLGY6JuARIuXsWCDT+jovLvRWSOX0jH74mpWPD8Y5DJmKyJqHkYOnQohg4dWu/tV69ejTZt2uCjjz4CAHTs2BGHDx/GJ5980mwSNX9DG7nKKgXe3/SrWpK+7/j5K9hznCVCich4GUMxJyZqI3fiQjryi0s19v968rweoyEiqqm4uBiFhYWq14OFlBpLUzGnwsJC3Lt3T2fHaUpM1EYur6ikzv67hZqTOBGRPvTp0wd2dnaq1/3VKKka71EbOR93pzr7fd0d9RQJEVHt4uPjERoaqnr/YH2GxtJUzMnW1hYWFhY6O05TYqI2csFtW8OvlTOuZN6ptX/Uo6H6DYiMXtL66agozoOZtQNCJ68QOxzjVFEKmFmKHYXOWFtbw9bWtknGjoiIwJ49e9TaHlbMydDw0ncLsOD5EfB0sVdrk0oleGF4JLoHthEnKDJaFcV5qCi6i4riPLFDMWKC2AGIpri4GElJSUhKSgJQ/fhVUlISMjIyAFSXNp4wYYJq+3//+9+4cuUK3nzzTVy8eBFffPEFvv/+e7z++utNEl9aWhreeecdPPXUU8jJyQEA7N27F+fPaz8fiGfULYCHsx3WvzUBx85fQer1W7A2l6NfuD9cHWzEDo2IqEFOnTqFfv36qd7PmDEDADBx4kTExMQgKytLlbQBoE2bNvjll1/w+uuvY8WKFWjdujW+/PLLJnk0Kz4+HkOHDkXPnj1x6NAhLFmyBK6urvjzzz+xfv16bNu2TatxmahbCJlMil7B7dAruJ3YoRARaa1v374QBM1XFGpbdaxv3744c+ZME0ZVbfbs2Xj33XcxY8YM2Nj8fSLUv39/fP7551qPy0vfRETNjaAUOwKqRXJyMh5//PEa7a6urrhzp/Z5QvXBRE1E1NwwURske3t7ZGVl1Wg/c+YMPD09tR6XiZqIqLlR1lxpkMT35JNP4q233kJ2djYkEgmUSiWOHDmCmTNnqk1waygmaiKi5qaqHKjjPi2JY+nSpQgICICXlxeKi4sRGBiI3r17IzIyEu+8847W43IyGRFRc6OsAhSVgImZ2JHQA8zMzLBu3TrMmzcPycnJKC4uRlhYGNq3b9+ocZmoiYiao4piwIQrCxoiLy8veHl56Ww8XvomImqOygrEjoD+YezYsXjvvfdqtL///vsYN26c1uMyURMRNUdl+WJHQP9w6NAhDBs2rEb70KFDcejQIa3H5aXvFkIQBJxKvYa/Mm7B2tIcvUPaw8HGeNYKJmpxSnPFjoD+obi4GGZmNecNmJqaorCwUOtxmahbgJy8Iryz7iekZ/39wP2anYcQPbIXHu8dJmJkRKS10rtiR0D/0LlzZ2zduhXz5s1Ta//uu+8QGBio9bhM1C3Awq92qyVpAKhUKPDFj/HwdnNEF38fkSIjIq0V33r4NqRXc+fOxZgxY5CWlob+/fsDAGJjY7Flyxb88MMPWo/LRG3kzl3JxF/XNf+D3nkoiYmaqDkqrLkCFolrxIgR2LlzJ5YuXYpt27bBwsICwcHB+O2339CnTx+tx2WiNnLpWbfr7E/TUKeaiAxc4U2xI6BaDB8+HMOHD9fpmEzURs7Oqu4JY/bWFnqKhIh0qvAmoFQCUj68Y2gqKiqQk5MDpVJ9TXZvb2+txmOiNnI9OrWBrZU5CkvKau0f/Ij2ExyISESKyupkba+7hTWocS5duoQXXngBR48eVWsXBAESiQQKhXZrtDNRGzkzUxO88eQgvBuzB5X/+EsS3sEbwyM7ixQZETVa7hUmagMyadIkmJiYYPfu3fDw8IBEItHJuEzULUBkUFt88cbT2PnHGaRm3IK1hRwDunbEwK4BMJHJxA6PiLR15y/AT/tJSqRbSUlJOH36NAICAnQ6LhN1C+Hr4YTXnhgodhhEpEs5KWJHQA8IDAzEnTu6n6DLWQhERM1VzgVAUSV2FPQ/7733Ht58803ExcXh7t27KCwsVHtpi2fURETNVeW96mTtESx2JARg4MDqq5YDBgxQa+dkMiKiluz6CSZqA3Hw4MEmGZeJmoioObt2FOgWLXYUBDRq9bG68B41EVEz0rVrV7Tu1A1dlyZWN+ReAfKvixsUqfzxxx949tlnERkZiZs3q1eP++abb3D48GGtx2SiJiJqRrKzs3EzMxvZhRV/N16JEy0e+tv27dsRFRUFCwsLJCYmory8HABQUFCApUuXaj0uEzURUXP31z5AEMSOosV79913sXr1aqxbtw6mpqaq9p49eyIxMVHrcZmoiYiau4IbwE3tEwHpRmpqKnr37l2j3c7ODvn5+VqPy0RNRGQMkjaJHUGL5+7ujsuXL9doP3z4MPz8/LQel4maiMgY3DwNXE8QO4oWLTo6GtOnT8eJEycgkUiQmZmJTZs2YebMmXj55Ze1HpePZxERGYujnwFj1wMmZmJH0iLNnj0bSqUSAwYMQGlpKXr37g25XI6ZM2di2rRpWo/LM2oiImORnwGc+UbsKFokhUKBP/74A1OmTEFubi7OnTuH48eP4/bt21i8eHGjxuYZNRGRMUnaBPg+Crh0EDuSFkUmk2Hw4MFISUmBvb09AgMDdTY2z6iJiIyJUgHELQUUlWJH0uIEBQXhypUrOh+XiZqIyNjkpgOJG8WOosV59913MXPmTOzevRtZWVmsnkVERHVI2gS06QM4txM7khZj2LBhAICRI0dCIpGo2lk9i4iIalIqgPjlwOjVgIy/6vXB4KpnXb58GWlpaejduzcsLCxU3xiIiMhA3LkEJMYAj7wodiQtgsFUz7p79y4GDhyIDh06YNiwYcjKygIATJ48GW+88YbOAyQiokY48y1w47TYUbQYBlE96/XXX4eJiQkyMjJgaWmpah8/fjz27dundSBERNQEBAGIXQgUZokdidEzmOpZv/76K9577z20bt1arb19+/a4du2a1oEQUbXS2xlI3fkBjr3/fzj63lhc3L4MxdlpYodFzVlZAbB/DlBeLHYkRs1gqmeVlJSonUnfl5ubC7lc3qCxVq1aheDgYNja2sLW1hYRERHYu3evxu1jYmIgkUjUXubm5g39EYgMVnF2Gv78agZun4uDouIelJVluJNyGGdjZqEg47zY4VFzlpsO/DYfUFSJHYnRMpjqWY8++ig2bvz7+TyJRAKlUon3338f/fr1a9BYrVu3xvLly3H69GmcOnUK/fv3x6hRo3D+vOZfSLa2tsjKylK9eBZPxuRq7AYoKu7VaFdWlSP9ty/1GouyqgK3zx/CzZM/If/KGQisd9z83TgFHP6YtaubSFNVz2rwrO/3338fAwYMwKlTp1BRUYE333wT58+fR25uLo4cOdKgsUaMGKH2fsmSJVi1ahWOHz+OTp061bqPRCKBu7t7Q8MmMniVpYXIT/9TY39x5l8oy78Fc3u3eo9ZUZKP7MR9KMw4B6mpGZwDe8O5Yy9IH/K4Tu6lk/hr18eoulekarNw9kLgE3Nh4ehZ7+OTAbr4C2DvDYQ8KXYkRud+9awNGzaoqmcdO3YMM2fOxNy5c7Uet8GJOigoCH/99Rc+//xz2NjYoLi4GGPGjMGUKVPg4eGhdSAKhQI//PADSkpKEBERoXG74uJi+Pj4QKlUIjw8HEuXLtWY1ImaE2VlOYC6z3QUlWX1Hq/41hWc2/Q2qkr/XhEp968TuHVmHwKfXAiZae23qu7dvYmUbUsh/GMJynt3ruP85nno8spaSKSyesdBBujEGsDRD/DqJnYkRqWpqmdp9Ry1nZ0d3n77ba0P+qDk5GRERESgrKwM1tbW+PHHHzUuZu7v748NGzYgODgYBQUF+PDDDxEZGYnz58/XmNx2X3l5uWrmHVCd6IkMkZmtE+T2bijPv1Vrv6mVPWRyK2Sf2Q+lohL2PsGwdPHWON5fP32slqTvK7iWjJvHtsO799O17pd5aneNJH1fWX427qYeh3PHnvX4ichgCUogdhEwZh1gq/0JFgFnz55FUFAQpFIpJBIJ3n77bcyaNQuXL19GcXExAgMDYW1t3ahjNDhRHzp0qM7+2m6k18Xf3x9JSUkoKCjAtm3bMHHiRMTHx9earCMiItTOtiMjI9GxY0esWbNGYxmxZcuWYeHChQ2KiUgMEokUrSP+D2l7V9bab+Xqi9MrJ0N4YDKQU0AkOox6AzJT9UmVRZmXUJqTrvFYt5J+hXfvp1F6OwO3z8ejqrwUtq0D4BTQEyVZNe+xPag46zITtTEoL6qeXDbyc9avboSwsDBkZWXB1dUVfn5+SEhIgJOTk06rZzU4Ufft27dG24MrkjV0LVMzMzO0a1e9Fm2XLl2QkJCAFStWYM2aNQ/d19TUFGFhYbXevL9vzpw5mDFjhup9UlJSk60eQ9RYHl2GoaqsGDeOfK+aVCY1lcPeNwS5l07W2P7uxaNIk1uhw4jX1Noriu/WeZzyoru4cuBLZJ74UdWWlbAL5g4ekNu51rmviUXjzg7IgNxOBY5+BvSeKXYkzZa9vT3S09Ph6uqKq1evQqlU6vwYDU7UeXl5au8rKytx5swZzJ07F0uWLGl0QEqlUu1SdV0UCgWSk5NVC6HXRi6Xqz021thLEERNzavnE/Do+hgKriUDghJ2Pp1x9us3NW5/O/kgfPtNhJm1g6rNwqn2W0H3mVk7qCXp+8rysuqcESyRyuDSiV90jUrKz4BbEOA/ROxImqWxY8eiT58+8PDwgEQiQdeuXSGT1T6HQ9sSmA1O1HZ2djXaBg0aBDMzM8yYMQOnT9d/qbo5c+Zg6NCh8Pb2RlFRETZv3oy4uDjs378fADBhwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePFFrmNLxsVEbgmnDt0BAEpFJUpva34MUVBWoTTnqlqitnRqDfs2ochPT6p1H0kds77L8rNh59O5+ovCP/j0mwi5rXM9fwpqNv74CHBuDzi1FTuSZmft2rUYM2YMLl++jFdffRXR0dGwsbHR6TF0VlLFzc0NqampDdonJycHEyZMQFZWFuzs7BAcHIz9+/dj0KBBAICMjAxIpX8/6p2Xl4fo6GhkZ2fDwcEBXbp0wdGjR3V6L4DI0EikJpCZWdT6fPV9tV2O7jDyDZzbMhelOVfV2t3DhuB2St3rDrsED4BL5/7I+fM3VBTnwtLFG626joC9X5hWPwMZOEUFcGAeMPZLwNRC7GialbNnz2Lw4MEYMmQITp8+jenTp4ufqM+ePav2XhAEZGVlYfny5QgNDW3QWOvXr6+zPy4uTu39J598gk8++aRBxyBq7iQSCVyC+iI7sfZV+yydvaFUVOH6ke8hNTGFk39PmNu7wszGEWEvfobcSyeRl5aIynuFsPfpDLfQwSi8eRGlZZqfgDC3dYF9m1C4hw5uqh+LDE3BDeD4KuDRGQ/fllQenEwWHx+PiooKnR+jwYk6NDQUEomkxipFPXr0wIYNG3QWGBH9zbv3M8hP/xNleZlq7VJTOSQmpjgb8/dkoPTfNsCzx+NoM+AFQCJB4fXzyDn7G5RVFbibchjX4r+FnW9IjTPt+8zt3WHnG9yUPw4Zqgs/Af7DANcAsSNpNgxyMll6uvojH1KpFC4uLlxzm6gJmVk7IOT5j5B1ajfupByGsqoC9r4hKCvIQf6Vfyz2Lyhx89h2WDh6oiwvGzePq08aq7pXhLsph2Ht3g7F2epPTMjMrdBh9CxIJA1eXZiMRcI6YPhHYkfRbBjkZDIfHx+tDkREjWNqaQvv3k+rFiopL7iNhM9f0Lj9zRM/oqLwjsZ+paBEwP+9jdvn46AovwcbzwB4hA+FmY2jzmMn3cjIyEBpaSkAoLRCiYzcMng76vgk6cYpoOAmYGe4S8WuXLkSH3zwAbKzsxESEoL//ve/6Nat9lXWYmJi8Pzzz6u1yeVylJXVf5W/uhjMZLLPPvus3gO++uqrWgdDRPVXeuda9QpTGty7c73u/W9dgYNfOJwDInUdGunYyZMnsXjxYvzyyy+q2455pVXwffskHuvsiLnDfPCIrw6Tw/UTgN0Y3Y2nQ1u3bsWMGTOwevVqdO/eHZ9++imioqKQmpoKV9fa1wCwtbVVm+z84NofujBkSPWjbaJOJqvvBC6JRMJETaQnppY1H5V8kExuCUV5qcZ+idQEEg2X6Mhw7NixA+PHj4cgCDXmBgkCsOdcLvaey8PW6I4YE6ajR+dyUnQzThP4+OOPER0drTpLXr16NX755Rds2LABs2fPrnUffRVz+uqrr5pk3Hol6n/elyYi8Vl7tIeli4/GZ6zdQgYh93ICynIza+138u8Bqcy01r7GuP8894PPdZN2Tp48ifHjx0OhUGgsM6pQAhIIGL8uBUffDNXNmXVB3VdjxFJRUYHTp09jzpw5qjapVIqBAwfi2LFjGvdrymJOY8aMQUxMDGxtbTFmTN1XIXbs2KHVMXT2HDUR6V/7x17Fuc1za5w5W7q2gfejT8HeLwwp378LQVml1m9iYQvvvs81SUyhk1c0ybgt0bvvvlvrmfQ/CQAECHh3zzX89EpQ4w9cWPuXu6ZSXFyMwsK/C8j8c0XJ++7cuQOFQgE3N/VSr25ubrh48WKtY2tTzKkh7OzsVJfSa1sQTBe0StQ3btzArl27kJGRUeOZsY8//lgngRHRw9l4BiAs+nNkJvyMgmtnITUxg3PHXnAPi4LMzAKO7R5B5+eW4frRH5B/5QykMhM4BfSEV6/xsHBsJXb4VIeMjAzs3r37oUn6PoUS+Dk5VzcTzMoKgLJCwNy2cePU0z/rL8yfPx8LFizQydjaFHNqiAcvd4t66ftBsbGxGDlyJPz8/HDx4kUEBQXh6tWrEAQB4eHhTREjEdXB3N4NfoM0L6Nr6xWITuPn6zGi5k+hUDTJ87ANsX///non6fsEAfj1Qh4mRrg9fOOHyU4BPJv2d3pVVfWVnvj4eLUFs2o7mwYAZ2dnyGQy3LqlXgr21q1b9b4HXZ9iToamwYl6zpw5mDlzJhYuXAgbGxts374drq6ueOaZZ1Qz34iImrPFixc32/K40d9eQvS3l3QwUg8djFE/1tbWsLV9+Nm7mZkZunTpgtjYWIwePRpAdSGn2NhYTJ06tV7Hqk8xp4YICwur9yzyxMTEh29UiwYn6pSUFGzZsqV6ZxMT3Lt3D9bW1li0aBFGjRqFl19+WatAiIgMxdy5c/H222+LGkNMTAxeeumlBu+37tn2ujmjdukIjK69NrqunDlzBt27d2/QPjNmzMDEiRPRtWtXdOvWDZ9++ilKSkpUs8D1Xczp/hcGACgrK8MXX3yBwMBA1eX248eP4/z583jllVe0PkaDE7WVlZXqvrSHhwfS0tJUs+fu3NG8uAIRia8s/xZKbl2BiYUtbL0Cdf48qbGQyWQaV5fSl6ioqFqXa66LRAIMDnSAqUwHK8vlXQKU5YC86UoDm5g0fJrU+PHjcfv2bcybNw/Z2dkIDQ3Fvn37VBPM9F3Maf78v28rvfjii3j11Vdr3PueP38+rl/Xfia9RGjgTZDRo0dj+PDhiI6OxsyZM/HTTz9h0qRJ2LFjBxwcHPDbb79pHYw+JCYmokuXLjh9+jTvqVOLUVVWjEu7V+Bu6nHVIinmDq3Qbvg02HNdb4M1cuRI7NmzBwqF4qHbyqTA8CBH3cz6vu/x1YBrR92N9w/G9vvYzs4Op06dQvv27dXaL126hK5du6KgoECrcRv8tevjjz9WXapYuHAhBgwYgK1bt8LX1/eh1bCISBwp25bi7sWjaiuZleVl4sLWBSi9e0Onx0paPx0nV0xA0vrpOh23JZo7dy4kEslDr3xIAEggwTvDdLzEs0L3laCMmYWFBY4cOVKj/ciRI42qh9Hg6w5Lly7Fs88+C6D6Mvjq1au1PjgRNb2im6kouPpnrX3KynJknvwJ7YZO0dnxKorzUFF0V2fjtWSPPPIItm7dqlqZrLYza5m0Okl/H91Rt8uIAoCt4a73bYhee+01vPzyy0hMTFStPX7ixAls2LABc+fO1XrcBifq27dvY8iQIXBxccGTTz6JZ599FiEhIVoHQE2jorIKPxw8jX0nzuNuQQm83BwwqlcIhkV0Fjs00rOC6+fr7C/MqLufxDVmzBgcPXoUixcvrvFctURSfbn7HV2v9Q0ArUIBKx0tSdpCzJ49G35+flixYgW+/fZbAEDHjh3x1Vdf4YknntB63AYn6p9++gl5eXn44YcfsHnzZnz88ccICAjAM888g6effhq+vr5aB0O6UaVQ4O21O5F0+e9Lmlcy7+CT72ORlnkH08b2a5LjvvLRZuQVlcLBxhJfvPF0kxyDGk5mWvszqfdJH9JP4nvkkUdUi0yFhoYiLy8PDpYmSHonXPfVs+4Lm9A04xq5J554olFJuTZaTQ10cHDASy+9hLi4OFy7dg2TJk3CN998g3bt2uk0ONLOoaRLakn6QbsO/4lr2U1zWTKvqBR3CoqRV6S5EATpR9HNi7hxdBsyE3bBppU/JFLN38mdAx/VY2TUGN7e3rC0tAQAWJpJmy5J+/QEWndpmrGpwRq11ndlZSVOnTqFEydO4OrVqzXWXyVxxCX9VXf/mb8wcWhEndtQ81RVVoKUbUvU7klLpDLYtA6o9RK3lVsbuIdxoSJ6gIk50JNVEA2JVmfUBw8eRHR0NNzc3DBp0iTY2tpi9+7duHFDt7NHSTtl5ZV191fU3U/N16VfVtSYOCYoFSjMOA/38CGwbtUBEqkMplb28IwYi87PLYeJ3FKkaMkgdZkI2DR9SUiqvwafUXt6eiI3NxdDhgzB2rVrMWLECI3rspI4Ovt54swlzQ/XB/mxGIMxKivIwd2Lmkv9Fd1MRVj053qMiJodBx+g8zixo6B/aHCiXrBgAcaNGwd7e/smCId0YXhkZ/z4RxKKSstq9Pl6OKFHJz8RoqKmVppzVe056X8quVVdPIerkZFGvWYATVCjnBqnwYk6Ojq6KeIgHXK0tcJ7/x6D9zbtw7Vbuar2IL9WGNM7DBnZuWjTio9dGBtTy7qLGphY2jBJk2adx1U/kkVaUygUiImJQWxsLHJycmpUYPv999+1GrdRk8nIcLX3csWXsyfgwtUs3MotwNFz6Th2Lg2LYn4BAPi1csa0sf15GdyI2HgGwMKpNe5pWGnMtfMAPUdEzYZLANCt4QVASN306dMRExOD4cOHIygoSGdfjJmojVygrwd2Hf4TcWdS1dqvZN7BnDU/YuWMp+Dt5ihSdKRr7R+bjvNb5kFRcU+t3dLVF66dB6CiKBdmNvy86QGWjsDgdwETM7Ejafa+++47fP/99zoroXkfE7WRu3k7H78nXqy1r6yiEtviEjFj/EA9R0VNxdYrEKEvfobMhJ9RcC0ZMlMzyO3dUZSZiqQvq+v12ngGwKffRBbjIMDMChj6AWDtInYkRsHMzKxJ1hPRQS00MmRJl66jrvpoZ/7K0F8wpBcWjq3QNupfCH/pc7iFDMKd8/Eoz8tW9RfdvIjzW+ai8PoFEaMk0cnMgKilgDMXqtKVN954AytWrGhQadL64Bm1kTMxqfu7mInINXep6SgVVbh2aFOtfYKiChmHNiPomXf1HBUZBIkUGLSQk8d07PDhwzh48CD27t2LTp06wdRUfQb9jh07tBqXidrIdQ9sA1OZDJUa6tn2Cm6r54hIX4qzLqOyOE9jf356EpRVFZDy3mTL03cO4BMpdhRGx97eHo8//rjOx2WiNnL21pZ4cmBXfLP/RI0+VwcbjOkTJkJUpB8Pu/wm6PwSHTUDEVOADoPFjsIoffXVV00yLhN1CzBhSASc7W2w7eBpXM/Jg5mpDH1CO2DS0Eg42FiJHR41EWuPdjC1skdlSX6t/Xa+IQ+trEVGpvM4IFi3lZ2o6TFRtxDDegRhWI8glJZVwMxUxnvTLYBUZgqvR5/ClX2ravRJpDJ4P8pSpC1K2/5Aj1fEjsLobdu2Dd9//z0yMjJQUVGh1peYmKjVmJz13cJYmpsxSbcgrbo+hnbDX4Xc7u/KdlbubRH45ELY+QSJGBnplWeX6vvSUv7Kb0qfffYZnn/+ebi5ueHMmTPo1q0bnJyccOXKFQwdOlTrcXlGTWTk3MOi4BY6CGW5WZDITGBuz3K0LYpLABc00ZMvvvgCa9euxVNPPYWYmBi8+eab8PPzw7x585Cbm/vwATTg1yuiFkAikcLCyZNJuqVx9AOGfQCYsZSpPmRkZCAysno2vYWFBYqKigAAzz33HLZs2aL1uEzURETGyLYVMOxDwLzuYi2kO+7u7qozZ29vbxw/fhwAkJ6e3qgnLJioiYiMjdym+kzayknsSFqU/v37Y9euXQCA559/Hq+//joGDRqE8ePHN+r5at6jJiIyJvdXHbNrLXYkLc7atWtVpS2nTJkCJycnHD16FCNHjsS//vUvrcdloiYiMiahT1fP8ia9k0qlkD4ws/7JJ5/Ek08+2fhxGz0CEREZBrvWQJdJYkfRov3xxx949tlnERERgZs3bwIAvvnmGxw+fFjrMZmoiQgAuJyoMejxCiAzffh21CS2b9+OqKgoWFhY4MyZMygvLwcAFBQUYOnSpVqPy0RN1IJVFOchbd8qHPvwCRxZMgJJ61/D7fOHxA6LtOESwEIbInv33XexevVqrFu3Tq1yVs+ePbVelQzgPWqiZk9QKpB76STuXDwKQVEJO58QuHbuB5mZeZ37VZYW4OzXb6IsL1PVVpx1Cak/vofywttoHTG2qUMnXQp9GpBIxI6iRUtNTUXv3r1rtNvZ2SE/P1/rcZmojUxpWQViT1/Etey7cLC1wqCuAXB14HOUxkpZVYELWxciPz1J1Xbnwh+4cewHdH52OcztXTXum3lyl1qSflBG/Ca4h0XBxNxa1yFTI7m7uwOCAu4mxX832rYCfB8VLygCUP3ZXL58Gb6+vmrthw8fhp+fn9bjMlEbkfPpmZj75S4UlZap2jbuO4Ypj/fFyF4h4gVGTeb64a1qSfq+8vxbuLT7U3R+dikEQYncSwkouPonJDITOPlHwLZ1R9xJ0Ty5RVlVjty/TsI1uH8TRk/aOHXqFJB/Hdj67N+NgaO5jrcBiI6OxvTp07FhwwZIJBJkZmbi2LFjmDlzJubOnav1uEzURqK8ogoLNvyslqQBQKkU8PmOg/D3doO/t7tI0VFTEAQB2Wf2aewvuPonijL/QtrelSjOuqxqv3lsO5w79oKysrzO8ZVVdfeTgZCaAP5DxI6CAMyePRtKpRIDBgxAaWkpevfuDblcjpkzZ2LatGlaj8uvYEYiLikV+cX3au0TBGDXkbMa9714LRs/HzmL+KS/UF5R1VQhko4pq8o11pq+L/3Al2pJ+r47KYdhYmFTx54S2PkENy5A0g+fCMDcTuwoCIBEIsHbb7+N3NxcnDt3DsePH8ft27exePHiRo3LM2ojceN2ft39OXk12nILS7Ao5hecT//7PqWNpRzT/28A+oR10HWIpGMyU3OYWTuiolhTVR4JCq9f0Lh/edFdSGSmEBSVNfqcAiJh4eSpo0ipSfn1EzsC+gczMzMEBgbqbDwmaiPhbFf3pJ/a+hds+Bkp17LV2opKy7Hs231wd7KDvzcrLRk69/ChyDi0qdY+61btUZz5l8Z9q0oLEDDuHVw98CXK8qv/HkikMrh06ou2w15pknhJxyRSwKub2FG0eC+88EK9ttuwYYNW4zNRG4n+4f748uc/UKbh0vXQHkFq789dyayRpO9TKJX48dAZzH6W970MXeueT6A4+zJy/zqh1m7h1BptBr2E5K9natzXxNwaTh26w6lDDxTdSEFVeQms3Pwgt2Ehh2bDxb+6AAeJKiYmBj4+PggLC2uShYOYqI2EjaU5Zj0VhWXf7kWVQqnW1yu4HfYeP4dv9h+Hp4s9RvQMRmpG7Un6vtSMW00ZLumIVGaCwCfmIT/9T9y5eBjKqkrYtwmFc8eekMpMYefTGQXXkmvd1zVkICSS6mkqtl66u0xHeuTWSewICMDLL7+MLVu2ID09Hc8//zyeffZZODo66mx8Jmoj0ju0Pfw8nbH7aHL1c9Q2lqisVCAu6e/LnxeuZuG3UynoF+5f51jWFmZNHS7pkH2bENi3qfkIXrvhryL5mzmoKLqj1m7j6Q/v3s/oKzxqKs6cS2IIVq5ciY8//hg7duzAhg0bMGfOHAwfPhyTJ0/G4MGDIWnkQjSizvpetWoVgoODYWtrC1tbW0RERGDv3r117vPDDz8gICAA5ubm6Ny5M/bs2aOnaJuH1i4O+Peo3lj2r8cxrEeQWpK+TxCAg6dTYWaq+XvagC4dmzJM0hMLx1YI/9dKtBn0IhzadYWjfwQ6jHwDnZ97DyZyS7HDo8ay9xE7AvofuVyOp556CgcOHMCFCxfQqVMnvPLKK/D19UVxcfHDB6iDqIm6devWWL58OU6fPo1Tp06hf//+GDVqFM6fP1/r9kePHsVTTz2FyZMn48yZMxg9ejRGjx6Nc+fO6Tny5mH/Sc0zfgUA4R28al1xsFObVjXuaVPzZWJuDc/uj6PTkwsROO4duAb3h9SEhRuMAmtOGySpVAqJRAJBEKBQKBo/ng5i0tqIESMwbNgwtG/fHh06dMCSJUtgbW2N48eP17r9ihUrMGTIEMyaNQsdO3bE4sWLER4ejs8//1zPkTcPuYUldfa72Nvgg1fGIjLIDy72NvBr5YyXRj6K9/49BnIz3hUhMmjmtoCcS7waivLycmzZsgWDBg1Chw4dkJycjM8//xwZGRmwtm7c52Qwv40VCgV++OEHlJSUICIiotZtjh07hhkzZqi1RUVFYefOnXqIsPnxdXfCyZSrGvt93J0Q0s4LIe289BcUEemGTSuxI6D/eeWVV/Ddd9/By8sLL7zwArZs2QJnZ2edjS96ok5OTkZERATKyspgbW2NH3/8UeOD4tnZ2XBzU3+2183NDdnZmmcwl5eXq2qCAmj0vYLmZHhkZ/x4KAmVtVx6sbGUY2DXABGiIjFUlZWg6GYKJFJT2HoHQsqaxc2fDdc5MBSrV6+Gt7c3/Pz8EB8fj/j4+Fq327Fjh1bji56o/f39kZSUhIKCAmzbtg0TJ05EfHy8zlZ1WbZsGRYuXKiTsZqbVs72+M+EoXhv036UVfy9+pSdlQUWvDACVuZyEaMjfRAEJa4d3IjMhF2qtb1Nrezh03cC3MOiRI6OGsWaa/cbigkTJjR6ZnddRE/UZmZmaNeuHQCgS5cuSEhIwIoVK7BmzZoa27q7u+PWLfXne2/dulVd9k2DOXPmqF0uT0pKQp8+fXQUveHrFdwOoe1b42BiKv66ngOlIKBLB2+0b625/CEZj4z4Tbhx9Ae1tsqSfFz+5TOYWFjDOaCnSJFRo1nz37ChiImJadLxRU/U/6RUKtUuVT8oIiICsbGxeO2111RtBw4c0HhPG6ieMi+X/33m2Nib+s2RUing8Nk0JP6VAQD49eQF2FjKMXVsP/QP5+VvY6WoKENmwi6N/TeO/MBE3ZxZuYgdAemJqIl6zpw5GDp0KLy9vVFUVITNmzcjLi4O+/fvB1B9OcHT0xPLli0DAEyfPh19+vTBRx99hOHDh+O7777DqVOnsHbtWjF/DIO3+Os9SLp0Xa2tqLQc723aD3dHWwT6clKKMSrOToOivFRzf9YlKCrKIDMz12NUpDOWXOq1pRD18aycnBxMmDAB/v7+GDBgABISErB//34MGjQIAJCRkYGsrCzV9pGRkdi8eTPWrl2LkJAQbNu2DTt37kRQEJ/51eTS9ZwaSfo+pVLA9rgzeo6I9EVmWvfqchKpDBKZTE/RkM6xtGWLIeoZ9fr16+vsj4uLq9E2btw4jBs3rokiMj4XH7Km98P6qfmycm8HcwcPlOVl1drv2KEHZ383Z2ZWYkdAeiLqGTU1PauHrNnNmd/GSyKRoM2gF6tLIf6DzNwKPn2eFSEq0hlTC7EjID1hojZyEZ38YCnXnKz7d6m7OAc1b04deqDzs0th7xcOSKSQmpjBpVNfhEz6CJYu3mKHR40h45fsloKJ2shZyM3w8uN9al3Tu31rV4zqVbPiEhkXO5/OCHp6MXr+ZxciZ/8I/8dnwdKZq9E1a1IZIG25v75XrlwJX19fmJubo3v37jh58mSd2zf3Yk4t95NuQYZ074QPXhmLiCA/ONpawdvNEc8Pi8RHU/8PFnWcbTeUg40lnO2s4WDDqkyGqCkXZCA9k7TcSYBbt27FjBkzMH/+fCQmJiIkJARRUVHIycmpdXtjKOYkEQRBEDsIfUpMTESXLl1w+vRphIeHix0OkdE5uWICKoruwszGCd2mbxQ7HONUcgew0t1a0mLR5vdx9+7d8cgjj6iKMSmVSnh5eWHatGmYPXt2je3Hjx+PkpIS7N69W9XWo0cPhIaGYvXq1br5QZoYz6iJiJqbWiYItgQVFRU4ffo0Bg4cqGqTSqUYOHAgjh07Vus+x44dU9seqC7mpGl7Q2RwK5OR+O6VVyD29EVcybwDB2tLDHykIzyc+MwmtRwKhQJKpVLsMDSrqgIqKx++nYGrqqoCUF0sqbCwUNX+zxUl77tz5w4UCkWtxZkuXrxY6zG0KeZkaJioW6DcwhIcO3cFlVUKhHXwgo/73yscpWZk4+21P6Gg5J6q7dtfTyB65KP4v768VUAtw+LFi1tsMR8x/LP+wvz587FgwQJxgjFATNQtTMzeY9gam4Aqxd9nC48Gt8NbzwyBVCrBvPU/qyVpAFAKAtb8dAj+Xm7o3NZT3yET6d3cuXPx9ttvix2GZuVFgNxG7Cga7cyZM+jevTvi4+MRGhqqaq/tbBoAnJ2dIZPJGlScSZtiToaGidoI3cjJQ+zpiyi+V44OXm7oE9YeZiYm2Hv8HDb9eqLG9n+cvQxryziEtfdCbmGJxnF3HfmTiZpaBJlMBpkhL68qyAHT5r+qnIlJdQqytraGra3tQ7c3MzNDly5dEBsbi9GjRwOonkwWGxuLqVOn1rqPNsWcDA0TtZH5as9RbPntJB6cyx+z9yiW/etxbItL1LjfbwkpsLGsewGFGzl5ugqTjJiZtYPaf6kJtNDJZAAwY8YMTJw4EV27dkW3bt3w6aefoqSkBM8//zwA4yzmxERtRA6fvYzNB2o++J+TV4T5G36uM9FWKhR42IN6zvZ1X2p75aPNyCsqhYONJb544+l6xUzGJ3TyCrFDaAFa7jPx48ePx+3btzFv3jxkZ2cjNDQU+/btU00Yy8jIgPSBxWDuF3N655138J///Aft27dvdsWcmKiNyM9Hzmrsu5GTB1MTGSqrFBq36dm5LX45lozSsopa+4f1qPsvdl5RKe4UFNcvWCLSXgs+owaAqVOnarzUbYzFnFr2p21krj/k0nQHL1eNfX6tnNGpTSvMeXYITE1q3psb1SsEEUF+jY6RiHSghSfqloZn1EbE2c4Kt/OLNPYP69EZeUWlyLxToNZuITfFhCE9ELP3GNJu3kaPTm1gIpOiuLQc9taWiOoeiJB2XBuayGBwOdgWhYnaiAzpEYSUa7U/xO9kZwV/HzeM698F59IykXbzNioVCoS194K/txuWf7sPZRVVavuM6hWCqWP76SN0aoR7uZm4cWwb8i6fAgA4tOuK1hFjYeHIGfrGi4m6JWGiNiJR3QKRmHoN8UmX1NrNzUzh7miL6Pe+UU0Yc7SxxKvj+qOrvy+eWvhljSQNAD8d/hNhHbzQs3M7fYRPWii+dQXJ38yGouzvx+pundmPOymH0fm55bB2q75dUVGUi4KMZEhkJrBvEwYTOQunNGs8o25RmKiNiEwqxdsThmFg13TVc9T+Xm44l56JPy/fUNs2t6gU7369B08P6oai0jKNY+45do6J2oClH/hSLUnfpygrQfqBLxH09GKk7V+NW2f2Q1BWTySUmVnAu88z8Oz+uL7DJV1hom5RmKiNjEQiQY9OfujRqfpM6krmbWyq5ZEtAKhSKHH47OU6x8up4543iauiOA8FVzXP9C+4ehZp+1YhO3GvWrui4h7SD3wJU0t7uHbmrQ0iQ8epg0bufHpWnf0Pe5zK09leh9GQLikq7gGo6+F3AbfOxmrsvXFsu85jIj1pWdWJWzwmaiNnaW5WZ7+DtSWc7aw19o/sGaLrkEhH5HauMK1j9S8Tc2sIVbU/Ew8ApTnp/0v2RGTImKiNXI9ObWBupnlN4H5dArDghcdgZ2Wh1i6RAJOGRiDc37upQyQtSWUmaPXISI39Lg+5rC2RmUAia/7rRRMZO96jNjIVVVXIySuCjYU57KwtYGUuR/SIXvjv9oM1tvX1cMLoXiGwspBj4zvP47dTKbiSeRu2VhYY9EhHeLk6ivATUEO0jhyHqntFyEzYBUFRPXNf8r8E7jvgBeRfOYN7d2/Uuq9zQE9IZfwV0CwJAieUtSD8V2okFAolNu4/jp+PnEVRaRmkEgm6BbbBy6N7Y2SvELg52mJ7XCJSr9+CtYUcA7oE4In+XWBlUV2Iw9LcDCN78TJ3cyORSNBm4GR4RoxF/pXqoiv2fuEws7IHALSN+jfOb10IQVGptp+plT28+zyn73BJZ3iPuiVhojYSH209gAMJKar3SkHA8fNXcOnGLax642l0D2yD7oFtRIyQmpKZlT1cO/ev0W7vF4aQSR/gxrHtyL9yBhKZCZwDesIz8v9gbqd5SVkycIISgAGX4SSdYqI2AtdzcvHbqZRa++4WlOCnw2cxaWjzqb1KumXt0R4BY2aLHQbpEmd9tyicTGYETly4Wue/2xPnrwAABEFAYUkZKiprrkJGRESGiWfULYAA4Ocjf2Jb3Blk3smHqUyGXiHtMHl4T7g52oodHhE1lJS/ulsSnlEbge6BvnX2m5uZ4rNtB5F5Jx8AUKlQ4GBiKl777HvcLai5/CQRGTgpf3W3JPy0jYCXqyMGdu1Ya5+DjSVSNVTUulNQjB2HEpsyNCIiaiQmaiMx88lBeGrgI7CxrH7cSiqRoEdgGzzeOwxVSqXG/Y4kp+krRCIi0gJvdBgJmUyKF4b3xLODuyMnrwjWlnLYW1vil6PJde6nUGhO4kREJD4maiNjZmqC1q5/r/8c3sEbEonmpzm6cIlQIiKDxkvfRs7D2Q4Du9R+/9pCboqxfcP1HBERETUEz6hbgNfHD4S1pRx7j59HWUX1UpLtW7ti6th+XM+7BagqK8atPw8gPz0JEpkpnP0j4dzpUUhZkIOoWWCibgFMTWR45fG+mDgkAtdu5cLaQg5vNyboluBebhaSv5mNiqI7qrbc1GPIOr0bnZ5+FyZySxGjI6L64KXvFsTKQo5AXw8m6Rbk8i8r1JL0fUU3U5FxaLMIERFRQzFRExmpe7lZKLimedZ/zp8HIAic9U9k6JioiYxURdHdOvuryoqhqCjTUzREpC0maiIjZe7gDkg0/xM3s3GCzMxCjxERkTaYqImMlNzWGU7+PTT2e3QZBolEoseIiEgbTNRERqzdsKmw9mhXo925Yy+0jhwnQkRE1FB8PIvIiJla2iHkhU+QeykB+elJkMpM4BTQE7atA8QOjYjqiYmayMhJJFI4degOpw7dxQ6FiLTAS99EREQGjImaiIjIgDFRExERGTDeo24h7uQX45djyUi9fgs2FnL07xKAbh19+XgOEZGBY6JuAc6m3cDcdbtQWl6havs9MRX9wv0x+5khkEqZrImIDBUvfRs5hUKJpd/sVUvS9x1MTMWvCRdEiIqIiOqLidrInUy5irsFJRr79x4/p8doiIiooZiojdydgqI6+2/nF+spEiIi0oaoiXrZsmV45JFHYGNjA1dXV4wePRqpqal17hMTEwOJRKL2Mjc311PEzY+ni0Od/a1d7PUTCBERaUXURB0fH48pU6bg+PHjOHDgACorKzF48GCUlGi+VAsAtra2yMrKUr2uXbump4ibn7D2XvBy1ZysR/YK0WM0RETUUKLO+t63b5/a+5iYGLi6uuL06dPo3bu3xv0kEgnc3d2bOjyjIJFIMP+FEZizekeNy9xPDnwEvYJrFmwgIiLDYVCPZxUUFAAAHB0d69yuuLgYPj4+UCqVCA8Px9KlS9GpUyd9hNgs+bg5IuY/kxB35i+156i93er+/0xEROIzmEStVCrx2muvoWfPnggKCtK4nb+/PzZs2IDg4GAUFBTgww8/RGRkJM6fP4/WrVvX2L68vBzl5eWq98XFLXPylJmpCQZ3C8TgboFih0JERA1gMIl6ypQpOHfuHA4fPlzndhEREYiIiFC9j4yMRMeOHbFmzRosXry4xvbLli3DwoULdR4vERGRPhjE41lTp07F7t27cfDgwVrPiutiamqKsLAwXL58udb+OXPmoKCgQPWKj4/XRchGqbDkHnIL657IR0RE+iXqGbUgCJg2bRp+/PFHxMXFoU2bNg0eQ6FQIDk5GcOGDau1Xy6XQy6Xq95bW1trHa+xunA1E+t3H8HZtJsAAF93JzwzuBv6hvmLHBkREYmaqKdMmYLNmzfjp59+go2NDbKzswEAdnZ2sLCwAABMmDABnp6eWLZsGQBg0aJF6NGjB9q1a4f8/Hx88MEHuHbtGl588UXRfo7m7OK1bMz6YjsqKhWqtqvZd7Fk416UVVRhSHdO0iMiEpOoiXrVqlUAgL59+6q1f/XVV5g0aRIAICMjA1Lp31fo8/LyEB0djezsbDg4OKBLly44evQoAgM5SUobG/cdU0vSD/p67zEM6toRMln97pA42Fiq/ZeIiBpP9EvfDxMXF6f2/pNPPsEnn3zSRBEZr9zCEuw9fg6p12/B2kKOAV0C0LmtJ06lal4s5k5BMVKv30Kgr0e9jvHFG0/rKlwiIvofg5n1TU3nfHom3l67EyVlf1fQOpCQgv7h/njYdyWl8uFfpoiIqOkYxKxvajoKhRJLNu5RS9L3/Z6YitZ1LC9qb22BDt6uTRkeERE9BBO1kUu4eLXOClkyqRRSqaTWvicHPAIzE150IaLmJzc3F8888wxsbW1hb2+PyZMnP3TBq759+9Yo+vTvf/9bTxFrxkRt5G7n113msrSsAosmj4Svu5OqzcnOClPG9MXYvuFNHR4RUZN45plncP78eRw4cAC7d+/GoUOH8NJLLz10v+joaLWiT++//74eoq0bT5eMXCtn+zr7PV3s0T2wDboHtkHGrVxUKRTwcXOq90xvIiJDk5KSgn379iEhIQFdu3YFAPz3v//FsGHD8OGHH6JVq1Ya97W0tDS4ok/8bWzkwjt4w7OOmtMjIoNVf/Z2c4RfKxcmaSJq1o4dOwZ7e3tVkgaAgQMHQiqV4sSJE3Xuu2nTJjg7OyMoKAhz5sxBaWlpU4f7UDyjNnISiQTzn38Ms1f/WGN50P/rG47eoe1FioyIqFpxcTEKCwtV7/+5omRDZWdnw9VVfSKsiYkJHB0dVQtr1ebpp5+Gj48PWrVqhbNnz+Ktt95CamoqduzYoXUsusBE3QK08XBGzH8m4eCZVPyVkQ0rCzkGdumINq2cxQ6NiAh9+vRRez9//nwsWLCgxnazZ8/Ge++9V+dYKSkpWsfx4D3szp07w8PDAwMGDEBaWhratm2r9biNxUTdQljITTGsRxCG9dBcQpSISAzx8fEIDQ1Vvdd0Nv3GG2+oVq3UxM/PD+7u7sjJyVFrr6qqQm5uboPuP3fv3h0AcPnyZSZq0h2FUonLN3KgFAS0b+0KE5lM7JCIiOpkbW0NW1vbh27n4uICFxeXh24XERGB/Px8nD59Gl26dAEA/P7771AqlarkWx9JSUkAAA+P+q3O2FSYqI3IrycvIGbvUdVz0w42lnh6UDeMfjRU3MCIiPSoY8eOGDJkCKKjo7F69WpUVlZi6tSpePLJJ1Uzvm/evIkBAwZg48aN6NatG9LS0rB582YMGzYMTk5OOHv2LF5//XX07t0bwcHBDzli02KiNhLxZ/7CB1t+VWvLKyrFyh1xkEmlGNFT3L9oRET6tGnTJkydOhUDBgyAVCrF2LFj8dlnn6n6KysrkZqaqprVbWZmht9++w2ffvopSkpK4OXlhbFjx+Kdd94R60dQYaI2Et/+qvmRgy2/ncSwiCDIpHzsiohaBkdHR2zevFljv6+vr1phKC8vL8THx+sjtAbjb24jcLegBFez72rsv51fjIxbuXqMiIiIdIVn1EbApB4LlEglEhxKuoTU69mwsTBHv3B/uDk+fPIGERGJi4naCNhZWyCoTSucS8+std/TxR7z1u9C5p0CVdtXe45iwtAIPDOom77CJCIiLfDSt5F4YXhPmJrUfBRLKpWgqkqplqQBQCkIiNlzFCfOp+srRCIi0gITtZHo3NYTH7wyFuEdvCH5X9XKzn6eePGxXriVV6hxv52Hk/QTIBERaYWXvo1Ipzat8N7LY1BWUQlBEGAhN8PPR/6sc59r2ZxkRkRkyJiojZC5manqzw42VnVu62Rr2dThEBFRI/DSt5HrHtgGDjaak3FU9056jIaIiBqKidrImZrI8ObTUZCb1rx4Ehnkh6HdWaSDiMiQ8dJ3C9A1wAdr33wWPx85i9Trt2BtIceALgHoFdyOq5URERk4JuoWopWzPf41qrfYYRARUQPxdIqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBqzFLniSkpIidghE9D8eHh7w8PAQO4wGy8rKQlZWlthhNFv8PVw/LS5Re3h4oE+fPnj22WfFDoWI/mf+/PlYsGCB2GE02Jo1a7Bw4UKxw2jW+vTp0yy/pOmTRBAEQewg9K0lfwsuLi5Gnz59EB8fD2tra7HDIT0y5M+eZ9T1Z8ifozaa62evTy0yUbdkhYWFsLOzQ0FBAWxtbcUOh/SIn71x4OfY8nAyGRERkQFjoiYiIjJgTNQtjFwux/z58yGXy8UOhfSMn71x4OfY8vAeNRERkQHjGTUREZEBY6ImIiIyYEzUREREBoyJmhokLi4OEokE+fn5YodCRNQiMFGLKDs7G9OmTYOfnx/kcjm8vLwwYsQIxMbG6vQ4ffv2xWuvvabTMeuydu1a9O3bF7a2tkzqjSSRSOp8NWbZTYlEgp07dz50uyVLliAyMhKWlpawt7fX+ngtGT9HaowWt9a3obh69Sp69uwJe3t7fPDBB+jcuTMqKyuxf/9+TJkyBRcvXtRrPIIgQKFQwMSk8X8lSktLMWTIEAwZMgRz5szRQXQt14PLU27duhXz5s1Damqqqk0fS0hWVFRg3LhxiIiIwPr165v8eMaInyM1ikCiGDp0qODp6SkUFxfX6MvLy1P9+dq1a8LIkSMFKysrwcbGRhg3bpyQnZ2t6p8/f74QEhIibNy4UfDx8RFsbW2F8ePHC4WFhYIgCMLEiRMFAGqv9PR04eDBgwIAYc+ePUJ4eLhgamoqHDx4UCgrKxOmTZsmuLi4CHK5XOjZs6dw8uRJ1fHu7/dgjJo0ZFt6uK+++kqws7NTa1u3bp0QEBAgyOVywd/fX1i5cqWqr7y8XJgyZYrg7u4uyOVywdvbW1i6dKkgCILg4+Oj9nfCx8dHq+NTw/FzpIbiGbUIcnNzsW/fPixZsgRWVlY1+u9fllIqlRg1ahSsra0RHx+PqqoqTJkyBePHj0dcXJxq+7S0NOzcuRO7d+9GXl4ennjiCSxfvhxLlizBihUr8NdffyEoKAiLFi0CALi4uODq1asAgNmzZ+PDDz+En58fHBwc8Oabb2L79u34+uuv4ePjg/fffx9RUVG4fPkyHB0dm/p/DTXApk2bMG/ePHz++ecICwvDmTNnEB0dDSsrK0ycOBGfffYZdu3ahe+//x7e3t64fv06rl+/DgBISEiAq6srvvrqKwwZMgQymUzkn6bl4udID8NELYLLly9DEAQEBATUuV1sbCySk5ORnp4OLy8vAMDGjRvRqVMnJCQk4JFHHgFQndBjYmJgY2MDAHjuuecQGxuLJUuWwM7ODmZmZrC0tIS7u3uNYyxatAiDBg0CAJSUlGDVqlWIiYnB0KFDAQDr1q3DgQMHsH79esyaNUtn/w+o8ebPn4+PPvoIY8aMAQC0adMGFy5cwJo1azBx4kRkZGSgffv26NWrFyQSCXx8fFT7uri4AKj+Uljb3wvSH36O9DCcTCYCoZ6LwaWkpMDLy0uVpAEgMDAQ9vb2agXXfX19VUkaqC4bl5OTU69jdO3aVfXntLQ0VFZWomfPnqo2U1NTdOvWjQXeDUxJSQnS0tIwefJkWFtbq17vvvsu0tLSAACTJk1CUlIS/P398eqrr+LXX38VOWr6J36OVB88oxZB+/btIZFIdDZhzNTUVO29RCKBUqms1761XXonw1dcXAyg+opH9+7d1fruX/4MDw9Heno69u7di99++w1PPPEEBg4ciG3btuk9XqodP0eqD55Ri8DR0RFRUVFYuXIlSkpKavTff5ypY8eOavejAODChQvIz89HYGBgvY9nZmYGhULx0O3atm0LMzMzHDlyRNVWWVmJhISEBh2Pmp6bmxtatWqFK1euoF27dmqvNm3aqLaztbXF+PHjsW7dOmzduhXbt29Hbm4ugOovePX5e0FNh58j1QfPqEWycuVK9OzZE926dcOiRYsQHByMqqoqHDhwAKtWrUJKSgoGDhyIzp0745lnnsGnn36KqqoqvPLKK+jTp4/aJeuH8fX1xYkTJ3D16lVYW1trnBRmZWWFl19+GbNmzYKjoyO8vb3x/vvvo7S0FJMnT6738bKzs5GdnY3Lly8DAJKTk2FjYwNvb29OSNOhhQsX4tVXX4WdnR2GDBmC8vJynDp1Cnl5eZgxYwY+/vhjeHh4ICwsDFKpFD/88APc3d1VkxV9fX0RGxuLnj17Qi6Xw8HBodbjZGRkIDc3FxkZGVAoFEhKSgIAtGvXTi+PFRk7fo70UGJPO2/JMjMzhSlTpgg+Pj6CmZmZ4OnpKYwcOVI4ePCgapv6Pp71oE8++UTtMY3U1FShR48egoWFRY3Hs/756NS9e/eEadOmCc7Ozlo/njV//vwaj4QBEL766ist/i/RfbU9VrNp0yYhNDRUMDMzExwcHITevXsLO3bsEARBENauXSuEhoYKVlZWgq2trTBgwAAhMTFRte+uXbuEdu3aCSYmJnU+1lPbI34A1P6eUv3xc6SGYplLIiIiA8Z71ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETURkgFj7ne5jojZQkyZNgkQiwfLly9Xad+7cCYlE0mTHzc3NxbRp0+Dv7w8LCwt4e3vj1VdfRUFBgdp2GRkZGD58OCwtLeHq6opZs2ahqqqqyeJqSfjZEwBERkYiKysLdnZ2YodCImOiNmDm5uZ47733kJeXp7djZmZmIjMzEx9++CHOnTuHmJgY7Nu3T23BE4VCgeHDh6OiogJHjx7F119/jZiYGMybN09vcRo7fvZkZmYGd3f3Jv1yRs2E2A9yU+0mTpwoPPbYY0JAQIAwa9YsVfuPP/4o6Ptj+/777wUzMzOhsrJSEARB2LNnjyCVStUWXlm1apVga2srlJeX6zU2Y8TP3jj16dNHmDp1qjB9+nTB3t5ecHV1FdauXSsUFxcLkyZNEqytrYW2bdsKe/bsEQSh5uJC9xdK2bdvnxAQECBYWVkJUVFRQmZmptoxpk+frnbcUaNGCRMnTlS9X7lypdCuXTtBLpcLrq6uwtixY5v6R6dG4hm1AZPJZFi6dCn++9//4saNG/Xeb+jQoWqVeP756tSpU4PiKCgogK2tLUxMqlecPXbsGDp37gw3NzfVNlFRUSgsLMT58+cbNDbVjp+9cfr666/h7OyMkydPYtq0aXj55Zcxbtw4REZGIjExEYMHD8Zzzz2H0tLSWvcvLS3Fhx9+iG+++QaHDh1CRkYGZs6cWe/jnzp1Cq+++ioWLVqE1NRU7Nu3D71799bVj0dNhGt9G7jHH38coaGhmD9/PtavX1+vfb788kvcu3dPY/8/q23V5c6dO1i8eDFeeuklVVt2drbaL2oAqvfZ2dn1Hpvqxs/e+ISEhOCdd94BAMyZMwfLly+Hs7MzoqOjAQDz5s3DqlWrcPbs2Vr3r6ysxOrVq9G2bVsAwNSpU7Fo0aJ6Hz8jIwNWVlZ47LHHYGNjAx8fH4SFhTXyp6KmxkTdDLz33nvo379/vb85e3p66uS4hYWFGD58OAIDA7FgwQKdjEkNw8/euAQHB6v+LJPJ4OTkhM6dO6va7n/pycnJga2tbY39LS0tVUkaaFjteQAYNGgQfHx84OfnhyFDhmDIkCF4/PHHYWlpqc2PQ3rCS9/NQO/evREVFYU5c+bUa3tdXP4sKirCkCFDYGNjgx9//FHtTMzd3R23bt1S2/7+e3d39wb8ZPQw/OyNS2214x9suz9xTFM9+dr2Fx4o1yCVStXeA9Vn4ffZ2NggMTERW7ZsgYeHB+bNm4eQkBA+AmbgeEbdTCxfvhyhoaHw9/d/6LaNvfxZWFiIqKgoyOVy7Nq1C+bm5mr9ERERWLJkCXJycuDq6goAOHDgAGxtbVm3ugnws6f6cnFxQVZWluq9QqHAuXPn0K9fP1WbiYkJBg4ciIEDB2L+/Pmwt7fH77//jjFjxogRMtUDE3Uzcb8u9WefffbQbRtz+bOwsBCDBw9GaWkpvv32WxQWFqKwsBBA9S8BmUyGwYMHIzAwEM899xzef/99ZGdn45133sGUKVMgl8u1PjbVjp891Vf//v0xY8YM/PLLL2jbti0+/vhjtbPl3bt348qVK+jduzccHBywZ88eKJXKen0JJPEwUTcjixYtwtatW5v0GImJiThx4gSA6oLyD0pPT4evry9kMhl2796Nl19+GREREbCyssLEiRMbNKmFGoafPdXHCy+8gD///BMTJkyAiYkJXn/9dbWzaXt7e+zYsQMLFixAWVkZ2rdvjy1btjT4aQDSL9ajJiIiMmCcTEZERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERELQxrXTcvTNRERI2QnZ2NadOmwc/PD3K5HF5eXhgxYgRiY2N1epy+ffvitdde0+mYdVm7di369u0LW1tbJnWRMVETEWnp6tWr6NKlC37//Xd88MEHSE5Oxr59+9CvXz9MmTJF7/EIgoCqqiqdjFVaWoohQ4bgP//5j07Go0YQtcgmEVEzNnToUMHT01MoLi6u0Xe/jrQgCMK1a9eEkSNHClZWVoKNjY0wbtw4tZre8+fPF0JCQoSNGzcKPj4+gq2trTB+/HihsLBQEITqGuUA1F7p6emqmtV79uwRwsPDBVNTU+HgwYNCWVmZMG3aNMHFxUWQy+VCz549hZMnT6qO989a13VpyLbUNHhGTUSkhdzcXOzbtw9TpkyBlZVVjX57e3sA1QU2Ro0ahdzcXMTHx+PAgQO4cuUKxo8fr7Z9Wloadu7cid27d2P37t2Ij4/H8uXLAQArVqxAREQEoqOjkZWVhaysLHh5ean2nT17NpYvX46UlBQEBwfjzTffxPbt2/H1118jMTER7dq1Q1RUFHJzc5vufwg1GS4hSkSkhcuXL0MQBAQEBNS5XWxsLJKTk5Genq5Krhs3bkSnTp2QkJCARx55BEB1Qo+JiYGNjQ0A4LnnnkNsbCyWLFkCOzs7mJmZwdLSstYqZYsWLcKgQYMAACUlJVi1ahViYmIwdOhQAMC6detw4MABrF+/HrNmzdLZ/wPSD55RExFpQajn6sspKSnw8vJSOwMODAyEvb09UlJSVG2+vr6qJA00rNZ0165dVX9OS0tDZWUlevbsqWozNTVFt27d1I5HzQcTNRGRFtq3bw+JRIKLFy/qZLzaak1rqkv9T7VdeifjwURNRKQFR0dHREVFYeXKlSgpKanRf/9xpo4dO+L69eu4fv26qu/ChQvIz89vUA1vMzMzKBSKh27Xtm1bmJmZ4ciRI6q2yspKJCQksGZ4M8VETUSkpZUrV0KhUKBbt27Yvn07Ll26hJSUFHz22WeIiIgAAAwcOFBVUzwxMREnT57EhAkT0KdPH7VL1g/j6+uLEydO4OrVq7hz547Gs20rKyu8/PLLmDVrFvbt24cLFy4gOjoapaWlmDx5cr2Pl52djaSkJFy+fBkAkJycjKSkJE5IEwETNRGRlvz8/JCYmIh+/frhjTfeQFBQEAYNGoTY2FisWrUKQPUl7J9++gkODg7o3bs3Bg4cCD8/vwbXF585cyZkMhkCAwPh4uKCjIwMjdsuX74cY8eOxXPPPYfw8HBcvnwZ+/fvh4ODQ72Pt3r1aoSFhSE6OhoA0Lt3b4SFhWHXrl0Nipsaj/WoiYiIDBjPqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQG7P8BAzIgTT7Tai8AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig1 = my_data.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Create a Gardner-Altman plot for the Hedges' g effect size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig2 = my_data.hedges_g.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a Cumming estimation plot for the mean difference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig3 = my_data.mean_diff.plot(float_contrast=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Create a paired Gardner-Altman plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_data_paired = dabest.load(df, idx=(\"Control 1\", \"Test 1\"),\n", + " id_col = \"ID\", paired='baseline')\n", + "fig4 = my_data_paired.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a multi-group Cumming plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_multi_groups = dabest.load(df, id_col = \"ID\", \n", + " idx=((\"Control 1\", \"Test 1\"),\n", + " (\"Control 2\", \"Test 2\")))\n", + "fig5 = my_multi_groups.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a shared control Cumming plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_shared_control = dabest.load(df, id_col = \"ID\",\n", + " idx=(\"Control 1\", \"Test 1\",\n", + " \"Test 2\", \"Test 3\"))\n", + "fig6 = my_shared_control.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a repeated meausures (against baseline) Slopeplot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_rm_baseline = dabest.load(df, id_col = \"ID\", paired = \"baseline\",\n", + " idx=(\"Control 1\", \"Test 1\",\n", + " \"Test 2\", \"Test 3\"))\n", + "fig7 = my_rm_baseline.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a repeated meausures (sequential) Slopeplot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_rm_sequential = dabest.load(df, id_col = \"ID\", paired = \"sequential\",\n", + " idx=(\"Control 1\", \"Test 1\",\n", + " \"Test 2\", \"Test 3\"))\n", + "fig8 = my_rm_sequential.mean_diff.plot();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class PermutationTest:\n", + " \"\"\"\n", + " A class to compute and report permutation tests.\n", + " \n", + " Parameters\n", + " ----------\n", + " control : array-like\n", + " test : array-like\n", + " These should be numerical iterables.\n", + " effect_size : string.\n", + " Any one of the following are accepted inputs:\n", + " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g\" or 'cliffs_delta'\n", + " is_paired : string, default None\n", + " permutation_count : int, default 10000\n", + " The number of permutations (reshuffles) to perform.\n", + " random_seed : int, default 12345\n", + " `random_seed` is used to seed the random number generator during\n", + " bootstrap resampling. This ensures that the generated permutations\n", + " are replicable.\n", + " \n", + " Returns\n", + " -------\n", + " A :py:class:`PermutationTest` object:\n", + " `difference`:float\n", + " The effect size of the difference between the control and the test.\n", + " `effect_size`:string\n", + " The type of effect size reported.\n", + " \n", + " \n", + " \"\"\"\n", + " \n", + " def __init__(self, control: array,\n", + " test: array, # These should be numerical iterables.\n", + " effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", + " is_paired:str=None,\n", + " permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n", + " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n", + " **kwargs):\n", + " from ._stats_tools.effsize import two_group_difference\n", + " from ._stats_tools.confint_2group_diff import calculate_group_var\n", + " \n", + "\n", + " self.__permutation_count = permutation_count\n", + "\n", + " # Run Sanity Check.\n", + " if is_paired and len(control) != len(test):\n", + " raise ValueError(\"The two arrays do not have the same length.\")\n", + "\n", + " # Initialise random number generator.\n", + " # rng = random.default_rng(seed=random_seed)\n", + " rng = RandomState(PCG64(random_seed))\n", + "\n", + " # Set required constants and variables\n", + " control = array(control)\n", + " test = array(test)\n", + "\n", + " control_sample = control.copy()\n", + " test_sample = test.copy()\n", + "\n", + " BAG = array([*control, *test])\n", + " CONTROL_LEN = int(len(control))\n", + " EXTREME_COUNT = 0.\n", + " THRESHOLD = abs(two_group_difference(control, test, \n", + " is_paired, effect_size))\n", + " self.__permutations = []\n", + " self.__permutations_var = []\n", + "\n", + " for i in range(int(permutation_count)):\n", + " \n", + " if is_paired:\n", + " # Select which control-test pairs to swap.\n", + " random_idx = rng.choice(CONTROL_LEN,\n", + " rng.randint(0, CONTROL_LEN+1),\n", + " replace=False)\n", + "\n", + " # Perform swap.\n", + " for i in random_idx:\n", + " _placeholder = control_sample[i]\n", + " control_sample[i] = test_sample[i]\n", + " test_sample[i] = _placeholder\n", + " \n", + " else:\n", + " # Shuffle the bag and assign to control and test groups.\n", + " # NB. rng.shuffle didn't produce replicable results...\n", + " shuffled = rng.permutation(BAG) \n", + " control_sample = shuffled[:CONTROL_LEN]\n", + " test_sample = shuffled[CONTROL_LEN:]\n", + "\n", + "\n", + " es = two_group_difference(control_sample, test_sample, \n", + " False, effect_size)\n", + " \n", + " group_var = calculate_group_var(var(control_sample, ddof=1), \n", + " CONTROL_LEN, \n", + " var(test_sample, ddof=1), \n", + " len(test_sample))\n", + " self.__permutations.append(es)\n", + " self.__permutations_var.append(group_var)\n", + "\n", + " if abs(es) > THRESHOLD:\n", + " EXTREME_COUNT += 1.\n", + "\n", + " self.__permutations = array(self.__permutations)\n", + " self.__permutations_var = array(self.__permutations_var)\n", + "\n", + " self.pvalue = EXTREME_COUNT / permutation_count\n", + "\n", + "\n", + " def __repr__(self):\n", + " return(\"{} permutations were taken. The p-value is {}.\".format(self.permutation_count, \n", + " self.pvalue))\n", + "\n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permuations taken.\n", + " \"\"\"\n", + " return self.__permutation_count\n", + "\n", + "\n", + " @property\n", + " def permutations(self):\n", + " \"\"\"\n", + " The effect sizes of all the permutations in a list.\n", + " \"\"\"\n", + " return self.__permutations\n", + "\n", + " \n", + " @property\n", + " def permutations_var(self):\n", + " \"\"\"\n", + " The experiment group variance of all the permutations in a list.\n", + " \"\"\"\n", + " return self.__permutations_var\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Notes**:\n", + " \n", + "The basic concept of permutation tests is the same as that behind bootstrapping.\n", + "In an \"exact\" permutation test, all possible resuffles of the control and test \n", + "labels are performed, and the proportion of effect sizes that equal or exceed \n", + "the observed effect size is computed. This is the probability, under the null \n", + "hypothesis of zero difference between test and control groups, of observing the\n", + "effect size: the p-value of the Student's t-test.\n", + "\n", + "Exact permutation tests are impractical: computing the effect sizes for all reshuffles quickly exceeds trivial computational loads. A control group and a test group both with 10 observations each would have a total of $20!$ or $2.43 \\times {10}^{18}$ reshuffles.\n", + "Therefore, in practice, \"approximate\" permutation tests are performed, where a sufficient number of reshuffles are performed (5,000 or 10,000), from which the p-value is computed.\n", + "\n", + "More information can be found [here](https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: permutation test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "control = norm.rvs(loc=0, size=30, random_state=12345)\n", + "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", + "perm_test = dabest.PermutationTest(control, test, \n", + " effect_size=\"mean_diff\", \n", + " is_paired=None)\n", + "perm_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/tests/test_01_effsizes_pvals.ipynb b/nbs/tests/test_01_effsizes_pvals.ipynb index fa848f90..2b74ecac 100644 --- a/nbs/tests/test_01_effsizes_pvals.ipynb +++ b/nbs/tests/test_01_effsizes_pvals.ipynb @@ -24,7 +24,8 @@ "outputs": [], "source": [ "from dabest._stats_tools import effsize\n", - "from dabest._classes import TwoGroupsEffectSize, PermutationTest, Dabest" + "from dabest._classes import Dabest\n", + "from dabest._effsize_objects import TwoGroupsEffectSize, PermutationTest" ] }, { diff --git a/nbs/tests/test_03_plotting.py b/nbs/tests/test_03_plotting.py index 40a753a9..7311aec6 100644 --- a/nbs/tests/test_03_plotting.py +++ b/nbs/tests/test_03_plotting.py @@ -160,7 +160,7 @@ def test_10_cummings_multi_groups(): @pytest.mark.mpl_image_compare(tolerance=10) def test_11_inset_plots(): - + # TODO Better remove external dependencies in tests. Tests need to run locally without internet. # Load the iris dataset. Requires internet access. iris = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv") iris_melt = pd.melt(iris.reset_index(), diff --git a/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb b/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb index b3f77f83..2f0d1068 100644 --- a/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb +++ b/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb @@ -21,8 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "from dabest._stats_tools import effsize\n", - "from dabest._classes import TwoGroupsEffectSize, PermutationTest, Dabest, EffectSizeDataFrame" + "from dabest._classes import Dabest" ] }, { diff --git a/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb b/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb index 521117dc..cddaf87e 100644 --- a/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb +++ b/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb @@ -22,7 +22,8 @@ "outputs": [], "source": [ "from dabest._stats_tools import effsize\n", - "from dabest._classes import TwoGroupsEffectSize, PermutationTest, Dabest" + "from dabest._classes import Dabest\n", + "from dabest._effsize_objects import PermutationTest" ] }, { diff --git a/nbs/tests/test_08_mini_meta_pvals.ipynb b/nbs/tests/test_08_mini_meta_pvals.ipynb index c5d58184..b6153c3c 100644 --- a/nbs/tests/test_08_mini_meta_pvals.ipynb +++ b/nbs/tests/test_08_mini_meta_pvals.ipynb @@ -21,7 +21,8 @@ "source": [ "from dabest._stats_tools import effsize\n", "from dabest._stats_tools import confint_2group_diff as ci2g\n", - "from dabest._classes import PermutationTest, Dabest" + "from dabest._classes import Dabest\n", + "from dabest._effsize_objects import PermutationTest" ] }, { From e7291c304e1bda7169b08d33937f8ce2223386b3 Mon Sep 17 00:00:00 2001 From: cyberosa Date: Sat, 16 Dec 2023 20:16:20 +0100 Subject: [PATCH 05/10] Renaming and refactoring in classes. Modularity principle --- dabest/__init__.py | 1 + dabest/_api.py | 6 +- dabest/{_classes.py => _dabest_object.py} | 324 ++++++++--------- dabest/_effsize_objects.py | 4 - nbs/API/{class.ipynb => dabest_object.ipynb} | 328 +++++++++--------- nbs/API/delta_objects.ipynb | 2 +- nbs/API/effsize_objects.ipynb | 6 +- nbs/API/load.ipynb | 8 +- nbs/tests/test_01_effsizes_pvals.ipynb | 3 +- ..._04_repeated_measures_effsizes_pvals.ipynb | 2 +- .../test_06_delta-delta_effsize_pvals.ipynb | 3 +- nbs/tests/test_08_mini_meta_pvals.ipynb | 3 +- nbs/tests/test_10_proportion_plot.py | 2 - 13 files changed, 348 insertions(+), 344 deletions(-) rename dabest/{_classes.py => _dabest_object.py} (83%) rename nbs/API/{class.ipynb => dabest_object.ipynb} (91%) diff --git a/dabest/__init__.py b/dabest/__init__.py index 4c99e500..e953af6e 100644 --- a/dabest/__init__.py +++ b/dabest/__init__.py @@ -1,5 +1,6 @@ from ._api import load, prop_dataset from ._stats_tools import effsize as effsize from ._effsize_objects import TwoGroupsEffectSize, PermutationTest +from ._dabest_object import Dabest __version__ = "2023.03.29" diff --git a/dabest/_api.py b/dabest/_api.py index 825acefd..a4cfc6b5 100644 --- a/dabest/_api.py +++ b/dabest/_api.py @@ -72,7 +72,7 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, ------- A `Dabest` object. ''' - from ._classes import Dabest + from dabest import Dabest return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta) @@ -81,14 +81,14 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, # %% ../nbs/API/load.ipynb 5 import numpy as np from typing import Union, Optional +import pandas as pd def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types. group_names: Optional[list] = None): ''' Convenient function to generate a dataframe of binary data. ''' - import pandas as pd - + if isinstance(group, dict): # If group_names is not provided, use the keys of the dict as group_names if group_names is None: diff --git a/dabest/_classes.py b/dabest/_dabest_object.py similarity index 83% rename from dabest/_classes.py rename to dabest/_dabest_object.py index 8e89dff9..deeb9881 100644 --- a/dabest/_classes.py +++ b/dabest/_dabest_object.py @@ -1,20 +1,16 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/class.ipynb. +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/dabest_object.ipynb. # %% auto 0 __all__ = ['Dabest'] -# %% ../nbs/API/class.ipynb 4 +# %% ../nbs/API/dabest_object.ipynb 4 # Import standard data science libraries from numpy import array, repeat, random, issubdtype, number import pandas as pd -import seaborn as sns from scipy.stats import norm from scipy.stats import randint -import datetime as dt -from string import Template -import warnings -# %% ../nbs/API/class.ipynb 6 +# %% ../nbs/API/dabest_object.ipynb 6 class Dabest(object): """ @@ -30,12 +26,12 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, statistics. You should not be calling this class directly; instead, use `dabest.load()` to parse your DataFrame prior to analysis. """ - from ._effsize_objects import EffectSizeDataFrame - + self.__delta2 = delta2 self.__experiment = experiment self.__ci = ci - self.__data = data + self.__input_data = data + self.__output_data = data.copy() self.__id_col = id_col self.__is_paired = paired self.__resamples = resamples @@ -43,27 +39,27 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, self.__proportional = proportional self.__mini_meta = mini_meta - # Make a copy of the data, so we don't make alterations to it. - # TODO is this really needed? - data_in = data.copy() - # Check if it is a valid mini_meta case - if mini_meta: + if self.__mini_meta: # Only mini_meta calculation but not proportional and delta-delta function - if proportional: + if self.__proportional: err0 = '`proportional` and `mini_meta` cannot be True at the same time.' raise ValueError(err0) - elif delta2: + if self.__delta2: err0 = '`delta` and `mini_meta` cannot be True at the same time.' raise ValueError(err0) # Check if the columns stated are valid + # TODO instead of traversing twice idx you can traverse only once + # and break the loop if the condition is not satisfied? + # TODO What if the type is not str and not tuple,list? missing raise Error if all([isinstance(i, str) for i in idx]): if len(pd.unique([t for t in idx]).tolist())!=2: err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) err1 = 'does not contain exactly 2 columns.' raise ValueError(err0 + err1) + if all([isinstance(i, (tuple, list)) for i in idx]): all_idx_lengths = [len(t) for t in idx] if (array(all_idx_lengths) != 2).any(): @@ -72,11 +68,11 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise ValueError(err1 + err2) - + # TODO can you have True mini_meta and delta2 at the same time? # Check if this is a 2x2 ANOVA case and x & y are valid columns # Create experiment_label and x1_level - if delta2: - if proportional: + if self.__delta2: + if self.__proportional: err0 = '`proportional` and `delta` cannot be True at the same time.' raise ValueError(err0) @@ -86,12 +82,13 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise ValueError(err0) # Check if x is valid + # TODO if x is None is fine?? if len(x) != 2: err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x)) raise ValueError(err0) for i in x: - if i not in data_in.columns: + if i not in self.__output_data.columns: err = '{0} is not a column in `data`. Please check.'.format(i) raise IndexError(err) @@ -100,12 +97,12 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, err0 = '`delta2` is True but `y` is not indicated.' raise ValueError(err0) - if y not in data_in.columns: + if y not in self.__output_data.columns: err = '{0} is not a column in `data`. Please check.'.format(y) raise IndexError(err) # Check if experiment is valid - if experiment not in data_in.columns: + if experiment not in self.__output_data.columns: err = '{0} is not a column in `data`. Please check.'.format(experiment) raise IndexError(err) @@ -116,29 +113,30 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise ValueError(err0) for i in experiment_label: - if i not in data_in[experiment].unique(): + if i not in self.__output_data[experiment].unique(): err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) raise IndexError(err) else: - experiment_label = data_in[experiment].unique() + experiment_label = self.__output_data[experiment].unique() # Check if x1_level is valid if x1_level: if len(x1_level) != 2: err0 = '`x1_level` does not have a length of 2.' raise ValueError(err0) - else: - for i in x1_level: - if i not in data_in[x[0]].unique(): - err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) - raise IndexError(err) + + for i in x1_level: + if i not in self.__output_data[x[0]].unique(): + err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) + raise IndexError(err) else: - x1_level = data_in[x[0]].unique() + x1_level = self.__output_data[x[0]].unique() + # TODO what if experiment is None? elif experiment: - experiment_label = data_in[experiment].unique() - x1_level = data_in[x[0]].unique() + experiment_label = self.__output_data[experiment].unique() + x1_level = self.__output_data[x[0]].unique() self.__experiment_label = experiment_label self.__x1_level = x1_level @@ -148,16 +146,16 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, # Add a length check for unique values in the first element in list x, # if the length is greater than 2, force delta2 to be False # Should be removed if delta2 for situations other than 2x2 is supported - if len(data_in[x[0]].unique()) > 2 and x1_level is None: - delta2 = False - self.__delta2 = delta2 + if len(self.__output_data[x[0]].unique()) > 2 and x1_level is None: + self.__delta2 = False # stop the loop if delta2 is False # add a new column which is a combination of experiment and the first variable new_col_name = experiment+x[0] - while new_col_name in data_in.columns: + while new_col_name in self.__output_data.columns: new_col_name += "_" - data_in[new_col_name] = data_in[x[0]].astype(str) + " " + data_in[experiment].astype(str) + + self.__output_data[new_col_name] = self.__output_data[x[0]].astype(str) + " " + self.__output_data[experiment].astype(str) #create idx and record the first and second x variable idx = [] @@ -207,8 +205,8 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, raise ValueError(err) # Check if there is a typo on paired - if paired and paired not in ("baseline", "sequential"): - err = '{} assigned for `paired` is not valid.'.format(paired) + if self.__is_paired and self.__is_paired not in ("baseline", "sequential"): + err = '{} assigned for `paired` is not valid.'.format(self.__is_paired) raise ValueError(err) @@ -221,134 +219,22 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, err = 'You have only specified `x`. Please also specify `y`.' raise ValueError(err) - # Identify the type of data that was passed in. - if x and y: - # Assume we have a long dataset. - # check both x and y are column names in data. - if x not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(x) - raise IndexError(err) - if y not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(y) - raise IndexError(err) - - # check y is numeric. - if not issubdtype(data_in[y].dtype, number): - err = '{0} is a column in `data`, but it is not numeric.'.format(y) - raise ValueError(err) - - # check all the idx can be found in data_in[x] - for g in all_plot_groups: - if g not in data_in[x].unique(): - err0 = '"{0}" is not a group in the column `{1}`.'.format(g, x) - err1 = " Please check `idx` and try again." - raise IndexError(err0 + err1) - - # Select only rows where the value in the `x` column - # is found in `idx`. - plot_data = data_in[data_in.loc[:, x].isin(all_plot_groups)].copy() - - # plot_data.drop("index", inplace=True, axis=1) - - # Assign attributes - self.__x = x - self.__y = y - self.__xvar = x - self.__yvar = y - - elif x is None and y is None: - # Assume we have a wide dataset. - # Assign attributes appropriately. - self.__x = None - self.__y = None - self.__xvar = "group" - self.__yvar = "value" - - # First, check we have all columns in the dataset. - for g in all_plot_groups: - if g not in data_in.columns: - err0 = '"{0}" is not a column in `data`.'.format(g) - err1 = " Please check `idx` and try again." - raise IndexError(err0 + err1) - - set_all_columns = set(data_in.columns.tolist()) - set_all_plot_groups = set(all_plot_groups) - id_vars = set_all_columns.difference(set_all_plot_groups) - - plot_data = pd.melt(data_in, - id_vars=id_vars, - value_vars=all_plot_groups, - value_name=self.__yvar, - var_name=self.__xvar) - - # Added in v0.2.7. - # remove any NA rows. - plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True) - # TODO these comments should not be in the code but on the release notes of the package version - # Lines 131 to 140 added in v0.2.3. - # Fixes a bug that jammed up when the xvar column was already - # a pandas Categorical. Now we check for this and act appropriately. - if isinstance(plot_data[self.__xvar].dtype, - pd.CategoricalDtype): - plot_data[self.__xvar].cat.remove_unused_categories(inplace=True) - plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, - ordered=True, - inplace=True) - else: - plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar], - categories=all_plot_groups, - ordered=True) - - # TODO Move all the plot_data logic to the function returning self.__plot_data - self.__plot_data = plot_data - # TODO Move all the all_plot_groups logic to the function returning self.__all_plot_groups + self.__plot_data = self.get_plot_data(x, y, all_plot_groups) self.__all_plot_groups = all_plot_groups # Check if `id_col` is valid - if paired: + if self.__is_paired: if id_col is None: err = "`id_col` must be specified if `paired` is assigned with a not NoneType value." raise IndexError(err) - if id_col not in plot_data.columns: + if id_col not in self.__plot_data.columns: err = "{} is not a column in `data`. ".format(id_col) raise IndexError(err) - EffectSizeDataFrame_kwargs = dict(ci=ci, is_paired=paired, - random_seed=random_seed, - resamples=resamples, - proportional=proportional, - delta2=delta2, - experiment_label=self.__experiment_label, - x1_level=self.__x1_level, - x2=self.__x2, - mini_meta = mini_meta) - - self.__mean_diff = EffectSizeDataFrame(self, "mean_diff", - **EffectSizeDataFrame_kwargs) - - self.__median_diff = EffectSizeDataFrame(self, "median_diff", - **EffectSizeDataFrame_kwargs) - - self.__cohens_d = EffectSizeDataFrame(self, "cohens_d", - **EffectSizeDataFrame_kwargs) - - self.__cohens_h = EffectSizeDataFrame(self, "cohens_h", - **EffectSizeDataFrame_kwargs) - - self.__hedges_g = EffectSizeDataFrame(self, "hedges_g", - **EffectSizeDataFrame_kwargs) - - self.__delta_g = EffectSizeDataFrame(self, "delta_g", - **EffectSizeDataFrame_kwargs) - - if not paired: - self.__cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta", - **EffectSizeDataFrame_kwargs) - else: - self.__cliffs_delta = "The data is paired; Cliff's delta is therefore undefined." + self.compute_effectsize_dfs() def __repr__(self): @@ -467,13 +353,13 @@ def delta_g(self): return self.__delta_g @property - def data(self): + def input_data(self): """ Returns the pandas DataFrame that was passed to `dabest.load()`. When `delta2` is True, a new column is added to support the function. The name of this new column is indicated by `x`. """ - return self.__data + return self.__input_data @property @@ -644,3 +530,125 @@ def _all_plot_groups(self): Returns the all plot groups, as indicated via the `idx` keyword. """ return self.__all_plot_groups + + + def get_plot_data(self, x, y, all_plot_groups): + ''' + Function to prepare some attributes for plotting + ''' + + # Identify the type of data that was passed in. + if x and y: + # Assume we have a long dataset. + # check both x and y are column names in data. + if x not in self.__output_data.columns: + err = '{0} is not a column in `data`. Please check.'.format(x) + raise IndexError(err) + if y not in self.__output_data.columns: + err = '{0} is not a column in `data`. Please check.'.format(y) + raise IndexError(err) + + # check y is numeric. + if not issubdtype(self.__output_data[y].dtype, number): + err = '{0} is a column in `data`, but it is not numeric.'.format(y) + raise ValueError(err) + + # check all the idx can be found in self.__output_data[x] + for g in all_plot_groups: + if g not in self.__output_data[x].unique(): + err0 = '"{0}" is not a group in the column `{1}`.'.format(g, x) + err1 = " Please check `idx` and try again." + raise IndexError(err0 + err1) + + # Select only rows where the value in the `x` column + # is found in `idx`. + plot_data = self.__output_data[self.__output_data.loc[:, x].isin(all_plot_groups)].copy() + + # Assign attributes + self.__x = x + self.__y = y + self.__xvar = x + self.__yvar = y + + elif x is None and y is None: + # Assume we have a wide dataset. + # Assign attributes appropriately. + self.__x = None + self.__y = None + self.__xvar = "group" + self.__yvar = "value" + + # First, check we have all columns in the dataset. + for g in all_plot_groups: + if g not in self.__output_data.columns: + err0 = '"{0}" is not a column in `data`.'.format(g) + err1 = " Please check `idx` and try again." + raise IndexError(err0 + err1) + + set_all_columns = set(self.__output_data.columns.tolist()) + set_all_plot_groups = set(all_plot_groups) + id_vars = set_all_columns.difference(set_all_plot_groups) + + plot_data = pd.melt(self.__output_data, + id_vars=id_vars, + value_vars=all_plot_groups, + value_name=self.__yvar, + var_name=self.__xvar) + + # Added in v0.2.7. + plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True) + + # TODO these comments should not be in the code but on the release notes of the package version + # Lines 131 to 140 added in v0.2.3. + # Fixes a bug that jammed up when the xvar column was already + # a pandas Categorical. Now we check for this and act appropriately. + if isinstance(plot_data[self.__xvar].dtype, + pd.CategoricalDtype): + plot_data[self.__xvar].cat.remove_unused_categories(inplace=True) + plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, + ordered=True, + inplace=True) + else: + plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar], + categories=all_plot_groups, + ordered=True) + + + return plot_data + + def compute_effectsize_dfs(self): + from ._effsize_objects import EffectSizeDataFrame + + effectsize_df_kwargs = dict(ci=self.__ci, is_paired=self.__is_paired, + random_seed=self.__random_seed, + resamples=self.__resamples, + proportional=self.__proportional, + delta2=self.__delta2, + experiment_label=self.__experiment_label, + x1_level=self.__x1_level, + x2=self.__x2, + mini_meta = self.__mini_meta) + + self.__mean_diff = EffectSizeDataFrame(self, "mean_diff", + **effectsize_df_kwargs) + + self.__median_diff = EffectSizeDataFrame(self, "median_diff", + **effectsize_df_kwargs) + + self.__cohens_d = EffectSizeDataFrame(self, "cohens_d", + **effectsize_df_kwargs) + + self.__cohens_h = EffectSizeDataFrame(self, "cohens_h", + **effectsize_df_kwargs) + + self.__hedges_g = EffectSizeDataFrame(self, "hedges_g", + **effectsize_df_kwargs) + + self.__delta_g = EffectSizeDataFrame(self, "delta_g", + **effectsize_df_kwargs) + + if not self.__is_paired: + self.__cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta", + **effectsize_df_kwargs) + else: + self.__cliffs_delta = "The data is paired; Cliff's delta is therefore undefined." diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py index 2695de83..ec497d2e 100644 --- a/dabest/_effsize_objects.py +++ b/dabest/_effsize_objects.py @@ -303,10 +303,6 @@ def __init__(self, control, test, effect_size, pass - - - - def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', diff --git a/nbs/API/class.ipynb b/nbs/API/dabest_object.ipynb similarity index 91% rename from nbs/API/class.ipynb rename to nbs/API/dabest_object.ipynb index 313f2680..46ae1e80 100644 --- a/nbs/API/class.ipynb +++ b/nbs/API/dabest_object.ipynb @@ -6,9 +6,9 @@ "id": "ed122c74", "metadata": {}, "source": [ - "# Class\n", + "# Dabest object\n", "\n", - "> Several classes for estimating statistics and generating plots.\n", + "> Main class for estimating statistics and generating plots.\n", "\n", "- order: 2" ] @@ -20,7 +20,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp _classes" + "#| default_exp _dabest_object" ] }, { @@ -58,12 +58,8 @@ "# Import standard data science libraries\n", "from numpy import array, repeat, random, issubdtype, number\n", "import pandas as pd\n", - "import seaborn as sns\n", "from scipy.stats import norm\n", - "from scipy.stats import randint\n", - "import datetime as dt\n", - "from string import Template\n", - "import warnings" + "from scipy.stats import randint" ] }, { @@ -100,12 +96,12 @@ " statistics. You should not be calling this class directly; instead,\n", " use `dabest.load()` to parse your DataFrame prior to analysis.\n", " \"\"\"\n", - " from ._effsize_objects import EffectSizeDataFrame\n", - "\n", + " \n", " self.__delta2 = delta2\n", " self.__experiment = experiment\n", " self.__ci = ci\n", - " self.__data = data\n", + " self.__input_data = data\n", + " self.__output_data = data.copy()\n", " self.__id_col = id_col\n", " self.__is_paired = paired\n", " self.__resamples = resamples\n", @@ -113,27 +109,27 @@ " self.__proportional = proportional\n", " self.__mini_meta = mini_meta \n", "\n", - " # Make a copy of the data, so we don't make alterations to it.\n", - " # TODO is this really needed?\n", - " data_in = data.copy()\n", - "\n", "\n", " # Check if it is a valid mini_meta case\n", - " if mini_meta:\n", + " if self.__mini_meta:\n", " # Only mini_meta calculation but not proportional and delta-delta function\n", - " if proportional:\n", + " if self.__proportional:\n", " err0 = '`proportional` and `mini_meta` cannot be True at the same time.'\n", " raise ValueError(err0)\n", - " elif delta2:\n", + " if self.__delta2:\n", " err0 = '`delta` and `mini_meta` cannot be True at the same time.'\n", " raise ValueError(err0)\n", " \n", " # Check if the columns stated are valid\n", + " # TODO instead of traversing twice idx you can traverse only once\n", + " # and break the loop if the condition is not satisfied?\n", + " # TODO What if the type is not str and not tuple,list? missing raise Error\n", " if all([isinstance(i, str) for i in idx]):\n", " if len(pd.unique([t for t in idx]).tolist())!=2:\n", " err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) \n", " err1 = 'does not contain exactly 2 columns.'\n", " raise ValueError(err0 + err1)\n", + " \n", " if all([isinstance(i, (tuple, list)) for i in idx]):\n", " all_idx_lengths = [len(t) for t in idx]\n", " if (array(all_idx_lengths) != 2).any():\n", @@ -142,11 +138,11 @@ " raise ValueError(err1 + err2)\n", " \n", "\n", - "\n", + " # TODO can you have True mini_meta and delta2 at the same time? \n", " # Check if this is a 2x2 ANOVA case and x & y are valid columns\n", " # Create experiment_label and x1_level\n", - " if delta2:\n", - " if proportional:\n", + " if self.__delta2:\n", + " if self.__proportional:\n", " err0 = '`proportional` and `delta` cannot be True at the same time.'\n", " raise ValueError(err0)\n", " \n", @@ -156,12 +152,13 @@ " raise ValueError(err0)\n", "\n", " # Check if x is valid\n", + " # TODO if x is None is fine??\n", " if len(x) != 2:\n", " err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x))\n", " raise ValueError(err0)\n", " \n", " for i in x:\n", - " if i not in data_in.columns:\n", + " if i not in self.__output_data.columns:\n", " err = '{0} is not a column in `data`. Please check.'.format(i)\n", " raise IndexError(err)\n", "\n", @@ -170,12 +167,12 @@ " err0 = '`delta2` is True but `y` is not indicated.'\n", " raise ValueError(err0)\n", " \n", - " if y not in data_in.columns:\n", + " if y not in self.__output_data.columns:\n", " err = '{0} is not a column in `data`. Please check.'.format(y)\n", " raise IndexError(err)\n", "\n", " # Check if experiment is valid\n", - " if experiment not in data_in.columns:\n", + " if experiment not in self.__output_data.columns:\n", " err = '{0} is not a column in `data`. Please check.'.format(experiment)\n", " raise IndexError(err)\n", "\n", @@ -186,29 +183,30 @@ " raise ValueError(err0)\n", " \n", " for i in experiment_label:\n", - " if i not in data_in[experiment].unique():\n", + " if i not in self.__output_data[experiment].unique():\n", " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", " raise IndexError(err)\n", " else:\n", - " experiment_label = data_in[experiment].unique()\n", + " experiment_label = self.__output_data[experiment].unique()\n", "\n", " # Check if x1_level is valid\n", " if x1_level:\n", " if len(x1_level) != 2:\n", " err0 = '`x1_level` does not have a length of 2.'\n", " raise ValueError(err0)\n", - " else: \n", - " for i in x1_level:\n", - " if i not in data_in[x[0]].unique():\n", - " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", - " raise IndexError(err)\n", + " \n", + " for i in x1_level:\n", + " if i not in self.__output_data[x[0]].unique():\n", + " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", + " raise IndexError(err)\n", "\n", " else:\n", - " x1_level = data_in[x[0]].unique()\n", + " x1_level = self.__output_data[x[0]].unique()\n", + " \n", " # TODO what if experiment is None? \n", " elif experiment:\n", - " experiment_label = data_in[experiment].unique()\n", - " x1_level = data_in[x[0]].unique() \n", + " experiment_label = self.__output_data[experiment].unique()\n", + " x1_level = self.__output_data[x[0]].unique() \n", " self.__experiment_label = experiment_label\n", " self.__x1_level = x1_level\n", "\n", @@ -218,16 +216,16 @@ " # Add a length check for unique values in the first element in list x, \n", " # if the length is greater than 2, force delta2 to be False\n", " # Should be removed if delta2 for situations other than 2x2 is supported\n", - " if len(data_in[x[0]].unique()) > 2 and x1_level is None:\n", - " delta2 = False\n", - " self.__delta2 = delta2\n", + " if len(self.__output_data[x[0]].unique()) > 2 and x1_level is None:\n", + " self.__delta2 = False\n", " # stop the loop if delta2 is False\n", " \n", " # add a new column which is a combination of experiment and the first variable\n", " new_col_name = experiment+x[0]\n", - " while new_col_name in data_in.columns:\n", + " while new_col_name in self.__output_data.columns:\n", " new_col_name += \"_\"\n", - " data_in[new_col_name] = data_in[x[0]].astype(str) + \" \" + data_in[experiment].astype(str)\n", + "\n", + " self.__output_data[new_col_name] = self.__output_data[x[0]].astype(str) + \" \" + self.__output_data[experiment].astype(str)\n", "\n", " #create idx and record the first and second x variable \n", " idx = []\n", @@ -277,8 +275,8 @@ " raise ValueError(err)\n", "\n", " # Check if there is a typo on paired\n", - " if paired and paired not in (\"baseline\", \"sequential\"):\n", - " err = '{} assigned for `paired` is not valid.'.format(paired)\n", + " if self.__is_paired and self.__is_paired not in (\"baseline\", \"sequential\"):\n", + " err = '{} assigned for `paired` is not valid.'.format(self.__is_paired)\n", " raise ValueError(err)\n", "\n", "\n", @@ -291,134 +289,22 @@ " err = 'You have only specified `x`. Please also specify `y`.'\n", " raise ValueError(err)\n", "\n", - " # Identify the type of data that was passed in.\n", - " if x and y:\n", - " # Assume we have a long dataset.\n", - " # check both x and y are column names in data.\n", - " if x not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(x)\n", - " raise IndexError(err)\n", - " if y not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(y)\n", - " raise IndexError(err)\n", - "\n", - " # check y is numeric.\n", - " if not issubdtype(data_in[y].dtype, number):\n", - " err = '{0} is a column in `data`, but it is not numeric.'.format(y)\n", - " raise ValueError(err)\n", - "\n", - " # check all the idx can be found in data_in[x]\n", - " for g in all_plot_groups:\n", - " if g not in data_in[x].unique():\n", - " err0 = '\"{0}\" is not a group in the column `{1}`.'.format(g, x)\n", - " err1 = \" Please check `idx` and try again.\"\n", - " raise IndexError(err0 + err1)\n", - "\n", - " # Select only rows where the value in the `x` column \n", - " # is found in `idx`.\n", - " plot_data = data_in[data_in.loc[:, x].isin(all_plot_groups)].copy()\n", - " \n", - " # plot_data.drop(\"index\", inplace=True, axis=1)\n", - "\n", - " # Assign attributes\n", - " self.__x = x\n", - " self.__y = y\n", - " self.__xvar = x\n", - " self.__yvar = y\n", - "\n", - " elif x is None and y is None:\n", - " # Assume we have a wide dataset.\n", - " # Assign attributes appropriately.\n", - " self.__x = None\n", - " self.__y = None\n", - " self.__xvar = \"group\"\n", - " self.__yvar = \"value\"\n", "\n", - " # First, check we have all columns in the dataset.\n", - " for g in all_plot_groups:\n", - " if g not in data_in.columns:\n", - " err0 = '\"{0}\" is not a column in `data`.'.format(g)\n", - " err1 = \" Please check `idx` and try again.\"\n", - " raise IndexError(err0 + err1)\n", - " \n", - " set_all_columns = set(data_in.columns.tolist())\n", - " set_all_plot_groups = set(all_plot_groups)\n", - " id_vars = set_all_columns.difference(set_all_plot_groups)\n", - "\n", - " plot_data = pd.melt(data_in,\n", - " id_vars=id_vars,\n", - " value_vars=all_plot_groups,\n", - " value_name=self.__yvar,\n", - " var_name=self.__xvar)\n", - " \n", - " # Added in v0.2.7.\n", - " # remove any NA rows.\n", - " plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True)\n", - "\n", - " # TODO these comments should not be in the code but on the release notes of the package version\n", - " # Lines 131 to 140 added in v0.2.3.\n", - " # Fixes a bug that jammed up when the xvar column was already \n", - " # a pandas Categorical. Now we check for this and act appropriately.\n", - " if isinstance(plot_data[self.__xvar].dtype, \n", - " pd.CategoricalDtype):\n", - " plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)\n", - " plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, \n", - " ordered=True, \n", - " inplace=True)\n", - " else:\n", - " plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],\n", - " categories=all_plot_groups,\n", - " ordered=True)\n", - " \n", - " # TODO Move all the plot_data logic to the function returning self.__plot_data\n", - " self.__plot_data = plot_data\n", - " # TODO Move all the all_plot_groups logic to the function returning self.__all_plot_groups\n", + " self.__plot_data = self.get_plot_data(x, y, all_plot_groups)\n", " self.__all_plot_groups = all_plot_groups\n", "\n", "\n", " # Check if `id_col` is valid\n", - " if paired:\n", + " if self.__is_paired:\n", " if id_col is None:\n", " err = \"`id_col` must be specified if `paired` is assigned with a not NoneType value.\"\n", " raise IndexError(err)\n", " \n", - " if id_col not in plot_data.columns:\n", + " if id_col not in self.__plot_data.columns:\n", " err = \"{} is not a column in `data`. \".format(id_col)\n", " raise IndexError(err)\n", "\n", - " EffectSizeDataFrame_kwargs = dict(ci=ci, is_paired=paired,\n", - " random_seed=random_seed,\n", - " resamples=resamples,\n", - " proportional=proportional, \n", - " delta2=delta2, \n", - " experiment_label=self.__experiment_label,\n", - " x1_level=self.__x1_level,\n", - " x2=self.__x2,\n", - " mini_meta = mini_meta)\n", - "\n", - " self.__mean_diff = EffectSizeDataFrame(self, \"mean_diff\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " self.__median_diff = EffectSizeDataFrame(self, \"median_diff\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " self.__cohens_d = EffectSizeDataFrame(self, \"cohens_d\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " self.__cohens_h = EffectSizeDataFrame(self, \"cohens_h\",\n", - " **EffectSizeDataFrame_kwargs) \n", - "\n", - " self.__hedges_g = EffectSizeDataFrame(self, \"hedges_g\",\n", - " **EffectSizeDataFrame_kwargs)\n", - " \n", - " self.__delta_g = EffectSizeDataFrame(self, \"delta_g\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " if not paired:\n", - " self.__cliffs_delta = EffectSizeDataFrame(self, \"cliffs_delta\",\n", - " **EffectSizeDataFrame_kwargs)\n", - " else:\n", - " self.__cliffs_delta = \"The data is paired; Cliff's delta is therefore undefined.\"\n", + " self.compute_effectsize_dfs()\n", "\n", "\n", " def __repr__(self):\n", @@ -537,13 +423,13 @@ " return self.__delta_g\n", "\n", " @property\n", - " def data(self):\n", + " def input_data(self):\n", " \"\"\"\n", " Returns the pandas DataFrame that was passed to `dabest.load()`.\n", " When `delta2` is True, a new column is added to support the \n", " function. The name of this new column is indicated by `x`.\n", " \"\"\"\n", - " return self.__data\n", + " return self.__input_data\n", "\n", "\n", " @property\n", @@ -713,7 +599,129 @@ " \"\"\"\n", " Returns the all plot groups, as indicated via the `idx` keyword.\n", " \"\"\"\n", - " return self.__all_plot_groups" + " return self.__all_plot_groups\n", + " \n", + " \n", + " def get_plot_data(self, x, y, all_plot_groups):\n", + " '''\n", + " Function to prepare some attributes for plotting \n", + " '''\n", + " \n", + " # Identify the type of data that was passed in.\n", + " if x and y:\n", + " # Assume we have a long dataset.\n", + " # check both x and y are column names in data.\n", + " if x not in self.__output_data.columns:\n", + " err = '{0} is not a column in `data`. Please check.'.format(x)\n", + " raise IndexError(err)\n", + " if y not in self.__output_data.columns:\n", + " err = '{0} is not a column in `data`. Please check.'.format(y)\n", + " raise IndexError(err)\n", + "\n", + " # check y is numeric.\n", + " if not issubdtype(self.__output_data[y].dtype, number):\n", + " err = '{0} is a column in `data`, but it is not numeric.'.format(y)\n", + " raise ValueError(err)\n", + "\n", + " # check all the idx can be found in self.__output_data[x]\n", + " for g in all_plot_groups:\n", + " if g not in self.__output_data[x].unique():\n", + " err0 = '\"{0}\" is not a group in the column `{1}`.'.format(g, x)\n", + " err1 = \" Please check `idx` and try again.\"\n", + " raise IndexError(err0 + err1)\n", + "\n", + " # Select only rows where the value in the `x` column \n", + " # is found in `idx`.\n", + " plot_data = self.__output_data[self.__output_data.loc[:, x].isin(all_plot_groups)].copy()\n", + " \n", + " # Assign attributes\n", + " self.__x = x\n", + " self.__y = y\n", + " self.__xvar = x\n", + " self.__yvar = y\n", + "\n", + " elif x is None and y is None:\n", + " # Assume we have a wide dataset.\n", + " # Assign attributes appropriately.\n", + " self.__x = None\n", + " self.__y = None\n", + " self.__xvar = \"group\"\n", + " self.__yvar = \"value\"\n", + "\n", + " # First, check we have all columns in the dataset.\n", + " for g in all_plot_groups:\n", + " if g not in self.__output_data.columns:\n", + " err0 = '\"{0}\" is not a column in `data`.'.format(g)\n", + " err1 = \" Please check `idx` and try again.\"\n", + " raise IndexError(err0 + err1)\n", + " \n", + " set_all_columns = set(self.__output_data.columns.tolist())\n", + " set_all_plot_groups = set(all_plot_groups)\n", + " id_vars = set_all_columns.difference(set_all_plot_groups)\n", + "\n", + " plot_data = pd.melt(self.__output_data,\n", + " id_vars=id_vars,\n", + " value_vars=all_plot_groups,\n", + " value_name=self.__yvar,\n", + " var_name=self.__xvar)\n", + " \n", + " # Added in v0.2.7.\n", + " plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True)\n", + "\n", + " # TODO these comments should not be in the code but on the release notes of the package version\n", + " # Lines 131 to 140 added in v0.2.3.\n", + " # Fixes a bug that jammed up when the xvar column was already \n", + " # a pandas Categorical. Now we check for this and act appropriately.\n", + " if isinstance(plot_data[self.__xvar].dtype, \n", + " pd.CategoricalDtype):\n", + " plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)\n", + " plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, \n", + " ordered=True, \n", + " inplace=True)\n", + " else:\n", + " plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],\n", + " categories=all_plot_groups,\n", + " ordered=True)\n", + " \n", + "\n", + " return plot_data\n", + " \n", + " def compute_effectsize_dfs(self):\n", + " from ._effsize_objects import EffectSizeDataFrame\n", + "\n", + " effectsize_df_kwargs = dict(ci=self.__ci, is_paired=self.__is_paired,\n", + " random_seed=self.__random_seed,\n", + " resamples=self.__resamples,\n", + " proportional=self.__proportional, \n", + " delta2=self.__delta2, \n", + " experiment_label=self.__experiment_label,\n", + " x1_level=self.__x1_level,\n", + " x2=self.__x2,\n", + " mini_meta = self.__mini_meta)\n", + "\n", + " self.__mean_diff = EffectSizeDataFrame(self, \"mean_diff\",\n", + " **effectsize_df_kwargs)\n", + "\n", + " self.__median_diff = EffectSizeDataFrame(self, \"median_diff\",\n", + " **effectsize_df_kwargs)\n", + "\n", + " self.__cohens_d = EffectSizeDataFrame(self, \"cohens_d\",\n", + " **effectsize_df_kwargs)\n", + "\n", + " self.__cohens_h = EffectSizeDataFrame(self, \"cohens_h\",\n", + " **effectsize_df_kwargs) \n", + "\n", + " self.__hedges_g = EffectSizeDataFrame(self, \"hedges_g\",\n", + " **effectsize_df_kwargs)\n", + " \n", + " self.__delta_g = EffectSizeDataFrame(self, \"delta_g\",\n", + " **effectsize_df_kwargs)\n", + "\n", + " if not self.__is_paired:\n", + " self.__cliffs_delta = EffectSizeDataFrame(self, \"cliffs_delta\",\n", + " **effectsize_df_kwargs)\n", + " else:\n", + " self.__cliffs_delta = \"The data is paired; Cliff's delta is therefore undefined.\"" ] }, { diff --git a/nbs/API/delta_objects.ipynb b/nbs/API/delta_objects.ipynb index 0abed891..9bb0d7d6 100644 --- a/nbs/API/delta_objects.ipynb +++ b/nbs/API/delta_objects.ipynb @@ -6,7 +6,7 @@ "source": [ "# Delta objects\n", "\n", - "> The different types of delta used for the computations.\n", + "> Auxiliary delta classes for estimating statistics and generating plots.\n", "\n", "- order: 9" ] diff --git a/nbs/API/effsize_objects.ipynb b/nbs/API/effsize_objects.ipynb index 8f45cc5d..778eba2c 100644 --- a/nbs/API/effsize_objects.ipynb +++ b/nbs/API/effsize_objects.ipynb @@ -6,7 +6,7 @@ "source": [ "# Effectsize objects\n", "\n", - "> The different objects involved in the computations of bootstrapped effect sizes.\n", + "> The auxiliary classes involved in the computations of bootstrapped effect sizes.\n", "\n", "- order: 10" ] @@ -365,10 +365,6 @@ " pass\n", "\n", "\n", - "\n", - "\n", - "\n", - "\n", " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", " \n", " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", diff --git a/nbs/API/load.ipynb b/nbs/API/load.ipynb index ea118cb3..3a0d5434 100644 --- a/nbs/API/load.ipynb +++ b/nbs/API/load.ipynb @@ -39,7 +39,7 @@ "#| hide\n", "from nbdev.showdoc import *\n", "import nbdev\n", - "nbdev.nbdev_export()" + "nbdev.nbdev_export()\n" ] }, { @@ -117,7 +117,7 @@ " -------\n", " A `Dabest` object.\n", " '''\n", - " from ._classes import Dabest\n", + " from dabest import Dabest\n", "\n", " return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta)\n", "\n" @@ -132,14 +132,14 @@ "#| export\n", "import numpy as np\n", "from typing import Union, Optional\n", + "import pandas as pd\n", "\n", "def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types.\n", " group_names: Optional[list] = None):\n", " '''\n", " Convenient function to generate a dataframe of binary data.\n", " '''\n", - " import pandas as pd\n", - "\n", + " \n", " if isinstance(group, dict):\n", " # If group_names is not provided, use the keys of the dict as group_names\n", " if group_names is None:\n", diff --git a/nbs/tests/test_01_effsizes_pvals.ipynb b/nbs/tests/test_01_effsizes_pvals.ipynb index 2b74ecac..717d7ff3 100644 --- a/nbs/tests/test_01_effsizes_pvals.ipynb +++ b/nbs/tests/test_01_effsizes_pvals.ipynb @@ -24,8 +24,7 @@ "outputs": [], "source": [ "from dabest._stats_tools import effsize\n", - "from dabest._classes import Dabest\n", - "from dabest._effsize_objects import TwoGroupsEffectSize, PermutationTest" + "from dabest import Dabest, TwoGroupsEffectSize, PermutationTest" ] }, { diff --git a/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb b/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb index 2f0d1068..775113fd 100644 --- a/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb +++ b/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb @@ -21,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "from dabest._classes import Dabest" + "from dabest import Dabest" ] }, { diff --git a/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb b/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb index cddaf87e..0bb2a7f0 100644 --- a/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb +++ b/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb @@ -22,8 +22,7 @@ "outputs": [], "source": [ "from dabest._stats_tools import effsize\n", - "from dabest._classes import Dabest\n", - "from dabest._effsize_objects import PermutationTest" + "from dabest import Dabest, PermutationTest" ] }, { diff --git a/nbs/tests/test_08_mini_meta_pvals.ipynb b/nbs/tests/test_08_mini_meta_pvals.ipynb index b6153c3c..d989258a 100644 --- a/nbs/tests/test_08_mini_meta_pvals.ipynb +++ b/nbs/tests/test_08_mini_meta_pvals.ipynb @@ -21,8 +21,7 @@ "source": [ "from dabest._stats_tools import effsize\n", "from dabest._stats_tools import confint_2group_diff as ci2g\n", - "from dabest._classes import Dabest\n", - "from dabest._effsize_objects import PermutationTest" + "from dabest import Dabest, PermutationTest" ] }, { diff --git a/nbs/tests/test_10_proportion_plot.py b/nbs/tests/test_10_proportion_plot.py index 443f1007..8ae453cb 100644 --- a/nbs/tests/test_10_proportion_plot.py +++ b/nbs/tests/test_10_proportion_plot.py @@ -9,8 +9,6 @@ def create_demo_prop_dataset(seed=9999, N=40): - import numpy as np - import pandas as pd np.random.seed(9999) # Fix the seed so the results are replicable. # Create samples From 52f6d62e888400e0e6ae0f74f1c8a2b7b81438b7 Mon Sep 17 00:00:00 2001 From: cyberosa Date: Sun, 17 Dec 2023 21:27:12 +0100 Subject: [PATCH 06/10] Refactoring Modularity and black formatting --- dabest/_api.py | 127 +- dabest/_bootstrap_tools.py | 7 +- dabest/_dabest_object.py | 384 +++--- dabest/_delta_objects.py | 216 ++-- dabest/_effsize_objects.py | 456 +++---- dabest/_stats_tools/confint_1group.py | 65 +- dabest/_stats_tools/confint_2group_diff.py | 186 +-- dabest/misc_tools.py | 26 +- dabest/plot_tools.py | 576 +++++---- dabest/plotter.py | 1293 ++++++++++--------- nbs/API/bootstrap.ipynb | 7 +- nbs/API/confint_1group.ipynb | 68 +- nbs/API/confint_2group_diff.ipynb | 190 +-- nbs/API/dabest_object.ipynb | 384 +++--- nbs/API/delta_objects.ipynb | 218 ++-- nbs/API/effsize_objects.ipynb | 457 +++---- nbs/API/load.ipynb | 137 +- nbs/API/misc_tools.ipynb | 35 +- nbs/API/plot_tools.ipynb | 581 +++++---- nbs/API/plotter.ipynb | 1302 +++++++++++--------- 20 files changed, 3663 insertions(+), 3052 deletions(-) diff --git a/dabest/_api.py b/dabest/_api.py index a4cfc6b5..190d4e61 100644 --- a/dabest/_api.py +++ b/dabest/_api.py @@ -4,11 +4,24 @@ __all__ = ['load', 'prop_dataset'] # %% ../nbs/API/load.ipynb 4 -def load(data, idx=None, x=None, y=None, paired=None, id_col=None, - ci=95, resamples=5000, random_seed=12345, proportional=False, - delta2 = False, experiment = None, experiment_label = None, - x1_level = None, mini_meta=False): - ''' +def load( + data, + idx=None, + x=None, + y=None, + paired=None, + id_col=None, + ci=95, + resamples=5000, + random_seed=12345, + proportional=False, + delta2=False, + experiment=None, + experiment_label=None, + x1_level=None, + mini_meta=False, +): + """ Loads data in preparation for estimation statistics. This is designed to work with pandas DataFrames. @@ -22,15 +35,15 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, with each individual tuple producing its own contrast plot x : string or list, default None Column name(s) of the independent variable. This can be expressed as - a list of 2 elements if and only if 'delta2' is True; otherwise it + a list of 2 elements if and only if 'delta2' is True; otherwise it can only be a string. y : string, default None Column names for data to be plotted on the x-axis and y-axis. paired : string, default None - The type of the experiment under which the data are obtained. If 'paired' + The type of the experiment under which the data are obtained. If 'paired' is None then the data will not be treated as paired data in the subsequent - calculations. If 'paired' is 'baseline', then in each tuple of x, other - groups will be paired up with the first group (as control). If 'paired' is + calculations. If 'paired' is 'baseline', then in each tuple of x, other + groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with its previous group (as control). id_col : default None. @@ -45,7 +58,7 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, This integer is used to seed the random number generator during bootstrap resampling, ensuring that the confidence intervals reported are replicable. - proportional : boolean, default False. + proportional : boolean, default False. An indicator of whether the data is binary or not. When set to True, it specifies that the data consists of binary data, where the values are limited to 0 and 1. The code is not suitable for analyzing proportion @@ -55,76 +68,112 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, delta2 : boolean, default False Indicator of delta-delta experiment experiment : String, default None - The name of the column of the dataframe which contains the label of + The name of the column of the dataframe which contains the label of experiments experiment_lab : list, default None A list of String to specify the order of subplots for delta-delta plots. - This can be expressed as a list of 2 elements if and only if 'delta2' - is True; otherwise it can only be a string. + This can be expressed as a list of 2 elements if and only if 'delta2' + is True; otherwise it can only be a string. x1_level : list, default None A list of String to specify the order of subplots for delta-delta plots. - This can be expressed as a list of 2 elements if and only if 'delta2' - is True; otherwise it can only be a string. + This can be expressed as a list of 2 elements if and only if 'delta2' + is True; otherwise it can only be a string. mini_meta : boolean, default False Indicator of weighted delta calculation. Returns ------- A `Dabest` object. - ''' + """ from dabest import Dabest - return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta) - - + return Dabest( + data, + idx, + x, + y, + paired, + id_col, + ci, + resamples, + random_seed, + proportional, + delta2, + experiment, + experiment_label, + x1_level, + mini_meta, + ) # %% ../nbs/API/load.ipynb 5 import numpy as np from typing import Union, Optional import pandas as pd -def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types. - group_names: Optional[list] = None): - ''' + +def prop_dataset( + group: Union[ + list, tuple, np.ndarray, dict + ], # Accepts lists, tuples, or numpy ndarrays of numeric types. + group_names: Optional[list] = None, +): + """ Convenient function to generate a dataframe of binary data. - ''' - + """ + if isinstance(group, dict): # If group_names is not provided, use the keys of the dict as group_names if group_names is None: group_names = list(group.keys()) elif not set(group_names) == set(group.keys()): # Check if the group_names provided is the same as the keys of the dict - raise ValueError('group_names must be the same as the keys of the dict.') + raise ValueError("group_names must be the same as the keys of the dict.") # Check if the values in the dict are numeric - if not all([isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]): - raise ValueError('group must be a dict of lists, tuples, or numpy ndarrays of numeric types.') + if not all( + [isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names] + ): + raise ValueError( + "group must be a dict of lists, tuples, or numpy ndarrays of numeric types." + ) # Check if the values in the dict only have two elements under each parent key if not all([len(group[name]) == 2 for name in group_names]): - raise ValueError('Each parent key should have only two elements.') + raise ValueError("Each parent key should have only two elements.") group_val = group else: if group_names is None: - raise ValueError('group_names must be provided if group is not a dict.') + raise ValueError("group_names must be provided if group is not a dict.") # Check if the length of group is two times of the length of group_names if not len(group) == 2 * len(group_names): - raise ValueError('The length of group must be two times of the length of group_names.') - group_val = {group_names[i]: [group[i*2], group[i*2+1]] for i in range(len(group_names))} + raise ValueError( + "The length of group must be two times of the length of group_names." + ) + group_val = { + group_names[i]: [group[i * 2], group[i * 2 + 1]] + for i in range(len(group_names)) + } # Check if the sum of values in group_val under each key are the same - if not all([sum(group_val[name]) == sum(group_val[group_names[0]]) for name in group_val.keys()]): - raise ValueError('The sum of values under each key must be the same.') - - id_col = pd.Series(range(1, sum(group_val[group_names[0]])+1)) - + if not all( + [ + sum(group_val[name]) == sum(group_val[group_names[0]]) + for name in group_val.keys() + ] + ): + raise ValueError("The sum of values under each key must be the same.") + + id_col = pd.Series(range(1, sum(group_val[group_names[0]]) + 1)) + final_df = pd.DataFrame() for name in group_val.keys(): - col = np.repeat(0, group_val[name][0]).tolist() + np.repeat(1, group_val[name][1]).tolist() - df = pd.DataFrame({name:col}) + col = ( + np.repeat(0, group_val[name][0]).tolist() + + np.repeat(1, group_val[name][1]).tolist() + ) + df = pd.DataFrame({name: col}) final_df = pd.concat([final_df, df], axis=1) - final_df['ID'] = id_col + final_df["ID"] = id_col return final_df diff --git a/dabest/_bootstrap_tools.py b/dabest/_bootstrap_tools.py index 45375310..153919da 100644 --- a/dabest/_bootstrap_tools.py +++ b/dabest/_bootstrap_tools.py @@ -90,7 +90,7 @@ def __init__(self, if len(x1) != len(x2): raise ValueError('x1 and x2 are not the same length.') - if (x2 is None) or (paired is not None) : + if (x2 is None) or paired: if x2 is None: tx = x1 @@ -100,7 +100,8 @@ def __init__(self, ttest_2_paired = 'NIL' wilcoxonresult = 'NIL' - elif paired is not None: + #elif paired is not None: + else: # only two options to enter here diff = True tx = x2 - x1 ttest_single = 'NIL' @@ -123,7 +124,7 @@ def __init__(self, pct_low_high = np.nan_to_num(pct_low_high).astype('int') - elif x2 is not None and paired is None: + elif x2 and paired is None: diff = True x2 = pd.Series(x2).dropna() # Generate statarrays for both arrays. diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py index deeb9881..c17cf9c2 100644 --- a/dabest/_dabest_object.py +++ b/dabest/_dabest_object.py @@ -17,104 +17,123 @@ class Dabest(object): Class for estimation statistics and plots. """ - def __init__(self, data, idx, x, y, paired, id_col, ci, - resamples, random_seed, proportional, delta2, - experiment, experiment_label, x1_level, mini_meta): - + def __init__( + self, + data, + idx, + x, + y, + paired, + id_col, + ci, + resamples, + random_seed, + proportional, + delta2, + experiment, + experiment_label, + x1_level, + mini_meta, + ): """ Parses and stores pandas DataFrames in preparation for estimation statistics. You should not be calling this class directly; instead, use `dabest.load()` to parse your DataFrame prior to analysis. """ - - self.__delta2 = delta2 - self.__experiment = experiment - self.__ci = ci - self.__input_data = data - self.__output_data = data.copy() - self.__id_col = id_col - self.__is_paired = paired - self.__resamples = resamples - self.__random_seed = random_seed - self.__proportional = proportional - self.__mini_meta = mini_meta + self.__delta2 = delta2 + self.__experiment = experiment + self.__ci = ci + self.__input_data = data + self.__output_data = data.copy() + self.__id_col = id_col + self.__is_paired = paired + self.__resamples = resamples + self.__random_seed = random_seed + self.__proportional = proportional + self.__mini_meta = mini_meta # Check if it is a valid mini_meta case if self.__mini_meta: # Only mini_meta calculation but not proportional and delta-delta function if self.__proportional: - err0 = '`proportional` and `mini_meta` cannot be True at the same time.' + err0 = "`proportional` and `mini_meta` cannot be True at the same time." raise ValueError(err0) if self.__delta2: - err0 = '`delta` and `mini_meta` cannot be True at the same time.' + err0 = "`delta` and `mini_meta` cannot be True at the same time." raise ValueError(err0) - + # Check if the columns stated are valid # TODO instead of traversing twice idx you can traverse only once # and break the loop if the condition is not satisfied? # TODO What if the type is not str and not tuple,list? missing raise Error if all([isinstance(i, str) for i in idx]): - if len(pd.unique([t for t in idx]).tolist())!=2: - err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) - err1 = 'does not contain exactly 2 columns.' + if len(pd.unique([t for t in idx]).tolist()) != 2: + err0 = "`mini_meta` is True, but `idx` ({})".format(idx) + err1 = "does not contain exactly 2 columns." raise ValueError(err0 + err1) - + if all([isinstance(i, (tuple, list)) for i in idx]): all_idx_lengths = [len(t) for t in idx] if (array(all_idx_lengths) != 2).any(): err1 = "`mini_meta` is True, but some idx " err2 = "in {} does not consist only of two groups.".format(idx) raise ValueError(err1 + err2) - - # TODO can you have True mini_meta and delta2 at the same time? + # TODO can you have True mini_meta and delta2 at the same time? # Check if this is a 2x2 ANOVA case and x & y are valid columns # Create experiment_label and x1_level if self.__delta2: + # TODO Wrap the errors in a separate function if self.__proportional: - err0 = '`proportional` and `delta` cannot be True at the same time.' + err0 = "`proportional` and `delta` cannot be True at the same time." raise ValueError(err0) - + # idx should not be specified if idx: - err0 = '`idx` should not be specified when `delta2` is True.'.format(len(x)) + err0 = "`idx` should not be specified when `delta2` is True.".format( + len(x) + ) raise ValueError(err0) # Check if x is valid # TODO if x is None is fine?? if len(x) != 2: - err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x)) + err0 = "`delta2` is True but the number of variables indicated by `x` is {}.".format( + len(x) + ) raise ValueError(err0) - + for i in x: if i not in self.__output_data.columns: - err = '{0} is not a column in `data`. Please check.'.format(i) + err = "{0} is not a column in `data`. Please check.".format(i) raise IndexError(err) # Check if y is valid if not y: - err0 = '`delta2` is True but `y` is not indicated.' + err0 = "`delta2` is True but `y` is not indicated." raise ValueError(err0) - + if y not in self.__output_data.columns: - err = '{0} is not a column in `data`. Please check.'.format(y) + err = "{0} is not a column in `data`. Please check.".format(y) raise IndexError(err) # Check if experiment is valid if experiment not in self.__output_data.columns: - err = '{0} is not a column in `data`. Please check.'.format(experiment) + err = "{0} is not a column in `data`. Please check.".format(experiment) raise IndexError(err) # Check if experiment_label is valid and create experiment when needed if experiment_label: if len(experiment_label) != 2: - err0 = '`experiment_label` does not have a length of 2.' + err0 = "`experiment_label` does not have a length of 2." raise ValueError(err0) - + for i in experiment_label: if i not in self.__output_data[experiment].unique(): - err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) + err = "{0} is not an element in the column `{1}` of `data`. Please check.".format( + i, experiment + ) raise IndexError(err) else: experiment_label = self.__output_data[experiment].unique() @@ -122,139 +141,139 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, # Check if x1_level is valid if x1_level: if len(x1_level) != 2: - err0 = '`x1_level` does not have a length of 2.' + err0 = "`x1_level` does not have a length of 2." raise ValueError(err0) - + for i in x1_level: if i not in self.__output_data[x[0]].unique(): - err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) + err = "{0} is not an element in the column `{1}` of `data`. Please check.".format( + i, experiment + ) raise IndexError(err) else: x1_level = self.__output_data[x[0]].unique() - - # TODO what if experiment is None? + + # TODO what if experiment is None? elif experiment: experiment_label = self.__output_data[experiment].unique() - x1_level = self.__output_data[x[0]].unique() + x1_level = self.__output_data[x[0]].unique() self.__experiment_label = experiment_label - self.__x1_level = x1_level - + self.__x1_level = x1_level # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case - if x and y and idx is None: - # Add a length check for unique values in the first element in list x, + if idx is None and x is not None and y is not None: + # Add a length check for unique values in the first element in list x, # if the length is greater than 2, force delta2 to be False # Should be removed if delta2 for situations other than 2x2 is supported if len(self.__output_data[x[0]].unique()) > 2 and x1_level is None: self.__delta2 = False # stop the loop if delta2 is False - + # add a new column which is a combination of experiment and the first variable - new_col_name = experiment+x[0] + new_col_name = experiment + x[0] while new_col_name in self.__output_data.columns: new_col_name += "_" - self.__output_data[new_col_name] = self.__output_data[x[0]].astype(str) + " " + self.__output_data[experiment].astype(str) + self.__output_data[new_col_name] = ( + self.__output_data[x[0]].astype(str) + + " " + + self.__output_data[experiment].astype(str) + ) - #create idx and record the first and second x variable + # create idx and record the first and second x variable idx = [] for i in list(map(lambda x: str(x), experiment_label)): temp = [] for j in list(map(lambda x: str(x), x1_level)): temp.append(j + " " + i) idx.append(temp) - + self.__idx = idx - self.__x1 = x[0] - self.__x2 = x[1] + self.__x1 = x[0] + self.__x2 = x[1] x = new_col_name else: self.__idx = idx - self.__x1 = None - self.__x2 = None - - + self.__x1 = None + self.__x2 = None # Determine the kind of estimation plot we need to produce. if all([isinstance(i, (str, int, float)) for i in idx]): # flatten out idx. all_plot_groups = pd.unique([t for t in idx]).tolist() if len(idx) > len(all_plot_groups): - err0 = '`idx` contains duplicated groups. Please remove any duplicates and try again.' + err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again." raise ValueError(err0) - + # We need to re-wrap this idx inside another tuple so as to # easily loop thru each pairwise group later on. self.__idx = (idx,) elif all([isinstance(i, (tuple, list)) for i in idx]): all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist() - + actual_groups_given = sum([len(i) for i in idx]) - + if actual_groups_given > len(all_plot_groups): - err0 = 'Groups are repeated across tuples,' - err1 = ' or a tuple has repeated groups in it.' - err2 = ' Please remove any duplicates and try again.' + err0 = "Groups are repeated across tuples," + err1 = " or a tuple has repeated groups in it." + err2 = " Please remove any duplicates and try again." raise ValueError(err0 + err1 + err2) - else: # mix of string and tuple? - err = 'There seems to be a problem with the idx you '\ - 'entered--{}.'.format(idx) + else: # mix of string and tuple? + err = "There seems to be a problem with the idx you " "entered--{}.".format( + idx + ) raise ValueError(err) # Check if there is a typo on paired if self.__is_paired and self.__is_paired not in ("baseline", "sequential"): - err = '{} assigned for `paired` is not valid.'.format(self.__is_paired) + err = "{} assigned for `paired` is not valid.".format(self.__is_paired) raise ValueError(err) - # Determine the type of data: wide or long. - if y and x is None: - err = 'You have only specified `y`. Please also specify `x`.' + if x is None and y is not None: + err = "You have only specified `y`. Please also specify `x`." raise ValueError(err) - if x and y is None: - err = 'You have only specified `x`. Please also specify `y`.' + if x is not None and y is None: + err = "You have only specified `x`. Please also specify `y`." raise ValueError(err) - self.__plot_data = self.get_plot_data(x, y, all_plot_groups) self.__all_plot_groups = all_plot_groups - # Check if `id_col` is valid if self.__is_paired: if id_col is None: err = "`id_col` must be specified if `paired` is assigned with a not NoneType value." raise IndexError(err) - + if id_col not in self.__plot_data.columns: err = "{} is not a column in `data`. ".format(id_col) raise IndexError(err) self.compute_effectsize_dfs() - def __repr__(self): from .__init__ import __version__ from .misc_tools import print_greeting - - greeting_header = print_greeting() - RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', - 'sequential': 'for the sequential design of repeated-measures experiment \n', - 'None' : '' - } + greeting_header = print_greeting() - PAIRED_STATUS = {'baseline' : 'Paired e', - 'sequential' : 'Paired e', - 'None' : 'E' + RM_STATUS = { + "baseline": "for repeated measures against baseline \n", + "sequential": "for the sequential design of repeated-measures experiment \n", + "None": "", } - first_line = {"rm_status" : RM_STATUS[str(self.__is_paired)], - "paired_status": PAIRED_STATUS[str(self.__is_paired)]} + PAIRED_STATUS = {"baseline": "Paired e", "sequential": "Paired e", "None": "E"} + + first_line = { + "rm_status": RM_STATUS[str(self.__is_paired)], + "paired_status": PAIRED_STATUS[str(self.__is_paired)], + } s1 = "{paired_status}ffect size(s) {rm_status}".format(**first_line) s2 = "with {}% confidence intervals will be computed for:".format(self.__ci) @@ -264,7 +283,7 @@ def __repr__(self): comparisons = [] - if self.__is_paired == 'sequential': + if self.__is_paired == "sequential": for j, current_tuple in enumerate(self.__idx): for ix, test_name in enumerate(current_tuple[1:]): control_name = current_tuple[ix] @@ -277,13 +296,17 @@ def __repr__(self): comparisons.append("{} minus {}".format(test_name, control_name)) if self.__delta2: - comparisons.append("{} minus {} (only for mean difference)".format(self.__experiment_label[1], self.__experiment_label[0])) - + comparisons.append( + "{} minus {} (only for mean difference)".format( + self.__experiment_label[1], self.__experiment_label[0] + ) + ) + if self.__mini_meta: comparisons.append("weighted delta (only for mean difference)") for j, g in enumerate(comparisons): - out.append("{}. {}".format(j+1, g)) + out.append("{}. {}".format(j + 1, g)) resamples_line1 = "\n{} resamples ".format(self.__resamples) resamples_line2 = "will be used to generate the effect size bootstraps." @@ -291,7 +314,6 @@ def __repr__(self): return "\n".join(out) - @property def mean_diff(self): """ @@ -299,17 +321,15 @@ def mean_diff(self): """ return self.__mean_diff - - - @property + + @property def median_diff(self): """ Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. """ return self.__median_diff - - + @property def cohens_d(self): """ @@ -317,8 +337,7 @@ def cohens_d(self): """ return self.__cohens_d - - + @property def cohens_h(self): """ @@ -327,17 +346,15 @@ def cohens_h(self): """ return self.__cohens_h - - @property + @property def hedges_g(self): """ Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. """ return self.__hedges_g - - - @property + + @property def cliffs_delta(self): """ Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. @@ -356,19 +373,17 @@ def delta_g(self): def input_data(self): """ Returns the pandas DataFrame that was passed to `dabest.load()`. - When `delta2` is True, a new column is added to support the + When `delta2` is True, a new column is added to support the function. The name of this new column is indicated by `x`. """ return self.__input_data - @property def idx(self): """ Returns the order of categories that was passed to `dabest.load()`. """ return self.__idx - @property def x1(self): @@ -378,16 +393,14 @@ def x1(self): """ return self.__x1 - @property def x1_level(self): """ - Returns the levels of first variable declared in x when it is a + Returns the levels of first variable declared in x when it is a delta-delta case; returns None otherwise. """ return self.__x1_level - @property def x2(self): """ @@ -396,15 +409,13 @@ def x2(self): """ return self.__x2 - @property def experiment(self): """ - Returns the column name of experiment labels that was passed to + Returns the column name of experiment labels that was passed to `dabest.load()` when it is a delta-delta case; returns None otherwise. """ return self.__experiment - @property def experiment_label(self): @@ -414,16 +425,14 @@ def experiment_label(self): """ return self.__experiment_label - @property def delta2(self): """ - Returns the boolean parameter indicating if this is a delta-delta + Returns the boolean parameter indicating if this is a delta-delta situation. """ return self.__delta2 - @property def is_paired(self): """ @@ -431,7 +440,6 @@ def is_paired(self): """ return self.__is_paired - @property def id_col(self): """ @@ -439,7 +447,6 @@ def id_col(self): """ return self.__id_col - @property def ci(self): """ @@ -447,7 +454,6 @@ def ci(self): """ return self.__ci - @property def resamples(self): """ @@ -455,7 +461,6 @@ def resamples(self): """ return self.__resamples - @property def random_seed(self): """ @@ -464,18 +469,16 @@ def random_seed(self): """ return self.__random_seed - @property def x(self): """ Returns the x column that was passed to `dabest.load()`, if any. - When `delta2` is True, `x` returns the name of the new column created - for the delta-delta situation. To retrieve the 2 variables passed into + When `delta2` is True, `x` returns the name of the new column created + for the delta-delta situation. To retrieve the 2 variables passed into `x` when `delta2` is True, please call `x1` and `x2` instead. """ return self.__x - @property def y(self): """ @@ -483,7 +486,6 @@ def y(self): """ return self.__y - @property def _xvar(self): """ @@ -491,7 +493,6 @@ def _xvar(self): """ return self.__xvar - @property def _yvar(self): """ @@ -499,7 +500,6 @@ def _yvar(self): """ return self.__yvar - @property def _plot_data(self): """ @@ -507,7 +507,6 @@ def _plot_data(self): """ return self.__plot_data - @property def proportional(self): """ @@ -515,7 +514,6 @@ def proportional(self): """ return self.__proportional - @property def mini_meta(self): """ @@ -523,34 +521,32 @@ def mini_meta(self): """ return self.__mini_meta - @property def _all_plot_groups(self): """ Returns the all plot groups, as indicated via the `idx` keyword. """ return self.__all_plot_groups - - + def get_plot_data(self, x, y, all_plot_groups): - ''' - Function to prepare some attributes for plotting - ''' - + """ + Function to prepare some attributes for plotting + """ + # Identify the type of data that was passed in. - if x and y: + if x is not None and y is not None: # Assume we have a long dataset. # check both x and y are column names in data. if x not in self.__output_data.columns: - err = '{0} is not a column in `data`. Please check.'.format(x) + err = "{0} is not a column in `data`. Please check.".format(x) raise IndexError(err) if y not in self.__output_data.columns: - err = '{0} is not a column in `data`. Please check.'.format(y) + err = "{0} is not a column in `data`. Please check.".format(y) raise IndexError(err) # check y is numeric. if not issubdtype(self.__output_data[y].dtype, number): - err = '{0} is a column in `data`, but it is not numeric.'.format(y) + err = "{0} is a column in `data`, but it is not numeric.".format(y) raise ValueError(err) # check all the idx can be found in self.__output_data[x] @@ -560,10 +556,12 @@ def get_plot_data(self, x, y, all_plot_groups): err1 = " Please check `idx` and try again." raise IndexError(err0 + err1) - # Select only rows where the value in the `x` column + # Select only rows where the value in the `x` column # is found in `idx`. - plot_data = self.__output_data[self.__output_data.loc[:, x].isin(all_plot_groups)].copy() - + plot_data = self.__output_data[ + self.__output_data.loc[:, x].isin(all_plot_groups) + ].copy() + # Assign attributes self.__x = x self.__y = y @@ -584,71 +582,75 @@ def get_plot_data(self, x, y, all_plot_groups): err0 = '"{0}" is not a column in `data`.'.format(g) err1 = " Please check `idx` and try again." raise IndexError(err0 + err1) - - set_all_columns = set(self.__output_data.columns.tolist()) + + set_all_columns = set(self.__output_data.columns.tolist()) set_all_plot_groups = set(all_plot_groups) id_vars = set_all_columns.difference(set_all_plot_groups) - plot_data = pd.melt(self.__output_data, - id_vars=id_vars, - value_vars=all_plot_groups, - value_name=self.__yvar, - var_name=self.__xvar) - + plot_data = pd.melt( + self.__output_data, + id_vars=id_vars, + value_vars=all_plot_groups, + value_name=self.__yvar, + var_name=self.__xvar, + ) + # Added in v0.2.7. - plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True) + plot_data.dropna(axis=0, how="any", subset=[self.__yvar], inplace=True) # TODO these comments should not be in the code but on the release notes of the package version # Lines 131 to 140 added in v0.2.3. - # Fixes a bug that jammed up when the xvar column was already + # Fixes a bug that jammed up when the xvar column was already # a pandas Categorical. Now we check for this and act appropriately. - if isinstance(plot_data[self.__xvar].dtype, - pd.CategoricalDtype): + if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype): plot_data[self.__xvar].cat.remove_unused_categories(inplace=True) - plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, - ordered=True, - inplace=True) + plot_data[self.__xvar].cat.reorder_categories( + all_plot_groups, ordered=True, inplace=True + ) else: - plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar], - categories=all_plot_groups, - ordered=True) - + plot_data.loc[:, self.__xvar] = pd.Categorical( + plot_data[self.__xvar], categories=all_plot_groups, ordered=True + ) return plot_data - + def compute_effectsize_dfs(self): from ._effsize_objects import EffectSizeDataFrame - effectsize_df_kwargs = dict(ci=self.__ci, is_paired=self.__is_paired, - random_seed=self.__random_seed, - resamples=self.__resamples, - proportional=self.__proportional, - delta2=self.__delta2, - experiment_label=self.__experiment_label, - x1_level=self.__x1_level, - x2=self.__x2, - mini_meta = self.__mini_meta) + effectsize_df_kwargs = dict( + ci=self.__ci, + is_paired=self.__is_paired, + random_seed=self.__random_seed, + resamples=self.__resamples, + proportional=self.__proportional, + delta2=self.__delta2, + experiment_label=self.__experiment_label, + x1_level=self.__x1_level, + x2=self.__x2, + mini_meta=self.__mini_meta, + ) + + self.__mean_diff = EffectSizeDataFrame( + self, "mean_diff", **effectsize_df_kwargs + ) - self.__mean_diff = EffectSizeDataFrame(self, "mean_diff", - **effectsize_df_kwargs) + self.__median_diff = EffectSizeDataFrame( + self, "median_diff", **effectsize_df_kwargs + ) - self.__median_diff = EffectSizeDataFrame(self, "median_diff", - **effectsize_df_kwargs) + self.__cohens_d = EffectSizeDataFrame(self, "cohens_d", **effectsize_df_kwargs) - self.__cohens_d = EffectSizeDataFrame(self, "cohens_d", - **effectsize_df_kwargs) + self.__cohens_h = EffectSizeDataFrame(self, "cohens_h", **effectsize_df_kwargs) - self.__cohens_h = EffectSizeDataFrame(self, "cohens_h", - **effectsize_df_kwargs) + self.__hedges_g = EffectSizeDataFrame(self, "hedges_g", **effectsize_df_kwargs) - self.__hedges_g = EffectSizeDataFrame(self, "hedges_g", - **effectsize_df_kwargs) - - self.__delta_g = EffectSizeDataFrame(self, "delta_g", - **effectsize_df_kwargs) + self.__delta_g = EffectSizeDataFrame(self, "delta_g", **effectsize_df_kwargs) if not self.__is_paired: - self.__cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta", - **effectsize_df_kwargs) + self.__cliffs_delta = EffectSizeDataFrame( + self, "cliffs_delta", **effectsize_df_kwargs + ) else: - self.__cliffs_delta = "The data is paired; Cliff's delta is therefore undefined." + self.__cliffs_delta = ( + "The data is paired; Cliff's delta is therefore undefined." + ) diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py index c8324332..fa455637 100644 --- a/dabest/_delta_objects.py +++ b/dabest/_delta_objects.py @@ -24,16 +24,16 @@ class DeltaDelta(object): $$\Delta_{2} = \overline{X}_{A_{2}, B_{2}} - \overline{X}_{A_{1}, B_{2}}$$ - where $\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\Delta$ is the mean difference between two samples. + where $\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\Delta$ is the mean difference between two samples. A delta-delta value is then calculated as the mean difference between the two primary deltas: $$\Delta_{\Delta} = \Delta_{2} - \Delta_{1}$$ - + and a deltas' g value is calculated as the mean difference between the two primary deltas divided by the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples: - + $$\Delta_{g} = \frac{\Delta_{\Delta}}{s_{\Delta_{\Delta}}}$$ $$s_{\Delta_{\Delta}} = \sqrt{\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$ @@ -42,53 +42,56 @@ class DeltaDelta(object): """ - - def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta, - ci=95): + + def __init__( + self, effectsizedataframe, permutation_count, bootstraps_delta_delta, ci=95 + ): from ._stats_tools import effsize as es from ._stats_tools import confint_1group as ci1g from ._stats_tools import confint_2group_diff as ci2g - - self.__effsizedf = effectsizedataframe.results - self.__dabest_obj = effectsizedataframe.dabest_obj - self.__ci = ci - self.__resamples = effectsizedataframe.resamples - self.__effect_size = effectsizedataframe.effect_size - self.__alpha = ci2g._compute_alpha_from_ci(ci) - self.__permutation_count = permutation_count - self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) - self.__control = self.__dabest_obj.experiment_label[0] - self.__test = self.__dabest_obj.experiment_label[1] + self.__effsizedf = effectsizedataframe.results + self.__dabest_obj = effectsizedataframe.dabest_obj + self.__ci = ci + self.__resamples = effectsizedataframe.resamples + self.__effect_size = effectsizedataframe.effect_size + self.__alpha = ci2g._compute_alpha_from_ci(ci) + self.__permutation_count = permutation_count + self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) + self.__control = self.__dabest_obj.experiment_label[0] + self.__test = self.__dabest_obj.experiment_label[1] # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data - if self.__effect_size == "mean_diff": + if self.__effect_size == "mean_diff": self.__bootstraps_delta_delta = bootstraps_delta_delta[2] - self.__difference = self.__effsizedf["difference"][1] - self.__effsizedf["difference"][0] + self.__difference = ( + self.__effsizedf["difference"][1] - self.__effsizedf["difference"][0] + ) else: self.__bootstraps_delta_delta = bootstraps_delta_delta[0] self.__difference = bootstraps_delta_delta[1] - + sorted_delta_delta = npsort(self.__bootstraps_delta_delta) self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps_delta_delta, self.__difference) - - self.__jackknives = np.array(ci1g.compute_1group_jackknife( - self.__bootstraps_delta_delta, - np.mean)) + self.__bootstraps_delta_delta, self.__difference + ) + + self.__jackknives = np.array( + ci1g.compute_1group_jackknife(self.__bootstraps_delta_delta, np.mean) + ) self.__acceleration_value = ci2g._calc_accel(self.__jackknives) # Compute BCa intervals. bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) - + self.__bias_correction, self.__acceleration_value, self.__resamples, ci + ) + self.__bca_interval_idx = (bca_idx_low, bca_idx_high) if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_delta_delta[bca_idx_low] + self.__bca_low = sorted_delta_delta[bca_idx_low] self.__bca_high = sorted_delta_delta[bca_idx_high] err1 = "The $lim_type limit of the interval" @@ -97,14 +100,14 @@ def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta err_temp = Template(" ".join([err1, err2, err3])) if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) + warnings.warn( + err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1 + ) - if bca_idx_high >= self.__resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) + if bca_idx_high >= self.__resamples - 9: + warnings.warn( + err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1 + ) else: err1 = "The $lim_type limit of the BCa interval cannot be computed." @@ -113,107 +116,103 @@ def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta err_temp = Template(" ".join([err1, err2, err3])) if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0) if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0) # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * self.__resamples) - pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) + pct_idx_low = int((self.__alpha / 2) * self.__resamples) + pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples) self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_delta_delta[pct_idx_low] - self.__pct_high = sorted_delta_delta[pct_idx_high] - - + self.__pct_low = sorted_delta_delta[pct_idx_low] + self.__pct_high = sorted_delta_delta[pct_idx_high] def __permutation_test(self): """ Perform a permutation test and obtain the permutation p-value based on the permutation data. """ - self.__permutations = np.array(self.__effsizedf["permutations"]) + self.__permutations = np.array(self.__effsizedf["permutations"]) THRESHOLD = np.abs(self.__difference) - self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0]) - - count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD) - self.__pvalue_permutation = count/self.__permutation_count - + self.__permutations_delta_delta = np.array( + self.__permutations[1] - self.__permutations[0] + ) + count = sum(np.abs(self.__permutations_delta_delta) > THRESHOLD) + self.__pvalue_permutation = count / self.__permutation_count def __repr__(self, header=True, sigfig=3): from .misc_tools import print_greeting - - first_line = {"control" : self.__control, - "test" : self.__test} - - if self.__effect_size == "mean_diff": + + first_line = {"control": self.__control, "test": self.__test} + + if self.__effect_size == "mean_diff": out1 = "The delta-delta between {control} and {test} ".format(**first_line) else: out1 = "The deltas' g between {control} and {test} ".format(**first_line) - + base_string_fmt = "{:." + str(sigfig) + "}" if "." in str(self.__ci): ci_width = base_string_fmt.format(self.__ci) else: ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - + + ci_out = { + "es": base_string_fmt.format(self.__difference), + "ci": ci_width, + "bca_low": base_string_fmt.format(self.__bca_low), + "bca_high": base_string_fmt.format(self.__bca_high), + } + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) out = out1 + out2 if header is True: out = print_greeting() + "\n" + "\n" + out - pval_rounded = base_string_fmt.format(self.pvalue_permutation) - - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) + p1 = "The p-value of the two-sided permutation t-test is {}, ".format( + pval_rounded + ) p2 = "calculated for legacy purposes only. " pvalue = p1 + p2 - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) bs2 = "the confidence interval is bias-corrected and accelerated." bs = bs1 + bs2 - pval_def1 = "Any p-value reported is the probability of observing the " + \ - "effect size (or greater),\nassuming the null hypothesis of " + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." + pval_def1 = ( + "Any p-value reported is the probability of observing the " + + "effect size (or greater),\nassuming the null hypothesis of " + + "zero difference is true." + ) + pval_def2 = ( + "\nFor each p-value, 5000 reshuffles of the " + + "control and test labels were performed." + ) pval_def = pval_def1 + pval_def2 - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - def to_dict(self): """ Returns the attributes of the `DeltaDelta` object as a dictionary. """ # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] + attrs = [a for a in dir(self) if not a.startswith(("_", "to_dict"))] out = {} for a in attrs: out[a] = getattr(self, a) return out - @property def ci(self): """ @@ -221,7 +220,6 @@ def ci(self): """ return self.__ci - @property def alpha(self): """ @@ -230,30 +228,25 @@ def alpha(self): """ return self.__alpha - @property def bias_correction(self): return self.__bias_correction - @property def bootstraps(self): - ''' + """ Return the bootstrapped deltas from all the experiment groups. - ''' + """ return self.__bootstraps - @property def jackknives(self): return self.__jackknives - @property def acceleration_value(self): return self.__acceleration_value - @property def bca_low(self): """ @@ -261,7 +254,6 @@ def bca_low(self): """ return self.__bca_low - @property def bca_high(self): """ @@ -269,49 +261,42 @@ def bca_high(self): """ return self.__bca_high - @property def bca_interval_idx(self): return self.__bca_interval_idx - @property def control(self): - ''' + """ Return the name of the control experiment group. - ''' + """ return self.__control - @property def test(self): - ''' + """ Return the name of the test experiment group. - ''' + """ return self.__test - @property def bootstraps_delta_delta(self): - ''' - Return the delta-delta values calculated from the bootstrapped + """ + Return the delta-delta values calculated from the bootstrapped deltas. - ''' + """ return self.__bootstraps_delta_delta - @property def difference(self): - ''' + """ Return the delta-delta value calculated based on the raw data. - ''' + """ return self.__difference - @property - def pct_interval_idx (self): - return self.__pct_interval_idx - + def pct_interval_idx(self): + return self.__pct_interval_idx @property def pct_low(self): @@ -320,7 +305,6 @@ def pct_low(self): """ return self.__pct_low - @property def pct_high(self): """ @@ -328,7 +312,6 @@ def pct_high(self): """ return self.__pct_high - @property def pvalue_permutation(self): try: @@ -336,7 +319,6 @@ def pvalue_permutation(self): except AttributeError: self.__permutation_test() return self.__pvalue_permutation - @property def permutation_count(self): @@ -345,34 +327,30 @@ def permutation_count(self): """ return self.__permutation_count - @property def permutations(self): - ''' + """ Return the mean differences of permutations obtained during the permutation test for each experiment group. - ''' + """ try: return self.__permutations except AttributeError: self.__permutation_test() return self.__permutations - @property def permutations_delta_delta(self): - ''' - Return the delta-delta values of permutations obtained + """ + Return the delta-delta values of permutations obtained during the permutation test. - ''' + """ try: return self.__permutations_delta_delta except AttributeError: self.__permutation_test() return self.__permutations_delta_delta - - # %% ../nbs/API/delta_objects.ipynb 10 class MiniMetaDelta(object): """ diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py index ec497d2e..3f2dcf3a 100644 --- a/dabest/_effsize_objects.py +++ b/dabest/_effsize_objects.py @@ -22,7 +22,7 @@ class TwoGroupsEffectSize(object): """ A class to compute and store the results of bootstrapped mean differences between two groups. - + Compute the effect size between two groups. Parameters @@ -38,7 +38,7 @@ class TwoGroupsEffectSize(object): The number of bootstrap resamples to be taken for the calculation of the confidence interval limits. permutation_count : int, default 5000 - The number of permutations (reshuffles) to perform for the + The number of permutations (reshuffles) to perform for the computation of the permutation p-value ci : float, default 95 The confidence interval width. The default of 95 produces 95% @@ -73,102 +73,208 @@ class TwoGroupsEffectSize(object): The percentile confidence interval lower limit and upper limits, respectively. """ - def __init__(self, control, test, effect_size, - proportional=False, - is_paired=None, ci=95, - resamples=5000, - permutation_count=5000, - random_seed=12345): - - from ._stats_tools import effsize as es + def __init__( + self, + control, + test, + effect_size, + proportional=False, + is_paired=None, + ci=95, + resamples=5000, + permutation_count=5000, + random_seed=12345, + ): from ._stats_tools import confint_2group_diff as ci2g + from ._stats_tools import effsize as es + self.__EFFECT_SIZE_DICT = { + "mean_diff": "mean difference", + "median_diff": "median difference", + "cohens_d": "Cohen's d", + "cohens_h": "Cohen's h", + "hedges_g": "Hedges' g", + "cliffs_delta": "Cliff's delta", + "delta_g": "deltas' g", + } - self.__EFFECT_SIZE_DICT = {"mean_diff" : "mean difference", - "median_diff" : "median difference", - "cohens_d" : "Cohen's d", - "cohens_h" : "Cohen's h", - "hedges_g" : "Hedges' g", - "cliffs_delta" : "Cliff's delta", - "delta_g" : "deltas' g"} - - - kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] - if effect_size not in kosher_es: - err1 = "The effect size '{}'".format(effect_size) - err2 = "is not one of {}".format(kosher_es) - raise ValueError(" ".join([err1, err2])) - - if effect_size == "cliffs_delta" and is_paired: - err1 = "`paired` is not None; therefore Cliff's delta is not defined." - raise ValueError(err1) - - if proportional==True and effect_size not in ['mean_diff','cohens_h']: - err1 = "`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined." - raise ValueError(err1) - - if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False): - err1 = "`proportional` is True; Only accept binary data consisting of 0 and 1." - raise ValueError(err1) + self.__is_paired = is_paired + self.__resamples = resamples + self.__effect_size = effect_size + self.__random_seed = random_seed + self.__ci = ci + self.__proportional = proportional + self.check_errors(control, test) # Convert to numpy arrays for speed. # NaNs are automatically dropped. control = array(control) - test = array(test) - control = control[~isnan(control)] - test = test[~isnan(test)] - - self.__effect_size = effect_size - # TODO refactor this - self.__control = control - self.__test = test - self.__is_paired = is_paired - self.__resamples = resamples + test = array(test) + self.__control = control[~isnan(control)] + self.__test = test[~isnan(test)] self.__permutation_count = permutation_count - self.__random_seed = random_seed - self.__ci = ci - self.__alpha = ci2g._compute_alpha_from_ci(ci) + + self.__alpha = ci2g._compute_alpha_from_ci(self.__ci) self.__difference = es.two_group_difference( - control, test, is_paired, effect_size) - + self.__control, test, self.__is_paired, self.__effect_size + ) + self.__jackknives = ci2g.compute_meandiff_jackknife( - control, test, is_paired, effect_size) + self.__control, test, self.__is_paired, self.__effect_size + ) self.__acceleration_value = ci2g._calc_accel(self.__jackknives) bootstraps = ci2g.compute_bootstrapped_diff( - control, test, is_paired, effect_size, - resamples, random_seed) + self.__control, + test, + self.__is_paired, + self.__effect_size, + self.__resamples, + self.__random_seed, + ) self.__bootstraps = bootstraps - + sorted_bootstraps = npsort(self.__bootstraps) # Added in v0.2.6. # Raises a UserWarning if there are any infiinities in the bootstraps. num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)]) - + if num_infinities > 0: - warn_msg = "There are {} bootstrap(s) that are not defined. "\ - "This is likely due to smaple sample sizes. "\ - "The values in a bootstrap for a group will be more likely "\ - "to be all equal, with a resulting variance of zero. "\ - "The computation of Cohen's d and Hedges' g thus "\ - "involved a division by zero. " - warnings.warn(warn_msg.format(num_infinities), - category=UserWarning) + warn_msg = ( + "There are {} bootstrap(s) that are not defined. " + "This is likely due to smaple sample sizes. " + "The values in a bootstrap for a group will be more likely " + "to be all equal, with a resulting variance of zero. " + "The computation of Cohen's d and Hedges' g thus " + "involved a division by zero. " + ) + warnings.warn(warn_msg.format(num_infinities), category=UserWarning) self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps, self.__difference) + self.__bootstraps, self.__difference + ) + + self.compute_bca_intervals(sorted_bootstraps) + + # Compute percentile intervals. + pct_idx_low = int((self.__alpha / 2) * self.__resamples) + pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples) + + self.__pct_interval_idx = (pct_idx_low, pct_idx_high) + self.__pct_low = sorted_bootstraps[pct_idx_low] + self.__pct_high = sorted_bootstraps[pct_idx_high] + + self.perform_statistical_test() + + def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): + RM_STATUS = { + "baseline": "for repeated measures against baseline \n", + "sequential": "for the sequential design of repeated-measures experiment \n", + "None": "", + } + + PAIRED_STATUS = { + "baseline": "paired", + "sequential": "paired", + "None": "unpaired", + } + + first_line = { + "rm_status": RM_STATUS[str(self.__is_paired)], + "es": self.__EFFECT_SIZE_DICT[self.__effect_size], + "paired_status": PAIRED_STATUS[str(self.__is_paired)], + } + + out1 = "The {paired_status} {es} {rm_status}".format(**first_line) + + base_string_fmt = "{:." + str(sigfig) + "}" + if "." in str(self.__ci): + ci_width = base_string_fmt.format(self.__ci) + else: + ci_width = str(self.__ci) + + ci_out = { + "es": base_string_fmt.format(self.__difference), + "ci": ci_width, + "bca_low": base_string_fmt.format(self.__bca_low), + "bca_high": base_string_fmt.format(self.__bca_high), + } + + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) + out = out1 + out2 + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + p1 = "The p-value of the two-sided permutation t-test is {}, ".format( + pval_rounded + ) + p2 = "calculated for legacy purposes only. " + pvalue = p1 + p2 + + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) + bs2 = "the confidence interval is bias-corrected and accelerated." + bs = bs1 + bs2 + + pval_def1 = ( + "Any p-value reported is the probability of observing the" + + "effect size (or greater),\nassuming the null hypothesis of" + + "zero difference is true." + ) + pval_def2 = ( + "\nFor each p-value, 5000 reshuffles of the " + + "control and test labels were performed." + ) + pval_def = pval_def1 + pval_def2 + + if show_resample_count and define_pval: + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + elif ~show_resample_count and define_pval: + return "{}\n{}\n\n{}".format(out, pvalue, pval_def) + elif show_resample_count and ~define_pval: + return "{}\n{}\n\n{}".format(out, pvalue, bs) + else: + return "{}\n{}".format(out, pvalue) + + def check_errors(self, control, test): + kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] + if self.__effect_size not in kosher_es: + err1 = "The effect size '{}'".format(self.__effect_size) + err2 = "is not one of {}".format(kosher_es) + raise ValueError(" ".join([err1, err2])) + + if self.__effect_size == "cliffs_delta" and self.__is_paired: + err1 = "`paired` is not None; therefore Cliff's delta is not defined." + raise ValueError(err1) + + if self.__proportional and self.__effect_size not in ["mean_diff", "cohens_h"]: + err1 = "`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined." + raise ValueError(err1) + + if self.__proportional and ( + isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False + ): + err1 = ( + "`proportional` is True; Only accept binary data consisting of 0 and 1." + ) + raise ValueError(err1) + + def compute_bca_intervals(self, sorted_bootstraps): + from ._stats_tools import confint_2group_diff as ci2g # Compute BCa intervals. bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) + self.__bias_correction, + self.__acceleration_value, + self.__resamples, + self.__ci, + ) self.__bca_interval_idx = (bca_idx_low, bca_idx_high) if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_bootstraps[bca_idx_low] + self.__bca_low = sorted_bootstraps[bca_idx_low] self.__bca_high = sorted_bootstraps[bca_idx_high] err1 = "The $lim_type limit of the interval" @@ -177,14 +283,14 @@ def __init__(self, control, test, effect_size, err_temp = Template(" ".join([err1, err2, err3])) if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) + warnings.warn( + err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1 + ) - if bca_idx_high >= resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) + if bca_idx_high >= self.__resamples - 9: + warnings.warn( + err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1 + ) else: # TODO improve error handling, separate file @@ -194,95 +300,97 @@ def __init__(self, control, test, effect_size, err_temp = Template(" ".join([err1, err2, err3])) if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0) if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) - - # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * resamples) - pct_idx_high = int((1-(self.__alpha/2)) * resamples) + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0) - self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_bootstraps[pct_idx_low] - self.__pct_high = sorted_bootstraps[pct_idx_high] + def perform_statistical_test(self): + from ._stats_tools import effsize as es # Perform statistical tests. - self.__PermutationTest_result = PermutationTest(control, test, - effect_size, - is_paired, - permutation_count) - - if is_paired and proportional is False: + self.__PermutationTest_result = PermutationTest( + self.__control, + self.__test, + self.__effect_size, + self.__is_paired, + self.__permutation_count, + ) + + if self.__is_paired and self.__proportional is False: # Wilcoxon, a non-parametric version of the paired T-test. - wilcoxon = spstats.wilcoxon(control, test) + wilcoxon = spstats.wilcoxon(self.__control, self.__test) self.__pvalue_wilcoxon = wilcoxon.pvalue self.__statistic_wilcoxon = wilcoxon.statistic - - - if effect_size != "median_diff": + + if self.__effect_size != "median_diff": # Paired Student's t-test. - paired_t = spstats.ttest_rel(control, test, nan_policy='omit') + paired_t = spstats.ttest_rel( + self.__control, self.__test, nan_policy="omit" + ) self.__pvalue_paired_students_t = paired_t.pvalue self.__statistic_paired_students_t = paired_t.statistic # TODO dead code - standardized_es = es.cohens_d(control, test, is_paired) + standardized_es = es.cohens_d( + self.__control, self.__test, self.__is_paired + ) - elif is_paired and proportional: + elif self.__is_paired and self.__proportional: # for binary paired data, use McNemar's test # References: # https://en.wikipedia.org/wiki/McNemar%27s_test - df_temp = pd.DataFrame({'control': control, 'test': test}) - x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)]) - x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)]) - x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)]) - x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)]) - table = [[x1,x2],[x3,x4]] + df_temp = pd.DataFrame({"control": self.__control, "test": self.__test}) + x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)]) + x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)]) + x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)]) + x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)]) + table = [[x1, x2], [x3, x4]] _mcnemar = mcnemar(table, exact=True, correction=True) self.__pvalue_mcnemar = _mcnemar.pvalue self.__statistic_mcnemar = _mcnemar.statistic - elif effect_size == "cliffs_delta": + elif self.__effect_size == "cliffs_delta": # Let's go with Brunner-Munzel! - brunner_munzel = spstats.brunnermunzel(control, test, - nan_policy='omit') + brunner_munzel = spstats.brunnermunzel( + self.__control, self.__test, nan_policy="omit" + ) self.__pvalue_brunner_munzel = brunner_munzel.pvalue self.__statistic_brunner_munzel = brunner_munzel.statistic - - elif effect_size == "median_diff": + elif self.__effect_size == "median_diff": # According to scipy's documentation of the function, # "The Kruskal-Wallis H-test tests the null hypothesis # that the population median of all of the groups are equal." - kruskal = spstats.kruskal(control, test, nan_policy='omit') + kruskal = spstats.kruskal(self.__control, self.__test, nan_policy="omit") self.__pvalue_kruskal = kruskal.pvalue self.__statistic_kruskal = kruskal.statistic - else: # for mean difference, Cohen's d, and Hedges' g. + else: # for mean difference, Cohen's d, and Hedges' g. # Welch's t-test, assumes normality of distributions, # but does not assume equal variances. - welch = spstats.ttest_ind(control, test, equal_var=False, - nan_policy='omit') + welch = spstats.ttest_ind( + self.__control, self.__test, equal_var=False, nan_policy="omit" + ) self.__pvalue_welch = welch.pvalue self.__statistic_welch = welch.statistic # Student's t-test, assumes normality of distributions, # as well as assumption of equal variances. - students_t = spstats.ttest_ind(control, test, equal_var=True, - nan_policy='omit') + students_t = spstats.ttest_ind( + self.__control, self.__test, equal_var=True, nan_policy="omit" + ) self.__pvalue_students_t = students_t.pvalue self.__statistic_students_t = students_t.statistic # Mann-Whitney test: Non parametric, # does not assume normality of distributions try: - mann_whitney = spstats.mannwhitneyu(control, test, - alternative='two-sided') + mann_whitney = spstats.mannwhitneyu( + self.__control, self.__test, alternative="two-sided" + ) self.__pvalue_mann_whitney = mann_whitney.pvalue self.__statistic_mann_whitney = mann_whitney.statistic except ValueError: @@ -290,94 +398,31 @@ def __init__(self, control, test, effect_size, # Occurs when the control and test are exactly identical # in terms of rank (eg. all zeros.) pass - - - standardized_es = es.cohens_d(control, test, is_paired = None) - + + standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None) + # The Cohen's h calculation is for binary categorical data try: - self.__proportional_difference = es.cohens_h(control, test) + self.__proportional_difference = es.cohens_h( + self.__control, self.__test + ) except ValueError: # TODO At least print some warning? # Occur only when the data consists not only 0's and 1's. pass - - def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): - - RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', - 'sequential': 'for the sequential design of repeated-measures experiment \n', - 'None' : '' - } - - PAIRED_STATUS = {'baseline' : 'paired', - 'sequential' : 'paired', - 'None' : 'unpaired' - } - - first_line = {"rm_status" : RM_STATUS[str(self.__is_paired)], - "es" : self.__EFFECT_SIZE_DICT[self.__effect_size], - "paired_status": PAIRED_STATUS[str(self.__is_paired)]} - - - out1 = "The {paired_status} {es} {rm_status}".format(**first_line) - - base_string_fmt = "{:." + str(sigfig) + "}" - if "." in str(self.__ci): - ci_width = base_string_fmt.format(self.__ci) - else: - ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - - out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) - out = out1 + out2 - - pval_rounded = base_string_fmt.format(self.pvalue_permutation) - - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) - p2 = "calculated for legacy purposes only. " - pvalue = p1 + p2 - - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) - bs2 = "the confidence interval is bias-corrected and accelerated." - bs = bs1 + bs2 - - pval_def1 = "Any p-value reported is the probability of observing the" + \ - "effect size (or greater),\nassuming the null hypothesis of" + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." - pval_def = pval_def1 + pval_def2 - - if show_resample_count and define_pval: - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - elif ~show_resample_count and define_pval: - return "{}\n{}\n\n{}".format(out, pvalue, pval_def) - elif show_resample_count and ~define_pval: - return "{}\n{}\n\n{}".format(out, pvalue, bs) - else: - return "{}\n{}".format(out, pvalue) - - - def to_dict(self): """ Returns the attributes of the `dabest.TwoGroupEffectSize` object as a dictionary. """ # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] + attrs = [a for a in dir(self) if not a.startswith(("_", "to_dict"))] out = {} for a in attrs: out[a] = getattr(self, a) return out - @property def difference(self): """ @@ -396,6 +441,10 @@ def effect_size(self): def is_paired(self): return self.__is_paired + @property + def proportional(self): + return self.__proportional + @property def ci(self): """ @@ -469,8 +518,6 @@ def pct_high(self): """ return self.__pct_high - - @property def pvalue_brunner_munzel(self): try: @@ -485,8 +532,6 @@ def statistic_brunner_munzel(self): except AttributeError: return npnan - - @property def pvalue_wilcoxon(self): try: @@ -515,8 +560,6 @@ def statistic_mcnemar(self): except AttributeError: return npnan - - @property def pvalue_paired_students_t(self): # TODO Missing docstring @@ -533,8 +576,6 @@ def statistic_paired_students_t(self): except AttributeError: return npnan - - @property def pvalue_kruskal(self): # TODO Missing docstring @@ -551,7 +592,6 @@ def statistic_kruskal(self): except AttributeError: return npnan - @property def pvalue_welch(self): # TODO Missing docstring @@ -568,8 +608,6 @@ def statistic_welch(self): except AttributeError: return npnan - - @property def pvalue_students_t(self): # TODO Missing docstring @@ -586,8 +624,6 @@ def statistic_students_t(self): except AttributeError: return npnan - - @property def pvalue_mann_whitney(self): # TODO Missing docstring @@ -596,8 +632,6 @@ def pvalue_mann_whitney(self): except AttributeError: return npnan - - @property def statistic_mann_whitney(self): # TODO Missing docstring @@ -605,31 +639,27 @@ def statistic_mann_whitney(self): return self.__statistic_mann_whitney except AttributeError: return npnan - + @property def pvalue_permutation(self): # TODO Missing docstring return self.__PermutationTest_result.pvalue - @property def permutation_count(self): """ - The number of permuations taken. + The number of permutations taken. """ return self.__PermutationTest_result.permutation_count - @property def permutations(self): return self.__PermutationTest_result.permutations - @property def permutations_var(self): return self.__PermutationTest_result.permutations_var - @property def proportional_difference(self): try: @@ -637,7 +667,6 @@ def proportional_difference(self): except AttributeError: return npnan - # %% ../nbs/API/effsize_objects.ipynb 10 class EffectSizeDataFrame(object): """A class that generates and stores the results of bootstrapped effect @@ -684,7 +713,7 @@ def __pre_calc(self): out = [] reprs = [] - if self.__delta2==True: + if self.__delta2: mixed_data = [] for j, current_tuple in enumerate(idx): if self.__is_paired != "sequential": @@ -1365,7 +1394,6 @@ def __init__(self, control: array, self.__permutations_var = [] for i in range(int(permutation_count)): - if is_paired: # Select which control-test pairs to swap. random_idx = rng.choice(CONTROL_LEN, diff --git a/dabest/_stats_tools/confint_1group.py b/dabest/_stats_tools/confint_1group.py index 88c9ec70..35cef3f9 100644 --- a/dabest/_stats_tools/confint_1group.py +++ b/dabest/_stats_tools/confint_1group.py @@ -17,67 +17,66 @@ def create_bootstrap_indexes(array, resamples=5000, random_seed=12345): """ rng = RandomState(PCG64(random_seed)) - + indexes = range(0, len(array)) - out = (rng.choice(indexes, len(indexes), replace=True) - for i in range(0, resamples)) + out = (rng.choice(indexes, len(indexes), replace=True) for i in range(0, resamples)) return out - def compute_1group_jackknife(x, func, *args, **kwargs): """ Returns the jackknife bootstraps for func(x). """ from . import confint_2group_diff as ci_2g + jackknives = [i for i in ci_2g.create_jackknife_indexes(x)] out = [func(x[j], *args, **kwargs) for j in jackknives] - del jackknives # memory management. + del jackknives # memory management. return out - def compute_1group_acceleration(jack_dist): from . import confint_2group_diff as ci_2g - return ci_2g._calc_accel(jack_dist) + return ci_2g._calc_accel(jack_dist) -def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345, - *args, **kwargs): +def compute_1group_bootstraps( + x, func, resamples=5000, random_seed=12345, *args, **kwargs +): """Bootstraps func(x), with the number of specified resamples.""" - # Create bootstrap indexes. - boot_indexes = create_bootstrap_indexes(x, resamples=resamples, - random_seed=random_seed) + boot_indexes = create_bootstrap_indexes( + x, resamples=resamples, random_seed=random_seed + ) out = [func(x[b], *args, **kwargs) for b in boot_indexes] - + del boot_indexes - - return out + return out def compute_1group_bias_correction(x, bootstraps, func, *args, **kwargs): - metric = func(x, *args, **kwargs) prop_boots_less_than_metric = sum(bootstraps < metric) / len(bootstraps) return norm.ppf(prop_boots_less_than_metric) - -def summary_ci_1group(x:np.array,# An numerical iterable. - func, #The function to be applied to x. - resamples:int=5000, #The number of bootstrap resamples to be taken of func(x). - alpha:float=0.05, #Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. - random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. - sort_bootstraps:bool=True, - *args, **kwargs): +def summary_ci_1group( + x: np.array, # An numerical iterable. + func, # The function to be applied to x. + resamples: int = 5000, # The number of bootstrap resamples to be taken of func(x). + alpha: float = 0.05, # Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. + random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. + sort_bootstraps: bool = True, + *args, + **kwargs +): """ Given an array-like x, returns func(x), and a bootstrap confidence interval of func(x). @@ -101,10 +100,9 @@ def summary_ci_1group(x:np.array,# An numerical iterable. """ from . import confint_2group_diff as ci2g - - boots = compute_1group_bootstraps(x, func, resamples=resamples, - random_seed=random_seed, - *args, **kwargs) + boots = compute_1group_bootstraps( + x, func, resamples=resamples, random_seed=random_seed, *args, **kwargs + ) bias = compute_1group_bias_correction(x, boots, func) jk = compute_1group_jackknife(x, func, *args, **kwargs) @@ -125,10 +123,13 @@ def summary_ci_1group(x:np.array,# An numerical iterable. del boots del boots_sorted - out = {'summary': func(x), 'func': func, - 'bca_ci_low': low, 'bca_ci_high': high, - 'bootstraps': B} + out = { + "summary": func(x), + "func": func, + "bca_ci_low": low, + "bca_ci_high": high, + "bootstraps": B, + } del B return out - diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index 9dfd41bb..a1b78747 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -36,7 +36,6 @@ def create_jackknife_indexes(data): return (delete(index_range, i) for i in index_range) - def create_repeated_indexes(data): """ Convenience function. Given an array-like with length N, @@ -47,25 +46,30 @@ def create_repeated_indexes(data): return (index_range for i in index_range) - def _create_two_group_jackknife_indexes(x0, x1, is_paired): """Creates the jackknife bootstrap for 2 groups.""" if is_paired and len(x0) == len(x1): - out = list(zip([j for j in create_jackknife_indexes(x0)], - [i for i in create_jackknife_indexes(x1)] - ) - ) + out = list( + zip( + [j for j in create_jackknife_indexes(x0)], + [i for i in create_jackknife_indexes(x1)], + ) + ) else: - jackknife_c = list(zip([j for j in create_jackknife_indexes(x0)], - [i for i in create_repeated_indexes(x1)] - ) - ) - - jackknife_t = list(zip([i for i in create_repeated_indexes(x0)], - [j for j in create_jackknife_indexes(x1)] - ) - ) + jackknife_c = list( + zip( + [j for j in create_jackknife_indexes(x0)], + [i for i in create_repeated_indexes(x1)], + ) + ) + + jackknife_t = list( + zip( + [i for i in create_repeated_indexes(x0)], + [j for j in create_jackknife_indexes(x1)], + ) + ) out = jackknife_c + jackknife_t del jackknife_c del jackknife_t @@ -73,7 +77,6 @@ def _create_two_group_jackknife_indexes(x0, x1, is_paired): return out - def compute_meandiff_jackknife(x0, x1, is_paired, effect_size): """ Given two arrays, returns the jackknife for their effect size. @@ -88,30 +91,28 @@ def compute_meandiff_jackknife(x0, x1, is_paired, effect_size): x0_shuffled = x0[j[0]] x1_shuffled = x1[j[1]] - es = __es.two_group_difference(x0_shuffled, x1_shuffled, - is_paired, effect_size) + es = __es.two_group_difference(x0_shuffled, x1_shuffled, is_paired, effect_size) out.append(es) return out - def _calc_accel(jack_dist): - jack_mean = npmean(jack_dist) - numer = npsum((jack_mean - jack_dist)**3) - denom = 6.0 * (npsum((jack_mean - jack_dist)**2) ** 1.5) + numer = npsum((jack_mean - jack_dist) ** 3) + denom = 6.0 * (npsum((jack_mean - jack_dist) ** 2) ** 1.5) - with errstate(invalid='ignore'): + with errstate(invalid="ignore"): # does not raise warning if invalid division encountered. return numer / denom -def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, - resamples=5000, random_seed=12345): +def compute_bootstrapped_diff( + x0, x1, is_paired, effect_size, resamples=5000, random_seed=12345 +): """Bootstraps the effect_size for 2 groups.""" - + from . import effsize as __es rng = RandomState(PCG64(random_seed)) @@ -119,9 +120,8 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, out = np.repeat(np.nan, resamples) x0_len = len(x0) x1_len = len(x1) - + for i in range(int(resamples)): - if is_paired: if x0_len != x1_len: raise ValueError("The two arrays do not have the same length.") @@ -131,23 +131,26 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, else: x0_sample = rng.choice(x0, x0_len, replace=True) x1_sample = rng.choice(x1, x1_len, replace=True) - - out[i] = __es.two_group_difference(x0_sample, x1_sample, - is_paired, effect_size) - + + out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size) + return out -def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1 - x2:np.ndarray,# Test group 1 - x3:np.ndarray,# Control group 2 - x4:np.ndarray,# Test group 2 - is_paired:str=None, - resamples:int=5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits. - random_seed:int=12345# `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. - )->tuple: # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta + +def compute_delta2_bootstrapped_diff( + x1: np.ndarray, # Control group 1 + x2: np.ndarray, # Test group 1 + x3: np.ndarray, # Control group 2 + x4: np.ndarray, # Test group 2 + is_paired: str = None, + resamples: int = 5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits. + random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. +) -> ( + tuple +): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta """ Bootstraps the effect size deltas' g. - + """ rng = RandomState(PCG64(random_seed)) @@ -158,11 +161,15 @@ def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1 out_delta_g = np.repeat(np.nan, resamples) deltadelta = np.repeat(np.nan, resamples) - n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2= x1_len, x2_len, x3_len, x4_len + n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4) - sd_numerator = ((n_a2_b1 - 1) * s_a2_b1 ** 2 + (n_a1_b1 - 1) * s_a1_b1 ** 2 + (n_a2_b2 - 1) * s_a2_b2 ** 2 + ( - n_a1_b2 - 1) * s_a1_b2 ** 2) + sd_numerator = ( + (n_a2_b1 - 1) * s_a2_b1**2 + + (n_a1_b1 - 1) * s_a1_b1**2 + + (n_a2_b2 - 1) * s_a2_b2**2 + + (n_a1_b2 - 1) * s_a1_b2**2 + ) sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1) pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator) @@ -170,46 +177,58 @@ def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1 if is_paired: if (x1_len != x2_len) or (x3_len != x4_len): raise ValueError("The two arrays do not have the same length.") - df_paired_1 = pd.DataFrame({ - 'value': np.concatenate([x1, x3]), - 'array_id': np.repeat(['x1','x3'], [x1_len, x3_len]) - }) - df_paired_2 = pd.DataFrame({ - 'value': np.concatenate([x2, x4]), - 'array_id': np.repeat(['x2','x4'], [x1_len, x3_len]) - }) - x_sample_index = rng.choice(len(df_paired_1), len(df_paired_1), replace=True) + df_paired_1 = pd.DataFrame( + { + "value": np.concatenate([x1, x3]), + "array_id": np.repeat(["x1", "x3"], [x1_len, x3_len]), + } + ) + df_paired_2 = pd.DataFrame( + { + "value": np.concatenate([x2, x4]), + "array_id": np.repeat(["x2", "x4"], [x1_len, x3_len]), + } + ) + x_sample_index = rng.choice( + len(df_paired_1), len(df_paired_1), replace=True + ) x_sample_1 = df_paired_1.loc[x_sample_index] x_sample_2 = df_paired_2.loc[x_sample_index] - x1_sample = x_sample_1[x_sample_1['array_id'] == 'x1']['value'] - x2_sample = x_sample_2[x_sample_2['array_id'] == 'x2']['value'] - x3_sample = x_sample_1[x_sample_1['array_id'] == 'x3']['value'] - x4_sample = x_sample_2[x_sample_2['array_id'] == 'x4']['value'] + x1_sample = x_sample_1[x_sample_1["array_id"] == "x1"]["value"] + x2_sample = x_sample_2[x_sample_2["array_id"] == "x2"]["value"] + x3_sample = x_sample_1[x_sample_1["array_id"] == "x3"]["value"] + x4_sample = x_sample_2[x_sample_2["array_id"] == "x4"]["value"] else: - df = pd.DataFrame({ - 'value': np.concatenate([x1, x2, x3, x4]), - 'array_id': np.repeat(['x1', 'x2', 'x3', 'x4'], [x1_len, x2_len, x3_len, x4_len]) - }) - x_sample_index = rng.choice(len(df),len(df), replace=True) + df = pd.DataFrame( + { + "value": np.concatenate([x1, x2, x3, x4]), + "array_id": np.repeat( + ["x1", "x2", "x3", "x4"], [x1_len, x2_len, x3_len, x4_len] + ), + } + ) + x_sample_index = rng.choice(len(df), len(df), replace=True) x_sample = df.loc[x_sample_index] - x1_sample = x_sample[x_sample['array_id'] == 'x1']['value'] - x2_sample = x_sample[x_sample['array_id'] == 'x2']['value'] - x3_sample = x_sample[x_sample['array_id'] == 'x3']['value'] - x4_sample = x_sample[x_sample['array_id'] == 'x4']['value'] + x1_sample = x_sample[x_sample["array_id"] == "x1"]["value"] + x2_sample = x_sample[x_sample["array_id"] == "x2"]["value"] + x3_sample = x_sample[x_sample["array_id"] == "x3"]["value"] + x4_sample = x_sample[x_sample["array_id"] == "x4"]["value"] - delta_1 = np.mean(x2_sample)-np.mean(x1_sample) - delta_2 = np.mean(x4_sample)-np.mean(x3_sample) + delta_1 = np.mean(x2_sample) - np.mean(x1_sample) + delta_2 = np.mean(x4_sample) - np.mean(x3_sample) delta_delta = delta_2 - delta_1 deltadelta[i] = delta_delta - out_delta_g[i] = delta_delta/pooled_sample_sd - delta_g = ((np.mean(x4)-np.mean(x3)) - (np.mean(x2)-np.mean(x1))) / pooled_sample_sd + out_delta_g[i] = delta_delta / pooled_sample_sd + delta_g = ( + (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1)) + ) / pooled_sample_sd return out_delta_g, delta_g, deltadelta - -def compute_meandiff_bias_correction(bootstraps, #An numerical iterable, comprising bootstrap resamples of the effect size. - effsize # The effect size for the original sample. - ): #The bias correction value for the given bootstraps and effect size. +def compute_meandiff_bias_correction( + bootstraps, # An numerical iterable, comprising bootstrap resamples of the effect size. + effsize, # The effect size for the original sample. +): # The bias correction value for the given bootstraps and effect size. """ Computes the bias correction required for the BCa method of confidence interval construction. @@ -228,13 +247,11 @@ def compute_meandiff_bias_correction(bootstraps, #An numerical iterable, compris return norm.ppf(prop_less_than_es) - def _compute_alpha_from_ci(ci): if ci < 0 or ci > 100: raise ValueError("`ci` must be a number between 0 and 100.") - return (100. - ci) / 100. - + return (100.0 - ci) / 100.0 def _compute_quantile(z, bias, acceleration): @@ -244,7 +261,6 @@ def _compute_quantile(z, bias, acceleration): return bias + (numer / denom) - def compute_interval_limits(bias, acceleration, n_boots, ci=95): """ Returns the indexes of the interval limits for a given bootstrap. @@ -260,7 +276,7 @@ def compute_interval_limits(bias, acceleration, n_boots, ci=95): z_low = norm.ppf(alpha_low) z_high = norm.ppf(alpha_high) - kws = {'bias': bias, 'acceleration': acceleration} + kws = {"bias": bias, "acceleration": acceleration} low = _compute_quantile(z_low, **kws) high = _compute_quantile(z_high, **kws) @@ -273,17 +289,17 @@ def compute_interval_limits(bias, acceleration, n_boots, ci=95): return low, high -def calculate_group_var(control_var, control_N,test_var, test_N): - return control_var/control_N + test_var/test_N +def calculate_group_var(control_var, control_N, test_var, test_N): + return control_var / control_N + test_var / test_N def calculate_weighted_delta(group_var, differences, resamples): - ''' + """ Compute the weighted deltas. - ''' + """ - weight = 1/group_var + weight = 1 / group_var denom = np.sum(weight) num = np.sum(weight[i] * differences[i] for i in range(0, len(weight))) - return num/denom + return num / denom diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py index 4b2617ef..c581c5db 100644 --- a/dabest/misc_tools.py +++ b/dabest/misc_tools.py @@ -4,9 +4,13 @@ __all__ = ['merge_two_dicts', 'unpack_and_add', 'print_greeting', 'get_varname'] # %% ../nbs/API/misc_tools.ipynb 4 -def merge_two_dicts(x:dict, - y:dict - )->dict:#A dictionary containing a union of all keys in both original dicts. +import datetime as dt +from numpy import repeat + +# %% ../nbs/API/misc_tools.ipynb 5 +def merge_two_dicts( + x: dict, y: dict +) -> dict: # A dictionary containing a union of all keys in both original dicts. """ Given two dicts, merge them into a new dict as a shallow copy. Any overlapping keys in `y` will override the values in `x`. @@ -20,24 +24,20 @@ def merge_two_dicts(x:dict, return z - def unpack_and_add(l, c): """Convenience function to allow me to add to an existing list without altering that list.""" t = [a for a in l] t.append(c) - return(t) - + return t def print_greeting(): from .__init__ import __version__ - import datetime as dt - import numpy as np line1 = "DABEST v{}".format(__version__) - header = "".join(np.repeat("=", len(line1))) - spacer = "".join(np.repeat(" ", len(line1))) + header = "".join(repeat("=", len(line1))) + spacer = "".join(repeat(" ", len(line1))) now = dt.datetime.now() if 0 < now.hour < 12: @@ -53,9 +53,7 @@ def print_greeting(): def get_varname(obj): - matching_vars = [k for k,v in globals().items() if v is obj] + matching_vars = [k for k, v in globals().items() if v is obj] if len(matching_vars) > 0: return matching_vars[0] - else: - return "" - + return "" diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index a297cc00..a8dc5331 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -14,25 +14,23 @@ import seaborn as sns import numpy as np import itertools +import matplotlib.lines as mlines # %% ../nbs/API/plot_tools.ipynb 5 -def halfviolin(v, half='right', fill_color='k', alpha=1, - line_color='k', line_width=0): - import numpy as np - - for b in v['bodies']: +def halfviolin(v, half="right", fill_color="k", alpha=1, line_color="k", line_width=0): + for b in v["bodies"]: V = b.get_paths()[0].vertices mean_vertical = np.mean(V[:, 0]) mean_horizontal = np.mean(V[:, 1]) - if half == 'right': + if half == "right": V[:, 0] = np.clip(V[:, 0], mean_vertical, np.inf) - elif half == 'left': + elif half == "left": V[:, 0] = np.clip(V[:, 0], -np.inf, mean_vertical) - elif half == 'bottom': + elif half == "bottom": V[:, 1] = np.clip(V[:, 1], -np.inf, mean_horizontal) - elif half == 'top': + elif half == "top": V[:, 1] = np.clip(V[:, 1], mean_horizontal, np.inf) b.set_color(fill_color) @@ -46,43 +44,45 @@ def get_swarm_spans(coll): Given a matplotlib Collection, will obtain the x and y spans for the collection. Will return None if this fails. """ - import numpy as np x, y = np.array(coll.get_offsets()).T try: return x.min(), x.max(), y.min(), y.max() except ValueError: return None -def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. - x:str, #x column to be plotted. - y:str, # y column to be plotted. - type:str='mean_sd', # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead. - offset:float=0.2, #Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets. - ax=None, #If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used. - line_color="black", # The color of the gapped lines. - gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span. - pos:list=[0, 1],#The positions of the error bars for the sankey_error_bar method. - method:str='gapped_lines', #The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'. - **kwargs:dict - ): - ''' + +def error_bar( + data: pd.DataFrame, # This DataFrame should be in 'long' format. + x: str, # x column to be plotted. + y: str, # y column to be plotted. + type: str = "mean_sd", # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead. + offset: float = 0.2, # Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets. + ax=None, # If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used. + line_color="black", # The color of the gapped lines. + gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span. + pos: list = [ + 0, + 1, + ], # The positions of the error bars for the sankey_error_bar method. + method: str = "gapped_lines", # The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'. + **kwargs: dict, +): + """ Function to plot the standard deviations as vertical errorbars. The mean is a gap defined by negative space. This function combines the functionality of gapped_lines(), proportional_error_bar(), and sankey_error_bar(). - ''' - import numpy as np - import pandas as pd - import matplotlib.pyplot as plt - import matplotlib.lines as mlines + """ if gap_width_percent < 0 or gap_width_percent > 100: raise ValueError("`gap_width_percent` must be between 0 and 100.") - if method not in ['gapped_lines', 'proportional_error_bar', 'sankey_error_bar']: - raise ValueError("Invalid `method`. Must be one of 'gapped_lines', \ - 'proportional_error_bar', or 'sankey_error_bar'.") + if method not in ["gapped_lines", "proportional_error_bar", "sankey_error_bar"]: + raise ValueError( + "Invalid `method`. Must be one of 'gapped_lines', \ + 'proportional_error_bar', or 'sankey_error_bar'." + ) if ax is None: ax = plt.gca() @@ -91,14 +91,14 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. gap_width = ax_yspan * gap_width_percent / 100 keys = kwargs.keys() - if 'clip_on' not in keys: - kwargs['clip_on'] = False + if "clip_on" not in keys: + kwargs["clip_on"] = False - if 'zorder' not in keys: - kwargs['zorder'] = 5 + if "zorder" not in keys: + kwargs["zorder"] = 5 - if 'lw' not in keys: - kwargs['lw'] = 2. + if "lw" not in keys: + kwargs["lw"] = 2.0 if isinstance(data[x].dtype, pd.CategoricalDtype): group_order = pd.unique(data[x]).categories @@ -107,8 +107,10 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. means = data.groupby(x)[y].mean().reindex(index=group_order) - if method in ['proportional_error_bar', 'sankey_error_bar']: - g = lambda x: np.sqrt((np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))) + if method in ["proportional_error_bar", "sankey_error_bar"]: + g = lambda x: np.sqrt( + (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x)) + ) sd = data.groupby(x)[y].apply(g) else: sd = data.groupby(x)[y].std().reindex(index=group_order) @@ -117,20 +119,20 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. upper_sd = means + sd if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any(): - kwargs['clip_on'] = True + kwargs["clip_on"] = True medians = data.groupby(x)[y].median().reindex(index=group_order) - quantiles = data.groupby(x)[y].quantile([0.25, 0.75]) \ - .unstack() \ - .reindex(index=group_order) + quantiles = ( + data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order) + ) lower_quartiles = quantiles[0.25] upper_quartiles = quantiles[0.75] - if type == 'mean_sd': + if type == "mean_sd": central_measures = means lows = lower_sd highs = upper_sd - elif type == 'median_quartiles': + elif type == "median_quartiles": central_measures = medians lows = lower_quartiles highs = upper_quartiles @@ -157,13 +159,12 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. err2 = "{} offset(s) were supplied in `offset`.".format(len_offset) raise ValueError(err1 + err2) - kwargs['zorder'] = kwargs['zorder'] + kwargs["zorder"] = kwargs["zorder"] for xpos, central_measure in enumerate(central_measures): - - kwargs['color'] = custom_palette[xpos] + kwargs["color"] = custom_palette[xpos] - if method == 'sankey_error_bar': + if method == "sankey_error_bar": _xpos = pos[xpos] + offset[xpos] else: _xpos = xpos + offset[xpos] @@ -171,36 +172,37 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. low = lows[xpos] high = highs[xpos] if low == high == central_measure: - low_to_mean = mlines.Line2D([_xpos, _xpos], - [low, central_measure], - **kwargs) + low_to_mean = mlines.Line2D( + [_xpos, _xpos], [low, central_measure], **kwargs + ) ax.add_line(low_to_mean) - - mean_to_high = mlines.Line2D([_xpos, _xpos], - [central_measure, high], - **kwargs) + + mean_to_high = mlines.Line2D( + [_xpos, _xpos], [central_measure, high], **kwargs + ) ax.add_line(mean_to_high) else: - low_to_mean = mlines.Line2D([_xpos, _xpos], - [low, central_measure - gap_width], - **kwargs) + low_to_mean = mlines.Line2D( + [_xpos, _xpos], [low, central_measure - gap_width], **kwargs + ) ax.add_line(low_to_mean) - - mean_to_high = mlines.Line2D([_xpos, _xpos], - [central_measure + gap_width, high], - **kwargs) + + mean_to_high = mlines.Line2D( + [_xpos, _xpos], [central_measure + gap_width, high], **kwargs + ) ax.add_line(mean_to_high) - - -def check_data_matches_labels(labels,#list of input labels - data, #Pandas Series of input data - side:str # 'left' or 'right' on the sankey diagram - ): - ''' - Function to check that the labels and data match in the sankey diagram. + + +def check_data_matches_labels( + labels, # list of input labels + data, # Pandas Series of input data + side: str, # 'left' or 'right' on the sankey diagram +): + """ + Function to check that the labels and data match in the sankey diagram. And enforce labels and data to be lists. Raises an exception if the labels and data do not match. - ''' + """ if len(labels) > 0: if isinstance(data, list): data = set(data) @@ -216,12 +218,18 @@ def check_data_matches_labels(labels,#list of input labels msg += "Data: " + ",".join(data) raise Exception(f"{side} labels and data do not match.{msg}") - + def normalize_dict(nested_dict, target): val = {} for key in nested_dict.keys(): - val[key] = np.sum([nested_dict[sub_key][key] for sub_key in nested_dict.keys() if key in nested_dict[sub_key]]) - + val[key] = np.sum( + [ + nested_dict[sub_key][key] + for sub_key in nested_dict.keys() + if key in nested_dict[sub_key] + ] + ) + for key, value in nested_dict.items(): if isinstance(value, dict): for subkey in value.keys(): @@ -229,66 +237,68 @@ def normalize_dict(nested_dict, target): if subkey in val.keys(): if val[subkey] != 0: # Address the problem when one of the label have zero value - value[subkey] = value[subkey] * target[subkey]['right']/val[subkey] + value[subkey] = ( + value[subkey] * target[subkey]["right"] / val[subkey] + ) else: value[subkey] = 0 else: - value[subkey] = target[subkey]['right'] + value[subkey] = target[subkey]["right"] return nested_dict -def width_determine(labels, data, pos='left'): +def width_determine(labels, data, pos="left"): widths_norm = defaultdict() for i, label in enumerate(labels): myD = {} myD[pos] = data[data[pos] == label][pos + "Weight"].sum() if len(labels) != 1: if i == 0: - myD['bottom'] = 0 + myD["bottom"] = 0 myD[pos] -= 0.01 - myD['top'] = myD[pos] + myD["top"] = myD[pos] elif i == len(labels) - 1: myD[pos] -= 0.01 - myD['bottom'] = 1 - myD[pos] - myD['top'] = 1 + myD["bottom"] = 1 - myD[pos] + myD["top"] = 1 else: myD[pos] -= 0.02 - myD['bottom'] = widths_norm[labels[i - 1]]['top'] + 0.02 - myD['top'] = myD['bottom'] + myD[pos] - topEdge = myD['top'] + myD["bottom"] = widths_norm[labels[i - 1]]["top"] + 0.02 + myD["top"] = myD["bottom"] + myD[pos] else: - myD['bottom'] = 0 - myD['top'] = 1 + myD["bottom"] = 0 + myD["top"] = 1 widths_norm[label] = myD return widths_norm -def single_sankey(left:np.array,# data on the left of the diagram - right:np.array, # data on the right of the diagram, len(left) == len(right) - xpos:float=0, # the starting point on the x-axis - leftWeight:np.array=None, #weights for the left labels, if None, all weights are 1 - rightWeight:np.array=None, #weights for the right labels, if None, all weights are corresponding leftWeight - colorDict:dict=None, #input format: {'label': 'color'} - leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels. - rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels. - ax=None, #matplotlib axes to be drawn on - flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison - sankey:bool=True, #if True, draw the sankey diagram, else draw barplot - width=0.5, - alpha=0.65, - bar_width=0.2, - error_bar_on:bool=True, #if True, draw error bar for each group comparison - strip_on:bool=True, #if True, draw strip for each group comparison - one_sankey:bool=False, #if True, only draw one sankey diagram - rightColor:bool=False, #if True, each strip of the diagram will be colored according to the corresponding left labels - align:bool='center'# if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick - ): - - ''' + +def single_sankey( + left: np.array, # data on the left of the diagram + right: np.array, # data on the right of the diagram, len(left) == len(right) + xpos: float = 0, # the starting point on the x-axis + leftWeight: np.array = None, # weights for the left labels, if None, all weights are 1 + rightWeight: np.array = None, # weights for the right labels, if None, all weights are corresponding leftWeight + colorDict: dict = None, # input format: {'label': 'color'} + leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels. + rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels. + ax=None, # matplotlib axes to be drawn on + flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison + sankey: bool = True, # if True, draw the sankey diagram, else draw barplot + width=0.5, + alpha=0.65, + bar_width=0.2, + error_bar_on: bool = True, # if True, draw error bar for each group comparison + strip_on: bool = True, # if True, draw strip for each group comparison + one_sankey: bool = False, # if True, only draw one sankey diagram + rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels + align: bool = "center", # if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick +): + """ Make a single Sankey diagram showing proportion flow from left to right Original code from: https://github.com/anazalea/pySankey Changes are added to normalize each diagram's height to be 1 - ''' + """ # Initiating values if ax is None: @@ -313,26 +323,35 @@ def single_sankey(left:np.array,# data on the left of the diagram left.reset_index(drop=True, inplace=True) if isinstance(right, pd.Series): right.reset_index(drop=True, inplace=True) - dataFrame = pd.DataFrame({'left': left, 'right': right, 'leftWeight': leftWeight, - 'rightWeight': rightWeight}, index=range(len(left))) - - if dataFrame[['left', 'right']].isnull().any(axis=None): - raise Exception('Sankey graph does not support null values.') + dataFrame = pd.DataFrame( + { + "left": left, + "right": right, + "leftWeight": leftWeight, + "rightWeight": rightWeight, + }, + index=range(len(left)), + ) + + if dataFrame[["left", "right"]].isnull().any(axis=None): + raise Exception("Sankey graph does not support null values.") # Identify all labels that appear 'left' or 'right' - allLabels = pd.Series(np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]).unique() + allLabels = pd.Series( + np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1] + ).unique() # Identify left labels if len(leftLabels) == 0: leftLabels = pd.Series(np.sort(dataFrame.left.unique())[::-1]).unique() else: - check_data_matches_labels(leftLabels, dataFrame['left'], 'left') + check_data_matches_labels(leftLabels, dataFrame["left"], "left") # Identify right labels if len(rightLabels) == 0: rightLabels = pd.Series(np.sort(dataFrame.right.unique())[::-1]).unique() else: - check_data_matches_labels(leftLabels, dataFrame['right'], 'right') + check_data_matches_labels(leftLabels, dataFrame["right"], "right") # If no colorDict given, make one if colorDict is None: @@ -341,31 +360,33 @@ def single_sankey(left:np.array,# data on the left of the diagram colorPalette = sns.color_palette(palette, len(allLabels)) for i, label in enumerate(allLabels): colorDict[label] = colorPalette[i] - fail_color = {0:"grey"} + fail_color = {0: "grey"} colorDict.update(fail_color) else: missing = [label for label in allLabels if label not in colorDict.keys()] if missing: msg = "The palette parameter is missing values for the following labels : " - msg += '{}'.format(', '.join(missing)) + msg += "{}".format(", ".join(missing)) raise ValueError(msg) if align not in ("center", "edge"): - err = '{} assigned for `align` is not valid.'.format(align) + err = "{} assigned for `align` is not valid.".format(align) raise ValueError(err) if align == "center": try: leftpos = xpos - width / 2 except TypeError as e: - raise TypeError(f'the dtypes of parameters x ({xpos.dtype}) ' - f'and width ({width.dtype}) ' - f'are incompatible') from e - else: + raise TypeError( + f"the dtypes of parameters x ({xpos.dtype}) " + f"and width ({width.dtype}) " + f"are incompatible" + ) from e + else: leftpos = xpos # Combine left and right arrays to have a pandas.DataFrame in the 'long' format - left_series = pd.Series(left, name='values').to_frame().assign(groups='left') - right_series = pd.Series(right, name='values').to_frame().assign(groups='right') + left_series = pd.Series(left, name="values").to_frame().assign(groups="left") + right_series = pd.Series(right, name="values").to_frame().assign(groups="right") concatenated_df = pd.concat([left_series, right_series], ignore_index=True) # Determine positions of left label patches and total widths @@ -373,53 +394,57 @@ def single_sankey(left:np.array,# data on the left of the diagram leftWidths_norm = defaultdict() for i, leftLabel in enumerate(leftLabels): myD = {} - myD['left'] = (dataFrame[dataFrame.left == leftLabel].leftWeight.sum()/ \ - dataFrame.leftWeight.sum()) + myD["left"] = ( + dataFrame[dataFrame.left == leftLabel].leftWeight.sum() + / dataFrame.leftWeight.sum() + ) if len(leftLabels) != 1: if i == 0: - myD['bottom'] = 0 - myD['left'] -= 0.01 - myD['top'] = myD['left'] + myD["bottom"] = 0 + myD["left"] -= 0.01 + myD["top"] = myD["left"] elif i == len(leftLabels) - 1: - myD['left'] -= 0.01 - myD['bottom'] = 1 - myD['left'] - myD['top'] = 1 + myD["left"] -= 0.01 + myD["bottom"] = 1 - myD["left"] + myD["top"] = 1 else: - myD['left'] -= 0.02 - myD['bottom'] = leftWidths_norm[leftLabels[i - 1]]['top'] + 0.02 - myD['top'] = myD['bottom'] + myD['left'] - topEdge = myD['top'] + myD["left"] -= 0.02 + myD["bottom"] = leftWidths_norm[leftLabels[i - 1]]["top"] + 0.02 + myD["top"] = myD["bottom"] + myD["left"] + topEdge = myD["top"] else: - myD['bottom'] = 0 - myD['top'] = 1 - myD['left'] = 1 + myD["bottom"] = 0 + myD["top"] = 1 + myD["left"] = 1 leftWidths_norm[leftLabel] = myD # Determine positions of right label patches and total widths rightWidths_norm = defaultdict() for i, rightLabel in enumerate(rightLabels): myD = {} - myD['right'] = (dataFrame[dataFrame.right == rightLabel].rightWeight.sum()/ \ - dataFrame.rightWeight.sum()) + myD["right"] = ( + dataFrame[dataFrame.right == rightLabel].rightWeight.sum() + / dataFrame.rightWeight.sum() + ) if len(rightLabels) != 1: if i == 0: - myD['bottom'] = 0 - myD['right'] -= 0.01 - myD['top'] = myD['right'] + myD["bottom"] = 0 + myD["right"] -= 0.01 + myD["top"] = myD["right"] elif i == len(rightLabels) - 1: - myD['right'] -= 0.01 - myD['bottom'] = 1 - myD['right'] - myD['top'] = 1 + myD["right"] -= 0.01 + myD["bottom"] = 1 - myD["right"] + myD["top"] = 1 else: - myD['right'] -= 0.02 - myD['bottom'] = rightWidths_norm[rightLabels[i - 1]]['top'] + 0.02 - myD['top'] = myD['bottom'] + myD['right'] - topEdge = myD['top'] + myD["right"] -= 0.02 + myD["bottom"] = rightWidths_norm[rightLabels[i - 1]]["top"] + 0.02 + myD["top"] = myD["bottom"] + myD["right"] + topEdge = myD["top"] else: - myD['bottom'] = 0 - myD['top'] = 1 - myD['right'] = 1 - rightWidths_norm[rightLabel] = myD + myD["bottom"] = 0 + myD["top"] = 1 + myD["right"] = 1 + rightWidths_norm[rightLabel] = myD # Total width of the graph xMax = width @@ -436,19 +461,29 @@ def single_sankey(left:np.array,# data on the left of the diagram if (flow == False and sankey == True) or one_sankey: for rightLabel in rightLabels: ax.fill_between( - [xMax + leftpos + (-bar_width * xMax * 0.5), leftpos + xMax + (bar_width * xMax * 0.5)], + [ + xMax + leftpos + (-bar_width * xMax * 0.5), + leftpos + xMax + (bar_width * xMax * 0.5), + ], 2 * [rightWidths_norm[rightLabel]["bottom"]], 2 * [rightWidths_norm[rightLabel]["top"]], color=colorDict[rightLabel], - alpha=0.99 + alpha=0.99, ) # Plot error bars if error_bar_on and strip_on: - error_bar(concatenated_df, x='groups', y='values', ax=ax, offset=0, gap_width_percent=2, - method="sankey_error_bar", - pos=[leftpos, leftpos + xMax]) - + error_bar( + concatenated_df, + x="groups", + y="values", + ax=ax, + offset=0, + gap_width_percent=2, + method="sankey_error_bar", + pos=[leftpos, leftpos + xMax], + ) + # Determine widths of individual strips, all widths are normalized to 1 ns_l = defaultdict() ns_r = defaultdict() @@ -460,96 +495,121 @@ def single_sankey(left:np.array,# data on the left of the diagram for rightLabel in rightLabels: leftDict[rightLabel] = dataFrame[ (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel) - ].leftWeight.sum() - + ].leftWeight.sum() + rightDict[rightLabel] = dataFrame[ (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel) - ].rightWeight.sum() - factorleft = leftWidths_norm[leftLabel]['left']/sum(leftDict.values()) - leftDict_norm = {k: v*factorleft for k, v in leftDict.items()} + ].rightWeight.sum() + factorleft = leftWidths_norm[leftLabel]["left"] / sum(leftDict.values()) + leftDict_norm = {k: v * factorleft for k, v in leftDict.items()} ns_l_norm[leftLabel] = leftDict_norm ns_r[leftLabel] = rightDict - + # ns_r should be using a different way of normalization to fit the right side # It is normalized using the value with the same key in each sub-dictionary ns_r_norm = normalize_dict(ns_r, rightWidths_norm) - + # Plot strips - if sankey == True and strip_on == True: + if sankey and strip_on: for leftLabel, rightLabel in itertools.product(leftLabels, rightLabels): labelColor = leftLabel if rightColor: labelColor = rightLabel - if len(dataFrame[(dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)]) > 0: + if ( + len( + dataFrame[ + (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel) + ] + ) + > 0 + ): # Create array of y values for each strip, half at left value, # half at right, convolve - ys_d = np.array(50 * [leftWidths_norm[leftLabel]['bottom']] + \ - 50 * [rightWidths_norm[rightLabel]['bottom']]) - ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid') - ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid') - ys_u = np.array(50 * [leftWidths_norm[leftLabel]['bottom'] + ns_l_norm[leftLabel][rightLabel]] + \ - 50 * [rightWidths_norm[rightLabel]['bottom'] + ns_r_norm[leftLabel][rightLabel]]) - ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid') - ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid') + ys_d = np.array( + 50 * [leftWidths_norm[leftLabel]["bottom"]] + + 50 * [rightWidths_norm[rightLabel]["bottom"]] + ) + ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode="valid") + ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode="valid") + ys_u = np.array( + 50 + * [ + leftWidths_norm[leftLabel]["bottom"] + + ns_l_norm[leftLabel][rightLabel] + ] + + 50 + * [ + rightWidths_norm[rightLabel]["bottom"] + + ns_r_norm[leftLabel][rightLabel] + ] + ) + ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode="valid") + ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode="valid") # Update bottom edges at each label so next strip starts at the right place - leftWidths_norm[leftLabel]['bottom'] += ns_l_norm[leftLabel][rightLabel] - rightWidths_norm[rightLabel]['bottom'] += ns_r_norm[leftLabel][rightLabel] + leftWidths_norm[leftLabel]["bottom"] += ns_l_norm[leftLabel][rightLabel] + rightWidths_norm[rightLabel]["bottom"] += ns_r_norm[leftLabel][ + rightLabel + ] ax.fill_between( - np.linspace(leftpos + (bar_width * xMax * 0.5), \ - leftpos + xMax - (bar_width * xMax * 0.5), len(ys_d)), \ - ys_d, ys_u, alpha=alpha, - color=colorDict[labelColor], edgecolor='none' + np.linspace( + leftpos + (bar_width * xMax * 0.5), + leftpos + xMax - (bar_width * xMax * 0.5), + len(ys_d), + ), + ys_d, + ys_u, + alpha=alpha, + color=colorDict[labelColor], + edgecolor="none", ) - -def sankeydiag(data:pd.DataFrame, - xvar:str, # x column to be plotted. - yvar:str, # y column to be plotted. - left_idx:str, #the value in column xvar that is on the left side of each sankey diagram - right_idx:str, #the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx - leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels. - rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels. - palette:str|dict=None, - ax=None, #matplotlib axes to be drawn on - flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison - sankey:bool=True, #if True, draw the sankey diagram, else draw barplot - one_sankey:bool=False,# determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes - width:float=0.4, # the width of each sankey diagram - rightColor:bool=False,#if True, each strip of the diagram will be colored according to the corresponding left labels - align:str='center', #the alignment of each sankey diagram, can be 'center' or 'left' - alpha:float=0.65, #the transparency of each strip - **kwargs): - ''' + + +def sankeydiag( + data: pd.DataFrame, + xvar: str, # x column to be plotted. + yvar: str, # y column to be plotted. + left_idx: str, # the value in column xvar that is on the left side of each sankey diagram + right_idx: str, # the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx + leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels. + rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels. + palette: str | dict = None, + ax=None, # matplotlib axes to be drawn on + flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison + sankey: bool = True, # if True, draw the sankey diagram, else draw barplot + one_sankey: bool = False, # determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes + width: float = 0.4, # the width of each sankey diagram + rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels + align: str = "center", # the alignment of each sankey diagram, can be 'center' or 'left' + alpha: float = 0.65, # the transparency of each strip + **kwargs, +): + """ Read in melted pd.DataFrame, and draw multiple sankey diagram on a single axes using the value in column yvar according to the value in column xvar left_idx in the column xvar is on the left side of each sankey diagram right_idx in the column xvar is on the right side of each sankey diagram - ''' - - import numpy as np - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt + """ if "width" in kwargs: width = kwargs["width"] if "align" in kwargs: align = kwargs["align"] - + if "alpha" in kwargs: alpha = kwargs["alpha"] - + if "rightColor" in kwargs: rightColor = kwargs["rightColor"] - + if "bar_width" in kwargs: bar_width = kwargs["bar_width"] - + if "sankey" in kwargs: sankey = kwargs["sankey"] - + if "flow" in kwargs: flow = kwargs["flow"] @@ -557,7 +617,7 @@ def sankeydiag(data:pd.DataFrame, ax = plt.gca() allLabels = pd.Series(np.sort(data[yvar].unique())[::-1]).unique() - + # Check if all the elements in left_idx and right_idx are in xvar column unique_xvar = data[xvar].unique() if not all(elem in unique_xvar for elem in left_idx): @@ -569,7 +629,7 @@ def sankeydiag(data:pd.DataFrame, # For baseline comparison, broadcast left_idx to the same length as right_idx # so that the left of sankey diagram will be the same - # For sequential comparison, left_idx and right_idx can have anything different + # For sequential comparison, left_idx and right_idx can have anything different # but should have the same length if len(left_idx) == 1: broadcasted_left = np.broadcast_to(left_idx, len(right_idx)) @@ -581,8 +641,7 @@ def sankeydiag(data:pd.DataFrame, if isinstance(palette, dict): if not all(key in allLabels for key in palette.keys()): raise ValueError(f"keys in palette should be in {yvar} column") - else: - plot_palette = palette + plot_palette = palette elif isinstance(palette, str): plot_palette = {} colorPalette = sns.color_palette(palette, len(allLabels)) @@ -592,38 +651,75 @@ def sankeydiag(data:pd.DataFrame, plot_palette = None # Create a strip_on list to determine whether to draw the strip during repeated measures - strip_on = [int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx)] + strip_on = [ + int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx) + ] draw_idx = list(zip(broadcasted_left, right_idx)) for i, (left, right) in enumerate(draw_idx): if one_sankey == False: if flow == True: width = 1 - align = 'edge' - sankey = False if i == len(draw_idx)-1 else sankey # Remove last strip in flow - error_bar_on = False if i == len(draw_idx)-1 and flow else True # Remove last error_bar in flow + align = "edge" + sankey = ( + False if i == len(draw_idx) - 1 else sankey + ) # Remove last strip in flow + error_bar_on = ( + False if i == len(draw_idx) - 1 and flow else True + ) # Remove last error_bar in flow bar_width = 0.4 if sankey == False and flow == False else bar_width - single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], - xpos=xpos, ax=ax, colorDict=plot_palette, width=width, - leftLabels=leftLabels, rightLabels=rightLabels, strip_on=strip_on[i], - rightColor=rightColor, bar_width=bar_width, sankey=sankey, - error_bar_on=error_bar_on, flow=flow, align=align, alpha=alpha) + single_sankey( + data[data[xvar] == left][yvar], + data[data[xvar] == right][yvar], + xpos=xpos, + ax=ax, + colorDict=plot_palette, + width=width, + leftLabels=leftLabels, + rightLabels=rightLabels, + strip_on=strip_on[i], + rightColor=rightColor, + bar_width=bar_width, + sankey=sankey, + error_bar_on=error_bar_on, + flow=flow, + align=align, + alpha=alpha, + ) xpos += 1 else: xpos = 0 width = 1 if sankey == False: bar_width = 0.5 - single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], - xpos=xpos, ax=ax, colorDict=plot_palette, width=width, - leftLabels=leftLabels, rightLabels=rightLabels, - rightColor=rightColor, bar_width=bar_width, sankey=sankey, - one_sankey=one_sankey, flow=False, align='edge', alpha=alpha) - -# Now only draw vs xticks for two-column sankey diagram - if one_sankey == False or (sankey and not flow): - sankey_ticks = [f"{left}" for left in broadcasted_left] if flow \ - else [f"{left}\n v.s.\n{right}" for left, right in zip(broadcasted_left, right_idx)] + single_sankey( + data[data[xvar] == left][yvar], + data[data[xvar] == right][yvar], + xpos=xpos, + ax=ax, + colorDict=plot_palette, + width=width, + leftLabels=leftLabels, + rightLabels=rightLabels, + rightColor=rightColor, + bar_width=bar_width, + sankey=sankey, + one_sankey=one_sankey, + flow=False, + align="edge", + alpha=alpha, + ) + + # Now only draw vs xticks for two-column sankey diagram + if ~one_sankey or (sankey and not flow): + sankey_ticks = ( + [f"{left}" for left in broadcasted_left] + if flow + else [ + f"{left}\n v.s.\n{right}" + for left, right in zip(broadcasted_left, right_idx) + ] + ) ax.get_xaxis().set_ticks(np.arange(len(right_idx))) ax.get_xaxis().set_ticklabels(sankey_ticks) else: diff --git a/dabest/plotter.py b/dabest/plotter.py index a1cf2929..7bcdb136 100644 --- a/dabest/plotter.py +++ b/dabest/plotter.py @@ -4,14 +4,24 @@ __all__ = ['EffectSizeDataFramePlotter'] # %% ../nbs/API/plotter.ipynb 4 -def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): +import numpy as np +import seaborn as sns +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd +import warnings +import logging + +# %% ../nbs/API/plotter.ipynb 5 +# TODO refactor function name +def EffectSizeDataFramePlotter(effectsize_df, **plot_kwargs): """ Custom function that creates an estimation plot from an EffectSizeDataFrame. Keywords -------- Parameters ---------- - EffectSizeDataFrame + effectsize_df A `dabest` EffectSizeDataFrame object. plot_kwargs color_col=None @@ -44,31 +54,28 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): fontsize_contrastxlabel=12, fontsize_contrastylabel=12, fontsize_delta2label=12 """ - - import numpy as np - import seaborn as sns - import matplotlib - import matplotlib.pyplot as plt - import pandas as pd - import warnings - warnings.filterwarnings('ignore', 'This figure includes Axes that are not compatible with tight_layout') - from .misc_tools import merge_two_dicts from .plot_tools import halfviolin, get_swarm_spans, error_bar, sankeydiag - from ._stats_tools.effsize import _compute_standardizers, _compute_hedges_correction_factor + from ._stats_tools.effsize import ( + _compute_standardizers, + _compute_hedges_correction_factor, + ) + + warnings.filterwarnings( + "ignore", "This figure includes Axes that are not compatible with tight_layout" + ) - import logging # Have to disable logging of warning when get_legend_handles_labels() # tries to get from slopegraph. logging.disable(logging.WARNING) # Save rcParams that I will alter, so I can reset back. original_rcParams = {} - _changed_rcParams = ['axes.grid'] + _changed_rcParams = ["axes.grid"] for parameter in _changed_rcParams: original_rcParams[parameter] = plt.rcParams[parameter] - plt.rcParams['axes.grid'] = False + plt.rcParams["axes.grid"] = False ytick_color = plt.rcParams["ytick.color"] face_color = plot_kwargs["face_color"] @@ -76,18 +83,18 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if plot_kwargs["face_color"] is None: face_color = "white" - dabest_obj = EffectSizeDataFrame.dabest_obj - plot_data = EffectSizeDataFrame._plot_data - xvar = EffectSizeDataFrame.xvar - yvar = EffectSizeDataFrame.yvar - is_paired = EffectSizeDataFrame.is_paired - delta2 = EffectSizeDataFrame.delta2 - mini_meta = EffectSizeDataFrame.mini_meta - effect_size = EffectSizeDataFrame.effect_size - proportional = EffectSizeDataFrame.proportional + dabest_obj = effectsize_df.dabest_obj + plot_data = effectsize_df._plot_data + xvar = effectsize_df.xvar + yvar = effectsize_df.yvar + is_paired = effectsize_df.is_paired + delta2 = effectsize_df.delta2 + mini_meta = effectsize_df.mini_meta + effect_size = effectsize_df.effect_size + proportional = effectsize_df.proportional all_plot_groups = dabest_obj._all_plot_groups - idx = dabest_obj.idx + idx = dabest_obj.idx if effect_size not in ["mean_diff", "delta_g"] or not delta2: show_delta2 = False @@ -105,16 +112,16 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # Disable Gardner-Altman plotting if any of the idxs comprise of more than # two groups or if it is a delta-delta plot. - float_contrast = plot_kwargs["float_contrast"] - effect_size_type = EffectSizeDataFrame.effect_size + float_contrast = plot_kwargs["float_contrast"] + effect_size_type = effectsize_df.effect_size if len(idx) > 1 or len(idx[0]) > 2: float_contrast = False - if effect_size_type in ['cliffs_delta']: + if effect_size_type in ["cliffs_delta"]: float_contrast = False if show_delta2 or show_mini_meta: - float_contrast = False + float_contrast = False if not is_paired: show_pairs = False @@ -122,12 +129,13 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): show_pairs = plot_kwargs["show_pairs"] # Set default kwargs first, then merge with user-dictated ones. - default_swarmplot_kwargs = {'size': plot_kwargs["raw_marker_size"]} + default_swarmplot_kwargs = {"size": plot_kwargs["raw_marker_size"]} if plot_kwargs["swarmplot_kwargs"] is None: swarmplot_kwargs = default_swarmplot_kwargs else: - swarmplot_kwargs = merge_two_dicts(default_swarmplot_kwargs, - plot_kwargs["swarmplot_kwargs"]) + swarmplot_kwargs = merge_two_dicts( + default_swarmplot_kwargs, plot_kwargs["swarmplot_kwargs"] + ) # Barplot kwargs default_barplot_kwargs = {"estimator": np.mean, "errorbar": plot_kwargs["ci"]} @@ -135,87 +143,105 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if plot_kwargs["barplot_kwargs"] is None: barplot_kwargs = default_barplot_kwargs else: - barplot_kwargs = merge_two_dicts(default_barplot_kwargs, - plot_kwargs["barplot_kwargs"]) + barplot_kwargs = merge_two_dicts( + default_barplot_kwargs, plot_kwargs["barplot_kwargs"] + ) # Sankey Diagram kwargs - default_sankey_kwargs = {"width": 0.4, "align": "center", - "sankey":True, "flow":True, - "alpha": 0.4, "rightColor": False, - "bar_width":0.2} + default_sankey_kwargs = { + "width": 0.4, + "align": "center", + "sankey": True, + "flow": True, + "alpha": 0.4, + "rightColor": False, + "bar_width": 0.2, + } if plot_kwargs["sankey_kwargs"] is None: sankey_kwargs = default_sankey_kwargs else: - sankey_kwargs = merge_two_dicts(default_sankey_kwargs, - plot_kwargs["sankey_kwargs"]) + sankey_kwargs = merge_two_dicts( + default_sankey_kwargs, plot_kwargs["sankey_kwargs"] + ) # We also need to extract the `sankey` and `flow` from the kwargs for plotter.py # to use for varying different kinds of paired proportional plots # We also don't want to pop the parameter from the kwargs - sankey = sankey_kwargs['sankey'] - flow = sankey_kwargs['flow'] + sankey = sankey_kwargs["sankey"] + flow = sankey_kwargs["flow"] # Violinplot kwargs. - default_violinplot_kwargs = {'widths':0.5, 'vert':True, - 'showextrema':False, 'showmedians':False} + default_violinplot_kwargs = { + "widths": 0.5, + "vert": True, + "showextrema": False, + "showmedians": False, + } if plot_kwargs["violinplot_kwargs"] is None: violinplot_kwargs = default_violinplot_kwargs else: - violinplot_kwargs = merge_two_dicts(default_violinplot_kwargs, - plot_kwargs["violinplot_kwargs"]) + violinplot_kwargs = merge_two_dicts( + default_violinplot_kwargs, plot_kwargs["violinplot_kwargs"] + ) # slopegraph kwargs. - default_slopegraph_kwargs = {'linewidth':1, 'alpha':0.5} + default_slopegraph_kwargs = {"linewidth": 1, "alpha": 0.5} if plot_kwargs["slopegraph_kwargs"] is None: slopegraph_kwargs = default_slopegraph_kwargs else: - slopegraph_kwargs = merge_two_dicts(default_slopegraph_kwargs, - plot_kwargs["slopegraph_kwargs"]) + slopegraph_kwargs = merge_two_dicts( + default_slopegraph_kwargs, plot_kwargs["slopegraph_kwargs"] + ) # Zero reference-line kwargs. - default_reflines_kwargs = {'linestyle':'solid', 'linewidth':0.75, - 'zorder': 2, - 'color': ytick_color} + default_reflines_kwargs = { + "linestyle": "solid", + "linewidth": 0.75, + "zorder": 2, + "color": ytick_color, + } if plot_kwargs["reflines_kwargs"] is None: reflines_kwargs = default_reflines_kwargs else: - reflines_kwargs = merge_two_dicts(default_reflines_kwargs, - plot_kwargs["reflines_kwargs"]) + reflines_kwargs = merge_two_dicts( + default_reflines_kwargs, plot_kwargs["reflines_kwargs"] + ) # Legend kwargs. - default_legend_kwargs = {'loc': 'upper left', 'frameon': False} + default_legend_kwargs = {"loc": "upper left", "frameon": False} if plot_kwargs["legend_kwargs"] is None: legend_kwargs = default_legend_kwargs else: - legend_kwargs = merge_two_dicts(default_legend_kwargs, - plot_kwargs["legend_kwargs"]) - - -################################################### GRIDKEY WIP - extracting arguments - + legend_kwargs = merge_two_dicts( + default_legend_kwargs, plot_kwargs["legend_kwargs"] + ) + + ################################################### GRIDKEY WIP - extracting arguments + gridkey_rows = plot_kwargs["gridkey_rows"] gridkey_merge_pairs = plot_kwargs["gridkey_merge_pairs"] gridkey_show_Ns = plot_kwargs["gridkey_show_Ns"] gridkey_show_es = plot_kwargs["gridkey_show_es"] - - if gridkey_rows == None: + + if gridkey_rows is None: gridkey_show_Ns = False gridkey_show_es = False - -################################################### END GRIDKEY WIP - extracting arguments + + ################################################### END GRIDKEY WIP - extracting arguments # Group summaries kwargs. - gs_default = {'mean_sd', 'median_quartiles', None} + gs_default = {"mean_sd", "median_quartiles", None} if plot_kwargs["group_summaries"] not in gs_default: - raise ValueError('group_summaries must be one of' - ' these: {}.'.format(gs_default) ) + raise ValueError( + "group_summaries must be one of" " these: {}.".format(gs_default) + ) - default_group_summary_kwargs = {'zorder': 3, 'lw': 2, - 'alpha': 1} + default_group_summary_kwargs = {"zorder": 3, "lw": 2, "alpha": 1} if plot_kwargs["group_summary_kwargs"] is None: group_summary_kwargs = default_group_summary_kwargs else: - group_summary_kwargs = merge_two_dicts(default_group_summary_kwargs, - plot_kwargs["group_summary_kwargs"]) + group_summary_kwargs = merge_two_dicts( + default_group_summary_kwargs, plot_kwargs["group_summary_kwargs"] + ) # Create color palette that will be shared across subplots. color_col = plot_kwargs["color_col"] @@ -241,35 +267,24 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if custom_pal is None: unsat_colors = sns.color_palette(n_colors=n_groups) else: - if isinstance(custom_pal, dict): - groups_in_palette = {k: v for k,v in custom_pal.items() - if k in color_groups} - - # # check that all the keys in custom_pal are found in the - # # color column. - # col_grps = {k for k in color_groups} - # pal_grps = {k for k in custom_pal.keys()} - # not_in_pal = pal_grps.difference(col_grps) - # if len(not_in_pal) > 0: - # err1 = 'The custom palette keys {} '.format(not_in_pal) - # err2 = 'are not found in `{}`. Please check.'.format(color_col) - # errstring = (err1 + err2) - # raise IndexError(errstring) + groups_in_palette = { + k: v for k, v in custom_pal.items() if k in color_groups + } names = groups_in_palette.keys() unsat_colors = groups_in_palette.values() elif isinstance(custom_pal, list): - unsat_colors = custom_pal[0: n_groups] + unsat_colors = custom_pal[0:n_groups] elif isinstance(custom_pal, str): # check it is in the list of matplotlib palettes. if custom_pal in plt.colormaps(): unsat_colors = sns.color_palette(custom_pal, n_groups) else: - err1 = 'The specified `custom_palette` {}'.format(custom_pal) - err2 = ' is not a matplotlib palette. Please check.' + err1 = "The specified `custom_palette` {}".format(custom_pal) + err2 = " is not a matplotlib palette. Please check." raise ValueError(err1 + err2) if custom_pal is None and color_col is None: @@ -299,159 +314,165 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): plot_palette_sankey = custom_pal # Infer the figsize. - fig_size = plot_kwargs["fig_size"] + fig_size = plot_kwargs["fig_size"] if fig_size is None: all_groups_count = np.sum([len(i) for i in dabest_obj.idx]) # Increase the width for delta-delta graph if show_delta2 or show_mini_meta: all_groups_count += 2 - if is_paired and show_pairs is True and proportional is False: + if is_paired and show_pairs and proportional is False: frac = 0.75 else: frac = 1 - if float_contrast is True: + if float_contrast: height_inches = 4 each_group_width_inches = 2.5 * frac else: height_inches = 6 each_group_width_inches = 1.5 * frac - width_inches = (each_group_width_inches * all_groups_count) + width_inches = each_group_width_inches * all_groups_count fig_size = (width_inches, height_inches) # Initialise the figure. - # sns.set(context="talk", style='ticks') - init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs["dpi"] - ,tight_layout=True) + init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs["dpi"], tight_layout=True) width_ratios_ga = [2.5, 1] - -###################### GRIDKEY HSPACE ALTERATION + + ###################### GRIDKEY HSPACE ALTERATION # Sets hspace for cummings plots if gridkey is shown. - if gridkey_rows != None: + if gridkey_rows is not None: h_space_cummings = 0.1 else: h_space_cummings = 0.3 - - -###################### END GRIDKEY HSPACE ALTERATION - + + ###################### END GRIDKEY HSPACE ALTERATION + if plot_kwargs["ax"] is not None: # New in v0.2.6. # Use inset axes to create the estimation plot inside a single axes. # Author: Adam L Nekimken. (PR #73) - inset_contrast = True rawdata_axes = plot_kwargs["ax"] ax_position = rawdata_axes.get_position() # [[x0, y0], [x1, y1]] - + fig = rawdata_axes.get_figure() fig.patch.set_facecolor(face_color) - - if float_contrast is True: + + if float_contrast: axins = rawdata_axes.inset_axes( - [1, 0, - width_ratios_ga[1]/width_ratios_ga[0], 1]) + [1, 0, width_ratios_ga[1] / width_ratios_ga[0], 1] + ) rawdata_axes.set_position( # [l, b, w, h] - [ax_position.x0, - ax_position.y0, - (ax_position.x1 - ax_position.x0) * (width_ratios_ga[0] / - sum(width_ratios_ga)), - (ax_position.y1 - ax_position.y0)]) + [ + ax_position.x0, + ax_position.y0, + (ax_position.x1 - ax_position.x0) + * (width_ratios_ga[0] / sum(width_ratios_ga)), + (ax_position.y1 - ax_position.y0), + ] + ) contrast_axes = axins else: axins = rawdata_axes.inset_axes([0, -1 - h_space_cummings, 1, 1]) - plot_height = ((ax_position.y1 - ax_position.y0) / - (2 + h_space_cummings)) + plot_height = (ax_position.y1 - ax_position.y0) / (2 + h_space_cummings) rawdata_axes.set_position( - [ax_position.x0, - ax_position.y0 + (1 + h_space_cummings) * plot_height, - (ax_position.x1 - ax_position.x0), - plot_height]) - - # If the contrast axes are NOT floating, create lists to store - # raw ylims and raw tick intervals, so that I can normalize - # their ylims later. - contrast_ax_ylim_low = list() - contrast_ax_ylim_high = list() - contrast_ax_ylim_tickintervals = list() + [ + ax_position.x0, + ax_position.y0 + (1 + h_space_cummings) * plot_height, + (ax_position.x1 - ax_position.x0), + plot_height, + ] + ) + contrast_axes = axins rawdata_axes.contrast_axes = axins else: - inset_contrast = False # Here, we hardcode some figure parameters. - if float_contrast is True: + if float_contrast: fig, axx = plt.subplots( - ncols=2, - gridspec_kw={"width_ratios": width_ratios_ga, - "wspace": 0}, - **init_fig_kwargs) + ncols=2, + gridspec_kw={"width_ratios": width_ratios_ga, "wspace": 0}, + **init_fig_kwargs + ) fig.patch.set_facecolor(face_color) else: - fig, axx = plt.subplots(nrows=2, - gridspec_kw={"hspace": h_space_cummings}, - **init_fig_kwargs) + fig, axx = plt.subplots( + nrows=2, gridspec_kw={"hspace": h_space_cummings}, **init_fig_kwargs + ) fig.patch.set_facecolor(face_color) - # If the contrast axes are NOT floating, create lists to store - # raw ylims and raw tick intervals, so that I can normalize - # their ylims later. - contrast_ax_ylim_low = list() - contrast_ax_ylim_high = list() - contrast_ax_ylim_tickintervals = list() - + # Title title = plot_kwargs["title"] fontsize_title = plot_kwargs["fontsize_title"] if title is not None: fig.suptitle(title, fontsize=fontsize_title) - rawdata_axes = axx[0] + rawdata_axes = axx[0] contrast_axes = axx[1] rawdata_axes.set_frame_on(False) contrast_axes.set_frame_on(False) - redraw_axes_kwargs = {'colors' : ytick_color, - 'facecolors' : ytick_color, - 'lw' : 1, - 'zorder' : 10, - 'clip_on' : False} + redraw_axes_kwargs = { + "colors": ytick_color, + "facecolors": ytick_color, + "lw": 1, + "zorder": 10, + "clip_on": False, + } swarm_ylim = plot_kwargs["swarm_ylim"] if swarm_ylim is not None: rawdata_axes.set_ylim(swarm_ylim) - one_sankey = False if is_paired is not None else False # Flag to indicate if only one sankey is plotted. - two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False + one_sankey = ( + False if is_paired is not None else False + ) # Flag to indicate if only one sankey is plotted. + two_col_sankey = ( + True if proportional and not one_sankey and sankey and not flow else False + ) - if show_pairs is True: + if show_pairs: # Determine temp_idx based on is_paired and proportional conditions if is_paired == "baseline": - idx_pairs = [(control, test) for i in idx for control, test in zip([i[0]] * (len(i) - 1), i[1:])] + idx_pairs = [ + (control, test) + for i in idx + for control, test in zip([i[0]] * (len(i) - 1), i[1:]) + ] temp_idx = idx if not proportional else idx_pairs else: - idx_pairs = [(control, test) for i in idx for control, test in zip(i[:-1], i[1:])] + idx_pairs = [ + (control, test) for i in idx for control, test in zip(i[:-1], i[1:]) + ] temp_idx = idx if not proportional else idx_pairs # Determine temp_all_plot_groups based on proportional condition plot_groups = [item for i in temp_idx for item in i] temp_all_plot_groups = all_plot_groups if not proportional else plot_groups - - if proportional==False: - # Plot the raw data as a slopegraph. - # Pivot the long (melted) data. + + if not proportional: + # Plot the raw data as a slopegraph. + # Pivot the long (melted) data. if color_col is None: pivot_values = [yvar] else: pivot_values = [yvar, color_col] - pivoted_plot_data = pd.pivot(data=plot_data, index=dabest_obj.id_col, - columns=xvar, values=pivot_values) + pivoted_plot_data = pd.pivot( + data=plot_data, + index=dabest_obj.id_col, + columns=xvar, + values=pivot_values, + ) x_start = 0 for ii, current_tuple in enumerate(temp_idx): - current_pair = pivoted_plot_data.loc[:, pd.MultiIndex.from_product([pivot_values, current_tuple])].dropna() + current_pair = pivoted_plot_data.loc[ + :, pd.MultiIndex.from_product([pivot_values, current_tuple]) + ].dropna() grp_count = len(current_tuple) # Iterate through the data for the current tuple. for ID, observation in current_pair.iterrows(): @@ -459,136 +480,174 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): y_points = observation[yvar].tolist() if color_col is None: - slopegraph_kwargs['color'] = ytick_color + slopegraph_kwargs["color"] = ytick_color else: color_key = observation[color_col][0] - if isinstance(color_key, (str, np.int64, np.float64)) == True: - slopegraph_kwargs['color'] = plot_palette_raw[color_key] - slopegraph_kwargs['label'] = color_key + if isinstance(color_key, (str, np.int64, np.float64)): + slopegraph_kwargs["color"] = plot_palette_raw[color_key] + slopegraph_kwargs["label"] = color_key - rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs) + rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs) - x_start = x_start + grp_count - - ##################### DELTA PTS ON CONTRAST PLOT WIP + + ##################### DELTA PTS ON CONTRAST PLOT WIP contrast_show_deltas = plot_kwargs["contrast_show_deltas"] - - if is_paired == None: + + if is_paired is None: contrast_show_deltas = False - - if contrast_show_deltas == True: - - trans = plt.gca().transData - + + if contrast_show_deltas: delta_plot_data_temp = plot_data.copy() delta_id_col = dabest_obj.id_col - if color_col != None: - delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col, color_col]] - deltapts_args = {"hue" : color_col, - "palette" : plot_palette_raw, - "marker" : "^", - "alpha" : 0.5} - + if color_col is not None: + delta_plot_data = delta_plot_data_temp[ + [xvar, yvar, delta_id_col, color_col] + ] + deltapts_args = { + "hue": color_col, + "palette": plot_palette_raw, + "marker": "^", + "alpha": 0.5, + } + else: delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col]] - deltapts_args = {"color" : "k", - "marker" : "^", - "alpha" : 0.5} - + deltapts_args = {"color": "k", "marker": "^", "alpha": 0.5} + final_deltas = pd.DataFrame() for i in idx: for j in i: if i.index(j) != 0: - temp_df_exp = delta_plot_data[delta_plot_data[xvar].str.contains(j)].reset_index(drop=True) + temp_df_exp = delta_plot_data[ + delta_plot_data[xvar].str.contains(j) + ].reset_index(drop=True) if is_paired == "baseline": - temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[0])].reset_index(drop=True) + temp_df_cont = delta_plot_data[ + delta_plot_data[xvar].str.contains(i[0]) + ].reset_index(drop=True) elif is_paired == "sequential": - temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[i.index(j) - 1])].reset_index(drop=True) + temp_df_cont = delta_plot_data[ + delta_plot_data[xvar].str.contains( + i[i.index(j) - 1] + ) + ].reset_index(drop=True) delta_df = temp_df_exp.copy() delta_df[yvar] = temp_df_exp[yvar] - temp_df_cont[yvar] - final_deltas = pd.concat([final_deltas, delta_df]) - - + final_deltas = pd.concat([final_deltas, delta_df]) + # Plot the raw data as a swarmplot. - deltapts_plot = sns.swarmplot(data=final_deltas, x=xvar, y=yvar, - ax=contrast_axes, - order=all_plot_groups, - zorder=2, - **deltapts_args) + deltapts_plot = sns.swarmplot( + data=final_deltas, + x=xvar, + y=yvar, + ax=contrast_axes, + order=all_plot_groups, + zorder=2, + **deltapts_args + ) contrast_axes.legend().set_visible(False) - - ##################### DELTA PTS ON CONTRAST PLOT END - + ##################### DELTA PTS ON CONTRAST PLOT END + # Set the tick labels, because the slopegraph plotting doesn't. rawdata_axes.set_xticks(np.arange(0, len(temp_all_plot_groups))) rawdata_axes.set_xticklabels(temp_all_plot_groups) - + else: # Plot the raw data as a set of Sankey Diagrams aligned like barplot. group_summaries = plot_kwargs["group_summaries"] if group_summaries is None: group_summaries = "mean_sd" err_color = plot_kwargs["err_color"] - if err_color == None: + if err_color is None: err_color = "black" - if show_pairs is True: + if show_pairs: sankey_control_group = [] sankey_test_group = [] # Design for Sankey Flow Diagram - sankey_idx = [(control, test) for i in idx for control, test in zip(i[:], (i[1:]+(i[0],)))]\ - if flow is True else temp_idx + sankey_idx = ( + [ + (control, test) + for i in idx + for control, test in zip(i[:], (i[1:] + (i[0],))) + ] + if flow + else temp_idx + ) for i in sankey_idx: sankey_control_group.append(i[0]) - sankey_test_group.append(i[1]) + sankey_test_group.append(i[1]) if len(temp_all_plot_groups) == 2: - one_sankey = True - sankey_control_group.pop(); sankey_test_group.pop() # Remove the last element from two lists + one_sankey = True + sankey_control_group.pop() + sankey_test_group.pop() # Remove the last element from two lists # two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False # Replace the paired proportional plot with sankey diagram - sankeyplot = sankeydiag(plot_data, xvar=xvar, yvar=yvar, - left_idx=sankey_control_group, - right_idx=sankey_test_group, - palette=plot_palette_sankey, - ax=rawdata_axes, - one_sankey=one_sankey, - **sankey_kwargs) - + sankeyplot = sankeydiag( + plot_data, + xvar=xvar, + yvar=yvar, + left_idx=sankey_control_group, + right_idx=sankey_test_group, + palette=plot_palette_sankey, + ax=rawdata_axes, + one_sankey=one_sankey, + **sankey_kwargs + ) + else: - if proportional==False: + if not proportional: # Plot the raw data as a swarmplot. - rawdata_plot = sns.swarmplot(data=plot_data, x=xvar, y=yvar, - ax=rawdata_axes, - order=all_plot_groups, hue=color_col, - palette=plot_palette_raw, zorder=1, - **swarmplot_kwargs) + rawdata_plot = sns.swarmplot( + data=plot_data, + x=xvar, + y=yvar, + ax=rawdata_axes, + order=all_plot_groups, + hue=color_col, + palette=plot_palette_raw, + zorder=1, + **swarmplot_kwargs + ) else: # Plot the raw data as a barplot. - bar1_df = pd.DataFrame({xvar: all_plot_groups, 'proportion': np.ones(len(all_plot_groups))}) - bar1 = sns.barplot(data=bar1_df, x=xvar, y="proportion", - ax=rawdata_axes, - order=all_plot_groups, - linewidth=2, facecolor=(1, 1, 1, 0), edgecolor=bar_color, - zorder=1) - bar2 = sns.barplot(data=plot_data, x=xvar, y=yvar, - ax=rawdata_axes, - order=all_plot_groups, - palette=plot_palette_bar, - zorder=1, - **barplot_kwargs) + bar1_df = pd.DataFrame( + {xvar: all_plot_groups, "proportion": np.ones(len(all_plot_groups))} + ) + bar1 = sns.barplot( + data=bar1_df, + x=xvar, + y="proportion", + ax=rawdata_axes, + order=all_plot_groups, + linewidth=2, + facecolor=(1, 1, 1, 0), + edgecolor=bar_color, + zorder=1, + ) + bar2 = sns.barplot( + data=plot_data, + x=xvar, + y=yvar, + ax=rawdata_axes, + order=all_plot_groups, + palette=plot_palette_bar, + zorder=1, + **barplot_kwargs + ) # adjust the width of bars bar_width = plot_kwargs["bar_width"] for bar in bar1.patches: x = bar.get_x() width = bar.get_width() - centre = x + width / 2. - bar.set_x(centre - bar_width / 2.) + centre = x + width / 2.0 + bar.set_x(centre - bar_width / 2.0) bar.set_width(bar_width) # Plot the gapped line summaries, if this is not a Cumming plot. @@ -597,7 +656,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if group_summaries is None: group_summaries = "mean_sd" - if group_summaries is not None and proportional==False: + if group_summaries is not None and not proportional: # Create list to gather xspans. xspans = [] line_colors = [] @@ -610,33 +669,42 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # we have got a None, so skip and move on. pass - if bootstraps_color_by_group is True: + if bootstraps_color_by_group: line_colors.append(plot_palette_raw[all_plot_groups[jj]]) if len(line_colors) != len(all_plot_groups): line_colors = ytick_color - error_bar(plot_data, x=xvar, y=yvar, - # Hardcoded offset... - offset=xspans + np.array(plot_kwargs["group_summaries_offset"]), - line_color=line_colors, - gap_width_percent=1.5, - type=group_summaries, ax=rawdata_axes, - method="gapped_lines", - **group_summary_kwargs) - - if group_summaries is not None and proportional == True: - + error_bar( + plot_data, + x=xvar, + y=yvar, + # Hardcoded offset... + offset=xspans + np.array(plot_kwargs["group_summaries_offset"]), + line_color=line_colors, + gap_width_percent=1.5, + type=group_summaries, + ax=rawdata_axes, + method="gapped_lines", + **group_summary_kwargs + ) + + if group_summaries is not None and proportional: err_color = plot_kwargs["err_color"] - if err_color == None: + if err_color is None: err_color = "black" - error_bar(plot_data, x=xvar, y=yvar, - offset=0, - line_color=err_color, - gap_width_percent=1.5, - type=group_summaries, ax=rawdata_axes, - method="proportional_error_bar", - **group_summary_kwargs) + error_bar( + plot_data, + x=xvar, + y=yvar, + offset=0, + line_color=err_color, + gap_width_percent=1.5, + type=group_summaries, + ax=rawdata_axes, + method="proportional_error_bar", + **group_summary_kwargs + ) # Add the counts to the rawdata axes xticks. counts = plot_data.groupby(xvar).count()[yvar] @@ -646,7 +714,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): for xticklab in rawdata_axes.xaxis.get_ticklabels(): t = xticklab.get_text() if t.rfind("\n") != -1: - te = t[t.rfind("\n") + len("\n"):] + te = t[t.rfind("\n") + len("\n") :] N = str(counts.loc[te]) te = t else: @@ -655,13 +723,13 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): ticks_with_counts.append("{}\nN = {}".format(te, N)) - if plot_kwargs['fontsize_rawxlabel'] is not None: - fontsize_rawxlabel = plot_kwargs['fontsize_rawxlabel'] - rawdata_axes.set_xticklabels(ticks_with_counts,fontsize=fontsize_rawxlabel) + if plot_kwargs["fontsize_rawxlabel"] is not None: + fontsize_rawxlabel = plot_kwargs["fontsize_rawxlabel"] + rawdata_axes.set_xticklabels(ticks_with_counts, fontsize=fontsize_rawxlabel) # Save the handles and labels for the legend. handles, labels = rawdata_axes.get_legend_handles_labels() - legend_labels = [l for l in labels] + legend_labels = [l for l in labels] legend_handles = [h for h in handles] if bootstraps_color_by_group is False: rawdata_axes.legend().set_visible(False) @@ -672,11 +740,11 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # Plot effect sizes and bootstraps. # Take note of where the `control` groups are. - if is_paired == "baseline" and show_pairs == True: + if is_paired == "baseline" and show_pairs: if two_col_sankey: ticks_to_skip = [] - ticks_to_plot = np.arange(0, len(temp_all_plot_groups)/2).tolist() - ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist() + ticks_to_plot = np.arange(0, len(temp_all_plot_groups) / 2).tolist() + ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist() ticks_to_start_twocol_sankey.pop() ticks_to_start_twocol_sankey.insert(0, 0) else: @@ -685,60 +753,63 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist() ticks_to_skip.insert(0, 0) # Then obtain the ticks where we have to plot the effect sizes. - ticks_to_plot = [t for t in range(0, len(all_plot_groups)) - if t not in ticks_to_skip] + ticks_to_plot = [ + t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip + ] ticks_to_skip_contrast = np.cumsum([(len(t)) for t in idx])[:-1].tolist() ticks_to_skip_contrast.insert(0, 0) else: if two_col_sankey: ticks_to_skip = [len(sankey_control_group)] # Then obtain the ticks where we have to plot the effect sizes. - ticks_to_plot = [t for t in range(0, len(temp_idx)) - if t not in ticks_to_skip] + ticks_to_plot = [ + t for t in range(0, len(temp_idx)) if t not in ticks_to_skip + ] ticks_to_skip = [] - ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist() + ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist() ticks_to_start_twocol_sankey.pop() ticks_to_start_twocol_sankey.insert(0, 0) else: ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist() ticks_to_skip.insert(0, 0) # Then obtain the ticks where we have to plot the effect sizes. - ticks_to_plot = [t for t in range(0, len(all_plot_groups)) - if t not in ticks_to_skip] + ticks_to_plot = [ + t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip + ] # Plot the bootstraps, then the effect sizes and CIs. - es_marker_size = plot_kwargs["es_marker_size"] + es_marker_size = plot_kwargs["es_marker_size"] halfviolin_alpha = plot_kwargs["halfviolin_alpha"] ci_type = plot_kwargs["ci_type"] - results = EffectSizeDataFrame.results + results = effectsize_df.results contrast_xtick_labels = [] - for j, tick in enumerate(ticks_to_plot): - current_group = results.test[j] - current_control = results.control[j] + current_group = results.test[j] + current_control = results.control[j] current_bootstrap = results.bootstraps[j] - current_effsize = results.difference[j] + current_effsize = results.difference[j] if ci_type == "bca": - current_ci_low = results.bca_low[j] - current_ci_high = results.bca_high[j] + current_ci_low = results.bca_low[j] + current_ci_high = results.bca_high[j] else: - current_ci_low = results.pct_low[j] - current_ci_high = results.pct_high[j] - + current_ci_low = results.pct_low[j] + current_ci_high = results.pct_high[j] # Create the violinplot. # New in v0.2.6: drop negative infinities before plotting. - v = contrast_axes.violinplot(current_bootstrap[~np.isinf(current_bootstrap)], - positions=[tick], - **violinplot_kwargs) + v = contrast_axes.violinplot( + current_bootstrap[~np.isinf(current_bootstrap)], + positions=[tick], + **violinplot_kwargs + ) # Turn the violinplot into half, and color it the same as the swarmplot. # Do this only if the color column is not specified. # Ideally, the alpha (transparency) fo the violin plot should be # less than one so the effect size and CIs are visible. - if bootstraps_color_by_group is True: + if bootstraps_color_by_group: fc = plot_palette_contrast[current_group] else: fc = "grey" @@ -746,96 +817,110 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): halfviolin(v, fill_color=fc, alpha=halfviolin_alpha) # Plot the effect size. - contrast_axes.plot([tick], current_effsize, marker='o', - color=ytick_color, - markersize=es_marker_size) - -################## SHOW ES ON CONTRAST PLOT WIP + contrast_axes.plot( + [tick], + current_effsize, + marker="o", + color=ytick_color, + markersize=es_marker_size, + ) + + ################## SHOW ES ON CONTRAST PLOT WIP contrast_show_es = plot_kwargs["contrast_show_es"] - es_sf = plot_kwargs['es_sf'] - es_fontsize = plot_kwargs['es_fontsize'] - - if gridkey_show_es == True: + es_sf = plot_kwargs["es_sf"] + es_fontsize = plot_kwargs["es_fontsize"] + + if gridkey_show_es: contrast_show_es = False - - effsize_for_print = current_effsize - - printed_es = np.format_float_positional(effsize_for_print, - precision=es_sf, - sign=True, - trim= 'k', - min_digits = es_sf) - if contrast_show_es == True: + + printed_es = np.format_float_positional( + effsize_for_print, precision=es_sf, sign=True, trim="k", min_digits=es_sf + ) + if contrast_show_es: if effsize_for_print < 0: textoffset = 10 else: textoffset = 15 - contrast_axes.annotate(text=printed_es, - xy = (tick, effsize_for_print), - xytext = (-textoffset-len(printed_es)*es_fontsize/2,-es_fontsize/2), - textcoords = "offset points", - **{ "fontsize" : es_fontsize }) - -################## SHOW ES ON CONTRAST PLOT END - - # Plot the confidence interval. - contrast_axes.plot([tick, tick], - [current_ci_low, current_ci_high], - linestyle="-", - color=ytick_color, - linewidth=group_summary_kwargs['lw']) + contrast_axes.annotate( + text=printed_es, + xy=(tick, effsize_for_print), + xytext=( + -textoffset - len(printed_es) * es_fontsize / 2, + -es_fontsize / 2, + ), + textcoords="offset points", + **{"fontsize": es_fontsize} + ) + + ################## SHOW ES ON CONTRAST PLOT END - contrast_xtick_labels.append("{}\nminus\n{}".format(current_group, - current_control)) + # Plot the confidence interval. + contrast_axes.plot( + [tick, tick], + [current_ci_low, current_ci_high], + linestyle="-", + color=ytick_color, + linewidth=group_summary_kwargs["lw"], + ) + + contrast_xtick_labels.append( + "{}\nminus\n{}".format(current_group, current_control) + ) # Plot mini-meta violin if show_mini_meta or show_delta2: if show_mini_meta: - mini_meta_delta = EffectSizeDataFrame.mini_meta_delta - data = mini_meta_delta.bootstraps_weighted_delta - difference = mini_meta_delta.difference + mini_meta_delta = effectsize_df.mini_meta_delta + data = mini_meta_delta.bootstraps_weighted_delta + difference = mini_meta_delta.difference if ci_type == "bca": - ci_low = mini_meta_delta.bca_low - ci_high = mini_meta_delta.bca_high + ci_low = mini_meta_delta.bca_low + ci_high = mini_meta_delta.bca_high else: - ci_low = mini_meta_delta.pct_low - ci_high = mini_meta_delta.pct_high - else: - delta_delta = EffectSizeDataFrame.delta_delta - data = delta_delta.bootstraps_delta_delta - difference = delta_delta.difference + ci_low = mini_meta_delta.pct_low + ci_high = mini_meta_delta.pct_high + else: + delta_delta = effectsize_df.delta_delta + data = delta_delta.bootstraps_delta_delta + difference = delta_delta.difference if ci_type == "bca": - ci_low = delta_delta.bca_low - ci_high = delta_delta.bca_high + ci_low = delta_delta.bca_low + ci_high = delta_delta.bca_high else: - ci_low = delta_delta.pct_low - ci_high = delta_delta.pct_high - #Create the violinplot. - #New in v0.2.6: drop negative infinities before plotting. - position = max(rawdata_axes.get_xticks())+2 - v = contrast_axes.violinplot(data[~np.isinf(data)], - positions=[position], - **violinplot_kwargs) + ci_low = delta_delta.pct_low + ci_high = delta_delta.pct_high + # Create the violinplot. + # New in v0.2.6: drop negative infinities before plotting. + position = max(rawdata_axes.get_xticks()) + 2 + v = contrast_axes.violinplot( + data[~np.isinf(data)], positions=[position], **violinplot_kwargs + ) fc = "grey" halfviolin(v, fill_color=fc, alpha=halfviolin_alpha) # Plot the effect size. - contrast_axes.plot([position], difference, marker='o', - color=ytick_color, - markersize=es_marker_size) + contrast_axes.plot( + [position], + difference, + marker="o", + color=ytick_color, + markersize=es_marker_size, + ) # Plot the confidence interval. - contrast_axes.plot([position, position], - [ci_low, ci_high], - linestyle="-", - color=ytick_color, - linewidth=group_summary_kwargs['lw']) + contrast_axes.plot( + [position, position], + [ci_low, ci_high], + linestyle="-", + color=ytick_color, + linewidth=group_summary_kwargs["lw"], + ) if show_mini_meta: - contrast_xtick_labels.extend(["","Weighted delta"]) + contrast_xtick_labels.extend(["", "Weighted delta"]) elif effect_size == "delta_g": contrast_xtick_labels.extend(["", "deltas' g"]) else: @@ -847,22 +932,22 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): contrast_axes.set_xticks(rawdata_axes.get_xticks()) else: temp = rawdata_axes.get_xticks() - temp = np.append(temp, [max(temp)+1, max(temp)+2]) + temp = np.append(temp, [max(temp) + 1, max(temp) + 2]) contrast_axes.set_xticks(temp) - if show_pairs is True: + if show_pairs: max_x = contrast_axes.get_xlim()[1] rawdata_axes.set_xlim(-0.375, max_x) - if float_contrast is True: + if float_contrast: contrast_axes.set_xlim(0.5, 1.5) elif show_delta2 or show_mini_meta: # Increase the xlim of raw data by 2 temp = rawdata_axes.get_xlim() if show_pairs: - rawdata_axes.set_xlim(temp[0], temp[1]+0.25) + rawdata_axes.set_xlim(temp[0], temp[1] + 0.25) else: - rawdata_axes.set_xlim(temp[0], temp[1]+2) + rawdata_axes.set_xlim(temp[0], temp[1] + 2) contrast_axes.set_xlim(rawdata_axes.get_xlim()) else: contrast_axes.set_xlim(rawdata_axes.get_xlim()) @@ -871,55 +956,67 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): for t in ticks_to_skip: contrast_xtick_labels.insert(t, "") - if plot_kwargs['fontsize_contrastxlabel'] is not None: - fontsize_contrastxlabel = plot_kwargs['fontsize_contrastxlabel'] + if plot_kwargs["fontsize_contrastxlabel"] is not None: + fontsize_contrastxlabel = plot_kwargs["fontsize_contrastxlabel"] - contrast_axes.set_xticklabels(contrast_xtick_labels,fontsize=fontsize_contrastxlabel) + contrast_axes.set_xticklabels( + contrast_xtick_labels, fontsize=fontsize_contrastxlabel + ) if bootstraps_color_by_group is False: legend_labels_unique = np.unique(legend_labels) unique_idx = np.unique(legend_labels, return_index=True)[1] - legend_handles_unique = (pd.Series(legend_handles, dtype="object").loc[unique_idx]).tolist() + legend_handles_unique = ( + pd.Series(legend_handles, dtype="object").loc[unique_idx] + ).tolist() if len(legend_handles_unique) > 0: - if float_contrast is True: + if float_contrast: axes_with_legend = contrast_axes - if show_pairs is True: + if show_pairs: bta = (1.75, 1.02) else: bta = (1.5, 1.02) else: axes_with_legend = rawdata_axes - if show_pairs is True: - bta = (1.02, 1.) + if show_pairs: + bta = (1.02, 1.0) else: - bta = (1.,1.) - leg = axes_with_legend.legend(legend_handles_unique, - legend_labels_unique, - bbox_to_anchor=bta, - **legend_kwargs) - if show_pairs is True: + bta = (1.0, 1.0) + leg = axes_with_legend.legend( + legend_handles_unique, + legend_labels_unique, + bbox_to_anchor=bta, + **legend_kwargs + ) + if show_pairs: for line in leg.get_lines(): line.set_linewidth(3.0) og_ylim_raw = rawdata_axes.get_ylim() og_xlim_raw = rawdata_axes.get_xlim() - if float_contrast is True: + if float_contrast: # For Gardner-Altman plots only. # Normalize ylims and despine the floating contrast axes. # Check that the effect size is within the swarm ylims. - if effect_size_type in ["mean_diff", "cohens_d", "hedges_g","cohens_h"]: - control_group_summary = plot_data.groupby(xvar)\ - .mean(numeric_only=True).loc[current_control, yvar] - test_group_summary = plot_data.groupby(xvar)\ - .mean(numeric_only=True).loc[current_group, yvar] + if effect_size_type in ["mean_diff", "cohens_d", "hedges_g", "cohens_h"]: + control_group_summary = ( + plot_data.groupby(xvar) + .mean(numeric_only=True) + .loc[current_control, yvar] + ) + test_group_summary = ( + plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar] + ) elif effect_size_type == "median_diff": - control_group_summary = plot_data.groupby(xvar)\ - .median().loc[current_control, yvar] - test_group_summary = plot_data.groupby(xvar)\ - .median().loc[current_group, yvar] + control_group_summary = ( + plot_data.groupby(xvar).median().loc[current_control, yvar] + ) + test_group_summary = ( + plot_data.groupby(xvar).median().loc[current_group, yvar] + ) if swarm_ylim is None: swarm_ylim = rawdata_axes.get_ylim() @@ -927,7 +1024,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): _, contrast_xlim_max = contrast_axes.get_xlim() difference = float(results.difference[0]) - + if effect_size_type in ["mean_diff", "median_diff"]: # Align 0 of contrast_axes to reference group mean of rawdata_axes. # If the effect size is positive, shift the contrast axis up. @@ -945,48 +1042,53 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): og_ylim_contrast = rawdata_axes.get_ylim() - np.array(control_group_summary) contrast_axes.set_ylim(og_ylim_contrast) - contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max) + contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max) - elif effect_size_type in ["cohens_d", "hedges_g","cohens_h"]: + elif effect_size_type in ["cohens_d", "hedges_g", "cohens_h"]: if is_paired: which_std = 1 else: which_std = 0 temp_control = plot_data[plot_data[xvar] == current_control][yvar] - temp_test = plot_data[plot_data[xvar] == current_group][yvar] - + temp_test = plot_data[plot_data[xvar] == current_group][yvar] + stds = _compute_standardizers(temp_control, temp_test) if is_paired: pooled_sd = stds[1] else: pooled_sd = stds[0] - - if effect_size_type == 'hedges_g': - gby_count = plot_data.groupby(xvar).count() + + if effect_size_type == "hedges_g": + gby_count = plot_data.groupby(xvar).count() len_control = gby_count.loc[current_control, yvar] - len_test = gby_count.loc[current_group, yvar] - - hg_correction_factor = _compute_hedges_correction_factor(len_control, len_test) - + len_test = gby_count.loc[current_group, yvar] + + hg_correction_factor = _compute_hedges_correction_factor( + len_control, len_test + ) + ylim_scale_factor = pooled_sd / hg_correction_factor elif effect_size_type == "cohens_h": - ylim_scale_factor = (np.mean(temp_test)-np.mean(temp_control)) / difference + ylim_scale_factor = ( + np.mean(temp_test) - np.mean(temp_control) + ) / difference else: ylim_scale_factor = pooled_sd - - scaled_ylim = ((rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor).tolist() + + scaled_ylim = ( + (rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor + ).tolist() contrast_axes.set_ylim(scaled_ylim) og_ylim_contrast = scaled_ylim - contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max) + contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max) if one_sankey is None: # Draw summary lines for control and test groups.. for jj, axx in enumerate([rawdata_axes, contrast_axes]): - # Draw effect size line. if jj == 0: ref = control_group_summary @@ -996,66 +1098,74 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): elif jj == 1: ref = 0 diff = ref + difference - effsize_line_start = contrast_xlim_max-1.1 + effsize_line_start = contrast_xlim_max - 1.1 xlimlow, xlimhigh = axx.get_xlim() # Draw reference line. - axx.hlines(ref, # y-coordinates - 0, xlimhigh, # x-coordinates, start and end. - **reflines_kwargs) - + axx.hlines( + ref, # y-coordinates + 0, + xlimhigh, # x-coordinates, start and end. + **reflines_kwargs + ) + # Draw effect size line. - axx.hlines(diff, - effsize_line_start, xlimhigh, - **reflines_kwargs) - else: + axx.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs) + else: ref = 0 diff = ref + difference effsize_line_start = contrast_xlim_max - 0.9 xlimlow, xlimhigh = contrast_axes.get_xlim() # Draw reference line. - contrast_axes.hlines(ref, # y-coordinates - effsize_line_start, xlimhigh, # x-coordinates, start and end. - **reflines_kwargs) - + contrast_axes.hlines( + ref, # y-coordinates + effsize_line_start, + xlimhigh, # x-coordinates, start and end. + **reflines_kwargs + ) + # Draw effect size line. - contrast_axes.hlines(diff, - effsize_line_start, xlimhigh, - **reflines_kwargs) - rawdata_axes.set_xlim(og_xlim_raw) # to align the axis + contrast_axes.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs) + rawdata_axes.set_xlim(og_xlim_raw) # to align the axis # Despine appropriately. - sns.despine(ax=rawdata_axes, bottom=True) + sns.despine(ax=rawdata_axes, bottom=True) sns.despine(ax=contrast_axes, left=True, right=False) # Insert break between the rawdata axes and the contrast axes # by re-drawing the x-spine. - rawdata_axes.hlines(og_ylim_raw[0], # yindex - rawdata_axes.get_xlim()[0], 1.3, # xmin, xmax - **redraw_axes_kwargs) + rawdata_axes.hlines( + og_ylim_raw[0], # yindex + rawdata_axes.get_xlim()[0], + 1.3, # xmin, xmax + **redraw_axes_kwargs + ) rawdata_axes.set_ylim(og_ylim_raw) - contrast_axes.hlines(contrast_axes.get_ylim()[0], - contrast_xlim_max-0.8, contrast_xlim_max, - **redraw_axes_kwargs) - + contrast_axes.hlines( + contrast_axes.get_ylim()[0], + contrast_xlim_max - 0.8, + contrast_xlim_max, + **redraw_axes_kwargs + ) else: # For Cumming Plots only. # Set custom contrast_ylim, if it was specified. - if plot_kwargs['contrast_ylim'] is not None or (plot_kwargs['delta2_ylim'] is not None and show_delta2): - - if plot_kwargs['contrast_ylim'] is not None: - custom_contrast_ylim = plot_kwargs['contrast_ylim'] - if plot_kwargs['delta2_ylim'] is not None and show_delta2: - custom_delta2_ylim = plot_kwargs['delta2_ylim'] - if custom_contrast_ylim!=custom_delta2_ylim: + if plot_kwargs["contrast_ylim"] is not None or ( + plot_kwargs["delta2_ylim"] is not None and show_delta2 + ): + if plot_kwargs["contrast_ylim"] is not None: + custom_contrast_ylim = plot_kwargs["contrast_ylim"] + if plot_kwargs["delta2_ylim"] is not None and show_delta2: + custom_delta2_ylim = plot_kwargs["delta2_ylim"] + if custom_contrast_ylim != custom_delta2_ylim: err1 = "Please check if `contrast_ylim` and `delta2_ylim` are assigned" err2 = "with same values." raise ValueError(err1 + err2) else: - custom_delta2_ylim = plot_kwargs['delta2_ylim'] + custom_delta2_ylim = plot_kwargs["delta2_ylim"] custom_contrast_ylim = custom_delta2_ylim if len(custom_contrast_ylim) != 2: @@ -1065,8 +1175,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if effect_size_type == "cliffs_delta": # Ensure the ylims for a cliffs_delta plot never exceed [-1, 1]. - l = plot_kwargs['contrast_ylim'][0] - h = plot_kwargs['contrast_ylim'][1] + l = plot_kwargs["contrast_ylim"][0] + h = plot_kwargs["contrast_ylim"][1] low = -1 if l < -1 else l high = 1 if h > 1 else h contrast_axes.set_ylim(low, high) @@ -1083,228 +1193,237 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if contrast_ylim_low < 0 < contrast_ylim_high: contrast_axes.axhline(y=0, **reflines_kwargs) - if is_paired == "baseline" and show_pairs == True: + if is_paired == "baseline" and show_pairs: if two_col_sankey: - rightend_ticks_raw = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey) + rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array( + ticks_to_start_twocol_sankey + ) elif proportional and is_paired is not None: - rightend_ticks_raw = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip) - else: - rightend_ticks_raw = np.array([len(i)-1 for i in temp_idx]) + np.array(ticks_to_skip) + rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array( + ticks_to_skip + ) + else: + rightend_ticks_raw = np.array( + [len(i) - 1 for i in temp_idx] + ) + np.array(ticks_to_skip) for ax in [rawdata_axes]: sns.despine(ax=ax, bottom=True) - + ylim = ax.get_ylim() xlim = ax.get_xlim() - redraw_axes_kwargs['y'] = ylim[0] - + redraw_axes_kwargs["y"] = ylim[0] + if two_col_sankey: for k, start_tick in enumerate(ticks_to_start_twocol_sankey): end_tick = rightend_ticks_raw[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) - else: + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) + else: for k, start_tick in enumerate(ticks_to_skip): end_tick = rightend_ticks_raw[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) ax.set_ylim(ylim) - del redraw_axes_kwargs['y'] - - if proportional == False: - temp_length = [(len(i)-1) for i in idx] + del redraw_axes_kwargs["y"] + + if not proportional: + temp_length = [(len(i) - 1) for i in idx] else: - temp_length = [(len(i)-1)*2-1 for i in idx] + temp_length = [(len(i) - 1) * 2 - 1 for i in idx] if two_col_sankey: - rightend_ticks_contrast = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey) + rightend_ticks_contrast = np.array( + [len(i) - 2 for i in idx] + ) + np.array(ticks_to_start_twocol_sankey) elif proportional and is_paired is not None: - rightend_ticks_contrast = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip) - else: - rightend_ticks_contrast = np.array(temp_length) + np.array(ticks_to_skip_contrast) + rightend_ticks_contrast = np.array( + [len(i) - 1 for i in idx] + ) + np.array(ticks_to_skip) + else: + rightend_ticks_contrast = np.array(temp_length) + np.array( + ticks_to_skip_contrast + ) for ax in [contrast_axes]: sns.despine(ax=ax, bottom=True) - + ylim = ax.get_ylim() xlim = ax.get_xlim() - redraw_axes_kwargs['y'] = ylim[0] - + redraw_axes_kwargs["y"] = ylim[0] + if two_col_sankey: for k, start_tick in enumerate(ticks_to_start_twocol_sankey): end_tick = rightend_ticks_contrast[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) else: for k, start_tick in enumerate(ticks_to_skip_contrast): end_tick = rightend_ticks_contrast[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) - + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) + ax.set_ylim(ylim) - del redraw_axes_kwargs['y'] + del redraw_axes_kwargs["y"] else: # Compute the end of each x-axes line. if two_col_sankey: - rightend_ticks = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey) + rightend_ticks = np.array([len(i) - 2 for i in idx]) + np.array( + ticks_to_start_twocol_sankey + ) else: - rightend_ticks = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip) - + rightend_ticks = np.array([len(i) - 1 for i in idx]) + np.array( + ticks_to_skip + ) + for ax in [rawdata_axes, contrast_axes]: sns.despine(ax=ax, bottom=True) - + ylim = ax.get_ylim() xlim = ax.get_xlim() - redraw_axes_kwargs['y'] = ylim[0] - + redraw_axes_kwargs["y"] = ylim[0] + if two_col_sankey: for k, start_tick in enumerate(ticks_to_start_twocol_sankey): end_tick = rightend_ticks[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) else: for k, start_tick in enumerate(ticks_to_skip): end_tick = rightend_ticks[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) - + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) + ax.set_ylim(ylim) - del redraw_axes_kwargs['y'] + del redraw_axes_kwargs["y"] - if show_delta2 is True or show_mini_meta is True: + if show_delta2 or show_mini_meta: ylim = contrast_axes.get_ylim() - redraw_axes_kwargs['y'] = ylim[0] + redraw_axes_kwargs["y"] = ylim[0] x_ticks = contrast_axes.get_xticks() - contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1], - **redraw_axes_kwargs) - del redraw_axes_kwargs['y'] + contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1], **redraw_axes_kwargs) + del redraw_axes_kwargs["y"] # Set raw axes y-label. - swarm_label = plot_kwargs['swarm_label'] + swarm_label = plot_kwargs["swarm_label"] if swarm_label is None and yvar is None: swarm_label = "value" elif swarm_label is None and yvar is not None: swarm_label = yvar - bar_label = plot_kwargs['bar_label'] + bar_label = plot_kwargs["bar_label"] if bar_label is None and effect_size_type != "cohens_h": bar_label = "proportion of success" elif bar_label is None and effect_size_type == "cohens_h": bar_label = "value" # Place contrast axes y-label. - contrast_label_dict = {'mean_diff': "mean difference", - 'median_diff': "median difference", - 'cohens_d': "Cohen's d", - 'hedges_g': "Hedges' g", - 'cliffs_delta': "Cliff's delta", - 'cohens_h': "Cohen's h", - 'delta_g': "mean difference"} - - if proportional == True and effect_size_type != "cohens_h": + contrast_label_dict = { + "mean_diff": "mean difference", + "median_diff": "median difference", + "cohens_d": "Cohen's d", + "hedges_g": "Hedges' g", + "cliffs_delta": "Cliff's delta", + "cohens_h": "Cohen's h", + "delta_g": "mean difference", + } + + if proportional and effect_size_type != "cohens_h": default_contrast_label = "proportion difference" elif effect_size_type == "delta_g": default_contrast_label = "Hedges' g" else: - default_contrast_label = contrast_label_dict[EffectSizeDataFrame.effect_size] + default_contrast_label = contrast_label_dict[effectsize_df.effect_size] - - if plot_kwargs['contrast_label'] is None: + if plot_kwargs["contrast_label"] is None: if is_paired: contrast_label = "paired\n{}".format(default_contrast_label) else: contrast_label = default_contrast_label contrast_label = contrast_label.capitalize() else: - contrast_label = plot_kwargs['contrast_label'] + contrast_label = plot_kwargs["contrast_label"] - if plot_kwargs['fontsize_rawylabel'] is not None: - fontsize_rawylabel = plot_kwargs['fontsize_rawylabel'] - if plot_kwargs['fontsize_contrastylabel'] is not None: - fontsize_contrastylabel = plot_kwargs['fontsize_contrastylabel'] - if plot_kwargs['fontsize_delta2label'] is not None: - fontsize_delta2label = plot_kwargs['fontsize_delta2label'] + if plot_kwargs["fontsize_rawylabel"] is not None: + fontsize_rawylabel = plot_kwargs["fontsize_rawylabel"] + if plot_kwargs["fontsize_contrastylabel"] is not None: + fontsize_contrastylabel = plot_kwargs["fontsize_contrastylabel"] + if plot_kwargs["fontsize_delta2label"] is not None: + fontsize_delta2label = plot_kwargs["fontsize_delta2label"] - contrast_axes.set_ylabel(contrast_label,fontsize = fontsize_contrastylabel) - if float_contrast is True: + contrast_axes.set_ylabel(contrast_label, fontsize=fontsize_contrastylabel) + if float_contrast: contrast_axes.yaxis.set_label_position("right") # Set the rawdata axes labels appropriately - if proportional == False: - rawdata_axes.set_ylabel(swarm_label,fontsize = fontsize_rawylabel) + if not proportional: + rawdata_axes.set_ylabel(swarm_label, fontsize=fontsize_rawylabel) else: - rawdata_axes.set_ylabel(bar_label,fontsize = fontsize_rawylabel) + rawdata_axes.set_ylabel(bar_label, fontsize=fontsize_rawylabel) rawdata_axes.set_xlabel("") # Because we turned the axes frame off, we also need to draw back # the y-spine for both axes. - if float_contrast==False: + if not float_contrast: rawdata_axes.set_xlim(contrast_axes.get_xlim()) og_xlim_raw = rawdata_axes.get_xlim() - rawdata_axes.vlines(og_xlim_raw[0], - og_ylim_raw[0], og_ylim_raw[1], - **redraw_axes_kwargs) + rawdata_axes.vlines( + og_xlim_raw[0], og_ylim_raw[0], og_ylim_raw[1], **redraw_axes_kwargs + ) og_xlim_contrast = contrast_axes.get_xlim() - if float_contrast is True: + if float_contrast: xpos = og_xlim_contrast[1] else: xpos = og_xlim_contrast[0] og_ylim_contrast = contrast_axes.get_ylim() - contrast_axes.vlines(xpos, - og_ylim_contrast[0], og_ylim_contrast[1], - **redraw_axes_kwargs) - - - if show_delta2 is True: - if plot_kwargs['delta2_label'] is not None: - delta2_label = plot_kwargs['delta2_label'] - elif effect_size == "mean_diff" : + contrast_axes.vlines( + xpos, og_ylim_contrast[0], og_ylim_contrast[1], **redraw_axes_kwargs + ) + + if show_delta2: + if plot_kwargs["delta2_label"] is not None: + delta2_label = plot_kwargs["delta2_label"] + elif effect_size == "mean_diff": delta2_label = "delta - delta" else: delta2_label = "deltas' g" delta2_axes = contrast_axes.twinx() delta2_axes.set_frame_on(False) - delta2_axes.set_ylabel(delta2_label, fontsize = fontsize_delta2label) + delta2_axes.set_ylabel(delta2_label, fontsize=fontsize_delta2label) og_xlim_delta = contrast_axes.get_xlim() og_ylim_delta = contrast_axes.get_ylim() delta2_axes.set_ylim(og_ylim_delta) - delta2_axes.vlines(og_xlim_delta[1], - og_ylim_delta[0], og_ylim_delta[1], - **redraw_axes_kwargs) + delta2_axes.vlines( + og_xlim_delta[1], og_ylim_delta[0], og_ylim_delta[1], **redraw_axes_kwargs + ) + ################################################### GRIDKEY MAIN CODE WIP -################################################### GRIDKEY MAIN CODE WIP - - #if gridkey_rows is None, skip everything here + # if gridkey_rows is None, skip everything here if gridkey_rows is not None: - # Raise error if there are more than 2 items in any idx and gridkey_merge_pairs is True and is_paired is not None - if gridkey_merge_pairs is True and is_paired is not None: + if gridkey_merge_pairs and is_paired is not None: for i in idx: if len(i) > 2: - warnings.warn("gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False") + warnings.warn( + "gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False" + ) gridkey_merge_pairs = False break - elif gridkey_merge_pairs is True and is_paired is None: - warnings.warn("gridkey_merge_pairs=True is only applicable for paired data.") + elif gridkey_merge_pairs and is_paired is None: + warnings.warn( + "gridkey_merge_pairs=True is only applicable for paired data." + ) gridkey_merge_pairs = False - + # Checks for gridkey_merge_pairs and is_paired; if both are true, "merges" the gridkey per pair - if gridkey_merge_pairs is True and is_paired is not None: + if gridkey_merge_pairs and is_paired is not None: groups_for_gridkey = [] for i in idx: groups_for_gridkey.append(i[1]) else: groups_for_gridkey = all_plot_groups - - + # raise errors if gridkey_rows is not a list, or if the list is empty if isinstance(gridkey_rows, list) is False: raise TypeError("gridkey_rows must be a list.") elif len(gridkey_rows) == 0: warnings.warn("gridkey_rows is an empty list.") - - + # raise Warning if an item in gridkey_rows is not contained in any idx for i in gridkey_rows: in_idx = 0 @@ -1313,93 +1432,101 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): in_idx += 1 if in_idx == 0: if is_paired is not None: - warnings.warn(i + " is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False.") + warnings.warn( + i + + " is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False." + ) else: - warnings.warn(i + " is not in any idx. Please check.") - - + warnings.warn(i + " is not in any idx. Please check.") + # Populate table: checks if idx for each column contains rowlabel name # IF so, marks that element as present w black dot, or space if not present - table_cellcols = [] + table_cellcols = [] for i in gridkey_rows: thisrow = [] for q in groups_for_gridkey: if str(i) in q: - thisrow.append(u"\u25CF") + thisrow.append("\u25CF") else: thisrow.append("") table_cellcols.append(thisrow) - - + # Adds a row for Ns with the Ns values - if gridkey_show_Ns == True: + if gridkey_show_Ns: gridkey_rows.append("Ns") list_of_Ns = [] for i in groups_for_gridkey: list_of_Ns.append(str(counts.loc[i])) table_cellcols.append(list_of_Ns) - # Adds a row for effectsizes with effectsize values - if gridkey_show_es == True: - gridkey_rows.append(u"\u0394") + if gridkey_show_es: + gridkey_rows.append("\u0394") effsize_list = [] results_list = results.test.to_list() - + # get the effect size, append + or -, 2 dec places for i in enumerate(groups_for_gridkey): if i[1] in results_list: - curr_esval = results.loc[results["test"] == i[1]]["difference"].iloc[0] - curr_esval_str = np.format_float_positional(curr_esval, - precision=es_sf, - sign=True, - trim= 'k', - min_digits = es_sf) + curr_esval = results.loc[results["test"] == i[1]][ + "difference" + ].iloc[0] + curr_esval_str = np.format_float_positional( + curr_esval, + precision=es_sf, + sign=True, + trim="k", + min_digits=es_sf, + ) effsize_list.append(curr_esval_str) else: effsize_list.append("-") - + table_cellcols.append(effsize_list) - + # If Gardner-Altman plot, plot on raw data and not contrast axes - if float_contrast == True: + if float_contrast: axes_ploton = rawdata_axes else: axes_ploton = contrast_axes - + # Account for extended x axis in case of show_delta2 or show_mini_meta x_groups_for_width = len(groups_for_gridkey) - if show_delta2 is True or show_mini_meta is True: - x_groups_for_width += 2 + if show_delta2 or show_mini_meta: + x_groups_for_width += 2 gridkey_width = len(groups_for_gridkey) / x_groups_for_width - - gridkey = axes_ploton.table(cellText = table_cellcols, - rowLabels = gridkey_rows, - cellLoc = "center", - bbox = [0, -len(gridkey_rows)*0.1-0.05, gridkey_width, len(gridkey_rows)*0.1], - **{"alpha" : 0.5}) - + + gridkey = axes_ploton.table( + cellText=table_cellcols, + rowLabels=gridkey_rows, + cellLoc="center", + bbox=[ + 0, + -len(gridkey_rows) * 0.1 - 0.05, + gridkey_width, + len(gridkey_rows) * 0.1, + ], + **{"alpha": 0.5} + ) + # modifies row label cells for cell in gridkey._cells: if cell[1] == -1: gridkey._cells[cell].visible_edges = "open" - gridkey._cells[cell].set_text_props(**{ "ha" : "right" }) - + gridkey._cells[cell].set_text_props(**{"ha": "right"}) + # turns off both x axes rawdata_axes.get_xaxis().set_visible(False) contrast_axes.get_xaxis().set_visible(False) - - ####################################################### END GRIDKEY MAIN CODE WIP - - - - + + ####################################################### END GRIDKEY MAIN CODE WIP + # Make sure no stray ticks appear! - rawdata_axes.xaxis.set_ticks_position('bottom') - rawdata_axes.yaxis.set_ticks_position('left') - contrast_axes.xaxis.set_ticks_position('bottom') + rawdata_axes.xaxis.set_ticks_position("bottom") + rawdata_axes.yaxis.set_ticks_position("left") + contrast_axes.xaxis.set_ticks_position("bottom") if float_contrast is False: - contrast_axes.yaxis.set_ticks_position('left') + contrast_axes.yaxis.set_ticks_position("left") # Reset rcParams. for parameter in _changed_rcParams: diff --git a/nbs/API/bootstrap.ipynb b/nbs/API/bootstrap.ipynb index 503f0f74..d84b8349 100644 --- a/nbs/API/bootstrap.ipynb +++ b/nbs/API/bootstrap.ipynb @@ -137,7 +137,7 @@ " if len(x1) != len(x2):\n", " raise ValueError('x1 and x2 are not the same length.')\n", "\n", - " if (x2 is None) or (paired is not None) :\n", + " if (x2 is None) or paired:\n", "\n", " if x2 is None:\n", " tx = x1\n", @@ -147,7 +147,8 @@ " ttest_2_paired = 'NIL'\n", " wilcoxonresult = 'NIL'\n", "\n", - " elif paired is not None:\n", + " #elif paired is not None:\n", + " else: # only two options to enter here\n", " diff = True\n", " tx = x2 - x1\n", " ttest_single = 'NIL'\n", @@ -170,7 +171,7 @@ " pct_low_high = np.nan_to_num(pct_low_high).astype('int')\n", "\n", "\n", - " elif x2 is not None and paired is None:\n", + " elif x2 and paired is None:\n", " diff = True\n", " x2 = pd.Series(x2).dropna()\n", " # Generate statarrays for both arrays.\n", diff --git a/nbs/API/confint_1group.ipynb b/nbs/API/confint_1group.ipynb index 3b3c9acd..4dd133bb 100644 --- a/nbs/API/confint_1group.ipynb +++ b/nbs/API/confint_1group.ipynb @@ -67,74 +67,73 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "# |export\n", "def create_bootstrap_indexes(array, resamples=5000, random_seed=12345):\n", " \"\"\"Given an array-like, returns a generator of bootstrap indexes\n", " to be used for resampling.\n", " \"\"\"\n", "\n", " rng = RandomState(PCG64(random_seed))\n", - " \n", + "\n", " indexes = range(0, len(array))\n", "\n", - " out = (rng.choice(indexes, len(indexes), replace=True)\n", - " for i in range(0, resamples))\n", + " out = (rng.choice(indexes, len(indexes), replace=True) for i in range(0, resamples))\n", "\n", " return out\n", "\n", "\n", - "\n", "def compute_1group_jackknife(x, func, *args, **kwargs):\n", " \"\"\"\n", " Returns the jackknife bootstraps for func(x).\n", " \"\"\"\n", " from . import confint_2group_diff as ci_2g\n", + "\n", " jackknives = [i for i in ci_2g.create_jackknife_indexes(x)]\n", " out = [func(x[j], *args, **kwargs) for j in jackknives]\n", - " del jackknives # memory management.\n", + " del jackknives # memory management.\n", " return out\n", "\n", "\n", - "\n", "def compute_1group_acceleration(jack_dist):\n", " from . import confint_2group_diff as ci_2g\n", - " return ci_2g._calc_accel(jack_dist)\n", "\n", + " return ci_2g._calc_accel(jack_dist)\n", "\n", "\n", - "def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345,\n", - " *args, **kwargs):\n", + "def compute_1group_bootstraps(\n", + " x, func, resamples=5000, random_seed=12345, *args, **kwargs\n", + "):\n", " \"\"\"Bootstraps func(x), with the number of specified resamples.\"\"\"\n", "\n", - " \n", " # Create bootstrap indexes.\n", - " boot_indexes = create_bootstrap_indexes(x, resamples=resamples,\n", - " random_seed=random_seed)\n", + " boot_indexes = create_bootstrap_indexes(\n", + " x, resamples=resamples, random_seed=random_seed\n", + " )\n", "\n", " out = [func(x[b], *args, **kwargs) for b in boot_indexes]\n", - " \n", + "\n", " del boot_indexes\n", - " \n", - " return out\n", "\n", + " return out\n", "\n", "\n", "def compute_1group_bias_correction(x, bootstraps, func, *args, **kwargs):\n", - "\n", " metric = func(x, *args, **kwargs)\n", " prop_boots_less_than_metric = sum(bootstraps < metric) / len(bootstraps)\n", "\n", " return norm.ppf(prop_boots_less_than_metric)\n", "\n", "\n", - "\n", - "def summary_ci_1group(x:np.array,# An numerical iterable.\n", - " func, #The function to be applied to x.\n", - " resamples:int=5000, #The number of bootstrap resamples to be taken of func(x).\n", - " alpha:float=0.05, #Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", - " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", - " sort_bootstraps:bool=True, \n", - " *args, **kwargs):\n", + "def summary_ci_1group(\n", + " x: np.array, # An numerical iterable.\n", + " func, # The function to be applied to x.\n", + " resamples: int = 5000, # The number of bootstrap resamples to be taken of func(x).\n", + " alpha: float = 0.05, # Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", + " random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", + " sort_bootstraps: bool = True,\n", + " *args,\n", + " **kwargs\n", + "):\n", " \"\"\"\n", " Given an array-like x, returns func(x), and a bootstrap confidence\n", " interval of func(x).\n", @@ -158,10 +157,9 @@ " \"\"\"\n", " from . import confint_2group_diff as ci2g\n", "\n", - "\n", - " boots = compute_1group_bootstraps(x, func, resamples=resamples,\n", - " random_seed=random_seed,\n", - " *args, **kwargs)\n", + " boots = compute_1group_bootstraps(\n", + " x, func, resamples=resamples, random_seed=random_seed, *args, **kwargs\n", + " )\n", " bias = compute_1group_bias_correction(x, boots, func)\n", "\n", " jk = compute_1group_jackknife(x, func, *args, **kwargs)\n", @@ -182,12 +180,16 @@ " del boots\n", " del boots_sorted\n", "\n", - " out = {'summary': func(x), 'func': func,\n", - " 'bca_ci_low': low, 'bca_ci_high': high,\n", - " 'bootstraps': B}\n", + " out = {\n", + " \"summary\": func(x),\n", + " \"func\": func,\n", + " \"bca_ci_low\": low,\n", + " \"bca_ci_high\": high,\n", + " \"bootstraps\": B,\n", + " }\n", "\n", " del B\n", - " return out\n" + " return out" ] }, { diff --git a/nbs/API/confint_2group_diff.ipynb b/nbs/API/confint_2group_diff.ipynb index 93c3417a..0f01ea27 100644 --- a/nbs/API/confint_2group_diff.ipynb +++ b/nbs/API/confint_2group_diff.ipynb @@ -54,7 +54,7 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "#| export\n", "import numpy as np\n", "from numpy import arange, delete, errstate\n", "from numpy import mean as npmean\n", @@ -72,7 +72,7 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "#| export\n", "def create_jackknife_indexes(data):\n", " \"\"\"\n", " Given an array-like, creates a jackknife bootstrap.\n", @@ -93,7 +93,6 @@ " return (delete(index_range, i) for i in index_range)\n", "\n", "\n", - "\n", "def create_repeated_indexes(data):\n", " \"\"\"\n", " Convenience function. Given an array-like with length N,\n", @@ -104,25 +103,30 @@ " return (index_range for i in index_range)\n", "\n", "\n", - "\n", "def _create_two_group_jackknife_indexes(x0, x1, is_paired):\n", " \"\"\"Creates the jackknife bootstrap for 2 groups.\"\"\"\n", "\n", " if is_paired and len(x0) == len(x1):\n", - " out = list(zip([j for j in create_jackknife_indexes(x0)],\n", - " [i for i in create_jackknife_indexes(x1)]\n", - " )\n", - " )\n", + " out = list(\n", + " zip(\n", + " [j for j in create_jackknife_indexes(x0)],\n", + " [i for i in create_jackknife_indexes(x1)],\n", + " )\n", + " )\n", " else:\n", - " jackknife_c = list(zip([j for j in create_jackknife_indexes(x0)],\n", - " [i for i in create_repeated_indexes(x1)]\n", - " )\n", - " )\n", - "\n", - " jackknife_t = list(zip([i for i in create_repeated_indexes(x0)],\n", - " [j for j in create_jackknife_indexes(x1)]\n", - " )\n", - " )\n", + " jackknife_c = list(\n", + " zip(\n", + " [j for j in create_jackknife_indexes(x0)],\n", + " [i for i in create_repeated_indexes(x1)],\n", + " )\n", + " )\n", + "\n", + " jackknife_t = list(\n", + " zip(\n", + " [i for i in create_repeated_indexes(x0)],\n", + " [j for j in create_jackknife_indexes(x1)],\n", + " )\n", + " )\n", " out = jackknife_c + jackknife_t\n", " del jackknife_c\n", " del jackknife_t\n", @@ -130,7 +134,6 @@ " return out\n", "\n", "\n", - "\n", "def compute_meandiff_jackknife(x0, x1, is_paired, effect_size):\n", " \"\"\"\n", " Given two arrays, returns the jackknife for their effect size.\n", @@ -145,30 +148,28 @@ " x0_shuffled = x0[j[0]]\n", " x1_shuffled = x1[j[1]]\n", "\n", - " es = __es.two_group_difference(x0_shuffled, x1_shuffled,\n", - " is_paired, effect_size)\n", + " es = __es.two_group_difference(x0_shuffled, x1_shuffled, is_paired, effect_size)\n", " out.append(es)\n", "\n", " return out\n", "\n", "\n", - "\n", "def _calc_accel(jack_dist):\n", - "\n", " jack_mean = npmean(jack_dist)\n", "\n", - " numer = npsum((jack_mean - jack_dist)**3)\n", - " denom = 6.0 * (npsum((jack_mean - jack_dist)**2) ** 1.5)\n", + " numer = npsum((jack_mean - jack_dist) ** 3)\n", + " denom = 6.0 * (npsum((jack_mean - jack_dist) ** 2) ** 1.5)\n", "\n", - " with errstate(invalid='ignore'):\n", + " with errstate(invalid=\"ignore\"):\n", " # does not raise warning if invalid division encountered.\n", " return numer / denom\n", "\n", "\n", - "def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,\n", - " resamples=5000, random_seed=12345):\n", + "def compute_bootstrapped_diff(\n", + " x0, x1, is_paired, effect_size, resamples=5000, random_seed=12345\n", + "):\n", " \"\"\"Bootstraps the effect_size for 2 groups.\"\"\"\n", - " \n", + "\n", " from . import effsize as __es\n", "\n", " rng = RandomState(PCG64(random_seed))\n", @@ -176,9 +177,8 @@ " out = np.repeat(np.nan, resamples)\n", " x0_len = len(x0)\n", " x1_len = len(x1)\n", - " \n", + "\n", " for i in range(int(resamples)):\n", - " \n", " if is_paired:\n", " if x0_len != x1_len:\n", " raise ValueError(\"The two arrays do not have the same length.\")\n", @@ -188,23 +188,26 @@ " else:\n", " x0_sample = rng.choice(x0, x0_len, replace=True)\n", " x1_sample = rng.choice(x1, x1_len, replace=True)\n", - " \n", - " out[i] = __es.two_group_difference(x0_sample, x1_sample,\n", - " is_paired, effect_size)\n", - " \n", + "\n", + " out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size)\n", + "\n", " return out\n", "\n", - "def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1\n", - " x2:np.ndarray,# Test group 1\n", - " x3:np.ndarray,# Control group 2\n", - " x4:np.ndarray,# Test group 2\n", - " is_paired:str=None,\n", - " resamples:int=5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n", - " random_seed:int=12345# `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", - " )->tuple: # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n", + "\n", + "def compute_delta2_bootstrapped_diff(\n", + " x1: np.ndarray, # Control group 1\n", + " x2: np.ndarray, # Test group 1\n", + " x3: np.ndarray, # Control group 2\n", + " x4: np.ndarray, # Test group 2\n", + " is_paired: str = None,\n", + " resamples: int = 5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n", + " random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", + ") -> (\n", + " tuple\n", + "): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n", " \"\"\"\n", " Bootstraps the effect size deltas' g.\n", - " \n", + "\n", " \"\"\"\n", "\n", " rng = RandomState(PCG64(random_seed))\n", @@ -215,11 +218,15 @@ " out_delta_g = np.repeat(np.nan, resamples)\n", " deltadelta = np.repeat(np.nan, resamples)\n", "\n", - " n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2= x1_len, x2_len, x3_len, x4_len\n", + " n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len\n", " s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4)\n", "\n", - " sd_numerator = ((n_a2_b1 - 1) * s_a2_b1 ** 2 + (n_a1_b1 - 1) * s_a1_b1 ** 2 + (n_a2_b2 - 1) * s_a2_b2 ** 2 + (\n", - " n_a1_b2 - 1) * s_a1_b2 ** 2)\n", + " sd_numerator = (\n", + " (n_a2_b1 - 1) * s_a2_b1**2\n", + " + (n_a1_b1 - 1) * s_a1_b1**2\n", + " + (n_a2_b2 - 1) * s_a2_b2**2\n", + " + (n_a1_b2 - 1) * s_a1_b2**2\n", + " )\n", " sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1)\n", " pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n", "\n", @@ -227,46 +234,58 @@ " if is_paired:\n", " if (x1_len != x2_len) or (x3_len != x4_len):\n", " raise ValueError(\"The two arrays do not have the same length.\")\n", - " df_paired_1 = pd.DataFrame({\n", - " 'value': np.concatenate([x1, x3]),\n", - " 'array_id': np.repeat(['x1','x3'], [x1_len, x3_len])\n", - " })\n", - " df_paired_2 = pd.DataFrame({\n", - " 'value': np.concatenate([x2, x4]),\n", - " 'array_id': np.repeat(['x2','x4'], [x1_len, x3_len])\n", - " })\n", - " x_sample_index = rng.choice(len(df_paired_1), len(df_paired_1), replace=True)\n", + " df_paired_1 = pd.DataFrame(\n", + " {\n", + " \"value\": np.concatenate([x1, x3]),\n", + " \"array_id\": np.repeat([\"x1\", \"x3\"], [x1_len, x3_len]),\n", + " }\n", + " )\n", + " df_paired_2 = pd.DataFrame(\n", + " {\n", + " \"value\": np.concatenate([x2, x4]),\n", + " \"array_id\": np.repeat([\"x2\", \"x4\"], [x1_len, x3_len]),\n", + " }\n", + " )\n", + " x_sample_index = rng.choice(\n", + " len(df_paired_1), len(df_paired_1), replace=True\n", + " )\n", " x_sample_1 = df_paired_1.loc[x_sample_index]\n", " x_sample_2 = df_paired_2.loc[x_sample_index]\n", - " x1_sample = x_sample_1[x_sample_1['array_id'] == 'x1']['value']\n", - " x2_sample = x_sample_2[x_sample_2['array_id'] == 'x2']['value']\n", - " x3_sample = x_sample_1[x_sample_1['array_id'] == 'x3']['value']\n", - " x4_sample = x_sample_2[x_sample_2['array_id'] == 'x4']['value']\n", + " x1_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x1\"][\"value\"]\n", + " x2_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x2\"][\"value\"]\n", + " x3_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x3\"][\"value\"]\n", + " x4_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x4\"][\"value\"]\n", " else:\n", - " df = pd.DataFrame({\n", - " 'value': np.concatenate([x1, x2, x3, x4]),\n", - " 'array_id': np.repeat(['x1', 'x2', 'x3', 'x4'], [x1_len, x2_len, x3_len, x4_len])\n", - " })\n", - " x_sample_index = rng.choice(len(df),len(df), replace=True)\n", + " df = pd.DataFrame(\n", + " {\n", + " \"value\": np.concatenate([x1, x2, x3, x4]),\n", + " \"array_id\": np.repeat(\n", + " [\"x1\", \"x2\", \"x3\", \"x4\"], [x1_len, x2_len, x3_len, x4_len]\n", + " ),\n", + " }\n", + " )\n", + " x_sample_index = rng.choice(len(df), len(df), replace=True)\n", " x_sample = df.loc[x_sample_index]\n", - " x1_sample = x_sample[x_sample['array_id'] == 'x1']['value']\n", - " x2_sample = x_sample[x_sample['array_id'] == 'x2']['value']\n", - " x3_sample = x_sample[x_sample['array_id'] == 'x3']['value']\n", - " x4_sample = x_sample[x_sample['array_id'] == 'x4']['value']\n", + " x1_sample = x_sample[x_sample[\"array_id\"] == \"x1\"][\"value\"]\n", + " x2_sample = x_sample[x_sample[\"array_id\"] == \"x2\"][\"value\"]\n", + " x3_sample = x_sample[x_sample[\"array_id\"] == \"x3\"][\"value\"]\n", + " x4_sample = x_sample[x_sample[\"array_id\"] == \"x4\"][\"value\"]\n", "\n", - " delta_1 = np.mean(x2_sample)-np.mean(x1_sample)\n", - " delta_2 = np.mean(x4_sample)-np.mean(x3_sample)\n", + " delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n", + " delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n", " delta_delta = delta_2 - delta_1\n", " deltadelta[i] = delta_delta\n", - " out_delta_g[i] = delta_delta/pooled_sample_sd\n", - " delta_g = ((np.mean(x4)-np.mean(x3)) - (np.mean(x2)-np.mean(x1))) / pooled_sample_sd\n", + " out_delta_g[i] = delta_delta / pooled_sample_sd\n", + " delta_g = (\n", + " (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))\n", + " ) / pooled_sample_sd\n", " return out_delta_g, delta_g, deltadelta\n", "\n", "\n", - "\n", - "def compute_meandiff_bias_correction(bootstraps, #An numerical iterable, comprising bootstrap resamples of the effect size.\n", - " effsize # The effect size for the original sample.\n", - " ): #The bias correction value for the given bootstraps and effect size.\n", + "def compute_meandiff_bias_correction(\n", + " bootstraps, # An numerical iterable, comprising bootstrap resamples of the effect size.\n", + " effsize, # The effect size for the original sample.\n", + "): # The bias correction value for the given bootstraps and effect size.\n", " \"\"\"\n", " Computes the bias correction required for the BCa method\n", " of confidence interval construction.\n", @@ -285,13 +304,11 @@ " return norm.ppf(prop_less_than_es)\n", "\n", "\n", - "\n", "def _compute_alpha_from_ci(ci):\n", " if ci < 0 or ci > 100:\n", " raise ValueError(\"`ci` must be a number between 0 and 100.\")\n", "\n", - " return (100. - ci) / 100.\n", - "\n", + " return (100.0 - ci) / 100.0\n", "\n", "\n", "def _compute_quantile(z, bias, acceleration):\n", @@ -301,7 +318,6 @@ " return bias + (numer / denom)\n", "\n", "\n", - "\n", "def compute_interval_limits(bias, acceleration, n_boots, ci=95):\n", " \"\"\"\n", " Returns the indexes of the interval limits for a given bootstrap.\n", @@ -317,7 +333,7 @@ " z_low = norm.ppf(alpha_low)\n", " z_high = norm.ppf(alpha_high)\n", "\n", - " kws = {'bias': bias, 'acceleration': acceleration}\n", + " kws = {\"bias\": bias, \"acceleration\": acceleration}\n", " low = _compute_quantile(z_low, **kws)\n", " high = _compute_quantile(z_high, **kws)\n", "\n", @@ -330,20 +346,20 @@ " return low, high\n", "\n", "\n", - "def calculate_group_var(control_var, control_N,test_var, test_N):\n", - " return control_var/control_N + test_var/test_N\n", + "def calculate_group_var(control_var, control_N, test_var, test_N):\n", + " return control_var / control_N + test_var / test_N\n", "\n", "\n", "def calculate_weighted_delta(group_var, differences, resamples):\n", - " '''\n", + " \"\"\"\n", " Compute the weighted deltas.\n", - " '''\n", + " \"\"\"\n", "\n", - " weight = 1/group_var\n", + " weight = 1 / group_var\n", " denom = np.sum(weight)\n", " num = np.sum(weight[i] * differences[i] for i in range(0, len(weight)))\n", "\n", - " return num/denom" + " return num / denom" ] }, { diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb index 46ae1e80..f108a235 100644 --- a/nbs/API/dabest_object.ipynb +++ b/nbs/API/dabest_object.ipynb @@ -87,104 +87,123 @@ " Class for estimation statistics and plots.\n", " \"\"\"\n", "\n", - " def __init__(self, data, idx, x, y, paired, id_col, ci, \n", - " resamples, random_seed, proportional, delta2, \n", - " experiment, experiment_label, x1_level, mini_meta):\n", - "\n", + " def __init__(\n", + " self,\n", + " data,\n", + " idx,\n", + " x,\n", + " y,\n", + " paired,\n", + " id_col,\n", + " ci,\n", + " resamples,\n", + " random_seed,\n", + " proportional,\n", + " delta2,\n", + " experiment,\n", + " experiment_label,\n", + " x1_level,\n", + " mini_meta,\n", + " ):\n", " \"\"\"\n", " Parses and stores pandas DataFrames in preparation for estimation\n", " statistics. You should not be calling this class directly; instead,\n", " use `dabest.load()` to parse your DataFrame prior to analysis.\n", " \"\"\"\n", - " \n", - " self.__delta2 = delta2\n", - " self.__experiment = experiment\n", - " self.__ci = ci\n", - " self.__input_data = data\n", - " self.__output_data = data.copy()\n", - " self.__id_col = id_col\n", - " self.__is_paired = paired\n", - " self.__resamples = resamples\n", - " self.__random_seed = random_seed\n", - " self.__proportional = proportional\n", - " self.__mini_meta = mini_meta \n", "\n", + " self.__delta2 = delta2\n", + " self.__experiment = experiment\n", + " self.__ci = ci\n", + " self.__input_data = data\n", + " self.__output_data = data.copy()\n", + " self.__id_col = id_col\n", + " self.__is_paired = paired\n", + " self.__resamples = resamples\n", + " self.__random_seed = random_seed\n", + " self.__proportional = proportional\n", + " self.__mini_meta = mini_meta\n", "\n", " # Check if it is a valid mini_meta case\n", " if self.__mini_meta:\n", " # Only mini_meta calculation but not proportional and delta-delta function\n", " if self.__proportional:\n", - " err0 = '`proportional` and `mini_meta` cannot be True at the same time.'\n", + " err0 = \"`proportional` and `mini_meta` cannot be True at the same time.\"\n", " raise ValueError(err0)\n", " if self.__delta2:\n", - " err0 = '`delta` and `mini_meta` cannot be True at the same time.'\n", + " err0 = \"`delta` and `mini_meta` cannot be True at the same time.\"\n", " raise ValueError(err0)\n", - " \n", + "\n", " # Check if the columns stated are valid\n", " # TODO instead of traversing twice idx you can traverse only once\n", " # and break the loop if the condition is not satisfied?\n", " # TODO What if the type is not str and not tuple,list? missing raise Error\n", " if all([isinstance(i, str) for i in idx]):\n", - " if len(pd.unique([t for t in idx]).tolist())!=2:\n", - " err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) \n", - " err1 = 'does not contain exactly 2 columns.'\n", + " if len(pd.unique([t for t in idx]).tolist()) != 2:\n", + " err0 = \"`mini_meta` is True, but `idx` ({})\".format(idx)\n", + " err1 = \"does not contain exactly 2 columns.\"\n", " raise ValueError(err0 + err1)\n", - " \n", + "\n", " if all([isinstance(i, (tuple, list)) for i in idx]):\n", " all_idx_lengths = [len(t) for t in idx]\n", " if (array(all_idx_lengths) != 2).any():\n", " err1 = \"`mini_meta` is True, but some idx \"\n", " err2 = \"in {} does not consist only of two groups.\".format(idx)\n", " raise ValueError(err1 + err2)\n", - " \n", "\n", - " # TODO can you have True mini_meta and delta2 at the same time? \n", + " # TODO can you have True mini_meta and delta2 at the same time?\n", " # Check if this is a 2x2 ANOVA case and x & y are valid columns\n", " # Create experiment_label and x1_level\n", " if self.__delta2:\n", + " # TODO Wrap the errors in a separate function\n", " if self.__proportional:\n", - " err0 = '`proportional` and `delta` cannot be True at the same time.'\n", + " err0 = \"`proportional` and `delta` cannot be True at the same time.\"\n", " raise ValueError(err0)\n", - " \n", + "\n", " # idx should not be specified\n", " if idx:\n", - " err0 = '`idx` should not be specified when `delta2` is True.'.format(len(x))\n", + " err0 = \"`idx` should not be specified when `delta2` is True.\".format(\n", + " len(x)\n", + " )\n", " raise ValueError(err0)\n", "\n", " # Check if x is valid\n", " # TODO if x is None is fine??\n", " if len(x) != 2:\n", - " err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x))\n", + " err0 = \"`delta2` is True but the number of variables indicated by `x` is {}.\".format(\n", + " len(x)\n", + " )\n", " raise ValueError(err0)\n", - " \n", + "\n", " for i in x:\n", " if i not in self.__output_data.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(i)\n", + " err = \"{0} is not a column in `data`. Please check.\".format(i)\n", " raise IndexError(err)\n", "\n", " # Check if y is valid\n", " if not y:\n", - " err0 = '`delta2` is True but `y` is not indicated.'\n", + " err0 = \"`delta2` is True but `y` is not indicated.\"\n", " raise ValueError(err0)\n", - " \n", + "\n", " if y not in self.__output_data.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(y)\n", + " err = \"{0} is not a column in `data`. Please check.\".format(y)\n", " raise IndexError(err)\n", "\n", " # Check if experiment is valid\n", " if experiment not in self.__output_data.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(experiment)\n", + " err = \"{0} is not a column in `data`. Please check.\".format(experiment)\n", " raise IndexError(err)\n", "\n", " # Check if experiment_label is valid and create experiment when needed\n", " if experiment_label:\n", " if len(experiment_label) != 2:\n", - " err0 = '`experiment_label` does not have a length of 2.'\n", + " err0 = \"`experiment_label` does not have a length of 2.\"\n", " raise ValueError(err0)\n", - " \n", + "\n", " for i in experiment_label:\n", " if i not in self.__output_data[experiment].unique():\n", - " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", + " err = \"{0} is not an element in the column `{1}` of `data`. Please check.\".format(\n", + " i, experiment\n", + " )\n", " raise IndexError(err)\n", " else:\n", " experiment_label = self.__output_data[experiment].unique()\n", @@ -192,139 +211,139 @@ " # Check if x1_level is valid\n", " if x1_level:\n", " if len(x1_level) != 2:\n", - " err0 = '`x1_level` does not have a length of 2.'\n", + " err0 = \"`x1_level` does not have a length of 2.\"\n", " raise ValueError(err0)\n", - " \n", + "\n", " for i in x1_level:\n", " if i not in self.__output_data[x[0]].unique():\n", - " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", + " err = \"{0} is not an element in the column `{1}` of `data`. Please check.\".format(\n", + " i, experiment\n", + " )\n", " raise IndexError(err)\n", "\n", " else:\n", " x1_level = self.__output_data[x[0]].unique()\n", - " \n", - " # TODO what if experiment is None? \n", + "\n", + " # TODO what if experiment is None?\n", " elif experiment:\n", " experiment_label = self.__output_data[experiment].unique()\n", - " x1_level = self.__output_data[x[0]].unique() \n", + " x1_level = self.__output_data[x[0]].unique()\n", " self.__experiment_label = experiment_label\n", - " self.__x1_level = x1_level\n", - "\n", + " self.__x1_level = x1_level\n", "\n", " # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case\n", - " if x and y and idx is None:\n", - " # Add a length check for unique values in the first element in list x, \n", + " if idx is None and x is not None and y is not None:\n", + " # Add a length check for unique values in the first element in list x,\n", " # if the length is greater than 2, force delta2 to be False\n", " # Should be removed if delta2 for situations other than 2x2 is supported\n", " if len(self.__output_data[x[0]].unique()) > 2 and x1_level is None:\n", " self.__delta2 = False\n", " # stop the loop if delta2 is False\n", - " \n", + "\n", " # add a new column which is a combination of experiment and the first variable\n", - " new_col_name = experiment+x[0]\n", + " new_col_name = experiment + x[0]\n", " while new_col_name in self.__output_data.columns:\n", " new_col_name += \"_\"\n", "\n", - " self.__output_data[new_col_name] = self.__output_data[x[0]].astype(str) + \" \" + self.__output_data[experiment].astype(str)\n", + " self.__output_data[new_col_name] = (\n", + " self.__output_data[x[0]].astype(str)\n", + " + \" \"\n", + " + self.__output_data[experiment].astype(str)\n", + " )\n", "\n", - " #create idx and record the first and second x variable \n", + " # create idx and record the first and second x variable\n", " idx = []\n", " for i in list(map(lambda x: str(x), experiment_label)):\n", " temp = []\n", " for j in list(map(lambda x: str(x), x1_level)):\n", " temp.append(j + \" \" + i)\n", " idx.append(temp)\n", - " \n", + "\n", " self.__idx = idx\n", - " self.__x1 = x[0]\n", - " self.__x2 = x[1]\n", + " self.__x1 = x[0]\n", + " self.__x2 = x[1]\n", " x = new_col_name\n", " else:\n", " self.__idx = idx\n", - " self.__x1 = None\n", - " self.__x2 = None\n", - "\n", - "\n", + " self.__x1 = None\n", + " self.__x2 = None\n", "\n", " # Determine the kind of estimation plot we need to produce.\n", " if all([isinstance(i, (str, int, float)) for i in idx]):\n", " # flatten out idx.\n", " all_plot_groups = pd.unique([t for t in idx]).tolist()\n", " if len(idx) > len(all_plot_groups):\n", - " err0 = '`idx` contains duplicated groups. Please remove any duplicates and try again.'\n", + " err0 = \"`idx` contains duplicated groups. Please remove any duplicates and try again.\"\n", " raise ValueError(err0)\n", - " \n", + "\n", " # We need to re-wrap this idx inside another tuple so as to\n", " # easily loop thru each pairwise group later on.\n", " self.__idx = (idx,)\n", "\n", " elif all([isinstance(i, (tuple, list)) for i in idx]):\n", " all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist()\n", - " \n", + "\n", " actual_groups_given = sum([len(i) for i in idx])\n", - " \n", + "\n", " if actual_groups_given > len(all_plot_groups):\n", - " err0 = 'Groups are repeated across tuples,'\n", - " err1 = ' or a tuple has repeated groups in it.'\n", - " err2 = ' Please remove any duplicates and try again.'\n", + " err0 = \"Groups are repeated across tuples,\"\n", + " err1 = \" or a tuple has repeated groups in it.\"\n", + " err2 = \" Please remove any duplicates and try again.\"\n", " raise ValueError(err0 + err1 + err2)\n", "\n", - " else: # mix of string and tuple?\n", - " err = 'There seems to be a problem with the idx you '\\\n", - " 'entered--{}.'.format(idx)\n", + " else: # mix of string and tuple?\n", + " err = \"There seems to be a problem with the idx you \" \"entered--{}.\".format(\n", + " idx\n", + " )\n", " raise ValueError(err)\n", "\n", " # Check if there is a typo on paired\n", " if self.__is_paired and self.__is_paired not in (\"baseline\", \"sequential\"):\n", - " err = '{} assigned for `paired` is not valid.'.format(self.__is_paired)\n", + " err = \"{} assigned for `paired` is not valid.\".format(self.__is_paired)\n", " raise ValueError(err)\n", "\n", - "\n", " # Determine the type of data: wide or long.\n", - " if y and x is None:\n", - " err = 'You have only specified `y`. Please also specify `x`.'\n", + " if x is None and y is not None:\n", + " err = \"You have only specified `y`. Please also specify `x`.\"\n", " raise ValueError(err)\n", "\n", - " if x and y is None:\n", - " err = 'You have only specified `x`. Please also specify `y`.'\n", + " if x is not None and y is None:\n", + " err = \"You have only specified `x`. Please also specify `y`.\"\n", " raise ValueError(err)\n", "\n", - "\n", " self.__plot_data = self.get_plot_data(x, y, all_plot_groups)\n", " self.__all_plot_groups = all_plot_groups\n", "\n", - "\n", " # Check if `id_col` is valid\n", " if self.__is_paired:\n", " if id_col is None:\n", " err = \"`id_col` must be specified if `paired` is assigned with a not NoneType value.\"\n", " raise IndexError(err)\n", - " \n", + "\n", " if id_col not in self.__plot_data.columns:\n", " err = \"{} is not a column in `data`. \".format(id_col)\n", " raise IndexError(err)\n", "\n", " self.compute_effectsize_dfs()\n", "\n", - "\n", " def __repr__(self):\n", " from .__init__ import __version__\n", " from .misc_tools import print_greeting\n", - " \n", - " greeting_header = print_greeting()\n", "\n", - " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", - " 'sequential': 'for the sequential design of repeated-measures experiment \\n',\n", - " 'None' : ''\n", - " }\n", + " greeting_header = print_greeting()\n", "\n", - " PAIRED_STATUS = {'baseline' : 'Paired e', \n", - " 'sequential' : 'Paired e',\n", - " 'None' : 'E'\n", + " RM_STATUS = {\n", + " \"baseline\": \"for repeated measures against baseline \\n\",\n", + " \"sequential\": \"for the sequential design of repeated-measures experiment \\n\",\n", + " \"None\": \"\",\n", " }\n", "\n", - " first_line = {\"rm_status\" : RM_STATUS[str(self.__is_paired)],\n", - " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)]}\n", + " PAIRED_STATUS = {\"baseline\": \"Paired e\", \"sequential\": \"Paired e\", \"None\": \"E\"}\n", + "\n", + " first_line = {\n", + " \"rm_status\": RM_STATUS[str(self.__is_paired)],\n", + " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)],\n", + " }\n", "\n", " s1 = \"{paired_status}ffect size(s) {rm_status}\".format(**first_line)\n", " s2 = \"with {}% confidence intervals will be computed for:\".format(self.__ci)\n", @@ -334,7 +353,7 @@ "\n", " comparisons = []\n", "\n", - " if self.__is_paired == 'sequential':\n", + " if self.__is_paired == \"sequential\":\n", " for j, current_tuple in enumerate(self.__idx):\n", " for ix, test_name in enumerate(current_tuple[1:]):\n", " control_name = current_tuple[ix]\n", @@ -347,13 +366,17 @@ " comparisons.append(\"{} minus {}\".format(test_name, control_name))\n", "\n", " if self.__delta2:\n", - " comparisons.append(\"{} minus {} (only for mean difference)\".format(self.__experiment_label[1], self.__experiment_label[0]))\n", - " \n", + " comparisons.append(\n", + " \"{} minus {} (only for mean difference)\".format(\n", + " self.__experiment_label[1], self.__experiment_label[0]\n", + " )\n", + " )\n", + "\n", " if self.__mini_meta:\n", " comparisons.append(\"weighted delta (only for mean difference)\")\n", "\n", " for j, g in enumerate(comparisons):\n", - " out.append(\"{}. {}\".format(j+1, g))\n", + " out.append(\"{}. {}\".format(j + 1, g))\n", "\n", " resamples_line1 = \"\\n{} resamples \".format(self.__resamples)\n", " resamples_line2 = \"will be used to generate the effect size bootstraps.\"\n", @@ -361,7 +384,6 @@ "\n", " return \"\\n\".join(out)\n", "\n", - " \n", " @property\n", " def mean_diff(self):\n", " \"\"\"\n", @@ -369,17 +391,15 @@ "\n", " \"\"\"\n", " return self.__mean_diff\n", - " \n", - " \n", - " @property \n", + "\n", + " @property\n", " def median_diff(self):\n", " \"\"\"\n", " Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", "\n", " \"\"\"\n", " return self.__median_diff\n", - " \n", - " \n", + "\n", " @property\n", " def cohens_d(self):\n", " \"\"\"\n", @@ -387,8 +407,7 @@ "\n", " \"\"\"\n", " return self.__cohens_d\n", - " \n", - " \n", + "\n", " @property\n", " def cohens_h(self):\n", " \"\"\"\n", @@ -397,17 +416,15 @@ " \"\"\"\n", " return self.__cohens_h\n", "\n", - "\n", - " @property \n", + " @property\n", " def hedges_g(self):\n", " \"\"\"\n", " Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", "\n", " \"\"\"\n", " return self.__hedges_g\n", - " \n", - " \n", - " @property \n", + "\n", + " @property\n", " def cliffs_delta(self):\n", " \"\"\"\n", " Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", @@ -426,19 +443,17 @@ " def input_data(self):\n", " \"\"\"\n", " Returns the pandas DataFrame that was passed to `dabest.load()`.\n", - " When `delta2` is True, a new column is added to support the \n", + " When `delta2` is True, a new column is added to support the\n", " function. The name of this new column is indicated by `x`.\n", " \"\"\"\n", " return self.__input_data\n", "\n", - "\n", " @property\n", " def idx(self):\n", " \"\"\"\n", " Returns the order of categories that was passed to `dabest.load()`.\n", " \"\"\"\n", " return self.__idx\n", - " \n", "\n", " @property\n", " def x1(self):\n", @@ -448,16 +463,14 @@ " \"\"\"\n", " return self.__x1\n", "\n", - "\n", " @property\n", " def x1_level(self):\n", " \"\"\"\n", - " Returns the levels of first variable declared in x when it is a \n", + " Returns the levels of first variable declared in x when it is a\n", " delta-delta case; returns None otherwise.\n", " \"\"\"\n", " return self.__x1_level\n", "\n", - "\n", " @property\n", " def x2(self):\n", " \"\"\"\n", @@ -466,15 +479,13 @@ " \"\"\"\n", " return self.__x2\n", "\n", - "\n", " @property\n", " def experiment(self):\n", " \"\"\"\n", - " Returns the column name of experiment labels that was passed to \n", + " Returns the column name of experiment labels that was passed to\n", " `dabest.load()` when it is a delta-delta case; returns None otherwise.\n", " \"\"\"\n", " return self.__experiment\n", - " \n", "\n", " @property\n", " def experiment_label(self):\n", @@ -484,16 +495,14 @@ " \"\"\"\n", " return self.__experiment_label\n", "\n", - "\n", " @property\n", " def delta2(self):\n", " \"\"\"\n", - " Returns the boolean parameter indicating if this is a delta-delta \n", + " Returns the boolean parameter indicating if this is a delta-delta\n", " situation.\n", " \"\"\"\n", " return self.__delta2\n", "\n", - "\n", " @property\n", " def is_paired(self):\n", " \"\"\"\n", @@ -501,7 +510,6 @@ " \"\"\"\n", " return self.__is_paired\n", "\n", - "\n", " @property\n", " def id_col(self):\n", " \"\"\"\n", @@ -509,7 +517,6 @@ " \"\"\"\n", " return self.__id_col\n", "\n", - "\n", " @property\n", " def ci(self):\n", " \"\"\"\n", @@ -517,7 +524,6 @@ " \"\"\"\n", " return self.__ci\n", "\n", - "\n", " @property\n", " def resamples(self):\n", " \"\"\"\n", @@ -525,7 +531,6 @@ " \"\"\"\n", " return self.__resamples\n", "\n", - "\n", " @property\n", " def random_seed(self):\n", " \"\"\"\n", @@ -534,18 +539,16 @@ " \"\"\"\n", " return self.__random_seed\n", "\n", - "\n", " @property\n", " def x(self):\n", " \"\"\"\n", " Returns the x column that was passed to `dabest.load()`, if any.\n", - " When `delta2` is True, `x` returns the name of the new column created \n", - " for the delta-delta situation. To retrieve the 2 variables passed into \n", + " When `delta2` is True, `x` returns the name of the new column created\n", + " for the delta-delta situation. To retrieve the 2 variables passed into\n", " `x` when `delta2` is True, please call `x1` and `x2` instead.\n", " \"\"\"\n", " return self.__x\n", "\n", - "\n", " @property\n", " def y(self):\n", " \"\"\"\n", @@ -553,7 +556,6 @@ " \"\"\"\n", " return self.__y\n", "\n", - "\n", " @property\n", " def _xvar(self):\n", " \"\"\"\n", @@ -561,7 +563,6 @@ " \"\"\"\n", " return self.__xvar\n", "\n", - "\n", " @property\n", " def _yvar(self):\n", " \"\"\"\n", @@ -569,7 +570,6 @@ " \"\"\"\n", " return self.__yvar\n", "\n", - "\n", " @property\n", " def _plot_data(self):\n", " \"\"\"\n", @@ -577,7 +577,6 @@ " \"\"\"\n", " return self.__plot_data\n", "\n", - " \n", " @property\n", " def proportional(self):\n", " \"\"\"\n", @@ -585,7 +584,6 @@ " \"\"\"\n", " return self.__proportional\n", "\n", - " \n", " @property\n", " def mini_meta(self):\n", " \"\"\"\n", @@ -593,34 +591,32 @@ " \"\"\"\n", " return self.__mini_meta\n", "\n", - "\n", " @property\n", " def _all_plot_groups(self):\n", " \"\"\"\n", " Returns the all plot groups, as indicated via the `idx` keyword.\n", " \"\"\"\n", " return self.__all_plot_groups\n", - " \n", - " \n", + "\n", " def get_plot_data(self, x, y, all_plot_groups):\n", - " '''\n", - " Function to prepare some attributes for plotting \n", - " '''\n", - " \n", + " \"\"\"\n", + " Function to prepare some attributes for plotting\n", + " \"\"\"\n", + "\n", " # Identify the type of data that was passed in.\n", - " if x and y:\n", + " if x is not None and y is not None:\n", " # Assume we have a long dataset.\n", " # check both x and y are column names in data.\n", " if x not in self.__output_data.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(x)\n", + " err = \"{0} is not a column in `data`. Please check.\".format(x)\n", " raise IndexError(err)\n", " if y not in self.__output_data.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(y)\n", + " err = \"{0} is not a column in `data`. Please check.\".format(y)\n", " raise IndexError(err)\n", "\n", " # check y is numeric.\n", " if not issubdtype(self.__output_data[y].dtype, number):\n", - " err = '{0} is a column in `data`, but it is not numeric.'.format(y)\n", + " err = \"{0} is a column in `data`, but it is not numeric.\".format(y)\n", " raise ValueError(err)\n", "\n", " # check all the idx can be found in self.__output_data[x]\n", @@ -630,10 +626,12 @@ " err1 = \" Please check `idx` and try again.\"\n", " raise IndexError(err0 + err1)\n", "\n", - " # Select only rows where the value in the `x` column \n", + " # Select only rows where the value in the `x` column\n", " # is found in `idx`.\n", - " plot_data = self.__output_data[self.__output_data.loc[:, x].isin(all_plot_groups)].copy()\n", - " \n", + " plot_data = self.__output_data[\n", + " self.__output_data.loc[:, x].isin(all_plot_groups)\n", + " ].copy()\n", + "\n", " # Assign attributes\n", " self.__x = x\n", " self.__y = y\n", @@ -654,74 +652,78 @@ " err0 = '\"{0}\" is not a column in `data`.'.format(g)\n", " err1 = \" Please check `idx` and try again.\"\n", " raise IndexError(err0 + err1)\n", - " \n", - " set_all_columns = set(self.__output_data.columns.tolist())\n", + "\n", + " set_all_columns = set(self.__output_data.columns.tolist())\n", " set_all_plot_groups = set(all_plot_groups)\n", " id_vars = set_all_columns.difference(set_all_plot_groups)\n", "\n", - " plot_data = pd.melt(self.__output_data,\n", - " id_vars=id_vars,\n", - " value_vars=all_plot_groups,\n", - " value_name=self.__yvar,\n", - " var_name=self.__xvar)\n", - " \n", + " plot_data = pd.melt(\n", + " self.__output_data,\n", + " id_vars=id_vars,\n", + " value_vars=all_plot_groups,\n", + " value_name=self.__yvar,\n", + " var_name=self.__xvar,\n", + " )\n", + "\n", " # Added in v0.2.7.\n", - " plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True)\n", + " plot_data.dropna(axis=0, how=\"any\", subset=[self.__yvar], inplace=True)\n", "\n", " # TODO these comments should not be in the code but on the release notes of the package version\n", " # Lines 131 to 140 added in v0.2.3.\n", - " # Fixes a bug that jammed up when the xvar column was already \n", + " # Fixes a bug that jammed up when the xvar column was already\n", " # a pandas Categorical. Now we check for this and act appropriately.\n", - " if isinstance(plot_data[self.__xvar].dtype, \n", - " pd.CategoricalDtype):\n", + " if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):\n", " plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)\n", - " plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, \n", - " ordered=True, \n", - " inplace=True)\n", + " plot_data[self.__xvar].cat.reorder_categories(\n", + " all_plot_groups, ordered=True, inplace=True\n", + " )\n", " else:\n", - " plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],\n", - " categories=all_plot_groups,\n", - " ordered=True)\n", - " \n", + " plot_data.loc[:, self.__xvar] = pd.Categorical(\n", + " plot_data[self.__xvar], categories=all_plot_groups, ordered=True\n", + " )\n", "\n", " return plot_data\n", - " \n", + "\n", " def compute_effectsize_dfs(self):\n", " from ._effsize_objects import EffectSizeDataFrame\n", "\n", - " effectsize_df_kwargs = dict(ci=self.__ci, is_paired=self.__is_paired,\n", - " random_seed=self.__random_seed,\n", - " resamples=self.__resamples,\n", - " proportional=self.__proportional, \n", - " delta2=self.__delta2, \n", - " experiment_label=self.__experiment_label,\n", - " x1_level=self.__x1_level,\n", - " x2=self.__x2,\n", - " mini_meta = self.__mini_meta)\n", + " effectsize_df_kwargs = dict(\n", + " ci=self.__ci,\n", + " is_paired=self.__is_paired,\n", + " random_seed=self.__random_seed,\n", + " resamples=self.__resamples,\n", + " proportional=self.__proportional,\n", + " delta2=self.__delta2,\n", + " experiment_label=self.__experiment_label,\n", + " x1_level=self.__x1_level,\n", + " x2=self.__x2,\n", + " mini_meta=self.__mini_meta,\n", + " )\n", "\n", - " self.__mean_diff = EffectSizeDataFrame(self, \"mean_diff\",\n", - " **effectsize_df_kwargs)\n", + " self.__mean_diff = EffectSizeDataFrame(\n", + " self, \"mean_diff\", **effectsize_df_kwargs\n", + " )\n", "\n", - " self.__median_diff = EffectSizeDataFrame(self, \"median_diff\",\n", - " **effectsize_df_kwargs)\n", + " self.__median_diff = EffectSizeDataFrame(\n", + " self, \"median_diff\", **effectsize_df_kwargs\n", + " )\n", "\n", - " self.__cohens_d = EffectSizeDataFrame(self, \"cohens_d\",\n", - " **effectsize_df_kwargs)\n", + " self.__cohens_d = EffectSizeDataFrame(self, \"cohens_d\", **effectsize_df_kwargs)\n", "\n", - " self.__cohens_h = EffectSizeDataFrame(self, \"cohens_h\",\n", - " **effectsize_df_kwargs) \n", + " self.__cohens_h = EffectSizeDataFrame(self, \"cohens_h\", **effectsize_df_kwargs)\n", "\n", - " self.__hedges_g = EffectSizeDataFrame(self, \"hedges_g\",\n", - " **effectsize_df_kwargs)\n", - " \n", - " self.__delta_g = EffectSizeDataFrame(self, \"delta_g\",\n", - " **effectsize_df_kwargs)\n", + " self.__hedges_g = EffectSizeDataFrame(self, \"hedges_g\", **effectsize_df_kwargs)\n", + "\n", + " self.__delta_g = EffectSizeDataFrame(self, \"delta_g\", **effectsize_df_kwargs)\n", "\n", " if not self.__is_paired:\n", - " self.__cliffs_delta = EffectSizeDataFrame(self, \"cliffs_delta\",\n", - " **effectsize_df_kwargs)\n", + " self.__cliffs_delta = EffectSizeDataFrame(\n", + " self, \"cliffs_delta\", **effectsize_df_kwargs\n", + " )\n", " else:\n", - " self.__cliffs_delta = \"The data is paired; Cliff's delta is therefore undefined.\"" + " self.__cliffs_delta = (\n", + " \"The data is paired; Cliff's delta is therefore undefined.\"\n", + " )" ] }, { diff --git a/nbs/API/delta_objects.ipynb b/nbs/API/delta_objects.ipynb index 9bb0d7d6..e2ab4475 100644 --- a/nbs/API/delta_objects.ipynb +++ b/nbs/API/delta_objects.ipynb @@ -75,7 +75,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "#| export\n", "class DeltaDelta(object):\n", " \"\"\"\n", @@ -87,16 +86,16 @@ " $$\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}$$\n", "\n", "\n", - " where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples. \n", + " where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples.\n", "\n", " A delta-delta value is then calculated as the mean difference between the two primary deltas:\n", "\n", "\n", " $$\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}$$\n", - " \n", + "\n", " and a deltas' g value is calculated as the mean difference between the two primary deltas divided by\n", " the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples:\n", - " \n", + "\n", " $$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$$\n", "\n", " $$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", @@ -105,53 +104,56 @@ "\n", "\n", " \"\"\"\n", - " \n", - " def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta,\n", - " ci=95):\n", + "\n", + " def __init__(\n", + " self, effectsizedataframe, permutation_count, bootstraps_delta_delta, ci=95\n", + " ):\n", " from ._stats_tools import effsize as es\n", " from ._stats_tools import confint_1group as ci1g\n", " from ._stats_tools import confint_2group_diff as ci2g\n", - " \n", - " self.__effsizedf = effectsizedataframe.results\n", - " self.__dabest_obj = effectsizedataframe.dabest_obj\n", - " self.__ci = ci\n", - " self.__resamples = effectsizedataframe.resamples\n", - " self.__effect_size = effectsizedataframe.effect_size\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", - " self.__permutation_count = permutation_count\n", - " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", - " self.__control = self.__dabest_obj.experiment_label[0]\n", - " self.__test = self.__dabest_obj.experiment_label[1]\n", "\n", + " self.__effsizedf = effectsizedataframe.results\n", + " self.__dabest_obj = effectsizedataframe.dabest_obj\n", + " self.__ci = ci\n", + " self.__resamples = effectsizedataframe.resamples\n", + " self.__effect_size = effectsizedataframe.effect_size\n", + " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", + " self.__permutation_count = permutation_count\n", + " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", + " self.__control = self.__dabest_obj.experiment_label[0]\n", + " self.__test = self.__dabest_obj.experiment_label[1]\n", "\n", " # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data\n", - " if self.__effect_size == \"mean_diff\":\n", + " if self.__effect_size == \"mean_diff\":\n", " self.__bootstraps_delta_delta = bootstraps_delta_delta[2]\n", - " self.__difference = self.__effsizedf[\"difference\"][1] - self.__effsizedf[\"difference\"][0]\n", + " self.__difference = (\n", + " self.__effsizedf[\"difference\"][1] - self.__effsizedf[\"difference\"][0]\n", + " )\n", " else:\n", " self.__bootstraps_delta_delta = bootstraps_delta_delta[0]\n", " self.__difference = bootstraps_delta_delta[1]\n", - " \n", + "\n", " sorted_delta_delta = npsort(self.__bootstraps_delta_delta)\n", "\n", " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps_delta_delta, self.__difference)\n", - " \n", - " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", - " self.__bootstraps_delta_delta, \n", - " np.mean))\n", + " self.__bootstraps_delta_delta, self.__difference\n", + " )\n", + "\n", + " self.__jackknives = np.array(\n", + " ci1g.compute_1group_jackknife(self.__bootstraps_delta_delta, np.mean)\n", + " )\n", "\n", " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", "\n", " # Compute BCa intervals.\n", " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", - " \n", + " self.__bias_correction, self.__acceleration_value, self.__resamples, ci\n", + " )\n", + "\n", " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", "\n", " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_delta_delta[bca_idx_low]\n", + " self.__bca_low = sorted_delta_delta[bca_idx_low]\n", " self.__bca_high = sorted_delta_delta[bca_idx_high]\n", "\n", " err1 = \"The $lim_type limit of the interval\"\n", @@ -160,14 +162,14 @@ " err_temp = Template(\" \".join([err1, err2, err3]))\n", "\n", " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"lower\", loc=\"bottom\"), stacklevel=1\n", + " )\n", "\n", - " if bca_idx_high >= self.__resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", + " if bca_idx_high >= self.__resamples - 9:\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"upper\", loc=\"top\"), stacklevel=1\n", + " )\n", "\n", " else:\n", " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", @@ -176,107 +178,103 @@ " err_temp = Template(\" \".join([err1, err2, err3]))\n", "\n", " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"), stacklevel=0)\n", "\n", " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"), stacklevel=0)\n", "\n", " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", + " pct_idx_low = int((self.__alpha / 2) * self.__resamples)\n", + " pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)\n", "\n", " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_delta_delta[pct_idx_low]\n", - " self.__pct_high = sorted_delta_delta[pct_idx_high]\n", - " \n", - " \n", + " self.__pct_low = sorted_delta_delta[pct_idx_low]\n", + " self.__pct_high = sorted_delta_delta[pct_idx_high]\n", "\n", " def __permutation_test(self):\n", " \"\"\"\n", " Perform a permutation test and obtain the permutation p-value\n", " based on the permutation data.\n", " \"\"\"\n", - " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", + " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", "\n", " THRESHOLD = np.abs(self.__difference)\n", "\n", - " self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0])\n", - "\n", - " count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD)\n", - " self.__pvalue_permutation = count/self.__permutation_count\n", - "\n", + " self.__permutations_delta_delta = np.array(\n", + " self.__permutations[1] - self.__permutations[0]\n", + " )\n", "\n", + " count = sum(np.abs(self.__permutations_delta_delta) > THRESHOLD)\n", + " self.__pvalue_permutation = count / self.__permutation_count\n", "\n", " def __repr__(self, header=True, sigfig=3):\n", " from .misc_tools import print_greeting\n", - " \n", - " first_line = {\"control\" : self.__control,\n", - " \"test\" : self.__test}\n", - " \n", - " if self.__effect_size == \"mean_diff\":\n", + "\n", + " first_line = {\"control\": self.__control, \"test\": self.__test}\n", + "\n", + " if self.__effect_size == \"mean_diff\":\n", " out1 = \"The delta-delta between {control} and {test} \".format(**first_line)\n", " else:\n", " out1 = \"The deltas' g between {control} and {test} \".format(**first_line)\n", - " \n", + "\n", " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", " if \".\" in str(self.__ci):\n", " ci_width = base_string_fmt.format(self.__ci)\n", " else:\n", " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", + "\n", + " ci_out = {\n", + " \"es\": base_string_fmt.format(self.__difference),\n", + " \"ci\": ci_width,\n", + " \"bca_low\": base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\": base_string_fmt.format(self.__bca_high),\n", + " }\n", + "\n", " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", " out = out1 + out2\n", "\n", " if header is True:\n", " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", "\n", - "\n", " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", "\n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(\n", + " pval_rounded\n", + " )\n", " p2 = \"calculated for legacy purposes only. \"\n", " pvalue = p1 + p2\n", "\n", - "\n", " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", " bs = bs1 + bs2\n", "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the \" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of \" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", + " pval_def1 = (\n", + " \"Any p-value reported is the probability of observing the \"\n", + " + \"effect size (or greater),\\nassuming the null hypothesis of \"\n", + " + \"zero difference is true.\"\n", + " )\n", + " pval_def2 = (\n", + " \"\\nFor each p-value, 5000 reshuffles of the \"\n", + " + \"control and test labels were performed.\"\n", + " )\n", " pval_def = pval_def1 + pval_def2\n", "\n", - "\n", " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", "\n", - "\n", " def to_dict(self):\n", " \"\"\"\n", " Returns the attributes of the `DeltaDelta` object as a\n", " dictionary.\n", " \"\"\"\n", " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", + " attrs = [a for a in dir(self) if not a.startswith((\"_\", \"to_dict\"))]\n", " out = {}\n", " for a in attrs:\n", " out[a] = getattr(self, a)\n", " return out\n", "\n", - "\n", " @property\n", " def ci(self):\n", " \"\"\"\n", @@ -284,7 +282,6 @@ " \"\"\"\n", " return self.__ci\n", "\n", - "\n", " @property\n", " def alpha(self):\n", " \"\"\"\n", @@ -293,30 +290,25 @@ " \"\"\"\n", " return self.__alpha\n", "\n", - "\n", " @property\n", " def bias_correction(self):\n", " return self.__bias_correction\n", "\n", - "\n", " @property\n", " def bootstraps(self):\n", - " '''\n", + " \"\"\"\n", " Return the bootstrapped deltas from all the experiment groups.\n", - " '''\n", + " \"\"\"\n", " return self.__bootstraps\n", "\n", - "\n", " @property\n", " def jackknives(self):\n", " return self.__jackknives\n", "\n", - "\n", " @property\n", " def acceleration_value(self):\n", " return self.__acceleration_value\n", "\n", - "\n", " @property\n", " def bca_low(self):\n", " \"\"\"\n", @@ -324,7 +316,6 @@ " \"\"\"\n", " return self.__bca_low\n", "\n", - "\n", " @property\n", " def bca_high(self):\n", " \"\"\"\n", @@ -332,49 +323,42 @@ " \"\"\"\n", " return self.__bca_high\n", "\n", - "\n", " @property\n", " def bca_interval_idx(self):\n", " return self.__bca_interval_idx\n", "\n", - "\n", " @property\n", " def control(self):\n", - " '''\n", + " \"\"\"\n", " Return the name of the control experiment group.\n", - " '''\n", + " \"\"\"\n", " return self.__control\n", "\n", - "\n", " @property\n", " def test(self):\n", - " '''\n", + " \"\"\"\n", " Return the name of the test experiment group.\n", - " '''\n", + " \"\"\"\n", " return self.__test\n", "\n", - "\n", " @property\n", " def bootstraps_delta_delta(self):\n", - " '''\n", - " Return the delta-delta values calculated from the bootstrapped \n", + " \"\"\"\n", + " Return the delta-delta values calculated from the bootstrapped\n", " deltas.\n", - " '''\n", + " \"\"\"\n", " return self.__bootstraps_delta_delta\n", "\n", - "\n", " @property\n", " def difference(self):\n", - " '''\n", + " \"\"\"\n", " Return the delta-delta value calculated based on the raw data.\n", - " '''\n", + " \"\"\"\n", " return self.__difference\n", "\n", - "\n", " @property\n", - " def pct_interval_idx (self):\n", - " return self.__pct_interval_idx \n", - "\n", + " def pct_interval_idx(self):\n", + " return self.__pct_interval_idx\n", "\n", " @property\n", " def pct_low(self):\n", @@ -383,7 +367,6 @@ " \"\"\"\n", " return self.__pct_low\n", "\n", - "\n", " @property\n", " def pct_high(self):\n", " \"\"\"\n", @@ -391,7 +374,6 @@ " \"\"\"\n", " return self.__pct_high\n", "\n", - "\n", " @property\n", " def pvalue_permutation(self):\n", " try:\n", @@ -399,7 +381,6 @@ " except AttributeError:\n", " self.__permutation_test()\n", " return self.__pvalue_permutation\n", - " \n", "\n", " @property\n", " def permutation_count(self):\n", @@ -408,32 +389,29 @@ " \"\"\"\n", " return self.__permutation_count\n", "\n", - " \n", " @property\n", " def permutations(self):\n", - " '''\n", + " \"\"\"\n", " Return the mean differences of permutations obtained during\n", " the permutation test for each experiment group.\n", - " '''\n", + " \"\"\"\n", " try:\n", " return self.__permutations\n", " except AttributeError:\n", " self.__permutation_test()\n", " return self.__permutations\n", "\n", - " \n", " @property\n", " def permutations_delta_delta(self):\n", - " '''\n", - " Return the delta-delta values of permutations obtained \n", + " \"\"\"\n", + " Return the delta-delta values of permutations obtained\n", " during the permutation test.\n", - " '''\n", + " \"\"\"\n", " try:\n", " return self.__permutations_delta_delta\n", " except AttributeError:\n", " self.__permutation_test()\n", - " return self.__permutations_delta_delta\n", - "\n" + " return self.__permutations_delta_delta" ] }, { diff --git a/nbs/API/effsize_objects.ipynb b/nbs/API/effsize_objects.ipynb index 778eba2c..ba45b867 100644 --- a/nbs/API/effsize_objects.ipynb +++ b/nbs/API/effsize_objects.ipynb @@ -84,7 +84,7 @@ " \"\"\"\n", " A class to compute and store the results of bootstrapped\n", " mean differences between two groups.\n", - " \n", + "\n", " Compute the effect size between two groups.\n", "\n", " Parameters\n", @@ -100,7 +100,7 @@ " The number of bootstrap resamples to be taken for the calculation\n", " of the confidence interval limits.\n", " permutation_count : int, default 5000\n", - " The number of permutations (reshuffles) to perform for the \n", + " The number of permutations (reshuffles) to perform for the\n", " computation of the permutation p-value\n", " ci : float, default 95\n", " The confidence interval width. The default of 95 produces 95%\n", @@ -135,102 +135,208 @@ " The percentile confidence interval lower limit and upper limits, respectively.\n", " \"\"\"\n", "\n", - " def __init__(self, control, test, effect_size,\n", - " proportional=False,\n", - " is_paired=None, ci=95,\n", - " resamples=5000, \n", - " permutation_count=5000, \n", - " random_seed=12345):\n", - " \n", - " from ._stats_tools import effsize as es\n", + " def __init__(\n", + " self,\n", + " control,\n", + " test,\n", + " effect_size,\n", + " proportional=False,\n", + " is_paired=None,\n", + " ci=95,\n", + " resamples=5000,\n", + " permutation_count=5000,\n", + " random_seed=12345,\n", + " ):\n", " from ._stats_tools import confint_2group_diff as ci2g\n", + " from ._stats_tools import effsize as es\n", "\n", + " self.__EFFECT_SIZE_DICT = {\n", + " \"mean_diff\": \"mean difference\",\n", + " \"median_diff\": \"median difference\",\n", + " \"cohens_d\": \"Cohen's d\",\n", + " \"cohens_h\": \"Cohen's h\",\n", + " \"hedges_g\": \"Hedges' g\",\n", + " \"cliffs_delta\": \"Cliff's delta\",\n", + " \"delta_g\": \"deltas' g\",\n", + " }\n", "\n", - " self.__EFFECT_SIZE_DICT = {\"mean_diff\" : \"mean difference\",\n", - " \"median_diff\" : \"median difference\",\n", - " \"cohens_d\" : \"Cohen's d\",\n", - " \"cohens_h\" : \"Cohen's h\",\n", - " \"hedges_g\" : \"Hedges' g\",\n", - " \"cliffs_delta\" : \"Cliff's delta\",\n", - " \"delta_g\" : \"deltas' g\"}\n", - "\n", - "\n", - " kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]\n", - " if effect_size not in kosher_es:\n", - " err1 = \"The effect size '{}'\".format(effect_size)\n", - " err2 = \"is not one of {}\".format(kosher_es)\n", - " raise ValueError(\" \".join([err1, err2]))\n", - "\n", - " if effect_size == \"cliffs_delta\" and is_paired:\n", - " err1 = \"`paired` is not None; therefore Cliff's delta is not defined.\"\n", - " raise ValueError(err1)\n", - "\n", - " if proportional==True and effect_size not in ['mean_diff','cohens_h']:\n", - " err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n", - " raise ValueError(err1)\n", - "\n", - " if proportional==True and (isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False):\n", - " err1 = \"`proportional` is True; Only accept binary data consisting of 0 and 1.\"\n", - " raise ValueError(err1)\n", + " self.__is_paired = is_paired\n", + " self.__resamples = resamples\n", + " self.__effect_size = effect_size\n", + " self.__random_seed = random_seed\n", + " self.__ci = ci\n", + " self.__proportional = proportional\n", + " self.check_errors(control, test)\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", " control = array(control)\n", - " test = array(test)\n", - " control = control[~isnan(control)]\n", - " test = test[~isnan(test)]\n", - "\n", - " self.__effect_size = effect_size\n", - " # TODO refactor this\n", - " self.__control = control\n", - " self.__test = test\n", - " self.__is_paired = is_paired\n", - " self.__resamples = resamples\n", + " test = array(test)\n", + " self.__control = control[~isnan(control)]\n", + " self.__test = test[~isnan(test)]\n", " self.__permutation_count = permutation_count\n", - " self.__random_seed = random_seed\n", - " self.__ci = ci\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", + "\n", + " self.__alpha = ci2g._compute_alpha_from_ci(self.__ci)\n", "\n", " self.__difference = es.two_group_difference(\n", - " control, test, is_paired, effect_size)\n", - " \n", + " self.__control, test, self.__is_paired, self.__effect_size\n", + " )\n", + "\n", " self.__jackknives = ci2g.compute_meandiff_jackknife(\n", - " control, test, is_paired, effect_size)\n", + " self.__control, test, self.__is_paired, self.__effect_size\n", + " )\n", "\n", " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", "\n", " bootstraps = ci2g.compute_bootstrapped_diff(\n", - " control, test, is_paired, effect_size,\n", - " resamples, random_seed)\n", + " self.__control,\n", + " test,\n", + " self.__is_paired,\n", + " self.__effect_size,\n", + " self.__resamples,\n", + " self.__random_seed,\n", + " )\n", " self.__bootstraps = bootstraps\n", - " \n", + "\n", " sorted_bootstraps = npsort(self.__bootstraps)\n", " # Added in v0.2.6.\n", " # Raises a UserWarning if there are any infiinities in the bootstraps.\n", " num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)])\n", - " \n", + "\n", " if num_infinities > 0:\n", - " warn_msg = \"There are {} bootstrap(s) that are not defined. \"\\\n", - " \"This is likely due to smaple sample sizes. \"\\\n", - " \"The values in a bootstrap for a group will be more likely \"\\\n", - " \"to be all equal, with a resulting variance of zero. \"\\\n", - " \"The computation of Cohen's d and Hedges' g thus \"\\\n", - " \"involved a division by zero. \"\n", - " warnings.warn(warn_msg.format(num_infinities), \n", - " category=UserWarning)\n", + " warn_msg = (\n", + " \"There are {} bootstrap(s) that are not defined. \"\n", + " \"This is likely due to smaple sample sizes. \"\n", + " \"The values in a bootstrap for a group will be more likely \"\n", + " \"to be all equal, with a resulting variance of zero. \"\n", + " \"The computation of Cohen's d and Hedges' g thus \"\n", + " \"involved a division by zero. \"\n", + " )\n", + " warnings.warn(warn_msg.format(num_infinities), category=UserWarning)\n", "\n", " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps, self.__difference)\n", + " self.__bootstraps, self.__difference\n", + " )\n", + "\n", + " self.compute_bca_intervals(sorted_bootstraps)\n", + "\n", + " # Compute percentile intervals.\n", + " pct_idx_low = int((self.__alpha / 2) * self.__resamples)\n", + " pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)\n", + "\n", + " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", + " self.__pct_low = sorted_bootstraps[pct_idx_low]\n", + " self.__pct_high = sorted_bootstraps[pct_idx_high]\n", + "\n", + " self.perform_statistical_test()\n", + "\n", + " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", + " RM_STATUS = {\n", + " \"baseline\": \"for repeated measures against baseline \\n\",\n", + " \"sequential\": \"for the sequential design of repeated-measures experiment \\n\",\n", + " \"None\": \"\",\n", + " }\n", + "\n", + " PAIRED_STATUS = {\n", + " \"baseline\": \"paired\",\n", + " \"sequential\": \"paired\",\n", + " \"None\": \"unpaired\",\n", + " }\n", + "\n", + " first_line = {\n", + " \"rm_status\": RM_STATUS[str(self.__is_paired)],\n", + " \"es\": self.__EFFECT_SIZE_DICT[self.__effect_size],\n", + " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)],\n", + " }\n", + "\n", + " out1 = \"The {paired_status} {es} {rm_status}\".format(**first_line)\n", + "\n", + " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", + " if \".\" in str(self.__ci):\n", + " ci_width = base_string_fmt.format(self.__ci)\n", + " else:\n", + " ci_width = str(self.__ci)\n", + "\n", + " ci_out = {\n", + " \"es\": base_string_fmt.format(self.__difference),\n", + " \"ci\": ci_width,\n", + " \"bca_low\": base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\": base_string_fmt.format(self.__bca_high),\n", + " }\n", + "\n", + " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", + " out = out1 + out2\n", + "\n", + " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", + "\n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(\n", + " pval_rounded\n", + " )\n", + " p2 = \"calculated for legacy purposes only. \"\n", + " pvalue = p1 + p2\n", + "\n", + " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", + " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", + " bs = bs1 + bs2\n", + "\n", + " pval_def1 = (\n", + " \"Any p-value reported is the probability of observing the\"\n", + " + \"effect size (or greater),\\nassuming the null hypothesis of\"\n", + " + \"zero difference is true.\"\n", + " )\n", + " pval_def2 = (\n", + " \"\\nFor each p-value, 5000 reshuffles of the \"\n", + " + \"control and test labels were performed.\"\n", + " )\n", + " pval_def = pval_def1 + pval_def2\n", + "\n", + " if show_resample_count and define_pval:\n", + " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", + " elif ~show_resample_count and define_pval:\n", + " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, pval_def)\n", + " elif show_resample_count and ~define_pval:\n", + " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, bs)\n", + " else:\n", + " return \"{}\\n{}\".format(out, pvalue)\n", + "\n", + " def check_errors(self, control, test):\n", + " kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]\n", + " if self.__effect_size not in kosher_es:\n", + " err1 = \"The effect size '{}'\".format(self.__effect_size)\n", + " err2 = \"is not one of {}\".format(kosher_es)\n", + " raise ValueError(\" \".join([err1, err2]))\n", + "\n", + " if self.__effect_size == \"cliffs_delta\" and self.__is_paired:\n", + " err1 = \"`paired` is not None; therefore Cliff's delta is not defined.\"\n", + " raise ValueError(err1)\n", + "\n", + " if self.__proportional and self.__effect_size not in [\"mean_diff\", \"cohens_h\"]:\n", + " err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n", + " raise ValueError(err1)\n", + "\n", + " if self.__proportional and (\n", + " isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False\n", + " ):\n", + " err1 = (\n", + " \"`proportional` is True; Only accept binary data consisting of 0 and 1.\"\n", + " )\n", + " raise ValueError(err1)\n", + "\n", + " def compute_bca_intervals(self, sorted_bootstraps):\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", "\n", " # Compute BCa intervals.\n", " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", + " self.__bias_correction,\n", + " self.__acceleration_value,\n", + " self.__resamples,\n", + " self.__ci,\n", + " )\n", "\n", " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", "\n", " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_bootstraps[bca_idx_low]\n", + " self.__bca_low = sorted_bootstraps[bca_idx_low]\n", " self.__bca_high = sorted_bootstraps[bca_idx_high]\n", "\n", " err1 = \"The $lim_type limit of the interval\"\n", @@ -239,14 +345,14 @@ " err_temp = Template(\" \".join([err1, err2, err3]))\n", "\n", " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"lower\", loc=\"bottom\"), stacklevel=1\n", + " )\n", "\n", - " if bca_idx_high >= resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", + " if bca_idx_high >= self.__resamples - 9:\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"upper\", loc=\"top\"), stacklevel=1\n", + " )\n", "\n", " else:\n", " # TODO improve error handling, separate file\n", @@ -256,95 +362,97 @@ " err_temp = Template(\" \".join([err1, err2, err3]))\n", "\n", " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"), stacklevel=0)\n", "\n", " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", - "\n", - " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * resamples)\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"), stacklevel=0)\n", "\n", - " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_bootstraps[pct_idx_low]\n", - " self.__pct_high = sorted_bootstraps[pct_idx_high]\n", + " def perform_statistical_test(self):\n", + " from ._stats_tools import effsize as es\n", "\n", " # Perform statistical tests.\n", - " self.__PermutationTest_result = PermutationTest(control, test, \n", - " effect_size, \n", - " is_paired,\n", - " permutation_count)\n", - " \n", - " if is_paired and proportional is False:\n", + " self.__PermutationTest_result = PermutationTest(\n", + " self.__control,\n", + " self.__test,\n", + " self.__effect_size,\n", + " self.__is_paired,\n", + " self.__permutation_count,\n", + " )\n", + "\n", + " if self.__is_paired and self.__proportional is False:\n", " # Wilcoxon, a non-parametric version of the paired T-test.\n", - " wilcoxon = spstats.wilcoxon(control, test)\n", + " wilcoxon = spstats.wilcoxon(self.__control, self.__test)\n", " self.__pvalue_wilcoxon = wilcoxon.pvalue\n", " self.__statistic_wilcoxon = wilcoxon.statistic\n", - " \n", - " \n", - " if effect_size != \"median_diff\":\n", + "\n", + " if self.__effect_size != \"median_diff\":\n", " # Paired Student's t-test.\n", - " paired_t = spstats.ttest_rel(control, test, nan_policy='omit')\n", + " paired_t = spstats.ttest_rel(\n", + " self.__control, self.__test, nan_policy=\"omit\"\n", + " )\n", " self.__pvalue_paired_students_t = paired_t.pvalue\n", " self.__statistic_paired_students_t = paired_t.statistic\n", " # TODO dead code\n", - " standardized_es = es.cohens_d(control, test, is_paired)\n", + " standardized_es = es.cohens_d(\n", + " self.__control, self.__test, self.__is_paired\n", + " )\n", "\n", - " elif is_paired and proportional:\n", + " elif self.__is_paired and self.__proportional:\n", " # for binary paired data, use McNemar's test\n", " # References:\n", " # https://en.wikipedia.org/wiki/McNemar%27s_test\n", "\n", - " df_temp = pd.DataFrame({'control': control, 'test': test})\n", - " x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)])\n", - " x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)])\n", - " x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)])\n", - " x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)])\n", - " table = [[x1,x2],[x3,x4]]\n", + " df_temp = pd.DataFrame({\"control\": self.__control, \"test\": self.__test})\n", + " x1 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 0)])\n", + " x2 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 1)])\n", + " x3 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 0)])\n", + " x4 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 1)])\n", + " table = [[x1, x2], [x3, x4]]\n", " _mcnemar = mcnemar(table, exact=True, correction=True)\n", " self.__pvalue_mcnemar = _mcnemar.pvalue\n", " self.__statistic_mcnemar = _mcnemar.statistic\n", "\n", - " elif effect_size == \"cliffs_delta\":\n", + " elif self.__effect_size == \"cliffs_delta\":\n", " # Let's go with Brunner-Munzel!\n", - " brunner_munzel = spstats.brunnermunzel(control, test,\n", - " nan_policy='omit')\n", + " brunner_munzel = spstats.brunnermunzel(\n", + " self.__control, self.__test, nan_policy=\"omit\"\n", + " )\n", " self.__pvalue_brunner_munzel = brunner_munzel.pvalue\n", " self.__statistic_brunner_munzel = brunner_munzel.statistic\n", "\n", - "\n", - " elif effect_size == \"median_diff\":\n", + " elif self.__effect_size == \"median_diff\":\n", " # According to scipy's documentation of the function,\n", " # \"The Kruskal-Wallis H-test tests the null hypothesis\n", " # that the population median of all of the groups are equal.\"\n", - " kruskal = spstats.kruskal(control, test, nan_policy='omit')\n", + " kruskal = spstats.kruskal(self.__control, self.__test, nan_policy=\"omit\")\n", " self.__pvalue_kruskal = kruskal.pvalue\n", " self.__statistic_kruskal = kruskal.statistic\n", "\n", - " else: # for mean difference, Cohen's d, and Hedges' g.\n", + " else: # for mean difference, Cohen's d, and Hedges' g.\n", " # Welch's t-test, assumes normality of distributions,\n", " # but does not assume equal variances.\n", - " welch = spstats.ttest_ind(control, test, equal_var=False,\n", - " nan_policy='omit')\n", + " welch = spstats.ttest_ind(\n", + " self.__control, self.__test, equal_var=False, nan_policy=\"omit\"\n", + " )\n", " self.__pvalue_welch = welch.pvalue\n", " self.__statistic_welch = welch.statistic\n", "\n", " # Student's t-test, assumes normality of distributions,\n", " # as well as assumption of equal variances.\n", - " students_t = spstats.ttest_ind(control, test, equal_var=True,\n", - " nan_policy='omit')\n", + " students_t = spstats.ttest_ind(\n", + " self.__control, self.__test, equal_var=True, nan_policy=\"omit\"\n", + " )\n", " self.__pvalue_students_t = students_t.pvalue\n", " self.__statistic_students_t = students_t.statistic\n", "\n", " # Mann-Whitney test: Non parametric,\n", " # does not assume normality of distributions\n", " try:\n", - " mann_whitney = spstats.mannwhitneyu(control, test, \n", - " alternative='two-sided')\n", + " mann_whitney = spstats.mannwhitneyu(\n", + " self.__control, self.__test, alternative=\"two-sided\"\n", + " )\n", " self.__pvalue_mann_whitney = mann_whitney.pvalue\n", " self.__statistic_mann_whitney = mann_whitney.statistic\n", " except ValueError:\n", @@ -352,94 +460,31 @@ " # Occurs when the control and test are exactly identical\n", " # in terms of rank (eg. all zeros.)\n", " pass\n", - " \n", - " \n", - " standardized_es = es.cohens_d(control, test, is_paired = None)\n", - " \n", + "\n", + " standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None)\n", + "\n", " # The Cohen's h calculation is for binary categorical data\n", " try:\n", - " self.__proportional_difference = es.cohens_h(control, test)\n", + " self.__proportional_difference = es.cohens_h(\n", + " self.__control, self.__test\n", + " )\n", " except ValueError:\n", " # TODO At least print some warning?\n", " # Occur only when the data consists not only 0's and 1's.\n", " pass\n", "\n", - "\n", - " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", - " \n", - " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", - " 'sequential': 'for the sequential design of repeated-measures experiment \\n',\n", - " 'None' : ''\n", - " }\n", - "\n", - " PAIRED_STATUS = {'baseline' : 'paired', \n", - " 'sequential' : 'paired',\n", - " 'None' : 'unpaired'\n", - " }\n", - "\n", - " first_line = {\"rm_status\" : RM_STATUS[str(self.__is_paired)],\n", - " \"es\" : self.__EFFECT_SIZE_DICT[self.__effect_size],\n", - " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)]}\n", - " \n", - "\n", - " out1 = \"The {paired_status} {es} {rm_status}\".format(**first_line)\n", - " \n", - " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", - " if \".\" in str(self.__ci):\n", - " ci_width = base_string_fmt.format(self.__ci)\n", - " else:\n", - " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", - " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", - " out = out1 + out2\n", - " \n", - " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", - " p2 = \"calculated for legacy purposes only. \"\n", - " pvalue = p1 + p2\n", - " \n", - " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", - " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", - " bs = bs1 + bs2\n", - "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", - " pval_def = pval_def1 + pval_def2\n", - "\n", - " if show_resample_count and define_pval:\n", - " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - " elif ~show_resample_count and define_pval:\n", - " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, pval_def)\n", - " elif show_resample_count and ~define_pval:\n", - " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, bs)\n", - " else:\n", - " return \"{}\\n{}\".format(out, pvalue)\n", - "\n", - "\n", - "\n", " def to_dict(self):\n", " \"\"\"\n", " Returns the attributes of the `dabest.TwoGroupEffectSize` object as a\n", " dictionary.\n", " \"\"\"\n", " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", + " attrs = [a for a in dir(self) if not a.startswith((\"_\", \"to_dict\"))]\n", " out = {}\n", " for a in attrs:\n", " out[a] = getattr(self, a)\n", " return out\n", "\n", - "\n", " @property\n", " def difference(self):\n", " \"\"\"\n", @@ -459,6 +504,10 @@ " return self.__is_paired\n", "\n", " @property\n", + " def proportional(self):\n", + " return self.__proportional\n", + "\n", + " @property\n", " def ci(self):\n", " \"\"\"\n", " Returns the width of the confidence interval, in percent.\n", @@ -531,8 +580,6 @@ " \"\"\"\n", " return self.__pct_high\n", "\n", - "\n", - "\n", " @property\n", " def pvalue_brunner_munzel(self):\n", " try:\n", @@ -547,8 +594,6 @@ " except AttributeError:\n", " return npnan\n", "\n", - "\n", - "\n", " @property\n", " def pvalue_wilcoxon(self):\n", " try:\n", @@ -577,8 +622,6 @@ " except AttributeError:\n", " return npnan\n", "\n", - "\n", - "\n", " @property\n", " def pvalue_paired_students_t(self):\n", " # TODO Missing docstring\n", @@ -595,8 +638,6 @@ " except AttributeError:\n", " return npnan\n", "\n", - "\n", - "\n", " @property\n", " def pvalue_kruskal(self):\n", " # TODO Missing docstring\n", @@ -613,7 +654,6 @@ " except AttributeError:\n", " return npnan\n", "\n", - "\n", " @property\n", " def pvalue_welch(self):\n", " # TODO Missing docstring\n", @@ -630,8 +670,6 @@ " except AttributeError:\n", " return npnan\n", "\n", - "\n", - "\n", " @property\n", " def pvalue_students_t(self):\n", " # TODO Missing docstring\n", @@ -648,8 +686,6 @@ " except AttributeError:\n", " return npnan\n", "\n", - "\n", - "\n", " @property\n", " def pvalue_mann_whitney(self):\n", " # TODO Missing docstring\n", @@ -658,8 +694,6 @@ " except AttributeError:\n", " return npnan\n", "\n", - "\n", - "\n", " @property\n", " def statistic_mann_whitney(self):\n", " # TODO Missing docstring\n", @@ -667,37 +701,33 @@ " return self.__statistic_mann_whitney\n", " except AttributeError:\n", " return npnan\n", - " \n", + "\n", " @property\n", " def pvalue_permutation(self):\n", " # TODO Missing docstring\n", " return self.__PermutationTest_result.pvalue\n", - " \n", "\n", " @property\n", " def permutation_count(self):\n", " \"\"\"\n", - " The number of permuations taken.\n", + " The number of permutations taken.\n", " \"\"\"\n", " return self.__PermutationTest_result.permutation_count\n", "\n", - " \n", " @property\n", " def permutations(self):\n", " return self.__PermutationTest_result.permutations\n", "\n", - " \n", " @property\n", " def permutations_var(self):\n", " return self.__PermutationTest_result.permutations_var\n", "\n", - "\n", " @property\n", " def proportional_difference(self):\n", " try:\n", " return self.__proportional_difference\n", " except AttributeError:\n", - " return npnan\n" + " return npnan" ] }, { @@ -844,7 +874,7 @@ " out = []\n", " reprs = []\n", " \n", - " if self.__delta2==True:\n", + " if self.__delta2:\n", " mixed_data = []\n", " for j, current_tuple in enumerate(idx):\n", " if self.__is_paired != \"sequential\":\n", @@ -1734,7 +1764,6 @@ " self.__permutations_var = []\n", "\n", " for i in range(int(permutation_count)):\n", - " \n", " if is_paired:\n", " # Select which control-test pairs to swap.\n", " random_idx = rng.choice(CONTROL_LEN,\n", diff --git a/nbs/API/load.ipynb b/nbs/API/load.ipynb index 3a0d5434..5ae39e13 100644 --- a/nbs/API/load.ipynb +++ b/nbs/API/load.ipynb @@ -39,7 +39,8 @@ "#| hide\n", "from nbdev.showdoc import *\n", "import nbdev\n", - "nbdev.nbdev_export()\n" + "\n", + "nbdev.nbdev_export()" ] }, { @@ -49,11 +50,24 @@ "outputs": [], "source": [ "#| export\n", - "def load(data, idx=None, x=None, y=None, paired=None, id_col=None,\n", - " ci=95, resamples=5000, random_seed=12345, proportional=False, \n", - " delta2 = False, experiment = None, experiment_label = None,\n", - " x1_level = None, mini_meta=False):\n", - " '''\n", + "def load(\n", + " data,\n", + " idx=None,\n", + " x=None,\n", + " y=None,\n", + " paired=None,\n", + " id_col=None,\n", + " ci=95,\n", + " resamples=5000,\n", + " random_seed=12345,\n", + " proportional=False,\n", + " delta2=False,\n", + " experiment=None,\n", + " experiment_label=None,\n", + " x1_level=None,\n", + " mini_meta=False,\n", + "):\n", + " \"\"\"\n", " Loads data in preparation for estimation statistics.\n", "\n", " This is designed to work with pandas DataFrames.\n", @@ -67,15 +81,15 @@ " with each individual tuple producing its own contrast plot\n", " x : string or list, default None\n", " Column name(s) of the independent variable. This can be expressed as\n", - " a list of 2 elements if and only if 'delta2' is True; otherwise it \n", + " a list of 2 elements if and only if 'delta2' is True; otherwise it\n", " can only be a string.\n", " y : string, default None\n", " Column names for data to be plotted on the x-axis and y-axis.\n", " paired : string, default None\n", - " The type of the experiment under which the data are obtained. If 'paired' \n", + " The type of the experiment under which the data are obtained. If 'paired'\n", " is None then the data will not be treated as paired data in the subsequent\n", - " calculations. If 'paired' is 'baseline', then in each tuple of x, other \n", - " groups will be paired up with the first group (as control). If 'paired' is \n", + " calculations. If 'paired' is 'baseline', then in each tuple of x, other\n", + " groups will be paired up with the first group (as control). If 'paired' is\n", " 'sequential', then in each tuple of x, each group will be paired up with\n", " its previous group (as control).\n", " id_col : default None.\n", @@ -90,7 +104,7 @@ " This integer is used to seed the random number generator during\n", " bootstrap resampling, ensuring that the confidence intervals\n", " reported are replicable.\n", - " proportional : boolean, default False. \n", + " proportional : boolean, default False.\n", " An indicator of whether the data is binary or not. When set to True, it\n", " specifies that the data consists of binary data, where the values are\n", " limited to 0 and 1. The code is not suitable for analyzing proportion\n", @@ -100,27 +114,42 @@ " delta2 : boolean, default False\n", " Indicator of delta-delta experiment\n", " experiment : String, default None\n", - " The name of the column of the dataframe which contains the label of \n", + " The name of the column of the dataframe which contains the label of\n", " experiments\n", " experiment_lab : list, default None\n", " A list of String to specify the order of subplots for delta-delta plots.\n", - " This can be expressed as a list of 2 elements if and only if 'delta2' \n", - " is True; otherwise it can only be a string. \n", + " This can be expressed as a list of 2 elements if and only if 'delta2'\n", + " is True; otherwise it can only be a string.\n", " x1_level : list, default None\n", " A list of String to specify the order of subplots for delta-delta plots.\n", - " This can be expressed as a list of 2 elements if and only if 'delta2' \n", - " is True; otherwise it can only be a string. \n", + " This can be expressed as a list of 2 elements if and only if 'delta2'\n", + " is True; otherwise it can only be a string.\n", " mini_meta : boolean, default False\n", " Indicator of weighted delta calculation.\n", "\n", " Returns\n", " -------\n", " A `Dabest` object.\n", - " '''\n", + " \"\"\"\n", " from dabest import Dabest\n", "\n", - " return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta)\n", - "\n" + " return Dabest(\n", + " data,\n", + " idx,\n", + " x,\n", + " y,\n", + " paired,\n", + " id_col,\n", + " ci,\n", + " resamples,\n", + " random_seed,\n", + " proportional,\n", + " delta2,\n", + " experiment,\n", + " experiment_label,\n", + " x1_level,\n", + " mini_meta,\n", + " )" ] }, { @@ -129,54 +158,76 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "import numpy as np\n", "from typing import Union, Optional\n", "import pandas as pd\n", "\n", - "def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types.\n", - " group_names: Optional[list] = None):\n", - " '''\n", + "\n", + "def prop_dataset(\n", + " group: Union[\n", + " list, tuple, np.ndarray, dict\n", + " ], # Accepts lists, tuples, or numpy ndarrays of numeric types.\n", + " group_names: Optional[list] = None,\n", + "):\n", + " \"\"\"\n", " Convenient function to generate a dataframe of binary data.\n", - " '''\n", - " \n", + " \"\"\"\n", + "\n", " if isinstance(group, dict):\n", " # If group_names is not provided, use the keys of the dict as group_names\n", " if group_names is None:\n", " group_names = list(group.keys())\n", " elif not set(group_names) == set(group.keys()):\n", " # Check if the group_names provided is the same as the keys of the dict\n", - " raise ValueError('group_names must be the same as the keys of the dict.')\n", + " raise ValueError(\"group_names must be the same as the keys of the dict.\")\n", " # Check if the values in the dict are numeric\n", - " if not all([isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]):\n", - " raise ValueError('group must be a dict of lists, tuples, or numpy ndarrays of numeric types.')\n", + " if not all(\n", + " [isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]\n", + " ):\n", + " raise ValueError(\n", + " \"group must be a dict of lists, tuples, or numpy ndarrays of numeric types.\"\n", + " )\n", " # Check if the values in the dict only have two elements under each parent key\n", " if not all([len(group[name]) == 2 for name in group_names]):\n", - " raise ValueError('Each parent key should have only two elements.')\n", + " raise ValueError(\"Each parent key should have only two elements.\")\n", " group_val = group\n", "\n", " else:\n", " if group_names is None:\n", - " raise ValueError('group_names must be provided if group is not a dict.')\n", + " raise ValueError(\"group_names must be provided if group is not a dict.\")\n", " # Check if the length of group is two times of the length of group_names\n", " if not len(group) == 2 * len(group_names):\n", - " raise ValueError('The length of group must be two times of the length of group_names.')\n", - " group_val = {group_names[i]: [group[i*2], group[i*2+1]] for i in range(len(group_names))}\n", + " raise ValueError(\n", + " \"The length of group must be two times of the length of group_names.\"\n", + " )\n", + " group_val = {\n", + " group_names[i]: [group[i * 2], group[i * 2 + 1]]\n", + " for i in range(len(group_names))\n", + " }\n", "\n", " # Check if the sum of values in group_val under each key are the same\n", - " if not all([sum(group_val[name]) == sum(group_val[group_names[0]]) for name in group_val.keys()]):\n", - " raise ValueError('The sum of values under each key must be the same.')\n", - " \n", - " id_col = pd.Series(range(1, sum(group_val[group_names[0]])+1))\n", - " \n", + " if not all(\n", + " [\n", + " sum(group_val[name]) == sum(group_val[group_names[0]])\n", + " for name in group_val.keys()\n", + " ]\n", + " ):\n", + " raise ValueError(\"The sum of values under each key must be the same.\")\n", + "\n", + " id_col = pd.Series(range(1, sum(group_val[group_names[0]]) + 1))\n", + "\n", " final_df = pd.DataFrame()\n", "\n", " for name in group_val.keys():\n", - " col = np.repeat(0, group_val[name][0]).tolist() + np.repeat(1, group_val[name][1]).tolist()\n", - " df = pd.DataFrame({name:col})\n", + " col = (\n", + " np.repeat(0, group_val[name][0]).tolist()\n", + " + np.repeat(1, group_val[name][1]).tolist()\n", + " )\n", + " df = pd.DataFrame({name: col})\n", " final_df = pd.concat([final_df, df], axis=1)\n", "\n", - " final_df['ID'] = id_col\n", + " final_df[\"ID\"] = id_col\n", "\n", " return final_df" ] @@ -217,7 +268,7 @@ "N = 10\n", "c1 = sp.stats.norm.rvs(loc=100, scale=5, size=N)\n", "t1 = sp.stats.norm.rvs(loc=115, scale=5, size=N)\n", - "df = pd.DataFrame({'Control 1' : c1, 'Test 1': t1})" + "df = pd.DataFrame({\"Control 1\": c1, \"Test 1\": t1})" ] }, { @@ -282,8 +333,8 @@ "N = 10\n", "c1 = np.random.binomial(1, 0.2, size=N)\n", "t1 = np.random.binomial(1, 0.5, size=N)\n", - "df = pd.DataFrame({'Control 1' : c1, 'Test 1': t1})\n", - "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"),proportional=True)" + "df = pd.DataFrame({\"Control 1\": c1, \"Test 1\": t1})\n", + "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"), proportional=True)" ] }, { diff --git a/nbs/API/misc_tools.ipynb b/nbs/API/misc_tools.ipynb index da49407b..e63e8f07 100644 --- a/nbs/API/misc_tools.ipynb +++ b/nbs/API/misc_tools.ipynb @@ -49,14 +49,26 @@ { "cell_type": "code", "execution_count": null, - "id": "6b50da46", + "id": "5f54be1c", "metadata": {}, "outputs": [], "source": [ "#| export\n", - "def merge_two_dicts(x:dict,\n", - " y:dict\n", - " )->dict:#A dictionary containing a union of all keys in both original dicts.\n", + "import datetime as dt\n", + "from numpy import repeat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b50da46", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "def merge_two_dicts(\n", + " x: dict, y: dict\n", + ") -> dict: # A dictionary containing a union of all keys in both original dicts.\n", " \"\"\"\n", " Given two dicts, merge them into a new dict as a shallow copy.\n", " Any overlapping keys in `y` will override the values in `x`.\n", @@ -70,24 +82,20 @@ " return z\n", "\n", "\n", - "\n", "def unpack_and_add(l, c):\n", " \"\"\"Convenience function to allow me to add to an existing list\n", " without altering that list.\"\"\"\n", " t = [a for a in l]\n", " t.append(c)\n", - " return(t)\n", - "\n", + " return t\n", "\n", "\n", "def print_greeting():\n", " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", "\n", " line1 = \"DABEST v{}\".format(__version__)\n", - " header = \"\".join(np.repeat(\"=\", len(line1)))\n", - " spacer = \"\".join(np.repeat(\" \", len(line1)))\n", + " header = \"\".join(repeat(\"=\", len(line1)))\n", + " spacer = \"\".join(repeat(\" \", len(line1)))\n", "\n", " now = dt.datetime.now()\n", " if 0 < now.hour < 12:\n", @@ -103,11 +111,10 @@ "\n", "\n", "def get_varname(obj):\n", - " matching_vars = [k for k,v in globals().items() if v is obj]\n", + " matching_vars = [k for k, v in globals().items() if v is obj]\n", " if len(matching_vars) > 0:\n", " return matching_vars[0]\n", - " else:\n", - " return \"\"\n" + " return \"\"" ] }, { diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index 7ad4e9c6..2582367a 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -60,7 +60,8 @@ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import numpy as np\n", - "import itertools" + "import itertools\n", + "import matplotlib.lines as mlines" ] }, { @@ -70,25 +71,21 @@ "metadata": {}, "outputs": [], "source": [ - "#| export \n", - "\n", - "def halfviolin(v, half='right', fill_color='k', alpha=1,\n", - " line_color='k', line_width=0):\n", - " import numpy as np\n", - "\n", - " for b in v['bodies']:\n", + "#| export\n", + "def halfviolin(v, half=\"right\", fill_color=\"k\", alpha=1, line_color=\"k\", line_width=0):\n", + " for b in v[\"bodies\"]:\n", " V = b.get_paths()[0].vertices\n", "\n", " mean_vertical = np.mean(V[:, 0])\n", " mean_horizontal = np.mean(V[:, 1])\n", "\n", - " if half == 'right':\n", + " if half == \"right\":\n", " V[:, 0] = np.clip(V[:, 0], mean_vertical, np.inf)\n", - " elif half == 'left':\n", + " elif half == \"left\":\n", " V[:, 0] = np.clip(V[:, 0], -np.inf, mean_vertical)\n", - " elif half == 'bottom':\n", + " elif half == \"bottom\":\n", " V[:, 1] = np.clip(V[:, 1], -np.inf, mean_horizontal)\n", - " elif half == 'top':\n", + " elif half == \"top\":\n", " V[:, 1] = np.clip(V[:, 1], mean_horizontal, np.inf)\n", "\n", " b.set_color(fill_color)\n", @@ -102,43 +99,45 @@ " Given a matplotlib Collection, will obtain the x and y spans\n", " for the collection. Will return None if this fails.\n", " \"\"\"\n", - " import numpy as np\n", " x, y = np.array(coll.get_offsets()).T\n", " try:\n", " return x.min(), x.max(), y.min(), y.max()\n", " except ValueError:\n", " return None\n", "\n", - "def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format.\n", - " x:str, #x column to be plotted.\n", - " y:str, # y column to be plotted.\n", - " type:str='mean_sd', # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead.\n", - " offset:float=0.2, #Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets.\n", - " ax=None, #If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used.\n", - " line_color=\"black\", # The color of the gapped lines.\n", - " gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span.\n", - " pos:list=[0, 1],#The positions of the error bars for the sankey_error_bar method.\n", - " method:str='gapped_lines', #The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'.\n", - " **kwargs:dict\n", - " ):\n", - " '''\n", + "\n", + "def error_bar(\n", + " data: pd.DataFrame, # This DataFrame should be in 'long' format.\n", + " x: str, # x column to be plotted.\n", + " y: str, # y column to be plotted.\n", + " type: str = \"mean_sd\", # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead.\n", + " offset: float = 0.2, # Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets.\n", + " ax=None, # If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used.\n", + " line_color=\"black\", # The color of the gapped lines.\n", + " gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span.\n", + " pos: list = [\n", + " 0,\n", + " 1,\n", + " ], # The positions of the error bars for the sankey_error_bar method.\n", + " method: str = \"gapped_lines\", # The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'.\n", + " **kwargs: dict,\n", + "):\n", + " \"\"\"\n", " Function to plot the standard deviations as vertical errorbars.\n", " The mean is a gap defined by negative space.\n", "\n", " This function combines the functionality of gapped_lines(),\n", " proportional_error_bar(), and sankey_error_bar().\n", "\n", - " '''\n", - " import numpy as np\n", - " import pandas as pd\n", - " import matplotlib.pyplot as plt\n", - " import matplotlib.lines as mlines\n", + " \"\"\"\n", "\n", " if gap_width_percent < 0 or gap_width_percent > 100:\n", " raise ValueError(\"`gap_width_percent` must be between 0 and 100.\")\n", - " if method not in ['gapped_lines', 'proportional_error_bar', 'sankey_error_bar']:\n", - " raise ValueError(\"Invalid `method`. Must be one of 'gapped_lines', \\\n", - " 'proportional_error_bar', or 'sankey_error_bar'.\")\n", + " if method not in [\"gapped_lines\", \"proportional_error_bar\", \"sankey_error_bar\"]:\n", + " raise ValueError(\n", + " \"Invalid `method`. Must be one of 'gapped_lines', \\\n", + " 'proportional_error_bar', or 'sankey_error_bar'.\"\n", + " )\n", "\n", " if ax is None:\n", " ax = plt.gca()\n", @@ -147,14 +146,14 @@ " gap_width = ax_yspan * gap_width_percent / 100\n", "\n", " keys = kwargs.keys()\n", - " if 'clip_on' not in keys:\n", - " kwargs['clip_on'] = False\n", + " if \"clip_on\" not in keys:\n", + " kwargs[\"clip_on\"] = False\n", "\n", - " if 'zorder' not in keys:\n", - " kwargs['zorder'] = 5\n", + " if \"zorder\" not in keys:\n", + " kwargs[\"zorder\"] = 5\n", "\n", - " if 'lw' not in keys:\n", - " kwargs['lw'] = 2.\n", + " if \"lw\" not in keys:\n", + " kwargs[\"lw\"] = 2.0\n", "\n", " if isinstance(data[x].dtype, pd.CategoricalDtype):\n", " group_order = pd.unique(data[x]).categories\n", @@ -163,8 +162,10 @@ "\n", " means = data.groupby(x)[y].mean().reindex(index=group_order)\n", "\n", - " if method in ['proportional_error_bar', 'sankey_error_bar']:\n", - " g = lambda x: np.sqrt((np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x)))\n", + " if method in [\"proportional_error_bar\", \"sankey_error_bar\"]:\n", + " g = lambda x: np.sqrt(\n", + " (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))\n", + " )\n", " sd = data.groupby(x)[y].apply(g)\n", " else:\n", " sd = data.groupby(x)[y].std().reindex(index=group_order)\n", @@ -173,20 +174,20 @@ " upper_sd = means + sd\n", "\n", " if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any():\n", - " kwargs['clip_on'] = True\n", + " kwargs[\"clip_on\"] = True\n", "\n", " medians = data.groupby(x)[y].median().reindex(index=group_order)\n", - " quantiles = data.groupby(x)[y].quantile([0.25, 0.75]) \\\n", - " .unstack() \\\n", - " .reindex(index=group_order)\n", + " quantiles = (\n", + " data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n", + " )\n", " lower_quartiles = quantiles[0.25]\n", " upper_quartiles = quantiles[0.75]\n", "\n", - " if type == 'mean_sd':\n", + " if type == \"mean_sd\":\n", " central_measures = means\n", " lows = lower_sd\n", " highs = upper_sd\n", - " elif type == 'median_quartiles':\n", + " elif type == \"median_quartiles\":\n", " central_measures = medians\n", " lows = lower_quartiles\n", " highs = upper_quartiles\n", @@ -213,13 +214,12 @@ " err2 = \"{} offset(s) were supplied in `offset`.\".format(len_offset)\n", " raise ValueError(err1 + err2)\n", "\n", - " kwargs['zorder'] = kwargs['zorder']\n", + " kwargs[\"zorder\"] = kwargs[\"zorder\"]\n", "\n", " for xpos, central_measure in enumerate(central_measures):\n", - " \n", - " kwargs['color'] = custom_palette[xpos]\n", + " kwargs[\"color\"] = custom_palette[xpos]\n", "\n", - " if method == 'sankey_error_bar':\n", + " if method == \"sankey_error_bar\":\n", " _xpos = pos[xpos] + offset[xpos]\n", " else:\n", " _xpos = xpos + offset[xpos]\n", @@ -227,36 +227,37 @@ " low = lows[xpos]\n", " high = highs[xpos]\n", " if low == high == central_measure:\n", - " low_to_mean = mlines.Line2D([_xpos, _xpos],\n", - " [low, central_measure],\n", - " **kwargs)\n", + " low_to_mean = mlines.Line2D(\n", + " [_xpos, _xpos], [low, central_measure], **kwargs\n", + " )\n", " ax.add_line(low_to_mean)\n", - " \n", - " mean_to_high = mlines.Line2D([_xpos, _xpos],\n", - " [central_measure, high],\n", - " **kwargs)\n", + "\n", + " mean_to_high = mlines.Line2D(\n", + " [_xpos, _xpos], [central_measure, high], **kwargs\n", + " )\n", " ax.add_line(mean_to_high)\n", " else:\n", - " low_to_mean = mlines.Line2D([_xpos, _xpos],\n", - " [low, central_measure - gap_width],\n", - " **kwargs)\n", + " low_to_mean = mlines.Line2D(\n", + " [_xpos, _xpos], [low, central_measure - gap_width], **kwargs\n", + " )\n", " ax.add_line(low_to_mean)\n", - " \n", - " mean_to_high = mlines.Line2D([_xpos, _xpos],\n", - " [central_measure + gap_width, high],\n", - " **kwargs)\n", + "\n", + " mean_to_high = mlines.Line2D(\n", + " [_xpos, _xpos], [central_measure + gap_width, high], **kwargs\n", + " )\n", " ax.add_line(mean_to_high)\n", - " \n", - "\n", - "def check_data_matches_labels(labels,#list of input labels \n", - " data, #Pandas Series of input data\n", - " side:str # 'left' or 'right' on the sankey diagram\n", - " ):\n", - " '''\n", - " Function to check that the labels and data match in the sankey diagram. \n", + "\n", + "\n", + "def check_data_matches_labels(\n", + " labels, # list of input labels\n", + " data, # Pandas Series of input data\n", + " side: str, # 'left' or 'right' on the sankey diagram\n", + "):\n", + " \"\"\"\n", + " Function to check that the labels and data match in the sankey diagram.\n", " And enforce labels and data to be lists.\n", " Raises an exception if the labels and data do not match.\n", - " '''\n", + " \"\"\"\n", " if len(labels) > 0:\n", " if isinstance(data, list):\n", " data = set(data)\n", @@ -272,12 +273,18 @@ " msg += \"Data: \" + \",\".join(data)\n", " raise Exception(f\"{side} labels and data do not match.{msg}\")\n", "\n", - " \n", + "\n", "def normalize_dict(nested_dict, target):\n", " val = {}\n", " for key in nested_dict.keys():\n", - " val[key] = np.sum([nested_dict[sub_key][key] for sub_key in nested_dict.keys() if key in nested_dict[sub_key]])\n", - " \n", + " val[key] = np.sum(\n", + " [\n", + " nested_dict[sub_key][key]\n", + " for sub_key in nested_dict.keys()\n", + " if key in nested_dict[sub_key]\n", + " ]\n", + " )\n", + "\n", " for key, value in nested_dict.items():\n", " if isinstance(value, dict):\n", " for subkey in value.keys():\n", @@ -285,66 +292,68 @@ " if subkey in val.keys():\n", " if val[subkey] != 0:\n", " # Address the problem when one of the label have zero value\n", - " value[subkey] = value[subkey] * target[subkey]['right']/val[subkey]\n", + " value[subkey] = (\n", + " value[subkey] * target[subkey][\"right\"] / val[subkey]\n", + " )\n", " else:\n", " value[subkey] = 0\n", " else:\n", - " value[subkey] = target[subkey]['right']\n", + " value[subkey] = target[subkey][\"right\"]\n", " return nested_dict\n", "\n", "\n", - "def width_determine(labels, data, pos='left'):\n", + "def width_determine(labels, data, pos=\"left\"):\n", " widths_norm = defaultdict()\n", " for i, label in enumerate(labels):\n", " myD = {}\n", " myD[pos] = data[data[pos] == label][pos + \"Weight\"].sum()\n", " if len(labels) != 1:\n", " if i == 0:\n", - " myD['bottom'] = 0\n", + " myD[\"bottom\"] = 0\n", " myD[pos] -= 0.01\n", - " myD['top'] = myD[pos]\n", + " myD[\"top\"] = myD[pos]\n", " elif i == len(labels) - 1:\n", " myD[pos] -= 0.01\n", - " myD['bottom'] = 1 - myD[pos]\n", - " myD['top'] = 1\n", + " myD[\"bottom\"] = 1 - myD[pos]\n", + " myD[\"top\"] = 1\n", " else:\n", " myD[pos] -= 0.02\n", - " myD['bottom'] = widths_norm[labels[i - 1]]['top'] + 0.02\n", - " myD['top'] = myD['bottom'] + myD[pos]\n", - " topEdge = myD['top']\n", + " myD[\"bottom\"] = widths_norm[labels[i - 1]][\"top\"] + 0.02\n", + " myD[\"top\"] = myD[\"bottom\"] + myD[pos]\n", " else:\n", - " myD['bottom'] = 0\n", - " myD['top'] = 1\n", + " myD[\"bottom\"] = 0\n", + " myD[\"top\"] = 1\n", " widths_norm[label] = myD\n", " return widths_norm\n", "\n", - "def single_sankey(left:np.array,# data on the left of the diagram\n", - " right:np.array, # data on the right of the diagram, len(left) == len(right)\n", - " xpos:float=0, # the starting point on the x-axis\n", - " leftWeight:np.array=None, #weights for the left labels, if None, all weights are 1\n", - " rightWeight:np.array=None, #weights for the right labels, if None, all weights are corresponding leftWeight\n", - " colorDict:dict=None, #input format: {'label': 'color'}\n", - " leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels.\n", - " rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels.\n", - " ax=None, #matplotlib axes to be drawn on\n", - " flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", - " sankey:bool=True, #if True, draw the sankey diagram, else draw barplot\n", - " width=0.5, \n", - " alpha=0.65, \n", - " bar_width=0.2, \n", - " error_bar_on:bool=True, #if True, draw error bar for each group comparison\n", - " strip_on:bool=True, #if True, draw strip for each group comparison\n", - " one_sankey:bool=False, #if True, only draw one sankey diagram\n", - " rightColor:bool=False, #if True, each strip of the diagram will be colored according to the corresponding left labels\n", - " align:bool='center'# if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick\n", - " ):\n", - "\n", - " '''\n", + "\n", + "def single_sankey(\n", + " left: np.array, # data on the left of the diagram\n", + " right: np.array, # data on the right of the diagram, len(left) == len(right)\n", + " xpos: float = 0, # the starting point on the x-axis\n", + " leftWeight: np.array = None, # weights for the left labels, if None, all weights are 1\n", + " rightWeight: np.array = None, # weights for the right labels, if None, all weights are corresponding leftWeight\n", + " colorDict: dict = None, # input format: {'label': 'color'}\n", + " leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels.\n", + " rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels.\n", + " ax=None, # matplotlib axes to be drawn on\n", + " flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", + " sankey: bool = True, # if True, draw the sankey diagram, else draw barplot\n", + " width=0.5,\n", + " alpha=0.65,\n", + " bar_width=0.2,\n", + " error_bar_on: bool = True, # if True, draw error bar for each group comparison\n", + " strip_on: bool = True, # if True, draw strip for each group comparison\n", + " one_sankey: bool = False, # if True, only draw one sankey diagram\n", + " rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels\n", + " align: bool = \"center\", # if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick\n", + "):\n", + " \"\"\"\n", " Make a single Sankey diagram showing proportion flow from left to right\n", " Original code from: https://github.com/anazalea/pySankey\n", " Changes are added to normalize each diagram's height to be 1\n", "\n", - " '''\n", + " \"\"\"\n", "\n", " # Initiating values\n", " if ax is None:\n", @@ -369,26 +378,35 @@ " left.reset_index(drop=True, inplace=True)\n", " if isinstance(right, pd.Series):\n", " right.reset_index(drop=True, inplace=True)\n", - " dataFrame = pd.DataFrame({'left': left, 'right': right, 'leftWeight': leftWeight,\n", - " 'rightWeight': rightWeight}, index=range(len(left)))\n", - " \n", - " if dataFrame[['left', 'right']].isnull().any(axis=None):\n", - " raise Exception('Sankey graph does not support null values.')\n", + " dataFrame = pd.DataFrame(\n", + " {\n", + " \"left\": left,\n", + " \"right\": right,\n", + " \"leftWeight\": leftWeight,\n", + " \"rightWeight\": rightWeight,\n", + " },\n", + " index=range(len(left)),\n", + " )\n", + "\n", + " if dataFrame[[\"left\", \"right\"]].isnull().any(axis=None):\n", + " raise Exception(\"Sankey graph does not support null values.\")\n", "\n", " # Identify all labels that appear 'left' or 'right'\n", - " allLabels = pd.Series(np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]).unique()\n", + " allLabels = pd.Series(\n", + " np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]\n", + " ).unique()\n", "\n", " # Identify left labels\n", " if len(leftLabels) == 0:\n", " leftLabels = pd.Series(np.sort(dataFrame.left.unique())[::-1]).unique()\n", " else:\n", - " check_data_matches_labels(leftLabels, dataFrame['left'], 'left')\n", + " check_data_matches_labels(leftLabels, dataFrame[\"left\"], \"left\")\n", "\n", " # Identify right labels\n", " if len(rightLabels) == 0:\n", " rightLabels = pd.Series(np.sort(dataFrame.right.unique())[::-1]).unique()\n", " else:\n", - " check_data_matches_labels(leftLabels, dataFrame['right'], 'right')\n", + " check_data_matches_labels(leftLabels, dataFrame[\"right\"], \"right\")\n", "\n", " # If no colorDict given, make one\n", " if colorDict is None:\n", @@ -397,31 +415,33 @@ " colorPalette = sns.color_palette(palette, len(allLabels))\n", " for i, label in enumerate(allLabels):\n", " colorDict[label] = colorPalette[i]\n", - " fail_color = {0:\"grey\"}\n", + " fail_color = {0: \"grey\"}\n", " colorDict.update(fail_color)\n", " else:\n", " missing = [label for label in allLabels if label not in colorDict.keys()]\n", " if missing:\n", " msg = \"The palette parameter is missing values for the following labels : \"\n", - " msg += '{}'.format(', '.join(missing))\n", + " msg += \"{}\".format(\", \".join(missing))\n", " raise ValueError(msg)\n", "\n", " if align not in (\"center\", \"edge\"):\n", - " err = '{} assigned for `align` is not valid.'.format(align)\n", + " err = \"{} assigned for `align` is not valid.\".format(align)\n", " raise ValueError(err)\n", " if align == \"center\":\n", " try:\n", " leftpos = xpos - width / 2\n", " except TypeError as e:\n", - " raise TypeError(f'the dtypes of parameters x ({xpos.dtype}) '\n", - " f'and width ({width.dtype}) '\n", - " f'are incompatible') from e\n", - " else: \n", + " raise TypeError(\n", + " f\"the dtypes of parameters x ({xpos.dtype}) \"\n", + " f\"and width ({width.dtype}) \"\n", + " f\"are incompatible\"\n", + " ) from e\n", + " else:\n", " leftpos = xpos\n", "\n", " # Combine left and right arrays to have a pandas.DataFrame in the 'long' format\n", - " left_series = pd.Series(left, name='values').to_frame().assign(groups='left')\n", - " right_series = pd.Series(right, name='values').to_frame().assign(groups='right')\n", + " left_series = pd.Series(left, name=\"values\").to_frame().assign(groups=\"left\")\n", + " right_series = pd.Series(right, name=\"values\").to_frame().assign(groups=\"right\")\n", " concatenated_df = pd.concat([left_series, right_series], ignore_index=True)\n", "\n", " # Determine positions of left label patches and total widths\n", @@ -429,53 +449,57 @@ " leftWidths_norm = defaultdict()\n", " for i, leftLabel in enumerate(leftLabels):\n", " myD = {}\n", - " myD['left'] = (dataFrame[dataFrame.left == leftLabel].leftWeight.sum()/ \\\n", - " dataFrame.leftWeight.sum())\n", + " myD[\"left\"] = (\n", + " dataFrame[dataFrame.left == leftLabel].leftWeight.sum()\n", + " / dataFrame.leftWeight.sum()\n", + " )\n", " if len(leftLabels) != 1:\n", " if i == 0:\n", - " myD['bottom'] = 0\n", - " myD['left'] -= 0.01\n", - " myD['top'] = myD['left']\n", + " myD[\"bottom\"] = 0\n", + " myD[\"left\"] -= 0.01\n", + " myD[\"top\"] = myD[\"left\"]\n", " elif i == len(leftLabels) - 1:\n", - " myD['left'] -= 0.01\n", - " myD['bottom'] = 1 - myD['left']\n", - " myD['top'] = 1\n", + " myD[\"left\"] -= 0.01\n", + " myD[\"bottom\"] = 1 - myD[\"left\"]\n", + " myD[\"top\"] = 1\n", " else:\n", - " myD['left'] -= 0.02\n", - " myD['bottom'] = leftWidths_norm[leftLabels[i - 1]]['top'] + 0.02\n", - " myD['top'] = myD['bottom'] + myD['left']\n", - " topEdge = myD['top']\n", + " myD[\"left\"] -= 0.02\n", + " myD[\"bottom\"] = leftWidths_norm[leftLabels[i - 1]][\"top\"] + 0.02\n", + " myD[\"top\"] = myD[\"bottom\"] + myD[\"left\"]\n", + " topEdge = myD[\"top\"]\n", " else:\n", - " myD['bottom'] = 0\n", - " myD['top'] = 1\n", - " myD['left'] = 1\n", + " myD[\"bottom\"] = 0\n", + " myD[\"top\"] = 1\n", + " myD[\"left\"] = 1\n", " leftWidths_norm[leftLabel] = myD\n", "\n", " # Determine positions of right label patches and total widths\n", " rightWidths_norm = defaultdict()\n", " for i, rightLabel in enumerate(rightLabels):\n", " myD = {}\n", - " myD['right'] = (dataFrame[dataFrame.right == rightLabel].rightWeight.sum()/ \\\n", - " dataFrame.rightWeight.sum())\n", + " myD[\"right\"] = (\n", + " dataFrame[dataFrame.right == rightLabel].rightWeight.sum()\n", + " / dataFrame.rightWeight.sum()\n", + " )\n", " if len(rightLabels) != 1:\n", " if i == 0:\n", - " myD['bottom'] = 0\n", - " myD['right'] -= 0.01\n", - " myD['top'] = myD['right']\n", + " myD[\"bottom\"] = 0\n", + " myD[\"right\"] -= 0.01\n", + " myD[\"top\"] = myD[\"right\"]\n", " elif i == len(rightLabels) - 1:\n", - " myD['right'] -= 0.01\n", - " myD['bottom'] = 1 - myD['right']\n", - " myD['top'] = 1\n", + " myD[\"right\"] -= 0.01\n", + " myD[\"bottom\"] = 1 - myD[\"right\"]\n", + " myD[\"top\"] = 1\n", " else:\n", - " myD['right'] -= 0.02\n", - " myD['bottom'] = rightWidths_norm[rightLabels[i - 1]]['top'] + 0.02\n", - " myD['top'] = myD['bottom'] + myD['right']\n", - " topEdge = myD['top']\n", + " myD[\"right\"] -= 0.02\n", + " myD[\"bottom\"] = rightWidths_norm[rightLabels[i - 1]][\"top\"] + 0.02\n", + " myD[\"top\"] = myD[\"bottom\"] + myD[\"right\"]\n", + " topEdge = myD[\"top\"]\n", " else:\n", - " myD['bottom'] = 0\n", - " myD['top'] = 1\n", - " myD['right'] = 1\n", - " rightWidths_norm[rightLabel] = myD \n", + " myD[\"bottom\"] = 0\n", + " myD[\"top\"] = 1\n", + " myD[\"right\"] = 1\n", + " rightWidths_norm[rightLabel] = myD\n", "\n", " # Total width of the graph\n", " xMax = width\n", @@ -492,19 +516,29 @@ " if (flow == False and sankey == True) or one_sankey:\n", " for rightLabel in rightLabels:\n", " ax.fill_between(\n", - " [xMax + leftpos + (-bar_width * xMax * 0.5), leftpos + xMax + (bar_width * xMax * 0.5)], \n", + " [\n", + " xMax + leftpos + (-bar_width * xMax * 0.5),\n", + " leftpos + xMax + (bar_width * xMax * 0.5),\n", + " ],\n", " 2 * [rightWidths_norm[rightLabel][\"bottom\"]],\n", " 2 * [rightWidths_norm[rightLabel][\"top\"]],\n", " color=colorDict[rightLabel],\n", - " alpha=0.99\n", + " alpha=0.99,\n", " )\n", "\n", " # Plot error bars\n", " if error_bar_on and strip_on:\n", - " error_bar(concatenated_df, x='groups', y='values', ax=ax, offset=0, gap_width_percent=2,\n", - " method=\"sankey_error_bar\",\n", - " pos=[leftpos, leftpos + xMax])\n", - " \n", + " error_bar(\n", + " concatenated_df,\n", + " x=\"groups\",\n", + " y=\"values\",\n", + " ax=ax,\n", + " offset=0,\n", + " gap_width_percent=2,\n", + " method=\"sankey_error_bar\",\n", + " pos=[leftpos, leftpos + xMax],\n", + " )\n", + "\n", " # Determine widths of individual strips, all widths are normalized to 1\n", " ns_l = defaultdict()\n", " ns_r = defaultdict()\n", @@ -516,96 +550,121 @@ " for rightLabel in rightLabels:\n", " leftDict[rightLabel] = dataFrame[\n", " (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)\n", - " ].leftWeight.sum()\n", - " \n", + " ].leftWeight.sum()\n", + "\n", " rightDict[rightLabel] = dataFrame[\n", " (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)\n", - " ].rightWeight.sum()\n", - " factorleft = leftWidths_norm[leftLabel]['left']/sum(leftDict.values())\n", - " leftDict_norm = {k: v*factorleft for k, v in leftDict.items()}\n", + " ].rightWeight.sum()\n", + " factorleft = leftWidths_norm[leftLabel][\"left\"] / sum(leftDict.values())\n", + " leftDict_norm = {k: v * factorleft for k, v in leftDict.items()}\n", " ns_l_norm[leftLabel] = leftDict_norm\n", " ns_r[leftLabel] = rightDict\n", - " \n", + "\n", " # ns_r should be using a different way of normalization to fit the right side\n", " # It is normalized using the value with the same key in each sub-dictionary\n", " ns_r_norm = normalize_dict(ns_r, rightWidths_norm)\n", - " \n", + "\n", " # Plot strips\n", - " if sankey == True and strip_on == True:\n", + " if sankey and strip_on:\n", " for leftLabel, rightLabel in itertools.product(leftLabels, rightLabels):\n", " labelColor = leftLabel\n", " if rightColor:\n", " labelColor = rightLabel\n", - " if len(dataFrame[(dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)]) > 0:\n", + " if (\n", + " len(\n", + " dataFrame[\n", + " (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)\n", + " ]\n", + " )\n", + " > 0\n", + " ):\n", " # Create array of y values for each strip, half at left value,\n", " # half at right, convolve\n", - " ys_d = np.array(50 * [leftWidths_norm[leftLabel]['bottom']] + \\\n", - " 50 * [rightWidths_norm[rightLabel]['bottom']])\n", - " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid')\n", - " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid')\n", - " ys_u = np.array(50 * [leftWidths_norm[leftLabel]['bottom'] + ns_l_norm[leftLabel][rightLabel]] + \\\n", - " 50 * [rightWidths_norm[rightLabel]['bottom'] + ns_r_norm[leftLabel][rightLabel]])\n", - " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid')\n", - " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid')\n", + " ys_d = np.array(\n", + " 50 * [leftWidths_norm[leftLabel][\"bottom\"]]\n", + " + 50 * [rightWidths_norm[rightLabel][\"bottom\"]]\n", + " )\n", + " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode=\"valid\")\n", + " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode=\"valid\")\n", + " ys_u = np.array(\n", + " 50\n", + " * [\n", + " leftWidths_norm[leftLabel][\"bottom\"]\n", + " + ns_l_norm[leftLabel][rightLabel]\n", + " ]\n", + " + 50\n", + " * [\n", + " rightWidths_norm[rightLabel][\"bottom\"]\n", + " + ns_r_norm[leftLabel][rightLabel]\n", + " ]\n", + " )\n", + " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode=\"valid\")\n", + " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode=\"valid\")\n", "\n", " # Update bottom edges at each label so next strip starts at the right place\n", - " leftWidths_norm[leftLabel]['bottom'] += ns_l_norm[leftLabel][rightLabel]\n", - " rightWidths_norm[rightLabel]['bottom'] += ns_r_norm[leftLabel][rightLabel]\n", + " leftWidths_norm[leftLabel][\"bottom\"] += ns_l_norm[leftLabel][rightLabel]\n", + " rightWidths_norm[rightLabel][\"bottom\"] += ns_r_norm[leftLabel][\n", + " rightLabel\n", + " ]\n", " ax.fill_between(\n", - " np.linspace(leftpos + (bar_width * xMax * 0.5), \\\n", - " leftpos + xMax - (bar_width * xMax * 0.5), len(ys_d)), \\\n", - " ys_d, ys_u, alpha=alpha,\n", - " color=colorDict[labelColor], edgecolor='none'\n", + " np.linspace(\n", + " leftpos + (bar_width * xMax * 0.5),\n", + " leftpos + xMax - (bar_width * xMax * 0.5),\n", + " len(ys_d),\n", + " ),\n", + " ys_d,\n", + " ys_u,\n", + " alpha=alpha,\n", + " color=colorDict[labelColor],\n", + " edgecolor=\"none\",\n", " )\n", - " \n", - "def sankeydiag(data:pd.DataFrame,\n", - " xvar:str, # x column to be plotted.\n", - " yvar:str, # y column to be plotted.\n", - " left_idx:str, #the value in column xvar that is on the left side of each sankey diagram\n", - " right_idx:str, #the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx\n", - " leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels.\n", - " rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels.\n", - " palette:str|dict=None, \n", - " ax=None, #matplotlib axes to be drawn on\n", - " flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", - " sankey:bool=True, #if True, draw the sankey diagram, else draw barplot\n", - " one_sankey:bool=False,# determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes\n", - " width:float=0.4, # the width of each sankey diagram\n", - " rightColor:bool=False,#if True, each strip of the diagram will be colored according to the corresponding left labels\n", - " align:str='center', #the alignment of each sankey diagram, can be 'center' or 'left'\n", - " alpha:float=0.65, #the transparency of each strip\n", - " **kwargs):\n", - " '''\n", + "\n", + "\n", + "def sankeydiag(\n", + " data: pd.DataFrame,\n", + " xvar: str, # x column to be plotted.\n", + " yvar: str, # y column to be plotted.\n", + " left_idx: str, # the value in column xvar that is on the left side of each sankey diagram\n", + " right_idx: str, # the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx\n", + " leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels.\n", + " rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels.\n", + " palette: str | dict = None,\n", + " ax=None, # matplotlib axes to be drawn on\n", + " flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", + " sankey: bool = True, # if True, draw the sankey diagram, else draw barplot\n", + " one_sankey: bool = False, # determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes\n", + " width: float = 0.4, # the width of each sankey diagram\n", + " rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels\n", + " align: str = \"center\", # the alignment of each sankey diagram, can be 'center' or 'left'\n", + " alpha: float = 0.65, # the transparency of each strip\n", + " **kwargs,\n", + "):\n", + " \"\"\"\n", " Read in melted pd.DataFrame, and draw multiple sankey diagram on a single axes\n", " using the value in column yvar according to the value in column xvar\n", " left_idx in the column xvar is on the left side of each sankey diagram\n", " right_idx in the column xvar is on the right side of each sankey diagram\n", "\n", - " '''\n", - "\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - " import matplotlib.pyplot as plt\n", + " \"\"\"\n", "\n", " if \"width\" in kwargs:\n", " width = kwargs[\"width\"]\n", "\n", " if \"align\" in kwargs:\n", " align = kwargs[\"align\"]\n", - " \n", + "\n", " if \"alpha\" in kwargs:\n", " alpha = kwargs[\"alpha\"]\n", - " \n", + "\n", " if \"rightColor\" in kwargs:\n", " rightColor = kwargs[\"rightColor\"]\n", - " \n", + "\n", " if \"bar_width\" in kwargs:\n", " bar_width = kwargs[\"bar_width\"]\n", - " \n", + "\n", " if \"sankey\" in kwargs:\n", " sankey = kwargs[\"sankey\"]\n", - " \n", + "\n", " if \"flow\" in kwargs:\n", " flow = kwargs[\"flow\"]\n", "\n", @@ -613,7 +672,7 @@ " ax = plt.gca()\n", "\n", " allLabels = pd.Series(np.sort(data[yvar].unique())[::-1]).unique()\n", - " \n", + "\n", " # Check if all the elements in left_idx and right_idx are in xvar column\n", " unique_xvar = data[xvar].unique()\n", " if not all(elem in unique_xvar for elem in left_idx):\n", @@ -625,7 +684,7 @@ "\n", " # For baseline comparison, broadcast left_idx to the same length as right_idx\n", " # so that the left of sankey diagram will be the same\n", - " # For sequential comparison, left_idx and right_idx can have anything different \n", + " # For sequential comparison, left_idx and right_idx can have anything different\n", " # but should have the same length\n", " if len(left_idx) == 1:\n", " broadcasted_left = np.broadcast_to(left_idx, len(right_idx))\n", @@ -637,8 +696,7 @@ " if isinstance(palette, dict):\n", " if not all(key in allLabels for key in palette.keys()):\n", " raise ValueError(f\"keys in palette should be in {yvar} column\")\n", - " else: \n", - " plot_palette = palette\n", + " plot_palette = palette\n", " elif isinstance(palette, str):\n", " plot_palette = {}\n", " colorPalette = sns.color_palette(palette, len(allLabels))\n", @@ -648,38 +706,75 @@ " plot_palette = None\n", "\n", " # Create a strip_on list to determine whether to draw the strip during repeated measures\n", - " strip_on = [int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx)]\n", + " strip_on = [\n", + " int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx)\n", + " ]\n", "\n", " draw_idx = list(zip(broadcasted_left, right_idx))\n", " for i, (left, right) in enumerate(draw_idx):\n", " if one_sankey == False:\n", " if flow == True:\n", " width = 1\n", - " align = 'edge'\n", - " sankey = False if i == len(draw_idx)-1 else sankey # Remove last strip in flow\n", - " error_bar_on = False if i == len(draw_idx)-1 and flow else True # Remove last error_bar in flow\n", + " align = \"edge\"\n", + " sankey = (\n", + " False if i == len(draw_idx) - 1 else sankey\n", + " ) # Remove last strip in flow\n", + " error_bar_on = (\n", + " False if i == len(draw_idx) - 1 and flow else True\n", + " ) # Remove last error_bar in flow\n", " bar_width = 0.4 if sankey == False and flow == False else bar_width\n", - " single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], \n", - " xpos=xpos, ax=ax, colorDict=plot_palette, width=width, \n", - " leftLabels=leftLabels, rightLabels=rightLabels, strip_on=strip_on[i],\n", - " rightColor=rightColor, bar_width=bar_width, sankey=sankey,\n", - " error_bar_on=error_bar_on, flow=flow, align=align, alpha=alpha)\n", + " single_sankey(\n", + " data[data[xvar] == left][yvar],\n", + " data[data[xvar] == right][yvar],\n", + " xpos=xpos,\n", + " ax=ax,\n", + " colorDict=plot_palette,\n", + " width=width,\n", + " leftLabels=leftLabels,\n", + " rightLabels=rightLabels,\n", + " strip_on=strip_on[i],\n", + " rightColor=rightColor,\n", + " bar_width=bar_width,\n", + " sankey=sankey,\n", + " error_bar_on=error_bar_on,\n", + " flow=flow,\n", + " align=align,\n", + " alpha=alpha,\n", + " )\n", " xpos += 1\n", " else:\n", " xpos = 0\n", " width = 1\n", " if sankey == False:\n", " bar_width = 0.5\n", - " single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], \n", - " xpos=xpos, ax=ax, colorDict=plot_palette, width=width, \n", - " leftLabels=leftLabels, rightLabels=rightLabels, \n", - " rightColor=rightColor, bar_width=bar_width, sankey=sankey, \n", - " one_sankey=one_sankey, flow=False, align='edge', alpha=alpha)\n", - "\n", - "# Now only draw vs xticks for two-column sankey diagram\n", - " if one_sankey == False or (sankey and not flow): \n", - " sankey_ticks = [f\"{left}\" for left in broadcasted_left] if flow \\\n", - " else [f\"{left}\\n v.s.\\n{right}\" for left, right in zip(broadcasted_left, right_idx)]\n", + " single_sankey(\n", + " data[data[xvar] == left][yvar],\n", + " data[data[xvar] == right][yvar],\n", + " xpos=xpos,\n", + " ax=ax,\n", + " colorDict=plot_palette,\n", + " width=width,\n", + " leftLabels=leftLabels,\n", + " rightLabels=rightLabels,\n", + " rightColor=rightColor,\n", + " bar_width=bar_width,\n", + " sankey=sankey,\n", + " one_sankey=one_sankey,\n", + " flow=False,\n", + " align=\"edge\",\n", + " alpha=alpha,\n", + " )\n", + "\n", + " # Now only draw vs xticks for two-column sankey diagram\n", + " if ~one_sankey or (sankey and not flow):\n", + " sankey_ticks = (\n", + " [f\"{left}\" for left in broadcasted_left]\n", + " if flow\n", + " else [\n", + " f\"{left}\\n v.s.\\n{right}\"\n", + " for left, right in zip(broadcasted_left, right_idx)\n", + " ]\n", + " )\n", " ax.get_xaxis().set_ticks(np.arange(len(right_idx)))\n", " ax.get_xaxis().set_ticklabels(sankey_ticks)\n", " else:\n", diff --git a/nbs/API/plotter.ipynb b/nbs/API/plotter.ipynb index 227c27fa..ef4d602a 100644 --- a/nbs/API/plotter.ipynb +++ b/nbs/API/plotter.ipynb @@ -47,6 +47,23 @@ "nbdev.nbdev_export()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7562c1a1", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import warnings\n", + "import logging" + ] + }, { "cell_type": "code", "execution_count": null, @@ -55,15 +72,15 @@ "outputs": [], "source": [ "#| export\n", - "\n", - "def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):\n", + "# TODO refactor function name\n", + "def EffectSizeDataFramePlotter(effectsize_df, **plot_kwargs):\n", " \"\"\"\n", " Custom function that creates an estimation plot from an EffectSizeDataFrame.\n", " Keywords\n", " --------\n", " Parameters\n", " ----------\n", - " EffectSizeDataFrame\n", + " effectsize_df\n", " A `dabest` EffectSizeDataFrame object.\n", " plot_kwargs\n", " color_col=None\n", @@ -96,31 +113,28 @@ " fontsize_contrastxlabel=12, fontsize_contrastylabel=12,\n", " fontsize_delta2label=12\n", " \"\"\"\n", - "\n", - " import numpy as np\n", - " import seaborn as sns\n", - " import matplotlib\n", - " import matplotlib.pyplot as plt\n", - " import pandas as pd\n", - " import warnings\n", - " warnings.filterwarnings('ignore', 'This figure includes Axes that are not compatible with tight_layout')\n", - "\n", " from .misc_tools import merge_two_dicts\n", " from .plot_tools import halfviolin, get_swarm_spans, error_bar, sankeydiag\n", - " from ._stats_tools.effsize import _compute_standardizers, _compute_hedges_correction_factor\n", + " from ._stats_tools.effsize import (\n", + " _compute_standardizers,\n", + " _compute_hedges_correction_factor,\n", + " )\n", + "\n", + " warnings.filterwarnings(\n", + " \"ignore\", \"This figure includes Axes that are not compatible with tight_layout\"\n", + " )\n", "\n", - " import logging\n", " # Have to disable logging of warning when get_legend_handles_labels()\n", " # tries to get from slopegraph.\n", " logging.disable(logging.WARNING)\n", "\n", " # Save rcParams that I will alter, so I can reset back.\n", " original_rcParams = {}\n", - " _changed_rcParams = ['axes.grid']\n", + " _changed_rcParams = [\"axes.grid\"]\n", " for parameter in _changed_rcParams:\n", " original_rcParams[parameter] = plt.rcParams[parameter]\n", "\n", - " plt.rcParams['axes.grid'] = False\n", + " plt.rcParams[\"axes.grid\"] = False\n", "\n", " ytick_color = plt.rcParams[\"ytick.color\"]\n", " face_color = plot_kwargs[\"face_color\"]\n", @@ -128,18 +142,18 @@ " if plot_kwargs[\"face_color\"] is None:\n", " face_color = \"white\"\n", "\n", - " dabest_obj = EffectSizeDataFrame.dabest_obj\n", - " plot_data = EffectSizeDataFrame._plot_data\n", - " xvar = EffectSizeDataFrame.xvar\n", - " yvar = EffectSizeDataFrame.yvar\n", - " is_paired = EffectSizeDataFrame.is_paired\n", - " delta2 = EffectSizeDataFrame.delta2\n", - " mini_meta = EffectSizeDataFrame.mini_meta\n", - " effect_size = EffectSizeDataFrame.effect_size\n", - " proportional = EffectSizeDataFrame.proportional\n", + " dabest_obj = effectsize_df.dabest_obj\n", + " plot_data = effectsize_df._plot_data\n", + " xvar = effectsize_df.xvar\n", + " yvar = effectsize_df.yvar\n", + " is_paired = effectsize_df.is_paired\n", + " delta2 = effectsize_df.delta2\n", + " mini_meta = effectsize_df.mini_meta\n", + " effect_size = effectsize_df.effect_size\n", + " proportional = effectsize_df.proportional\n", "\n", " all_plot_groups = dabest_obj._all_plot_groups\n", - " idx = dabest_obj.idx\n", + " idx = dabest_obj.idx\n", "\n", " if effect_size not in [\"mean_diff\", \"delta_g\"] or not delta2:\n", " show_delta2 = False\n", @@ -157,16 +171,16 @@ "\n", " # Disable Gardner-Altman plotting if any of the idxs comprise of more than\n", " # two groups or if it is a delta-delta plot.\n", - " float_contrast = plot_kwargs[\"float_contrast\"]\n", - " effect_size_type = EffectSizeDataFrame.effect_size\n", + " float_contrast = plot_kwargs[\"float_contrast\"]\n", + " effect_size_type = effectsize_df.effect_size\n", " if len(idx) > 1 or len(idx[0]) > 2:\n", " float_contrast = False\n", "\n", - " if effect_size_type in ['cliffs_delta']:\n", + " if effect_size_type in [\"cliffs_delta\"]:\n", " float_contrast = False\n", "\n", " if show_delta2 or show_mini_meta:\n", - " float_contrast = False \n", + " float_contrast = False\n", "\n", " if not is_paired:\n", " show_pairs = False\n", @@ -174,12 +188,13 @@ " show_pairs = plot_kwargs[\"show_pairs\"]\n", "\n", " # Set default kwargs first, then merge with user-dictated ones.\n", - " default_swarmplot_kwargs = {'size': plot_kwargs[\"raw_marker_size\"]}\n", + " default_swarmplot_kwargs = {\"size\": plot_kwargs[\"raw_marker_size\"]}\n", " if plot_kwargs[\"swarmplot_kwargs\"] is None:\n", " swarmplot_kwargs = default_swarmplot_kwargs\n", " else:\n", - " swarmplot_kwargs = merge_two_dicts(default_swarmplot_kwargs,\n", - " plot_kwargs[\"swarmplot_kwargs\"])\n", + " swarmplot_kwargs = merge_two_dicts(\n", + " default_swarmplot_kwargs, plot_kwargs[\"swarmplot_kwargs\"]\n", + " )\n", "\n", " # Barplot kwargs\n", " default_barplot_kwargs = {\"estimator\": np.mean, \"errorbar\": plot_kwargs[\"ci\"]}\n", @@ -187,87 +202,105 @@ " if plot_kwargs[\"barplot_kwargs\"] is None:\n", " barplot_kwargs = default_barplot_kwargs\n", " else:\n", - " barplot_kwargs = merge_two_dicts(default_barplot_kwargs,\n", - " plot_kwargs[\"barplot_kwargs\"])\n", + " barplot_kwargs = merge_two_dicts(\n", + " default_barplot_kwargs, plot_kwargs[\"barplot_kwargs\"]\n", + " )\n", "\n", " # Sankey Diagram kwargs\n", - " default_sankey_kwargs = {\"width\": 0.4, \"align\": \"center\",\n", - " \"sankey\":True, \"flow\":True,\n", - " \"alpha\": 0.4, \"rightColor\": False,\n", - " \"bar_width\":0.2}\n", + " default_sankey_kwargs = {\n", + " \"width\": 0.4,\n", + " \"align\": \"center\",\n", + " \"sankey\": True,\n", + " \"flow\": True,\n", + " \"alpha\": 0.4,\n", + " \"rightColor\": False,\n", + " \"bar_width\": 0.2,\n", + " }\n", " if plot_kwargs[\"sankey_kwargs\"] is None:\n", " sankey_kwargs = default_sankey_kwargs\n", " else:\n", - " sankey_kwargs = merge_two_dicts(default_sankey_kwargs,\n", - " plot_kwargs[\"sankey_kwargs\"])\n", + " sankey_kwargs = merge_two_dicts(\n", + " default_sankey_kwargs, plot_kwargs[\"sankey_kwargs\"]\n", + " )\n", " # We also need to extract the `sankey` and `flow` from the kwargs for plotter.py\n", " # to use for varying different kinds of paired proportional plots\n", " # We also don't want to pop the parameter from the kwargs\n", - " sankey = sankey_kwargs['sankey']\n", - " flow = sankey_kwargs['flow']\n", + " sankey = sankey_kwargs[\"sankey\"]\n", + " flow = sankey_kwargs[\"flow\"]\n", "\n", " # Violinplot kwargs.\n", - " default_violinplot_kwargs = {'widths':0.5, 'vert':True,\n", - " 'showextrema':False, 'showmedians':False}\n", + " default_violinplot_kwargs = {\n", + " \"widths\": 0.5,\n", + " \"vert\": True,\n", + " \"showextrema\": False,\n", + " \"showmedians\": False,\n", + " }\n", " if plot_kwargs[\"violinplot_kwargs\"] is None:\n", " violinplot_kwargs = default_violinplot_kwargs\n", " else:\n", - " violinplot_kwargs = merge_two_dicts(default_violinplot_kwargs,\n", - " plot_kwargs[\"violinplot_kwargs\"])\n", + " violinplot_kwargs = merge_two_dicts(\n", + " default_violinplot_kwargs, plot_kwargs[\"violinplot_kwargs\"]\n", + " )\n", "\n", " # slopegraph kwargs.\n", - " default_slopegraph_kwargs = {'linewidth':1, 'alpha':0.5}\n", + " default_slopegraph_kwargs = {\"linewidth\": 1, \"alpha\": 0.5}\n", " if plot_kwargs[\"slopegraph_kwargs\"] is None:\n", " slopegraph_kwargs = default_slopegraph_kwargs\n", " else:\n", - " slopegraph_kwargs = merge_two_dicts(default_slopegraph_kwargs,\n", - " plot_kwargs[\"slopegraph_kwargs\"])\n", + " slopegraph_kwargs = merge_two_dicts(\n", + " default_slopegraph_kwargs, plot_kwargs[\"slopegraph_kwargs\"]\n", + " )\n", "\n", " # Zero reference-line kwargs.\n", - " default_reflines_kwargs = {'linestyle':'solid', 'linewidth':0.75,\n", - " 'zorder': 2,\n", - " 'color': ytick_color}\n", + " default_reflines_kwargs = {\n", + " \"linestyle\": \"solid\",\n", + " \"linewidth\": 0.75,\n", + " \"zorder\": 2,\n", + " \"color\": ytick_color,\n", + " }\n", " if plot_kwargs[\"reflines_kwargs\"] is None:\n", " reflines_kwargs = default_reflines_kwargs\n", " else:\n", - " reflines_kwargs = merge_two_dicts(default_reflines_kwargs,\n", - " plot_kwargs[\"reflines_kwargs\"])\n", + " reflines_kwargs = merge_two_dicts(\n", + " default_reflines_kwargs, plot_kwargs[\"reflines_kwargs\"]\n", + " )\n", "\n", " # Legend kwargs.\n", - " default_legend_kwargs = {'loc': 'upper left', 'frameon': False}\n", + " default_legend_kwargs = {\"loc\": \"upper left\", \"frameon\": False}\n", " if plot_kwargs[\"legend_kwargs\"] is None:\n", " legend_kwargs = default_legend_kwargs\n", " else:\n", - " legend_kwargs = merge_two_dicts(default_legend_kwargs,\n", - " plot_kwargs[\"legend_kwargs\"])\n", - " \n", - " \n", - "################################################### GRIDKEY WIP - extracting arguments \n", - " \n", + " legend_kwargs = merge_two_dicts(\n", + " default_legend_kwargs, plot_kwargs[\"legend_kwargs\"]\n", + " )\n", + "\n", + " ################################################### GRIDKEY WIP - extracting arguments\n", + "\n", " gridkey_rows = plot_kwargs[\"gridkey_rows\"]\n", " gridkey_merge_pairs = plot_kwargs[\"gridkey_merge_pairs\"]\n", " gridkey_show_Ns = plot_kwargs[\"gridkey_show_Ns\"]\n", " gridkey_show_es = plot_kwargs[\"gridkey_show_es\"]\n", - " \n", - " if gridkey_rows == None:\n", + "\n", + " if gridkey_rows is None:\n", " gridkey_show_Ns = False\n", " gridkey_show_es = False\n", - " \n", - "################################################### END GRIDKEY WIP - extracting arguments\n", + "\n", + " ################################################### END GRIDKEY WIP - extracting arguments\n", "\n", " # Group summaries kwargs.\n", - " gs_default = {'mean_sd', 'median_quartiles', None}\n", + " gs_default = {\"mean_sd\", \"median_quartiles\", None}\n", " if plot_kwargs[\"group_summaries\"] not in gs_default:\n", - " raise ValueError('group_summaries must be one of'\n", - " ' these: {}.'.format(gs_default) )\n", + " raise ValueError(\n", + " \"group_summaries must be one of\" \" these: {}.\".format(gs_default)\n", + " )\n", "\n", - " default_group_summary_kwargs = {'zorder': 3, 'lw': 2,\n", - " 'alpha': 1}\n", + " default_group_summary_kwargs = {\"zorder\": 3, \"lw\": 2, \"alpha\": 1}\n", " if plot_kwargs[\"group_summary_kwargs\"] is None:\n", " group_summary_kwargs = default_group_summary_kwargs\n", " else:\n", - " group_summary_kwargs = merge_two_dicts(default_group_summary_kwargs,\n", - " plot_kwargs[\"group_summary_kwargs\"])\n", + " group_summary_kwargs = merge_two_dicts(\n", + " default_group_summary_kwargs, plot_kwargs[\"group_summary_kwargs\"]\n", + " )\n", "\n", " # Create color palette that will be shared across subplots.\n", " color_col = plot_kwargs[\"color_col\"]\n", @@ -293,35 +326,24 @@ " if custom_pal is None:\n", " unsat_colors = sns.color_palette(n_colors=n_groups)\n", " else:\n", - "\n", " if isinstance(custom_pal, dict):\n", - " groups_in_palette = {k: v for k,v in custom_pal.items()\n", - " if k in color_groups}\n", - "\n", - " # # check that all the keys in custom_pal are found in the\n", - " # # color column.\n", - " # col_grps = {k for k in color_groups}\n", - " # pal_grps = {k for k in custom_pal.keys()}\n", - " # not_in_pal = pal_grps.difference(col_grps)\n", - " # if len(not_in_pal) > 0:\n", - " # err1 = 'The custom palette keys {} '.format(not_in_pal)\n", - " # err2 = 'are not found in `{}`. Please check.'.format(color_col)\n", - " # errstring = (err1 + err2)\n", - " # raise IndexError(errstring)\n", + " groups_in_palette = {\n", + " k: v for k, v in custom_pal.items() if k in color_groups\n", + " }\n", "\n", " names = groups_in_palette.keys()\n", " unsat_colors = groups_in_palette.values()\n", "\n", " elif isinstance(custom_pal, list):\n", - " unsat_colors = custom_pal[0: n_groups]\n", + " unsat_colors = custom_pal[0:n_groups]\n", "\n", " elif isinstance(custom_pal, str):\n", " # check it is in the list of matplotlib palettes.\n", " if custom_pal in plt.colormaps():\n", " unsat_colors = sns.color_palette(custom_pal, n_groups)\n", " else:\n", - " err1 = 'The specified `custom_palette` {}'.format(custom_pal)\n", - " err2 = ' is not a matplotlib palette. Please check.'\n", + " err1 = \"The specified `custom_palette` {}\".format(custom_pal)\n", + " err2 = \" is not a matplotlib palette. Please check.\"\n", " raise ValueError(err1 + err2)\n", "\n", " if custom_pal is None and color_col is None:\n", @@ -351,159 +373,165 @@ " plot_palette_sankey = custom_pal\n", "\n", " # Infer the figsize.\n", - " fig_size = plot_kwargs[\"fig_size\"]\n", + " fig_size = plot_kwargs[\"fig_size\"]\n", " if fig_size is None:\n", " all_groups_count = np.sum([len(i) for i in dabest_obj.idx])\n", " # Increase the width for delta-delta graph\n", " if show_delta2 or show_mini_meta:\n", " all_groups_count += 2\n", - " if is_paired and show_pairs is True and proportional is False:\n", + " if is_paired and show_pairs and proportional is False:\n", " frac = 0.75\n", " else:\n", " frac = 1\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " height_inches = 4\n", " each_group_width_inches = 2.5 * frac\n", " else:\n", " height_inches = 6\n", " each_group_width_inches = 1.5 * frac\n", "\n", - " width_inches = (each_group_width_inches * all_groups_count)\n", + " width_inches = each_group_width_inches * all_groups_count\n", " fig_size = (width_inches, height_inches)\n", "\n", " # Initialise the figure.\n", - " # sns.set(context=\"talk\", style='ticks')\n", - " init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs[\"dpi\"]\n", - " ,tight_layout=True)\n", + " init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs[\"dpi\"], tight_layout=True)\n", "\n", " width_ratios_ga = [2.5, 1]\n", - " \n", - "###################### GRIDKEY HSPACE ALTERATION\n", + "\n", + " ###################### GRIDKEY HSPACE ALTERATION\n", "\n", " # Sets hspace for cummings plots if gridkey is shown.\n", - " if gridkey_rows != None:\n", + " if gridkey_rows is not None:\n", " h_space_cummings = 0.1\n", " else:\n", " h_space_cummings = 0.3\n", - " \n", - " \n", - "###################### END GRIDKEY HSPACE ALTERATION \n", - " \n", + "\n", + " ###################### END GRIDKEY HSPACE ALTERATION\n", + "\n", " if plot_kwargs[\"ax\"] is not None:\n", " # New in v0.2.6.\n", " # Use inset axes to create the estimation plot inside a single axes.\n", " # Author: Adam L Nekimken. (PR #73)\n", - " inset_contrast = True\n", " rawdata_axes = plot_kwargs[\"ax\"]\n", " ax_position = rawdata_axes.get_position() # [[x0, y0], [x1, y1]]\n", - " \n", + "\n", " fig = rawdata_axes.get_figure()\n", " fig.patch.set_facecolor(face_color)\n", - " \n", - " if float_contrast is True:\n", + "\n", + " if float_contrast:\n", " axins = rawdata_axes.inset_axes(\n", - " [1, 0,\n", - " width_ratios_ga[1]/width_ratios_ga[0], 1])\n", + " [1, 0, width_ratios_ga[1] / width_ratios_ga[0], 1]\n", + " )\n", " rawdata_axes.set_position( # [l, b, w, h]\n", - " [ax_position.x0,\n", - " ax_position.y0,\n", - " (ax_position.x1 - ax_position.x0) * (width_ratios_ga[0] /\n", - " sum(width_ratios_ga)),\n", - " (ax_position.y1 - ax_position.y0)])\n", + " [\n", + " ax_position.x0,\n", + " ax_position.y0,\n", + " (ax_position.x1 - ax_position.x0)\n", + " * (width_ratios_ga[0] / sum(width_ratios_ga)),\n", + " (ax_position.y1 - ax_position.y0),\n", + " ]\n", + " )\n", "\n", " contrast_axes = axins\n", "\n", " else:\n", " axins = rawdata_axes.inset_axes([0, -1 - h_space_cummings, 1, 1])\n", - " plot_height = ((ax_position.y1 - ax_position.y0) /\n", - " (2 + h_space_cummings))\n", + " plot_height = (ax_position.y1 - ax_position.y0) / (2 + h_space_cummings)\n", " rawdata_axes.set_position(\n", - " [ax_position.x0,\n", - " ax_position.y0 + (1 + h_space_cummings) * plot_height,\n", - " (ax_position.x1 - ax_position.x0),\n", - " plot_height])\n", - "\n", - " # If the contrast axes are NOT floating, create lists to store\n", - " # raw ylims and raw tick intervals, so that I can normalize\n", - " # their ylims later.\n", - " contrast_ax_ylim_low = list()\n", - " contrast_ax_ylim_high = list()\n", - " contrast_ax_ylim_tickintervals = list()\n", + " [\n", + " ax_position.x0,\n", + " ax_position.y0 + (1 + h_space_cummings) * plot_height,\n", + " (ax_position.x1 - ax_position.x0),\n", + " plot_height,\n", + " ]\n", + " )\n", + "\n", " contrast_axes = axins\n", " rawdata_axes.contrast_axes = axins\n", "\n", " else:\n", - " inset_contrast = False\n", " # Here, we hardcode some figure parameters.\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " fig, axx = plt.subplots(\n", - " ncols=2,\n", - " gridspec_kw={\"width_ratios\": width_ratios_ga,\n", - " \"wspace\": 0},\n", - " **init_fig_kwargs)\n", + " ncols=2,\n", + " gridspec_kw={\"width_ratios\": width_ratios_ga, \"wspace\": 0},\n", + " **init_fig_kwargs\n", + " )\n", " fig.patch.set_facecolor(face_color)\n", "\n", " else:\n", - " fig, axx = plt.subplots(nrows=2,\n", - " gridspec_kw={\"hspace\": h_space_cummings},\n", - " **init_fig_kwargs)\n", + " fig, axx = plt.subplots(\n", + " nrows=2, gridspec_kw={\"hspace\": h_space_cummings}, **init_fig_kwargs\n", + " )\n", " fig.patch.set_facecolor(face_color)\n", - " # If the contrast axes are NOT floating, create lists to store\n", - " # raw ylims and raw tick intervals, so that I can normalize\n", - " # their ylims later.\n", - " contrast_ax_ylim_low = list()\n", - " contrast_ax_ylim_high = list()\n", - " contrast_ax_ylim_tickintervals = list()\n", - " \n", + "\n", " # Title\n", " title = plot_kwargs[\"title\"]\n", " fontsize_title = plot_kwargs[\"fontsize_title\"]\n", " if title is not None:\n", " fig.suptitle(title, fontsize=fontsize_title)\n", - " rawdata_axes = axx[0]\n", + " rawdata_axes = axx[0]\n", " contrast_axes = axx[1]\n", " rawdata_axes.set_frame_on(False)\n", " contrast_axes.set_frame_on(False)\n", "\n", - " redraw_axes_kwargs = {'colors' : ytick_color,\n", - " 'facecolors' : ytick_color,\n", - " 'lw' : 1,\n", - " 'zorder' : 10,\n", - " 'clip_on' : False}\n", + " redraw_axes_kwargs = {\n", + " \"colors\": ytick_color,\n", + " \"facecolors\": ytick_color,\n", + " \"lw\": 1,\n", + " \"zorder\": 10,\n", + " \"clip_on\": False,\n", + " }\n", "\n", " swarm_ylim = plot_kwargs[\"swarm_ylim\"]\n", "\n", " if swarm_ylim is not None:\n", " rawdata_axes.set_ylim(swarm_ylim)\n", "\n", - " one_sankey = False if is_paired is not None else False # Flag to indicate if only one sankey is plotted.\n", - " two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False\n", + " one_sankey = (\n", + " False if is_paired is not None else False\n", + " ) # Flag to indicate if only one sankey is plotted.\n", + " two_col_sankey = (\n", + " True if proportional and not one_sankey and sankey and not flow else False\n", + " )\n", "\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " # Determine temp_idx based on is_paired and proportional conditions\n", " if is_paired == \"baseline\":\n", - " idx_pairs = [(control, test) for i in idx for control, test in zip([i[0]] * (len(i) - 1), i[1:])]\n", + " idx_pairs = [\n", + " (control, test)\n", + " for i in idx\n", + " for control, test in zip([i[0]] * (len(i) - 1), i[1:])\n", + " ]\n", " temp_idx = idx if not proportional else idx_pairs\n", " else:\n", - " idx_pairs = [(control, test) for i in idx for control, test in zip(i[:-1], i[1:])]\n", + " idx_pairs = [\n", + " (control, test) for i in idx for control, test in zip(i[:-1], i[1:])\n", + " ]\n", " temp_idx = idx if not proportional else idx_pairs\n", "\n", " # Determine temp_all_plot_groups based on proportional condition\n", " plot_groups = [item for i in temp_idx for item in i]\n", " temp_all_plot_groups = all_plot_groups if not proportional else plot_groups\n", - " \n", - " if proportional==False:\n", - " # Plot the raw data as a slopegraph.\n", - " # Pivot the long (melted) data.\n", + "\n", + " if not proportional:\n", + " # Plot the raw data as a slopegraph.\n", + " # Pivot the long (melted) data.\n", " if color_col is None:\n", " pivot_values = [yvar]\n", " else:\n", " pivot_values = [yvar, color_col]\n", - " pivoted_plot_data = pd.pivot(data=plot_data, index=dabest_obj.id_col,\n", - " columns=xvar, values=pivot_values)\n", + " pivoted_plot_data = pd.pivot(\n", + " data=plot_data,\n", + " index=dabest_obj.id_col,\n", + " columns=xvar,\n", + " values=pivot_values,\n", + " )\n", " x_start = 0\n", " for ii, current_tuple in enumerate(temp_idx):\n", - " current_pair = pivoted_plot_data.loc[:, pd.MultiIndex.from_product([pivot_values, current_tuple])].dropna()\n", + " current_pair = pivoted_plot_data.loc[\n", + " :, pd.MultiIndex.from_product([pivot_values, current_tuple])\n", + " ].dropna()\n", " grp_count = len(current_tuple)\n", " # Iterate through the data for the current tuple.\n", " for ID, observation in current_pair.iterrows():\n", @@ -511,136 +539,174 @@ " y_points = observation[yvar].tolist()\n", "\n", " if color_col is None:\n", - " slopegraph_kwargs['color'] = ytick_color\n", + " slopegraph_kwargs[\"color\"] = ytick_color\n", " else:\n", " color_key = observation[color_col][0]\n", - " if isinstance(color_key, (str, np.int64, np.float64)) == True:\n", - " slopegraph_kwargs['color'] = plot_palette_raw[color_key]\n", - " slopegraph_kwargs['label'] = color_key\n", + " if isinstance(color_key, (str, np.int64, np.float64)):\n", + " slopegraph_kwargs[\"color\"] = plot_palette_raw[color_key]\n", + " slopegraph_kwargs[\"label\"] = color_key\n", "\n", - " rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs) \n", + " rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs)\n", "\n", - " \n", " x_start = x_start + grp_count\n", - " \n", - " ##################### DELTA PTS ON CONTRAST PLOT WIP \n", + "\n", + " ##################### DELTA PTS ON CONTRAST PLOT WIP\n", "\n", " contrast_show_deltas = plot_kwargs[\"contrast_show_deltas\"]\n", - " \n", - " if is_paired == None:\n", + "\n", + " if is_paired is None:\n", " contrast_show_deltas = False\n", - " \n", - " if contrast_show_deltas == True:\n", - " \n", - " trans = plt.gca().transData\n", - " \n", + "\n", + " if contrast_show_deltas:\n", " delta_plot_data_temp = plot_data.copy()\n", " delta_id_col = dabest_obj.id_col\n", - " if color_col != None:\n", - " delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col, color_col]]\n", - " deltapts_args = {\"hue\" : color_col, \n", - " \"palette\" : plot_palette_raw,\n", - " \"marker\" : \"^\",\n", - " \"alpha\" : 0.5}\n", - " \n", + " if color_col is not None:\n", + " delta_plot_data = delta_plot_data_temp[\n", + " [xvar, yvar, delta_id_col, color_col]\n", + " ]\n", + " deltapts_args = {\n", + " \"hue\": color_col,\n", + " \"palette\": plot_palette_raw,\n", + " \"marker\": \"^\",\n", + " \"alpha\": 0.5,\n", + " }\n", + "\n", " else:\n", " delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col]]\n", - " deltapts_args = {\"color\" : \"k\",\n", - " \"marker\" : \"^\",\n", - " \"alpha\" : 0.5}\n", - " \n", + " deltapts_args = {\"color\": \"k\", \"marker\": \"^\", \"alpha\": 0.5}\n", + "\n", " final_deltas = pd.DataFrame()\n", " for i in idx:\n", " for j in i:\n", " if i.index(j) != 0:\n", - " temp_df_exp = delta_plot_data[delta_plot_data[xvar].str.contains(j)].reset_index(drop=True)\n", + " temp_df_exp = delta_plot_data[\n", + " delta_plot_data[xvar].str.contains(j)\n", + " ].reset_index(drop=True)\n", " if is_paired == \"baseline\":\n", - " temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[0])].reset_index(drop=True)\n", + " temp_df_cont = delta_plot_data[\n", + " delta_plot_data[xvar].str.contains(i[0])\n", + " ].reset_index(drop=True)\n", " elif is_paired == \"sequential\":\n", - " temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[i.index(j) - 1])].reset_index(drop=True)\n", + " temp_df_cont = delta_plot_data[\n", + " delta_plot_data[xvar].str.contains(\n", + " i[i.index(j) - 1]\n", + " )\n", + " ].reset_index(drop=True)\n", " delta_df = temp_df_exp.copy()\n", " delta_df[yvar] = temp_df_exp[yvar] - temp_df_cont[yvar]\n", - " final_deltas = pd.concat([final_deltas, delta_df]) \n", - " \n", - " \n", + " final_deltas = pd.concat([final_deltas, delta_df])\n", + "\n", " # Plot the raw data as a swarmplot.\n", - " deltapts_plot = sns.swarmplot(data=final_deltas, x=xvar, y=yvar,\n", - " ax=contrast_axes,\n", - " order=all_plot_groups, \n", - " zorder=2,\n", - " **deltapts_args)\n", + " deltapts_plot = sns.swarmplot(\n", + " data=final_deltas,\n", + " x=xvar,\n", + " y=yvar,\n", + " ax=contrast_axes,\n", + " order=all_plot_groups,\n", + " zorder=2,\n", + " **deltapts_args\n", + " )\n", " contrast_axes.legend().set_visible(False)\n", - " \n", - " ##################### DELTA PTS ON CONTRAST PLOT END\n", "\n", - " \n", + " ##################### DELTA PTS ON CONTRAST PLOT END\n", + "\n", " # Set the tick labels, because the slopegraph plotting doesn't.\n", " rawdata_axes.set_xticks(np.arange(0, len(temp_all_plot_groups)))\n", " rawdata_axes.set_xticklabels(temp_all_plot_groups)\n", - " \n", + "\n", " else:\n", " # Plot the raw data as a set of Sankey Diagrams aligned like barplot.\n", " group_summaries = plot_kwargs[\"group_summaries\"]\n", " if group_summaries is None:\n", " group_summaries = \"mean_sd\"\n", " err_color = plot_kwargs[\"err_color\"]\n", - " if err_color == None:\n", + " if err_color is None:\n", " err_color = \"black\"\n", "\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " sankey_control_group = []\n", " sankey_test_group = []\n", " # Design for Sankey Flow Diagram\n", - " sankey_idx = [(control, test) for i in idx for control, test in zip(i[:], (i[1:]+(i[0],)))]\\\n", - " if flow is True else temp_idx\n", + " sankey_idx = (\n", + " [\n", + " (control, test)\n", + " for i in idx\n", + " for control, test in zip(i[:], (i[1:] + (i[0],)))\n", + " ]\n", + " if flow\n", + " else temp_idx\n", + " )\n", " for i in sankey_idx:\n", " sankey_control_group.append(i[0])\n", - " sankey_test_group.append(i[1]) \n", + " sankey_test_group.append(i[1])\n", "\n", " if len(temp_all_plot_groups) == 2:\n", - " one_sankey = True \n", - " sankey_control_group.pop(); sankey_test_group.pop() # Remove the last element from two lists\n", + " one_sankey = True\n", + " sankey_control_group.pop()\n", + " sankey_test_group.pop() # Remove the last element from two lists\n", "\n", " # two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False\n", "\n", " # Replace the paired proportional plot with sankey diagram\n", - " sankeyplot = sankeydiag(plot_data, xvar=xvar, yvar=yvar, \n", - " left_idx=sankey_control_group, \n", - " right_idx=sankey_test_group,\n", - " palette=plot_palette_sankey,\n", - " ax=rawdata_axes, \n", - " one_sankey=one_sankey,\n", - " **sankey_kwargs)\n", - " \n", + " sankeyplot = sankeydiag(\n", + " plot_data,\n", + " xvar=xvar,\n", + " yvar=yvar,\n", + " left_idx=sankey_control_group,\n", + " right_idx=sankey_test_group,\n", + " palette=plot_palette_sankey,\n", + " ax=rawdata_axes,\n", + " one_sankey=one_sankey,\n", + " **sankey_kwargs\n", + " )\n", + "\n", " else:\n", - " if proportional==False:\n", + " if not proportional:\n", " # Plot the raw data as a swarmplot.\n", - " rawdata_plot = sns.swarmplot(data=plot_data, x=xvar, y=yvar,\n", - " ax=rawdata_axes,\n", - " order=all_plot_groups, hue=color_col,\n", - " palette=plot_palette_raw, zorder=1,\n", - " **swarmplot_kwargs)\n", + " rawdata_plot = sns.swarmplot(\n", + " data=plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " ax=rawdata_axes,\n", + " order=all_plot_groups,\n", + " hue=color_col,\n", + " palette=plot_palette_raw,\n", + " zorder=1,\n", + " **swarmplot_kwargs\n", + " )\n", " else:\n", " # Plot the raw data as a barplot.\n", - " bar1_df = pd.DataFrame({xvar: all_plot_groups, 'proportion': np.ones(len(all_plot_groups))})\n", - " bar1 = sns.barplot(data=bar1_df, x=xvar, y=\"proportion\",\n", - " ax=rawdata_axes,\n", - " order=all_plot_groups,\n", - " linewidth=2, facecolor=(1, 1, 1, 0), edgecolor=bar_color,\n", - " zorder=1)\n", - " bar2 = sns.barplot(data=plot_data, x=xvar, y=yvar,\n", - " ax=rawdata_axes,\n", - " order=all_plot_groups,\n", - " palette=plot_palette_bar,\n", - " zorder=1,\n", - " **barplot_kwargs)\n", + " bar1_df = pd.DataFrame(\n", + " {xvar: all_plot_groups, \"proportion\": np.ones(len(all_plot_groups))}\n", + " )\n", + " bar1 = sns.barplot(\n", + " data=bar1_df,\n", + " x=xvar,\n", + " y=\"proportion\",\n", + " ax=rawdata_axes,\n", + " order=all_plot_groups,\n", + " linewidth=2,\n", + " facecolor=(1, 1, 1, 0),\n", + " edgecolor=bar_color,\n", + " zorder=1,\n", + " )\n", + " bar2 = sns.barplot(\n", + " data=plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " ax=rawdata_axes,\n", + " order=all_plot_groups,\n", + " palette=plot_palette_bar,\n", + " zorder=1,\n", + " **barplot_kwargs\n", + " )\n", " # adjust the width of bars\n", " bar_width = plot_kwargs[\"bar_width\"]\n", " for bar in bar1.patches:\n", " x = bar.get_x()\n", " width = bar.get_width()\n", - " centre = x + width / 2.\n", - " bar.set_x(centre - bar_width / 2.)\n", + " centre = x + width / 2.0\n", + " bar.set_x(centre - bar_width / 2.0)\n", " bar.set_width(bar_width)\n", "\n", " # Plot the gapped line summaries, if this is not a Cumming plot.\n", @@ -649,7 +715,7 @@ " if group_summaries is None:\n", " group_summaries = \"mean_sd\"\n", "\n", - " if group_summaries is not None and proportional==False:\n", + " if group_summaries is not None and not proportional:\n", " # Create list to gather xspans.\n", " xspans = []\n", " line_colors = []\n", @@ -662,33 +728,42 @@ " # we have got a None, so skip and move on.\n", " pass\n", "\n", - " if bootstraps_color_by_group is True:\n", + " if bootstraps_color_by_group:\n", " line_colors.append(plot_palette_raw[all_plot_groups[jj]])\n", "\n", " if len(line_colors) != len(all_plot_groups):\n", " line_colors = ytick_color\n", "\n", - " error_bar(plot_data, x=xvar, y=yvar,\n", - " # Hardcoded offset...\n", - " offset=xspans + np.array(plot_kwargs[\"group_summaries_offset\"]),\n", - " line_color=line_colors,\n", - " gap_width_percent=1.5,\n", - " type=group_summaries, ax=rawdata_axes,\n", - " method=\"gapped_lines\",\n", - " **group_summary_kwargs)\n", - "\n", - " if group_summaries is not None and proportional == True:\n", - "\n", + " error_bar(\n", + " plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " # Hardcoded offset...\n", + " offset=xspans + np.array(plot_kwargs[\"group_summaries_offset\"]),\n", + " line_color=line_colors,\n", + " gap_width_percent=1.5,\n", + " type=group_summaries,\n", + " ax=rawdata_axes,\n", + " method=\"gapped_lines\",\n", + " **group_summary_kwargs\n", + " )\n", + "\n", + " if group_summaries is not None and proportional:\n", " err_color = plot_kwargs[\"err_color\"]\n", - " if err_color == None:\n", + " if err_color is None:\n", " err_color = \"black\"\n", - " error_bar(plot_data, x=xvar, y=yvar,\n", - " offset=0,\n", - " line_color=err_color,\n", - " gap_width_percent=1.5,\n", - " type=group_summaries, ax=rawdata_axes,\n", - " method=\"proportional_error_bar\",\n", - " **group_summary_kwargs)\n", + " error_bar(\n", + " plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " offset=0,\n", + " line_color=err_color,\n", + " gap_width_percent=1.5,\n", + " type=group_summaries,\n", + " ax=rawdata_axes,\n", + " method=\"proportional_error_bar\",\n", + " **group_summary_kwargs\n", + " )\n", "\n", " # Add the counts to the rawdata axes xticks.\n", " counts = plot_data.groupby(xvar).count()[yvar]\n", @@ -698,7 +773,7 @@ " for xticklab in rawdata_axes.xaxis.get_ticklabels():\n", " t = xticklab.get_text()\n", " if t.rfind(\"\\n\") != -1:\n", - " te = t[t.rfind(\"\\n\") + len(\"\\n\"):]\n", + " te = t[t.rfind(\"\\n\") + len(\"\\n\") :]\n", " N = str(counts.loc[te])\n", " te = t\n", " else:\n", @@ -707,13 +782,13 @@ "\n", " ticks_with_counts.append(\"{}\\nN = {}\".format(te, N))\n", "\n", - " if plot_kwargs['fontsize_rawxlabel'] is not None:\n", - " fontsize_rawxlabel = plot_kwargs['fontsize_rawxlabel']\n", - " rawdata_axes.set_xticklabels(ticks_with_counts,fontsize=fontsize_rawxlabel)\n", + " if plot_kwargs[\"fontsize_rawxlabel\"] is not None:\n", + " fontsize_rawxlabel = plot_kwargs[\"fontsize_rawxlabel\"]\n", + " rawdata_axes.set_xticklabels(ticks_with_counts, fontsize=fontsize_rawxlabel)\n", "\n", " # Save the handles and labels for the legend.\n", " handles, labels = rawdata_axes.get_legend_handles_labels()\n", - " legend_labels = [l for l in labels]\n", + " legend_labels = [l for l in labels]\n", " legend_handles = [h for h in handles]\n", " if bootstraps_color_by_group is False:\n", " rawdata_axes.legend().set_visible(False)\n", @@ -724,11 +799,11 @@ "\n", " # Plot effect sizes and bootstraps.\n", " # Take note of where the `control` groups are.\n", - " if is_paired == \"baseline\" and show_pairs == True:\n", + " if is_paired == \"baseline\" and show_pairs:\n", " if two_col_sankey:\n", " ticks_to_skip = []\n", - " ticks_to_plot = np.arange(0, len(temp_all_plot_groups)/2).tolist()\n", - " ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist()\n", + " ticks_to_plot = np.arange(0, len(temp_all_plot_groups) / 2).tolist()\n", + " ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist()\n", " ticks_to_start_twocol_sankey.pop()\n", " ticks_to_start_twocol_sankey.insert(0, 0)\n", " else:\n", @@ -737,60 +812,63 @@ " ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()\n", " ticks_to_skip.insert(0, 0)\n", " # Then obtain the ticks where we have to plot the effect sizes.\n", - " ticks_to_plot = [t for t in range(0, len(all_plot_groups))\n", - " if t not in ticks_to_skip]\n", + " ticks_to_plot = [\n", + " t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip\n", + " ]\n", " ticks_to_skip_contrast = np.cumsum([(len(t)) for t in idx])[:-1].tolist()\n", " ticks_to_skip_contrast.insert(0, 0)\n", " else:\n", " if two_col_sankey:\n", " ticks_to_skip = [len(sankey_control_group)]\n", " # Then obtain the ticks where we have to plot the effect sizes.\n", - " ticks_to_plot = [t for t in range(0, len(temp_idx))\n", - " if t not in ticks_to_skip]\n", + " ticks_to_plot = [\n", + " t for t in range(0, len(temp_idx)) if t not in ticks_to_skip\n", + " ]\n", " ticks_to_skip = []\n", - " ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist()\n", + " ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist()\n", " ticks_to_start_twocol_sankey.pop()\n", " ticks_to_start_twocol_sankey.insert(0, 0)\n", " else:\n", " ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()\n", " ticks_to_skip.insert(0, 0)\n", " # Then obtain the ticks where we have to plot the effect sizes.\n", - " ticks_to_plot = [t for t in range(0, len(all_plot_groups))\n", - " if t not in ticks_to_skip]\n", + " ticks_to_plot = [\n", + " t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip\n", + " ]\n", "\n", " # Plot the bootstraps, then the effect sizes and CIs.\n", - " es_marker_size = plot_kwargs[\"es_marker_size\"]\n", + " es_marker_size = plot_kwargs[\"es_marker_size\"]\n", " halfviolin_alpha = plot_kwargs[\"halfviolin_alpha\"]\n", "\n", " ci_type = plot_kwargs[\"ci_type\"]\n", "\n", - " results = EffectSizeDataFrame.results\n", + " results = effectsize_df.results\n", " contrast_xtick_labels = []\n", "\n", - "\n", " for j, tick in enumerate(ticks_to_plot):\n", - " current_group = results.test[j]\n", - " current_control = results.control[j]\n", + " current_group = results.test[j]\n", + " current_control = results.control[j]\n", " current_bootstrap = results.bootstraps[j]\n", - " current_effsize = results.difference[j]\n", + " current_effsize = results.difference[j]\n", " if ci_type == \"bca\":\n", - " current_ci_low = results.bca_low[j]\n", - " current_ci_high = results.bca_high[j]\n", + " current_ci_low = results.bca_low[j]\n", + " current_ci_high = results.bca_high[j]\n", " else:\n", - " current_ci_low = results.pct_low[j]\n", - " current_ci_high = results.pct_high[j]\n", - "\n", + " current_ci_low = results.pct_low[j]\n", + " current_ci_high = results.pct_high[j]\n", "\n", " # Create the violinplot.\n", " # New in v0.2.6: drop negative infinities before plotting.\n", - " v = contrast_axes.violinplot(current_bootstrap[~np.isinf(current_bootstrap)],\n", - " positions=[tick],\n", - " **violinplot_kwargs)\n", + " v = contrast_axes.violinplot(\n", + " current_bootstrap[~np.isinf(current_bootstrap)],\n", + " positions=[tick],\n", + " **violinplot_kwargs\n", + " )\n", " # Turn the violinplot into half, and color it the same as the swarmplot.\n", " # Do this only if the color column is not specified.\n", " # Ideally, the alpha (transparency) fo the violin plot should be\n", " # less than one so the effect size and CIs are visible.\n", - " if bootstraps_color_by_group is True:\n", + " if bootstraps_color_by_group:\n", " fc = plot_palette_contrast[current_group]\n", " else:\n", " fc = \"grey\"\n", @@ -798,96 +876,110 @@ " halfviolin(v, fill_color=fc, alpha=halfviolin_alpha)\n", "\n", " # Plot the effect size.\n", - " contrast_axes.plot([tick], current_effsize, marker='o',\n", - " color=ytick_color,\n", - " markersize=es_marker_size)\n", - " \n", - "################## SHOW ES ON CONTRAST PLOT WIP \n", + " contrast_axes.plot(\n", + " [tick],\n", + " current_effsize,\n", + " marker=\"o\",\n", + " color=ytick_color,\n", + " markersize=es_marker_size,\n", + " )\n", + "\n", + " ################## SHOW ES ON CONTRAST PLOT WIP\n", "\n", " contrast_show_es = plot_kwargs[\"contrast_show_es\"]\n", - " es_sf = plot_kwargs['es_sf']\n", - " es_fontsize = plot_kwargs['es_fontsize']\n", - " \n", - " if gridkey_show_es == True:\n", + " es_sf = plot_kwargs[\"es_sf\"]\n", + " es_fontsize = plot_kwargs[\"es_fontsize\"]\n", + "\n", + " if gridkey_show_es:\n", " contrast_show_es = False\n", - " \n", "\n", - " \n", " effsize_for_print = current_effsize\n", - " \n", - " printed_es = np.format_float_positional(effsize_for_print,\n", - " precision=es_sf,\n", - " sign=True,\n", - " trim= 'k',\n", - " min_digits = es_sf)\n", - " if contrast_show_es == True:\n", + "\n", + " printed_es = np.format_float_positional(\n", + " effsize_for_print, precision=es_sf, sign=True, trim=\"k\", min_digits=es_sf\n", + " )\n", + " if contrast_show_es:\n", " if effsize_for_print < 0:\n", " textoffset = 10\n", " else:\n", " textoffset = 15\n", - " contrast_axes.annotate(text=printed_es, \n", - " xy = (tick, effsize_for_print),\n", - " xytext = (-textoffset-len(printed_es)*es_fontsize/2,-es_fontsize/2),\n", - " textcoords = \"offset points\",\n", - " **{ \"fontsize\" : es_fontsize })\n", - " \n", - "################## SHOW ES ON CONTRAST PLOT END \n", - " \n", - " # Plot the confidence interval.\n", - " contrast_axes.plot([tick, tick],\n", - " [current_ci_low, current_ci_high],\n", - " linestyle=\"-\",\n", - " color=ytick_color,\n", - " linewidth=group_summary_kwargs['lw'])\n", + " contrast_axes.annotate(\n", + " text=printed_es,\n", + " xy=(tick, effsize_for_print),\n", + " xytext=(\n", + " -textoffset - len(printed_es) * es_fontsize / 2,\n", + " -es_fontsize / 2,\n", + " ),\n", + " textcoords=\"offset points\",\n", + " **{\"fontsize\": es_fontsize}\n", + " )\n", + "\n", + " ################## SHOW ES ON CONTRAST PLOT END\n", "\n", - " contrast_xtick_labels.append(\"{}\\nminus\\n{}\".format(current_group,\n", - " current_control))\n", + " # Plot the confidence interval.\n", + " contrast_axes.plot(\n", + " [tick, tick],\n", + " [current_ci_low, current_ci_high],\n", + " linestyle=\"-\",\n", + " color=ytick_color,\n", + " linewidth=group_summary_kwargs[\"lw\"],\n", + " )\n", + "\n", + " contrast_xtick_labels.append(\n", + " \"{}\\nminus\\n{}\".format(current_group, current_control)\n", + " )\n", "\n", " # Plot mini-meta violin\n", " if show_mini_meta or show_delta2:\n", " if show_mini_meta:\n", - " mini_meta_delta = EffectSizeDataFrame.mini_meta_delta\n", - " data = mini_meta_delta.bootstraps_weighted_delta\n", - " difference = mini_meta_delta.difference\n", + " mini_meta_delta = effectsize_df.mini_meta_delta\n", + " data = mini_meta_delta.bootstraps_weighted_delta\n", + " difference = mini_meta_delta.difference\n", " if ci_type == \"bca\":\n", - " ci_low = mini_meta_delta.bca_low\n", - " ci_high = mini_meta_delta.bca_high\n", + " ci_low = mini_meta_delta.bca_low\n", + " ci_high = mini_meta_delta.bca_high\n", " else:\n", - " ci_low = mini_meta_delta.pct_low\n", - " ci_high = mini_meta_delta.pct_high\n", - " else: \n", - " delta_delta = EffectSizeDataFrame.delta_delta\n", - " data = delta_delta.bootstraps_delta_delta\n", - " difference = delta_delta.difference\n", + " ci_low = mini_meta_delta.pct_low\n", + " ci_high = mini_meta_delta.pct_high\n", + " else:\n", + " delta_delta = effectsize_df.delta_delta\n", + " data = delta_delta.bootstraps_delta_delta\n", + " difference = delta_delta.difference\n", " if ci_type == \"bca\":\n", - " ci_low = delta_delta.bca_low\n", - " ci_high = delta_delta.bca_high\n", + " ci_low = delta_delta.bca_low\n", + " ci_high = delta_delta.bca_high\n", " else:\n", - " ci_low = delta_delta.pct_low\n", - " ci_high = delta_delta.pct_high\n", - " #Create the violinplot.\n", - " #New in v0.2.6: drop negative infinities before plotting.\n", - " position = max(rawdata_axes.get_xticks())+2\n", - " v = contrast_axes.violinplot(data[~np.isinf(data)],\n", - " positions=[position],\n", - " **violinplot_kwargs)\n", + " ci_low = delta_delta.pct_low\n", + " ci_high = delta_delta.pct_high\n", + " # Create the violinplot.\n", + " # New in v0.2.6: drop negative infinities before plotting.\n", + " position = max(rawdata_axes.get_xticks()) + 2\n", + " v = contrast_axes.violinplot(\n", + " data[~np.isinf(data)], positions=[position], **violinplot_kwargs\n", + " )\n", "\n", " fc = \"grey\"\n", "\n", " halfviolin(v, fill_color=fc, alpha=halfviolin_alpha)\n", "\n", " # Plot the effect size.\n", - " contrast_axes.plot([position], difference, marker='o',\n", - " color=ytick_color,\n", - " markersize=es_marker_size)\n", + " contrast_axes.plot(\n", + " [position],\n", + " difference,\n", + " marker=\"o\",\n", + " color=ytick_color,\n", + " markersize=es_marker_size,\n", + " )\n", " # Plot the confidence interval.\n", - " contrast_axes.plot([position, position],\n", - " [ci_low, ci_high],\n", - " linestyle=\"-\",\n", - " color=ytick_color,\n", - " linewidth=group_summary_kwargs['lw'])\n", + " contrast_axes.plot(\n", + " [position, position],\n", + " [ci_low, ci_high],\n", + " linestyle=\"-\",\n", + " color=ytick_color,\n", + " linewidth=group_summary_kwargs[\"lw\"],\n", + " )\n", " if show_mini_meta:\n", - " contrast_xtick_labels.extend([\"\",\"Weighted delta\"])\n", + " contrast_xtick_labels.extend([\"\", \"Weighted delta\"])\n", " elif effect_size == \"delta_g\":\n", " contrast_xtick_labels.extend([\"\", \"deltas' g\"])\n", " else:\n", @@ -899,22 +991,22 @@ " contrast_axes.set_xticks(rawdata_axes.get_xticks())\n", " else:\n", " temp = rawdata_axes.get_xticks()\n", - " temp = np.append(temp, [max(temp)+1, max(temp)+2])\n", + " temp = np.append(temp, [max(temp) + 1, max(temp) + 2])\n", " contrast_axes.set_xticks(temp)\n", "\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " max_x = contrast_axes.get_xlim()[1]\n", " rawdata_axes.set_xlim(-0.375, max_x)\n", "\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " contrast_axes.set_xlim(0.5, 1.5)\n", " elif show_delta2 or show_mini_meta:\n", " # Increase the xlim of raw data by 2\n", " temp = rawdata_axes.get_xlim()\n", " if show_pairs:\n", - " rawdata_axes.set_xlim(temp[0], temp[1]+0.25)\n", + " rawdata_axes.set_xlim(temp[0], temp[1] + 0.25)\n", " else:\n", - " rawdata_axes.set_xlim(temp[0], temp[1]+2)\n", + " rawdata_axes.set_xlim(temp[0], temp[1] + 2)\n", " contrast_axes.set_xlim(rawdata_axes.get_xlim())\n", " else:\n", " contrast_axes.set_xlim(rawdata_axes.get_xlim())\n", @@ -923,55 +1015,67 @@ " for t in ticks_to_skip:\n", " contrast_xtick_labels.insert(t, \"\")\n", "\n", - " if plot_kwargs['fontsize_contrastxlabel'] is not None:\n", - " fontsize_contrastxlabel = plot_kwargs['fontsize_contrastxlabel']\n", + " if plot_kwargs[\"fontsize_contrastxlabel\"] is not None:\n", + " fontsize_contrastxlabel = plot_kwargs[\"fontsize_contrastxlabel\"]\n", "\n", - " contrast_axes.set_xticklabels(contrast_xtick_labels,fontsize=fontsize_contrastxlabel)\n", + " contrast_axes.set_xticklabels(\n", + " contrast_xtick_labels, fontsize=fontsize_contrastxlabel\n", + " )\n", "\n", " if bootstraps_color_by_group is False:\n", " legend_labels_unique = np.unique(legend_labels)\n", " unique_idx = np.unique(legend_labels, return_index=True)[1]\n", - " legend_handles_unique = (pd.Series(legend_handles, dtype=\"object\").loc[unique_idx]).tolist()\n", + " legend_handles_unique = (\n", + " pd.Series(legend_handles, dtype=\"object\").loc[unique_idx]\n", + " ).tolist()\n", "\n", " if len(legend_handles_unique) > 0:\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " axes_with_legend = contrast_axes\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " bta = (1.75, 1.02)\n", " else:\n", " bta = (1.5, 1.02)\n", " else:\n", " axes_with_legend = rawdata_axes\n", - " if show_pairs is True:\n", - " bta = (1.02, 1.)\n", + " if show_pairs:\n", + " bta = (1.02, 1.0)\n", " else:\n", - " bta = (1.,1.)\n", - " leg = axes_with_legend.legend(legend_handles_unique,\n", - " legend_labels_unique,\n", - " bbox_to_anchor=bta,\n", - " **legend_kwargs)\n", - " if show_pairs is True:\n", + " bta = (1.0, 1.0)\n", + " leg = axes_with_legend.legend(\n", + " legend_handles_unique,\n", + " legend_labels_unique,\n", + " bbox_to_anchor=bta,\n", + " **legend_kwargs\n", + " )\n", + " if show_pairs:\n", " for line in leg.get_lines():\n", " line.set_linewidth(3.0)\n", "\n", " og_ylim_raw = rawdata_axes.get_ylim()\n", " og_xlim_raw = rawdata_axes.get_xlim()\n", "\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " # For Gardner-Altman plots only.\n", "\n", " # Normalize ylims and despine the floating contrast axes.\n", " # Check that the effect size is within the swarm ylims.\n", - " if effect_size_type in [\"mean_diff\", \"cohens_d\", \"hedges_g\",\"cohens_h\"]:\n", - " control_group_summary = plot_data.groupby(xvar)\\\n", - " .mean(numeric_only=True).loc[current_control, yvar]\n", - " test_group_summary = plot_data.groupby(xvar)\\\n", - " .mean(numeric_only=True).loc[current_group, yvar]\n", + " if effect_size_type in [\"mean_diff\", \"cohens_d\", \"hedges_g\", \"cohens_h\"]:\n", + " control_group_summary = (\n", + " plot_data.groupby(xvar)\n", + " .mean(numeric_only=True)\n", + " .loc[current_control, yvar]\n", + " )\n", + " test_group_summary = (\n", + " plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar]\n", + " )\n", " elif effect_size_type == \"median_diff\":\n", - " control_group_summary = plot_data.groupby(xvar)\\\n", - " .median().loc[current_control, yvar]\n", - " test_group_summary = plot_data.groupby(xvar)\\\n", - " .median().loc[current_group, yvar]\n", + " control_group_summary = (\n", + " plot_data.groupby(xvar).median().loc[current_control, yvar]\n", + " )\n", + " test_group_summary = (\n", + " plot_data.groupby(xvar).median().loc[current_group, yvar]\n", + " )\n", "\n", " if swarm_ylim is None:\n", " swarm_ylim = rawdata_axes.get_ylim()\n", @@ -979,7 +1083,7 @@ " _, contrast_xlim_max = contrast_axes.get_xlim()\n", "\n", " difference = float(results.difference[0])\n", - " \n", + "\n", " if effect_size_type in [\"mean_diff\", \"median_diff\"]:\n", " # Align 0 of contrast_axes to reference group mean of rawdata_axes.\n", " # If the effect size is positive, shift the contrast axis up.\n", @@ -997,48 +1101,53 @@ " og_ylim_contrast = rawdata_axes.get_ylim() - np.array(control_group_summary)\n", "\n", " contrast_axes.set_ylim(og_ylim_contrast)\n", - " contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max)\n", + " contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max)\n", "\n", - " elif effect_size_type in [\"cohens_d\", \"hedges_g\",\"cohens_h\"]:\n", + " elif effect_size_type in [\"cohens_d\", \"hedges_g\", \"cohens_h\"]:\n", " if is_paired:\n", " which_std = 1\n", " else:\n", " which_std = 0\n", " temp_control = plot_data[plot_data[xvar] == current_control][yvar]\n", - " temp_test = plot_data[plot_data[xvar] == current_group][yvar]\n", - " \n", + " temp_test = plot_data[plot_data[xvar] == current_group][yvar]\n", + "\n", " stds = _compute_standardizers(temp_control, temp_test)\n", " if is_paired:\n", " pooled_sd = stds[1]\n", " else:\n", " pooled_sd = stds[0]\n", - " \n", - " if effect_size_type == 'hedges_g':\n", - " gby_count = plot_data.groupby(xvar).count()\n", + "\n", + " if effect_size_type == \"hedges_g\":\n", + " gby_count = plot_data.groupby(xvar).count()\n", " len_control = gby_count.loc[current_control, yvar]\n", - " len_test = gby_count.loc[current_group, yvar]\n", - " \n", - " hg_correction_factor = _compute_hedges_correction_factor(len_control, len_test)\n", - " \n", + " len_test = gby_count.loc[current_group, yvar]\n", + "\n", + " hg_correction_factor = _compute_hedges_correction_factor(\n", + " len_control, len_test\n", + " )\n", + "\n", " ylim_scale_factor = pooled_sd / hg_correction_factor\n", "\n", " elif effect_size_type == \"cohens_h\":\n", - " ylim_scale_factor = (np.mean(temp_test)-np.mean(temp_control)) / difference\n", + " ylim_scale_factor = (\n", + " np.mean(temp_test) - np.mean(temp_control)\n", + " ) / difference\n", "\n", " else:\n", " ylim_scale_factor = pooled_sd\n", - " \n", - " scaled_ylim = ((rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor).tolist()\n", + "\n", + " scaled_ylim = (\n", + " (rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor\n", + " ).tolist()\n", "\n", " contrast_axes.set_ylim(scaled_ylim)\n", " og_ylim_contrast = scaled_ylim\n", "\n", - " contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max)\n", + " contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max)\n", "\n", " if one_sankey is None:\n", " # Draw summary lines for control and test groups..\n", " for jj, axx in enumerate([rawdata_axes, contrast_axes]):\n", - "\n", " # Draw effect size line.\n", " if jj == 0:\n", " ref = control_group_summary\n", @@ -1048,66 +1157,74 @@ " elif jj == 1:\n", " ref = 0\n", " diff = ref + difference\n", - " effsize_line_start = contrast_xlim_max-1.1\n", + " effsize_line_start = contrast_xlim_max - 1.1\n", "\n", " xlimlow, xlimhigh = axx.get_xlim()\n", "\n", " # Draw reference line.\n", - " axx.hlines(ref, # y-coordinates\n", - " 0, xlimhigh, # x-coordinates, start and end.\n", - " **reflines_kwargs)\n", - " \n", + " axx.hlines(\n", + " ref, # y-coordinates\n", + " 0,\n", + " xlimhigh, # x-coordinates, start and end.\n", + " **reflines_kwargs\n", + " )\n", + "\n", " # Draw effect size line.\n", - " axx.hlines(diff,\n", - " effsize_line_start, xlimhigh,\n", - " **reflines_kwargs)\n", - " else: \n", + " axx.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs)\n", + " else:\n", " ref = 0\n", " diff = ref + difference\n", " effsize_line_start = contrast_xlim_max - 0.9\n", " xlimlow, xlimhigh = contrast_axes.get_xlim()\n", " # Draw reference line.\n", - " contrast_axes.hlines(ref, # y-coordinates\n", - " effsize_line_start, xlimhigh, # x-coordinates, start and end.\n", - " **reflines_kwargs)\n", - " \n", + " contrast_axes.hlines(\n", + " ref, # y-coordinates\n", + " effsize_line_start,\n", + " xlimhigh, # x-coordinates, start and end.\n", + " **reflines_kwargs\n", + " )\n", + "\n", " # Draw effect size line.\n", - " contrast_axes.hlines(diff,\n", - " effsize_line_start, xlimhigh,\n", - " **reflines_kwargs) \n", - " rawdata_axes.set_xlim(og_xlim_raw) # to align the axis\n", + " contrast_axes.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs)\n", + " rawdata_axes.set_xlim(og_xlim_raw) # to align the axis\n", " # Despine appropriately.\n", - " sns.despine(ax=rawdata_axes, bottom=True)\n", + " sns.despine(ax=rawdata_axes, bottom=True)\n", " sns.despine(ax=contrast_axes, left=True, right=False)\n", "\n", " # Insert break between the rawdata axes and the contrast axes\n", " # by re-drawing the x-spine.\n", - " rawdata_axes.hlines(og_ylim_raw[0], # yindex\n", - " rawdata_axes.get_xlim()[0], 1.3, # xmin, xmax\n", - " **redraw_axes_kwargs)\n", + " rawdata_axes.hlines(\n", + " og_ylim_raw[0], # yindex\n", + " rawdata_axes.get_xlim()[0],\n", + " 1.3, # xmin, xmax\n", + " **redraw_axes_kwargs\n", + " )\n", " rawdata_axes.set_ylim(og_ylim_raw)\n", "\n", - " contrast_axes.hlines(contrast_axes.get_ylim()[0],\n", - " contrast_xlim_max-0.8, contrast_xlim_max,\n", - " **redraw_axes_kwargs)\n", - "\n", + " contrast_axes.hlines(\n", + " contrast_axes.get_ylim()[0],\n", + " contrast_xlim_max - 0.8,\n", + " contrast_xlim_max,\n", + " **redraw_axes_kwargs\n", + " )\n", "\n", " else:\n", " # For Cumming Plots only.\n", "\n", " # Set custom contrast_ylim, if it was specified.\n", - " if plot_kwargs['contrast_ylim'] is not None or (plot_kwargs['delta2_ylim'] is not None and show_delta2):\n", - "\n", - " if plot_kwargs['contrast_ylim'] is not None:\n", - " custom_contrast_ylim = plot_kwargs['contrast_ylim']\n", - " if plot_kwargs['delta2_ylim'] is not None and show_delta2:\n", - " custom_delta2_ylim = plot_kwargs['delta2_ylim']\n", - " if custom_contrast_ylim!=custom_delta2_ylim:\n", + " if plot_kwargs[\"contrast_ylim\"] is not None or (\n", + " plot_kwargs[\"delta2_ylim\"] is not None and show_delta2\n", + " ):\n", + " if plot_kwargs[\"contrast_ylim\"] is not None:\n", + " custom_contrast_ylim = plot_kwargs[\"contrast_ylim\"]\n", + " if plot_kwargs[\"delta2_ylim\"] is not None and show_delta2:\n", + " custom_delta2_ylim = plot_kwargs[\"delta2_ylim\"]\n", + " if custom_contrast_ylim != custom_delta2_ylim:\n", " err1 = \"Please check if `contrast_ylim` and `delta2_ylim` are assigned\"\n", " err2 = \"with same values.\"\n", " raise ValueError(err1 + err2)\n", " else:\n", - " custom_delta2_ylim = plot_kwargs['delta2_ylim']\n", + " custom_delta2_ylim = plot_kwargs[\"delta2_ylim\"]\n", " custom_contrast_ylim = custom_delta2_ylim\n", "\n", " if len(custom_contrast_ylim) != 2:\n", @@ -1117,8 +1234,8 @@ "\n", " if effect_size_type == \"cliffs_delta\":\n", " # Ensure the ylims for a cliffs_delta plot never exceed [-1, 1].\n", - " l = plot_kwargs['contrast_ylim'][0]\n", - " h = plot_kwargs['contrast_ylim'][1]\n", + " l = plot_kwargs[\"contrast_ylim\"][0]\n", + " h = plot_kwargs[\"contrast_ylim\"][1]\n", " low = -1 if l < -1 else l\n", " high = 1 if h > 1 else h\n", " contrast_axes.set_ylim(low, high)\n", @@ -1135,228 +1252,237 @@ " if contrast_ylim_low < 0 < contrast_ylim_high:\n", " contrast_axes.axhline(y=0, **reflines_kwargs)\n", "\n", - " if is_paired == \"baseline\" and show_pairs == True:\n", + " if is_paired == \"baseline\" and show_pairs:\n", " if two_col_sankey:\n", - " rightend_ticks_raw = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey)\n", + " rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(\n", + " ticks_to_start_twocol_sankey\n", + " )\n", " elif proportional and is_paired is not None:\n", - " rightend_ticks_raw = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip)\n", - " else: \n", - " rightend_ticks_raw = np.array([len(i)-1 for i in temp_idx]) + np.array(ticks_to_skip)\n", + " rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(\n", + " ticks_to_skip\n", + " )\n", + " else:\n", + " rightend_ticks_raw = np.array(\n", + " [len(i) - 1 for i in temp_idx]\n", + " ) + np.array(ticks_to_skip)\n", " for ax in [rawdata_axes]:\n", " sns.despine(ax=ax, bottom=True)\n", - " \n", + "\n", " ylim = ax.get_ylim()\n", " xlim = ax.get_xlim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", - " \n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", + "\n", " if two_col_sankey:\n", " for k, start_tick in enumerate(ticks_to_start_twocol_sankey):\n", " end_tick = rightend_ticks_raw[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", - " else: \n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", + " else:\n", " for k, start_tick in enumerate(ticks_to_skip):\n", " end_tick = rightend_ticks_raw[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", " ax.set_ylim(ylim)\n", - " del redraw_axes_kwargs['y']\n", - " \n", - " if proportional == False:\n", - " temp_length = [(len(i)-1) for i in idx]\n", + " del redraw_axes_kwargs[\"y\"]\n", + "\n", + " if not proportional:\n", + " temp_length = [(len(i) - 1) for i in idx]\n", " else:\n", - " temp_length = [(len(i)-1)*2-1 for i in idx]\n", + " temp_length = [(len(i) - 1) * 2 - 1 for i in idx]\n", " if two_col_sankey:\n", - " rightend_ticks_contrast = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey)\n", + " rightend_ticks_contrast = np.array(\n", + " [len(i) - 2 for i in idx]\n", + " ) + np.array(ticks_to_start_twocol_sankey)\n", " elif proportional and is_paired is not None:\n", - " rightend_ticks_contrast = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip)\n", - " else: \n", - " rightend_ticks_contrast = np.array(temp_length) + np.array(ticks_to_skip_contrast)\n", + " rightend_ticks_contrast = np.array(\n", + " [len(i) - 1 for i in idx]\n", + " ) + np.array(ticks_to_skip)\n", + " else:\n", + " rightend_ticks_contrast = np.array(temp_length) + np.array(\n", + " ticks_to_skip_contrast\n", + " )\n", " for ax in [contrast_axes]:\n", " sns.despine(ax=ax, bottom=True)\n", - " \n", + "\n", " ylim = ax.get_ylim()\n", " xlim = ax.get_xlim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", - " \n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", + "\n", " if two_col_sankey:\n", " for k, start_tick in enumerate(ticks_to_start_twocol_sankey):\n", " end_tick = rightend_ticks_contrast[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", " else:\n", " for k, start_tick in enumerate(ticks_to_skip_contrast):\n", " end_tick = rightend_ticks_contrast[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs) \n", - " \n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", + "\n", " ax.set_ylim(ylim)\n", - " del redraw_axes_kwargs['y']\n", + " del redraw_axes_kwargs[\"y\"]\n", " else:\n", " # Compute the end of each x-axes line.\n", " if two_col_sankey:\n", - " rightend_ticks = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey)\n", + " rightend_ticks = np.array([len(i) - 2 for i in idx]) + np.array(\n", + " ticks_to_start_twocol_sankey\n", + " )\n", " else:\n", - " rightend_ticks = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip)\n", - " \n", + " rightend_ticks = np.array([len(i) - 1 for i in idx]) + np.array(\n", + " ticks_to_skip\n", + " )\n", + "\n", " for ax in [rawdata_axes, contrast_axes]:\n", " sns.despine(ax=ax, bottom=True)\n", - " \n", + "\n", " ylim = ax.get_ylim()\n", " xlim = ax.get_xlim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", - " \n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", + "\n", " if two_col_sankey:\n", " for k, start_tick in enumerate(ticks_to_start_twocol_sankey):\n", " end_tick = rightend_ticks[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", " else:\n", " for k, start_tick in enumerate(ticks_to_skip):\n", " end_tick = rightend_ticks[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", - " \n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", + "\n", " ax.set_ylim(ylim)\n", - " del redraw_axes_kwargs['y']\n", + " del redraw_axes_kwargs[\"y\"]\n", "\n", - " if show_delta2 is True or show_mini_meta is True:\n", + " if show_delta2 or show_mini_meta:\n", " ylim = contrast_axes.get_ylim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", " x_ticks = contrast_axes.get_xticks()\n", - " contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1],\n", - " **redraw_axes_kwargs)\n", - " del redraw_axes_kwargs['y']\n", + " contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1], **redraw_axes_kwargs)\n", + " del redraw_axes_kwargs[\"y\"]\n", "\n", " # Set raw axes y-label.\n", - " swarm_label = plot_kwargs['swarm_label']\n", + " swarm_label = plot_kwargs[\"swarm_label\"]\n", " if swarm_label is None and yvar is None:\n", " swarm_label = \"value\"\n", " elif swarm_label is None and yvar is not None:\n", " swarm_label = yvar\n", "\n", - " bar_label = plot_kwargs['bar_label']\n", + " bar_label = plot_kwargs[\"bar_label\"]\n", " if bar_label is None and effect_size_type != \"cohens_h\":\n", " bar_label = \"proportion of success\"\n", " elif bar_label is None and effect_size_type == \"cohens_h\":\n", " bar_label = \"value\"\n", "\n", " # Place contrast axes y-label.\n", - " contrast_label_dict = {'mean_diff': \"mean difference\",\n", - " 'median_diff': \"median difference\",\n", - " 'cohens_d': \"Cohen's d\",\n", - " 'hedges_g': \"Hedges' g\",\n", - " 'cliffs_delta': \"Cliff's delta\",\n", - " 'cohens_h': \"Cohen's h\",\n", - " 'delta_g': \"mean difference\"}\n", - "\n", - " if proportional == True and effect_size_type != \"cohens_h\":\n", + " contrast_label_dict = {\n", + " \"mean_diff\": \"mean difference\",\n", + " \"median_diff\": \"median difference\",\n", + " \"cohens_d\": \"Cohen's d\",\n", + " \"hedges_g\": \"Hedges' g\",\n", + " \"cliffs_delta\": \"Cliff's delta\",\n", + " \"cohens_h\": \"Cohen's h\",\n", + " \"delta_g\": \"mean difference\",\n", + " }\n", + "\n", + " if proportional and effect_size_type != \"cohens_h\":\n", " default_contrast_label = \"proportion difference\"\n", " elif effect_size_type == \"delta_g\":\n", " default_contrast_label = \"Hedges' g\"\n", " else:\n", - " default_contrast_label = contrast_label_dict[EffectSizeDataFrame.effect_size]\n", + " default_contrast_label = contrast_label_dict[effectsize_df.effect_size]\n", "\n", - "\n", - " if plot_kwargs['contrast_label'] is None:\n", + " if plot_kwargs[\"contrast_label\"] is None:\n", " if is_paired:\n", " contrast_label = \"paired\\n{}\".format(default_contrast_label)\n", " else:\n", " contrast_label = default_contrast_label\n", " contrast_label = contrast_label.capitalize()\n", " else:\n", - " contrast_label = plot_kwargs['contrast_label']\n", + " contrast_label = plot_kwargs[\"contrast_label\"]\n", "\n", - " if plot_kwargs['fontsize_rawylabel'] is not None:\n", - " fontsize_rawylabel = plot_kwargs['fontsize_rawylabel']\n", - " if plot_kwargs['fontsize_contrastylabel'] is not None:\n", - " fontsize_contrastylabel = plot_kwargs['fontsize_contrastylabel']\n", - " if plot_kwargs['fontsize_delta2label'] is not None:\n", - " fontsize_delta2label = plot_kwargs['fontsize_delta2label']\n", + " if plot_kwargs[\"fontsize_rawylabel\"] is not None:\n", + " fontsize_rawylabel = plot_kwargs[\"fontsize_rawylabel\"]\n", + " if plot_kwargs[\"fontsize_contrastylabel\"] is not None:\n", + " fontsize_contrastylabel = plot_kwargs[\"fontsize_contrastylabel\"]\n", + " if plot_kwargs[\"fontsize_delta2label\"] is not None:\n", + " fontsize_delta2label = plot_kwargs[\"fontsize_delta2label\"]\n", "\n", - " contrast_axes.set_ylabel(contrast_label,fontsize = fontsize_contrastylabel)\n", - " if float_contrast is True:\n", + " contrast_axes.set_ylabel(contrast_label, fontsize=fontsize_contrastylabel)\n", + " if float_contrast:\n", " contrast_axes.yaxis.set_label_position(\"right\")\n", "\n", " # Set the rawdata axes labels appropriately\n", - " if proportional == False:\n", - " rawdata_axes.set_ylabel(swarm_label,fontsize = fontsize_rawylabel)\n", + " if not proportional:\n", + " rawdata_axes.set_ylabel(swarm_label, fontsize=fontsize_rawylabel)\n", " else:\n", - " rawdata_axes.set_ylabel(bar_label,fontsize = fontsize_rawylabel)\n", + " rawdata_axes.set_ylabel(bar_label, fontsize=fontsize_rawylabel)\n", " rawdata_axes.set_xlabel(\"\")\n", "\n", " # Because we turned the axes frame off, we also need to draw back\n", " # the y-spine for both axes.\n", - " if float_contrast==False:\n", + " if not float_contrast:\n", " rawdata_axes.set_xlim(contrast_axes.get_xlim())\n", " og_xlim_raw = rawdata_axes.get_xlim()\n", - " rawdata_axes.vlines(og_xlim_raw[0],\n", - " og_ylim_raw[0], og_ylim_raw[1],\n", - " **redraw_axes_kwargs)\n", + " rawdata_axes.vlines(\n", + " og_xlim_raw[0], og_ylim_raw[0], og_ylim_raw[1], **redraw_axes_kwargs\n", + " )\n", "\n", " og_xlim_contrast = contrast_axes.get_xlim()\n", "\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " xpos = og_xlim_contrast[1]\n", " else:\n", " xpos = og_xlim_contrast[0]\n", "\n", " og_ylim_contrast = contrast_axes.get_ylim()\n", - " contrast_axes.vlines(xpos,\n", - " og_ylim_contrast[0], og_ylim_contrast[1],\n", - " **redraw_axes_kwargs)\n", - "\n", - "\n", - " if show_delta2 is True:\n", - " if plot_kwargs['delta2_label'] is not None:\n", - " delta2_label = plot_kwargs['delta2_label']\n", - " elif effect_size == \"mean_diff\" :\n", + " contrast_axes.vlines(\n", + " xpos, og_ylim_contrast[0], og_ylim_contrast[1], **redraw_axes_kwargs\n", + " )\n", + "\n", + " if show_delta2:\n", + " if plot_kwargs[\"delta2_label\"] is not None:\n", + " delta2_label = plot_kwargs[\"delta2_label\"]\n", + " elif effect_size == \"mean_diff\":\n", " delta2_label = \"delta - delta\"\n", " else:\n", " delta2_label = \"deltas' g\"\n", " delta2_axes = contrast_axes.twinx()\n", " delta2_axes.set_frame_on(False)\n", - " delta2_axes.set_ylabel(delta2_label, fontsize = fontsize_delta2label)\n", + " delta2_axes.set_ylabel(delta2_label, fontsize=fontsize_delta2label)\n", " og_xlim_delta = contrast_axes.get_xlim()\n", " og_ylim_delta = contrast_axes.get_ylim()\n", " delta2_axes.set_ylim(og_ylim_delta)\n", - " delta2_axes.vlines(og_xlim_delta[1],\n", - " og_ylim_delta[0], og_ylim_delta[1],\n", - " **redraw_axes_kwargs)\n", + " delta2_axes.vlines(\n", + " og_xlim_delta[1], og_ylim_delta[0], og_ylim_delta[1], **redraw_axes_kwargs\n", + " )\n", "\n", + " ################################################### GRIDKEY MAIN CODE WIP\n", "\n", - "################################################### GRIDKEY MAIN CODE WIP\n", - " \n", - " #if gridkey_rows is None, skip everything here\n", + " # if gridkey_rows is None, skip everything here\n", " if gridkey_rows is not None:\n", - " \n", " # Raise error if there are more than 2 items in any idx and gridkey_merge_pairs is True and is_paired is not None\n", - " if gridkey_merge_pairs is True and is_paired is not None:\n", + " if gridkey_merge_pairs and is_paired is not None:\n", " for i in idx:\n", " if len(i) > 2:\n", - " warnings.warn(\"gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False\")\n", + " warnings.warn(\n", + " \"gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False\"\n", + " )\n", " gridkey_merge_pairs = False\n", " break\n", - " elif gridkey_merge_pairs is True and is_paired is None:\n", - " warnings.warn(\"gridkey_merge_pairs=True is only applicable for paired data.\")\n", + " elif gridkey_merge_pairs and is_paired is None:\n", + " warnings.warn(\n", + " \"gridkey_merge_pairs=True is only applicable for paired data.\"\n", + " )\n", " gridkey_merge_pairs = False\n", - " \n", + "\n", " # Checks for gridkey_merge_pairs and is_paired; if both are true, \"merges\" the gridkey per pair\n", - " if gridkey_merge_pairs is True and is_paired is not None: \n", + " if gridkey_merge_pairs and is_paired is not None:\n", " groups_for_gridkey = []\n", " for i in idx:\n", " groups_for_gridkey.append(i[1])\n", " else:\n", " groups_for_gridkey = all_plot_groups\n", - " \n", - " \n", + "\n", " # raise errors if gridkey_rows is not a list, or if the list is empty\n", " if isinstance(gridkey_rows, list) is False:\n", " raise TypeError(\"gridkey_rows must be a list.\")\n", " elif len(gridkey_rows) == 0:\n", " warnings.warn(\"gridkey_rows is an empty list.\")\n", - " \n", - " \n", + "\n", " # raise Warning if an item in gridkey_rows is not contained in any idx\n", " for i in gridkey_rows:\n", " in_idx = 0\n", @@ -1365,93 +1491,101 @@ " in_idx += 1\n", " if in_idx == 0:\n", " if is_paired is not None:\n", - " warnings.warn(i + \" is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False.\")\n", + " warnings.warn(\n", + " i\n", + " + \" is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False.\"\n", + " )\n", " else:\n", - " warnings.warn(i + \" is not in any idx. Please check.\") \n", - " \n", - " \n", + " warnings.warn(i + \" is not in any idx. Please check.\")\n", + "\n", " # Populate table: checks if idx for each column contains rowlabel name\n", " # IF so, marks that element as present w black dot, or space if not present\n", - " table_cellcols = [] \n", + " table_cellcols = []\n", " for i in gridkey_rows:\n", " thisrow = []\n", " for q in groups_for_gridkey:\n", " if str(i) in q:\n", - " thisrow.append(u\"\\u25CF\")\n", + " thisrow.append(\"\\u25CF\")\n", " else:\n", " thisrow.append(\"\")\n", " table_cellcols.append(thisrow)\n", - " \n", - " \n", + "\n", " # Adds a row for Ns with the Ns values\n", - " if gridkey_show_Ns == True:\n", + " if gridkey_show_Ns:\n", " gridkey_rows.append(\"Ns\")\n", " list_of_Ns = []\n", " for i in groups_for_gridkey:\n", " list_of_Ns.append(str(counts.loc[i]))\n", " table_cellcols.append(list_of_Ns)\n", "\n", - " \n", " # Adds a row for effectsizes with effectsize values\n", - " if gridkey_show_es == True:\n", - " gridkey_rows.append(u\"\\u0394\")\n", + " if gridkey_show_es:\n", + " gridkey_rows.append(\"\\u0394\")\n", " effsize_list = []\n", " results_list = results.test.to_list()\n", - " \n", + "\n", " # get the effect size, append + or -, 2 dec places\n", " for i in enumerate(groups_for_gridkey):\n", " if i[1] in results_list:\n", - " curr_esval = results.loc[results[\"test\"] == i[1]][\"difference\"].iloc[0]\n", - " curr_esval_str = np.format_float_positional(curr_esval,\n", - " precision=es_sf,\n", - " sign=True,\n", - " trim= 'k',\n", - " min_digits = es_sf)\n", + " curr_esval = results.loc[results[\"test\"] == i[1]][\n", + " \"difference\"\n", + " ].iloc[0]\n", + " curr_esval_str = np.format_float_positional(\n", + " curr_esval,\n", + " precision=es_sf,\n", + " sign=True,\n", + " trim=\"k\",\n", + " min_digits=es_sf,\n", + " )\n", " effsize_list.append(curr_esval_str)\n", " else:\n", " effsize_list.append(\"-\")\n", - " \n", + "\n", " table_cellcols.append(effsize_list)\n", - " \n", + "\n", " # If Gardner-Altman plot, plot on raw data and not contrast axes\n", - " if float_contrast == True:\n", + " if float_contrast:\n", " axes_ploton = rawdata_axes\n", " else:\n", " axes_ploton = contrast_axes\n", - " \n", + "\n", " # Account for extended x axis in case of show_delta2 or show_mini_meta\n", " x_groups_for_width = len(groups_for_gridkey)\n", - " if show_delta2 is True or show_mini_meta is True:\n", - " x_groups_for_width += 2 \n", + " if show_delta2 or show_mini_meta:\n", + " x_groups_for_width += 2\n", " gridkey_width = len(groups_for_gridkey) / x_groups_for_width\n", - " \n", - " gridkey = axes_ploton.table(cellText = table_cellcols, \n", - " rowLabels = gridkey_rows, \n", - " cellLoc = \"center\",\n", - " bbox = [0, -len(gridkey_rows)*0.1-0.05, gridkey_width, len(gridkey_rows)*0.1],\n", - " **{\"alpha\" : 0.5}) \n", - " \n", + "\n", + " gridkey = axes_ploton.table(\n", + " cellText=table_cellcols,\n", + " rowLabels=gridkey_rows,\n", + " cellLoc=\"center\",\n", + " bbox=[\n", + " 0,\n", + " -len(gridkey_rows) * 0.1 - 0.05,\n", + " gridkey_width,\n", + " len(gridkey_rows) * 0.1,\n", + " ],\n", + " **{\"alpha\": 0.5}\n", + " )\n", + "\n", " # modifies row label cells\n", " for cell in gridkey._cells:\n", " if cell[1] == -1:\n", " gridkey._cells[cell].visible_edges = \"open\"\n", - " gridkey._cells[cell].set_text_props(**{ \"ha\" : \"right\" }) \n", - " \n", + " gridkey._cells[cell].set_text_props(**{\"ha\": \"right\"})\n", + "\n", " # turns off both x axes\n", " rawdata_axes.get_xaxis().set_visible(False)\n", " contrast_axes.get_xaxis().set_visible(False)\n", - " \n", - " ####################################################### END GRIDKEY MAIN CODE WIP \n", - " \n", - " \n", - " \n", - " \n", + "\n", + " ####################################################### END GRIDKEY MAIN CODE WIP\n", + "\n", " # Make sure no stray ticks appear!\n", - " rawdata_axes.xaxis.set_ticks_position('bottom')\n", - " rawdata_axes.yaxis.set_ticks_position('left')\n", - " contrast_axes.xaxis.set_ticks_position('bottom')\n", + " rawdata_axes.xaxis.set_ticks_position(\"bottom\")\n", + " rawdata_axes.yaxis.set_ticks_position(\"left\")\n", + " contrast_axes.xaxis.set_ticks_position(\"bottom\")\n", " if float_contrast is False:\n", - " contrast_axes.yaxis.set_ticks_position('left')\n", + " contrast_axes.yaxis.set_ticks_position(\"left\")\n", "\n", " # Reset rcParams.\n", " for parameter in _changed_rcParams:\n", From 5e8dc4028c5233f48b3eab7b3ab09509de8a343d Mon Sep 17 00:00:00 2001 From: cyberosa Date: Mon, 18 Dec 2023 09:27:30 +0100 Subject: [PATCH 07/10] Using self atributes --- nbs/API/bootstrap.ipynb | 11 ++++++----- nbs/API/dabest_object.ipynb | 2 +- nbs/API/effsize_objects.ipynb | 12 ++++++++---- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/nbs/API/bootstrap.ipynb b/nbs/API/bootstrap.ipynb index d84b8349..d7a5ae64 100644 --- a/nbs/API/bootstrap.ipynb +++ b/nbs/API/bootstrap.ipynb @@ -104,11 +104,12 @@ " def __init__(self, \n", " x1:np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", " x2:np.array=None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", - " paired:bool=False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control).\n", - " statfunction:callable=np.mean,#The summary statistic called on data.\n", - " smoothboot:bool=False,#Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate).\n", - " alpha_level:float=0.05,#Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", - " reps:int=5000 # Number of bootstrap iterations to perform.\n", + " paired:bool=False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control).\n", + " # TODO naming\n", + " statfunction:callable=np.mean,#The summary statistic called on data.\n", + " smoothboot:bool=False,#Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate).\n", + " alpha_level:float=0.05,#Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", + " reps:int=5000 # Number of bootstrap iterations to perform.\n", " ):\n", "\n", " # Turn to pandas series.\n", diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb index f108a235..8a69f064 100644 --- a/nbs/API/dabest_object.ipynb +++ b/nbs/API/dabest_object.ipynb @@ -59,7 +59,7 @@ "from numpy import array, repeat, random, issubdtype, number\n", "import pandas as pd\n", "from scipy.stats import norm\n", - "from scipy.stats import randint" + "from scipy.stats import randint\n" ] }, { diff --git a/nbs/API/effsize_objects.ipynb b/nbs/API/effsize_objects.ipynb index ba45b867..50407b7c 100644 --- a/nbs/API/effsize_objects.ipynb +++ b/nbs/API/effsize_objects.ipynb @@ -179,18 +179,18 @@ " self.__alpha = ci2g._compute_alpha_from_ci(self.__ci)\n", "\n", " self.__difference = es.two_group_difference(\n", - " self.__control, test, self.__is_paired, self.__effect_size\n", + " self.__control, self.__test, self.__is_paired, self.__effect_size\n", " )\n", "\n", " self.__jackknives = ci2g.compute_meandiff_jackknife(\n", - " self.__control, test, self.__is_paired, self.__effect_size\n", + " self.__control, self.__test, self.__is_paired, self.__effect_size\n", " )\n", "\n", " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", "\n", " bootstraps = ci2g.compute_bootstrapped_diff(\n", " self.__control,\n", - " test,\n", + " self.__test,\n", " self.__is_paired,\n", " self.__effect_size,\n", " self.__resamples,\n", @@ -1880,7 +1880,11 @@ "source": [] } ], - "metadata": {}, + "metadata": { + "language_info": { + "name": "python" + } + }, "nbformat": 4, "nbformat_minor": 2 } From 5f873f19902b90b0ae105d84205d03c3feb7217e Mon Sep 17 00:00:00 2001 From: cyberosa Date: Tue, 19 Dec 2023 10:01:57 +0100 Subject: [PATCH 08/10] More small fixes from phase 1 refactoring --- dabest/_bootstrap_tools.py | 200 +++++++++++++++++--------------- dabest/_dabest_object.py | 11 +- dabest/_effsize_objects.py | 43 ++++--- nbs/API/bootstrap.ipynb | 212 ++++++++++++++++++---------------- nbs/API/dabest_object.ipynb | 10 +- nbs/API/effsize_objects.ipynb | 43 +++---- 6 files changed, 275 insertions(+), 244 deletions(-) diff --git a/dabest/_bootstrap_tools.py b/dabest/_bootstrap_tools.py index 153919da..ed3398ee 100644 --- a/dabest/_bootstrap_tools.py +++ b/dabest/_bootstrap_tools.py @@ -14,9 +14,9 @@ # %% ../nbs/API/bootstrap.ipynb 4 class bootstrap: - ''' - Computes the summary statistic and a bootstrapped confidence interval. - + """ + Computes the summary statistic and a bootstrapped confidence interval. + Returns ------- An `bootstrap` object reporting the summary statistics, percentile CIs, bias-corrected and accelerated (BCa) CIs, and the settings used: @@ -53,78 +53,79 @@ class bootstrap: `pvalue_mann_whitney`: float Two-sided p-value obtained from scipy.stats.mannwhitneyu. If a single array was given (x1 only), returns 'NIL'. The Mann-Whitney U-test is a nonparametric unpaired test of the null hypothesis that x1 and x2 are from the same distribution. See - ''' - def __init__(self, - x1:np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. - x2:np.array=None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. - paired:bool=False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control). - statfunction:callable=np.mean,#The summary statistic called on data. - smoothboot:bool=False,#Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate). - alpha_level:float=0.05,#Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. - reps:int=5000 # Number of bootstrap iterations to perform. - ): + """ + def __init__( + self, + x1: np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. + x2: np.array = None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. + paired: bool = False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control). + stat_function: callable = np.mean, # The summary statistic called on data. + smoothboot: bool = False, # Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate). + alpha_level: float = 0.05, # Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. + reps: int = 5000, # Number of bootstrap iterations to perform. + ): # Turn to pandas series. x1 = pd.Series(x1).dropna() diff = False - # Initialise statfunction - if statfunction is None: - statfunction = np.mean + # Initialise stat_function + if stat_function is None: + stat_function = np.mean # Compute two-sided alphas. - if alpha_level > 1. or alpha_level < 0.: + if alpha_level > 1.0 or alpha_level < 0.0: raise ValueError("alpha_level must be between 0 and 1.") - alphas = np.array([alpha_level/2., 1-alpha_level/2.]) + alphas = np.array([alpha_level / 2.0, 1 - alpha_level / 2.0]) - sns_bootstrap_kwargs = {'func': statfunction, - 'n_boot': reps, - 'smooth': smoothboot} + sns_bootstrap_kwargs = { + "func": stat_function, + "n_boot": reps, + "smooth": smoothboot, + } if paired: # check x2 is not None: if x2 is None: - raise ValueError('Please specify x2.') + raise ValueError("Please specify x2.") else: x2 = pd.Series(x2).dropna() if len(x1) != len(x2): - raise ValueError('x1 and x2 are not the same length.') - - if (x2 is None) or paired: + raise ValueError("x1 and x2 are not the same length.") + if (x2 is None) or (paired is not None): if x2 is None: tx = x1 paired = False ttest_single = ttest_1samp(x1, 0)[1] - ttest_2_ind = 'NIL' - ttest_2_paired = 'NIL' - wilcoxonresult = 'NIL' + ttest_2_ind = "NIL" + ttest_2_paired = "NIL" + wilcoxonresult = "NIL" - #elif paired is not None: - else: # only two options to enter here + # elif paired is not None: + else: # only two options to enter here diff = True tx = x2 - x1 - ttest_single = 'NIL' - ttest_2_ind = 'NIL' + ttest_single = "NIL" + ttest_2_ind = "NIL" ttest_2_paired = ttest_rel(x1, x2)[1] wilcoxonresult = wilcoxon(x1, x2)[1] - mannwhitneyresult = 'NIL' + mannwhitneyresult = "NIL" # Turns data into array, then tuple. tdata = (tx,) # The value of the statistic function applied # just to the actual data. - summ_stat = statfunction(*tdata) + summ_stat = stat_function(*tdata) statarray = sns.algorithms.bootstrap(tx, **sns_bootstrap_kwargs) statarray.sort() # Get Percentile indices - pct_low_high = np.round((reps-1) * alphas) - pct_low_high = np.nan_to_num(pct_low_high).astype('int') - + pct_low_high = np.round((reps - 1) * alphas) + pct_low_high = np.nan_to_num(pct_low_high).astype("int") - elif x2 and paired is None: + elif x2 is not None and paired is None: diff = True x2 = pd.Series(x2).dropna() # Generate statarrays for both arrays. @@ -134,42 +135,45 @@ def __init__(self, tdata = exp_statarray - ref_statarray statarray = tdata.copy() statarray.sort() - tdata = (tdata, ) # Note tuple form. + tdata = (tdata,) # Note tuple form. # The difference as one would calculate it. - summ_stat = statfunction(x2) - statfunction(x1) + summ_stat = stat_function(x2) - stat_function(x1) # Get Percentile indices - pct_low_high = np.round((reps-1) * alphas) - pct_low_high = np.nan_to_num(pct_low_high).astype('int') + pct_low_high = np.round((reps - 1) * alphas) + pct_low_high = np.nan_to_num(pct_low_high).astype("int") # Statistical tests. - ttest_single='NIL' - ttest_2_ind = ttest_ind(x1,x2)[1] - ttest_2_paired='NIL' - mannwhitneyresult = mannwhitneyu(x1, x2, alternative='two-sided')[1] - wilcoxonresult = 'NIL' + ttest_single = "NIL" + ttest_2_ind = ttest_ind(x1, x2)[1] + ttest_2_paired = "NIL" + mannwhitneyresult = mannwhitneyu(x1, x2, alternative="two-sided")[1] + wilcoxonresult = "NIL" # Get Bias-Corrected Accelerated indices convenience function invoked. - bca_low_high = bca(tdata, alphas, statarray, - statfunction, summ_stat, reps) + bca_low_high = bca(tdata, alphas, statarray, stat_function, summ_stat, reps) # Warnings for unstable or extreme indices. for ind in [pct_low_high, bca_low_high]: - if np.any(ind == 0) or np.any(ind == reps-1): - warnings.warn("Some values used extremal samples;" - " results are probably unstable.") - elif np.any(ind<10) or np.any(ind>=reps-10): - warnings.warn("Some values used top 10 low/high samples;" - " results may be unstable.") + if np.any(ind == 0) or np.any(ind == reps - 1): + warnings.warn( + "Some values used extremal samples;" + " results are probably unstable." + ) + elif np.any(ind < 10) or np.any(ind >= reps - 10): + warnings.warn( + "Some values used top 10 low/high samples;" + " results may be unstable." + ) self.summary = summ_stat self.is_paired = paired self.is_difference = diff - self.statistic = str(statfunction) + self.statistic = str(stat_function) self.n_reps = reps - self.ci = (1-alpha_level)*100 + self.ci = (1 - alpha_level) * 100 self.stat_array = np.array(statarray) self.pct_ci_low = statarray[pct_low_high[0]] @@ -186,32 +190,33 @@ def __init__(self, self.pvalue_wilcoxon = wilcoxonresult self.pvalue_mann_whitney = mannwhitneyresult - self.results = {'stat_summary': self.summary, - 'is_difference': diff, - 'is_paired': paired, - 'bca_ci_low': self.bca_ci_low, - 'bca_ci_high': self.bca_ci_high, - 'ci': self.ci - } + self.results = { + "stat_summary": self.summary, + "is_difference": diff, + "is_paired": paired, + "bca_ci_low": self.bca_ci_low, + "bca_ci_high": self.bca_ci_high, + "ci": self.ci, + } def __repr__(self): - - if 'mean' in self.statistic: - stat = 'mean' - elif 'median' in self.statistic: - stat = 'median' + if "mean" in self.statistic: + stat = "mean" + elif "median" in self.statistic: + stat = "median" else: stat = self.statistic - diff_types = {'sequential': 'paired', 'baseline': 'paired', None: 'unpaired'} + diff_types = {"sequential": "paired", "baseline": "paired", None: "unpaired"} if self.is_difference: - a = 'The {} {} difference is {}.'.format(diff_types[self.is_paired], - stat, self.summary) + a = "The {} {} difference is {}.".format( + diff_types[self.is_paired], stat, self.summary + ) else: - a = 'The {} is {}.'.format(stat, self.summary) + a = "The {} is {}.".format(stat, self.summary) - b = '[{} CI: {}, {}]'.format(self.ci, self.bca_ci_low, self.bca_ci_high) - return '\n'.join([a, b]) + b = "[{} CI: {}, {}]".format(self.ci, self.bca_ci_low, self.bca_ci_high) + return "\n".join([a, b]) # %% ../nbs/API/bootstrap.ipynb 5 def jackknife_indexes(data): @@ -225,38 +230,41 @@ def jackknife_indexes(data): data set Y with the ith data point deleted. """ - base = np.arange(0,len(data)) - return (np.delete(base,i) for i in base) + base = np.arange(0, len(data)) + return (np.delete(base, i) for i in base) -def bca(data, alphas, statarray, statfunction, ostat, reps): - ''' + +def bca(data, alphas, statarray, stat_function, ostat, reps): + """ Subroutine called to calculate the BCa statistics. Borrowed heavily from scikits.bootstrap code. - ''' + """ # The bias correction value. - z0 = norm.ppf( ( 1.0*np.sum(statarray < ostat, axis = 0) ) / reps ) + z0 = norm.ppf((1.0 * np.sum(statarray < ostat, axis=0)) / reps) # Statistics of the jackknife distribution jackindexes = jackknife_indexes(data[0]) - jstat = [statfunction(*(x[indexes] for x in data)) - for indexes in jackindexes] - jmean = np.mean(jstat,axis = 0) + jstat = [stat_function(*(x[indexes] for x in data)) for indexes in jackindexes] + jmean = np.mean(jstat, axis=0) # Acceleration value - a = np.divide(np.sum( (jmean - jstat)**3, axis = 0 ), - ( 6.0 * np.sum( (jmean - jstat)**2, axis = 0)**1.5 ) - ) + a = np.divide( + np.sum((jmean - jstat) ** 3, axis=0), + (6.0 * np.sum((jmean - jstat) ** 2, axis=0) ** 1.5), + ) if np.any(np.isnan(a)): nanind = np.nonzero(np.isnan(a)) - warnings.warn("Some acceleration values were undefined." - "This is almost certainly because all values" - "for the statistic were equal. Affected" - "confidence intervals will have zero width and" - "may be inaccurate (indexes: {})".format(nanind)) - zs = z0 + norm.ppf(alphas).reshape(alphas.shape+(1,)*z0.ndim) - avals = norm.cdf(z0 + zs/(1-a*zs)) - nvals = np.round((reps-1)*avals) - nvals = np.nan_to_num(nvals).astype('int') + warnings.warn( + "Some acceleration values were undefined." + "This is almost certainly because all values" + "for the statistic were equal. Affected" + "confidence intervals will have zero width and" + "may be inaccurate (indexes: {})".format(nanind) + ) + zs = z0 + norm.ppf(alphas).reshape(alphas.shape + (1,) * z0.ndim) + avals = norm.cdf(z0 + zs / (1 - a * zs)) + nvals = np.round((reps - 1) * avals) + nvals = np.nan_to_num(nvals).astype("int") return nvals diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py index c17cf9c2..c1d6be8f 100644 --- a/dabest/_dabest_object.py +++ b/dabest/_dabest_object.py @@ -10,6 +10,7 @@ from scipy.stats import norm from scipy.stats import randint + # %% ../nbs/API/dabest_object.ipynb 6 class Dabest(object): @@ -84,7 +85,7 @@ def __init__( # Check if this is a 2x2 ANOVA case and x & y are valid columns # Create experiment_label and x1_level if self.__delta2: - # TODO Wrap the errors in a separate function + # TODO Wrap the errors in a separate function called check_errors() if self.__proportional: err0 = "`proportional` and `delta` cannot be True at the same time." raise ValueError(err0) @@ -254,7 +255,7 @@ def __init__( err = "{} is not a column in `data`. ".format(id_col) raise IndexError(err) - self.compute_effectsize_dfs() + self._compute_effectsize_dfs() def __repr__(self): from .__init__ import __version__ @@ -614,7 +615,11 @@ def get_plot_data(self, x, y, all_plot_groups): return plot_data - def compute_effectsize_dfs(self): + def _compute_effectsize_dfs(self): + ''' + Function to compute all attributes based on EffectSizeDataFrame. + It returns nothing. + ''' from ._effsize_objects import EffectSizeDataFrame effectsize_df_kwargs = dict( diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py index 3f2dcf3a..3bf10723 100644 --- a/dabest/_effsize_objects.py +++ b/dabest/_effsize_objects.py @@ -104,7 +104,7 @@ def __init__( self.__random_seed = random_seed self.__ci = ci self.__proportional = proportional - self.check_errors(control, test) + self._check_errors(control, test) # Convert to numpy arrays for speed. # NaNs are automatically dropped. @@ -117,18 +117,18 @@ def __init__( self.__alpha = ci2g._compute_alpha_from_ci(self.__ci) self.__difference = es.two_group_difference( - self.__control, test, self.__is_paired, self.__effect_size + self.__control, self.__test, self.__is_paired, self.__effect_size ) self.__jackknives = ci2g.compute_meandiff_jackknife( - self.__control, test, self.__is_paired, self.__effect_size + self.__control, self.__test, self.__is_paired, self.__effect_size ) self.__acceleration_value = ci2g._calc_accel(self.__jackknives) bootstraps = ci2g.compute_bootstrapped_diff( self.__control, - test, + self.__test, self.__is_paired, self.__effect_size, self.__resamples, @@ -156,7 +156,7 @@ def __init__( self.__bootstraps, self.__difference ) - self.compute_bca_intervals(sorted_bootstraps) + self._compute_bca_intervals(sorted_bootstraps) # Compute percentile intervals. pct_idx_low = int((self.__alpha / 2) * self.__resamples) @@ -166,7 +166,7 @@ def __init__( self.__pct_low = sorted_bootstraps[pct_idx_low] self.__pct_high = sorted_bootstraps[pct_idx_high] - self.perform_statistical_test() + self._perform_statistical_test() def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): RM_STATUS = { @@ -230,14 +230,17 @@ def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): if show_resample_count and define_pval: return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - elif ~show_resample_count and define_pval: + elif not show_resample_count and define_pval: return "{}\n{}\n\n{}".format(out, pvalue, pval_def) elif show_resample_count and ~define_pval: return "{}\n{}\n\n{}".format(out, pvalue, bs) else: return "{}\n{}".format(out, pvalue) - def check_errors(self, control, test): + def _check_errors(self, control, test): + ''' + Function to check configuration errors for the given control and test data. + ''' kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] if self.__effect_size not in kosher_es: err1 = "The effect size '{}'".format(self.__effect_size) @@ -260,7 +263,10 @@ def check_errors(self, control, test): ) raise ValueError(err1) - def compute_bca_intervals(self, sorted_bootstraps): + def _compute_bca_intervals(self, sorted_bootstraps): + ''' + Function to compute the bca intervals given the sorted bootstraps. + ''' from ._stats_tools import confint_2group_diff as ci2g # Compute BCa intervals. @@ -293,7 +299,7 @@ def compute_bca_intervals(self, sorted_bootstraps): ) else: - # TODO improve error handling, separate file + # TODO improve error handling, separate file with error messages? err1 = "The $lim_type limit of the BCa interval cannot be computed." err2 = "It is set to the effect size itself." err3 = "All bootstrap values were likely all the same." @@ -307,7 +313,10 @@ def compute_bca_intervals(self, sorted_bootstraps): self.__bca_high = self.__difference warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0) - def perform_statistical_test(self): + def _perform_statistical_test(self): + ''' + Function to complete the statistical tests + ''' from ._stats_tools import effsize as es # Perform statistical tests. @@ -319,7 +328,7 @@ def perform_statistical_test(self): self.__permutation_count, ) - if self.__is_paired and self.__proportional is False: + if self.__is_paired and not self.__proportional: # Wilcoxon, a non-parametric version of the paired T-test. wilcoxon = spstats.wilcoxon(self.__control, self.__test) self.__pvalue_wilcoxon = wilcoxon.pvalue @@ -332,10 +341,6 @@ def perform_statistical_test(self): ) self.__pvalue_paired_students_t = paired_t.pvalue self.__statistic_paired_students_t = paired_t.statistic - # TODO dead code - standardized_es = es.cohens_d( - self.__control, self.__test, self.__is_paired - ) elif self.__is_paired and self.__proportional: # for binary paired data, use McNemar's test @@ -1393,7 +1398,7 @@ def __init__(self, control: array, self.__permutations = [] self.__permutations_var = [] - for i in range(int(permutation_count)): + for i in range(int(self.__permutation_count)): if is_paired: # Select which control-test pairs to swap. random_idx = rng.choice(CONTROL_LEN, @@ -1430,11 +1435,11 @@ def __init__(self, control: array, self.__permutations = array(self.__permutations) self.__permutations_var = array(self.__permutations_var) - self.pvalue = EXTREME_COUNT / permutation_count + self.pvalue = EXTREME_COUNT / self.__permutation_count def __repr__(self): - return("{} permutations were taken. The p-value is {}.".format(self.permutation_count, + return("{} permutations were taken. The p-value is {}.".format(self.__permutation_count, self.pvalue)) diff --git a/nbs/API/bootstrap.ipynb b/nbs/API/bootstrap.ipynb index d7a5ae64..a89ea5af 100644 --- a/nbs/API/bootstrap.ipynb +++ b/nbs/API/bootstrap.ipynb @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp _bootstrap_tools" + "# | default_exp _bootstrap_tools" ] }, { @@ -28,10 +28,11 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "from __future__ import division\n", "from nbdev.showdoc import *\n", "import nbdev\n", + "\n", "nbdev.nbdev_export()" ] }, @@ -42,7 +43,7 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "# |export\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", @@ -59,11 +60,11 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "# |export\n", "class bootstrap:\n", - " '''\n", - " Computes the summary statistic and a bootstrapped confidence interval. \n", - " \n", + " \"\"\"\n", + " Computes the summary statistic and a bootstrapped confidence interval.\n", + "\n", " Returns\n", " -------\n", " An `bootstrap` object reporting the summary statistics, percentile CIs, bias-corrected and accelerated (BCa) CIs, and the settings used:\n", @@ -100,79 +101,79 @@ " `pvalue_mann_whitney`: float\n", " Two-sided p-value obtained from scipy.stats.mannwhitneyu. If a single array was given (x1 only), returns 'NIL'. The Mann-Whitney U-test is a nonparametric unpaired test of the null hypothesis that x1 and x2 are from the same distribution. See \n", "\n", - " '''\n", - " def __init__(self, \n", - " x1:np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", - " x2:np.array=None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", - " paired:bool=False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control).\n", - " # TODO naming\n", - " statfunction:callable=np.mean,#The summary statistic called on data.\n", - " smoothboot:bool=False,#Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate).\n", - " alpha_level:float=0.05,#Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", - " reps:int=5000 # Number of bootstrap iterations to perform.\n", - " ):\n", + " \"\"\"\n", "\n", + " def __init__(\n", + " self,\n", + " x1: np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", + " x2: np.array = None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", + " paired: bool = False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control).\n", + " stat_function: callable = np.mean, # The summary statistic called on data.\n", + " smoothboot: bool = False, # Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate).\n", + " alpha_level: float = 0.05, # Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", + " reps: int = 5000, # Number of bootstrap iterations to perform.\n", + " ):\n", " # Turn to pandas series.\n", " x1 = pd.Series(x1).dropna()\n", " diff = False\n", "\n", - " # Initialise statfunction\n", - " if statfunction is None:\n", - " statfunction = np.mean\n", + " # Initialise stat_function\n", + " if stat_function is None:\n", + " stat_function = np.mean\n", "\n", " # Compute two-sided alphas.\n", - " if alpha_level > 1. or alpha_level < 0.:\n", + " if alpha_level > 1.0 or alpha_level < 0.0:\n", " raise ValueError(\"alpha_level must be between 0 and 1.\")\n", - " alphas = np.array([alpha_level/2., 1-alpha_level/2.])\n", + " alphas = np.array([alpha_level / 2.0, 1 - alpha_level / 2.0])\n", "\n", - " sns_bootstrap_kwargs = {'func': statfunction,\n", - " 'n_boot': reps,\n", - " 'smooth': smoothboot}\n", + " sns_bootstrap_kwargs = {\n", + " \"func\": stat_function,\n", + " \"n_boot\": reps,\n", + " \"smooth\": smoothboot,\n", + " }\n", "\n", " if paired:\n", " # check x2 is not None:\n", " if x2 is None:\n", - " raise ValueError('Please specify x2.')\n", + " raise ValueError(\"Please specify x2.\")\n", " else:\n", " x2 = pd.Series(x2).dropna()\n", " if len(x1) != len(x2):\n", - " raise ValueError('x1 and x2 are not the same length.')\n", - "\n", - " if (x2 is None) or paired:\n", + " raise ValueError(\"x1 and x2 are not the same length.\")\n", "\n", + " if (x2 is None) or (paired is not None):\n", " if x2 is None:\n", " tx = x1\n", " paired = False\n", " ttest_single = ttest_1samp(x1, 0)[1]\n", - " ttest_2_ind = 'NIL'\n", - " ttest_2_paired = 'NIL'\n", - " wilcoxonresult = 'NIL'\n", + " ttest_2_ind = \"NIL\"\n", + " ttest_2_paired = \"NIL\"\n", + " wilcoxonresult = \"NIL\"\n", "\n", - " #elif paired is not None:\n", - " else: # only two options to enter here\n", + " # elif paired is not None:\n", + " else: # only two options to enter here\n", " diff = True\n", " tx = x2 - x1\n", - " ttest_single = 'NIL'\n", - " ttest_2_ind = 'NIL'\n", + " ttest_single = \"NIL\"\n", + " ttest_2_ind = \"NIL\"\n", " ttest_2_paired = ttest_rel(x1, x2)[1]\n", " wilcoxonresult = wilcoxon(x1, x2)[1]\n", - " mannwhitneyresult = 'NIL'\n", + " mannwhitneyresult = \"NIL\"\n", "\n", " # Turns data into array, then tuple.\n", " tdata = (tx,)\n", "\n", " # The value of the statistic function applied\n", " # just to the actual data.\n", - " summ_stat = statfunction(*tdata)\n", + " summ_stat = stat_function(*tdata)\n", " statarray = sns.algorithms.bootstrap(tx, **sns_bootstrap_kwargs)\n", " statarray.sort()\n", "\n", " # Get Percentile indices\n", - " pct_low_high = np.round((reps-1) * alphas)\n", - " pct_low_high = np.nan_to_num(pct_low_high).astype('int')\n", - "\n", + " pct_low_high = np.round((reps - 1) * alphas)\n", + " pct_low_high = np.nan_to_num(pct_low_high).astype(\"int\")\n", "\n", - " elif x2 and paired is None:\n", + " elif x2 is not None and paired is None:\n", " diff = True\n", " x2 = pd.Series(x2).dropna()\n", " # Generate statarrays for both arrays.\n", @@ -182,42 +183,45 @@ " tdata = exp_statarray - ref_statarray\n", " statarray = tdata.copy()\n", " statarray.sort()\n", - " tdata = (tdata, ) # Note tuple form.\n", + " tdata = (tdata,) # Note tuple form.\n", "\n", " # The difference as one would calculate it.\n", - " summ_stat = statfunction(x2) - statfunction(x1)\n", + " summ_stat = stat_function(x2) - stat_function(x1)\n", "\n", " # Get Percentile indices\n", - " pct_low_high = np.round((reps-1) * alphas)\n", - " pct_low_high = np.nan_to_num(pct_low_high).astype('int')\n", + " pct_low_high = np.round((reps - 1) * alphas)\n", + " pct_low_high = np.nan_to_num(pct_low_high).astype(\"int\")\n", "\n", " # Statistical tests.\n", - " ttest_single='NIL'\n", - " ttest_2_ind = ttest_ind(x1,x2)[1]\n", - " ttest_2_paired='NIL'\n", - " mannwhitneyresult = mannwhitneyu(x1, x2, alternative='two-sided')[1]\n", - " wilcoxonresult = 'NIL'\n", + " ttest_single = \"NIL\"\n", + " ttest_2_ind = ttest_ind(x1, x2)[1]\n", + " ttest_2_paired = \"NIL\"\n", + " mannwhitneyresult = mannwhitneyu(x1, x2, alternative=\"two-sided\")[1]\n", + " wilcoxonresult = \"NIL\"\n", "\n", " # Get Bias-Corrected Accelerated indices convenience function invoked.\n", - " bca_low_high = bca(tdata, alphas, statarray,\n", - " statfunction, summ_stat, reps)\n", + " bca_low_high = bca(tdata, alphas, statarray, stat_function, summ_stat, reps)\n", "\n", " # Warnings for unstable or extreme indices.\n", " for ind in [pct_low_high, bca_low_high]:\n", - " if np.any(ind == 0) or np.any(ind == reps-1):\n", - " warnings.warn(\"Some values used extremal samples;\"\n", - " \" results are probably unstable.\")\n", - " elif np.any(ind<10) or np.any(ind>=reps-10):\n", - " warnings.warn(\"Some values used top 10 low/high samples;\"\n", - " \" results may be unstable.\")\n", + " if np.any(ind == 0) or np.any(ind == reps - 1):\n", + " warnings.warn(\n", + " \"Some values used extremal samples;\"\n", + " \" results are probably unstable.\"\n", + " )\n", + " elif np.any(ind < 10) or np.any(ind >= reps - 10):\n", + " warnings.warn(\n", + " \"Some values used top 10 low/high samples;\"\n", + " \" results may be unstable.\"\n", + " )\n", "\n", " self.summary = summ_stat\n", " self.is_paired = paired\n", " self.is_difference = diff\n", - " self.statistic = str(statfunction)\n", + " self.statistic = str(stat_function)\n", " self.n_reps = reps\n", "\n", - " self.ci = (1-alpha_level)*100\n", + " self.ci = (1 - alpha_level) * 100\n", " self.stat_array = np.array(statarray)\n", "\n", " self.pct_ci_low = statarray[pct_low_high[0]]\n", @@ -234,32 +238,33 @@ " self.pvalue_wilcoxon = wilcoxonresult\n", " self.pvalue_mann_whitney = mannwhitneyresult\n", "\n", - " self.results = {'stat_summary': self.summary,\n", - " 'is_difference': diff,\n", - " 'is_paired': paired,\n", - " 'bca_ci_low': self.bca_ci_low,\n", - " 'bca_ci_high': self.bca_ci_high,\n", - " 'ci': self.ci\n", - " }\n", + " self.results = {\n", + " \"stat_summary\": self.summary,\n", + " \"is_difference\": diff,\n", + " \"is_paired\": paired,\n", + " \"bca_ci_low\": self.bca_ci_low,\n", + " \"bca_ci_high\": self.bca_ci_high,\n", + " \"ci\": self.ci,\n", + " }\n", "\n", " def __repr__(self):\n", - "\n", - " if 'mean' in self.statistic:\n", - " stat = 'mean'\n", - " elif 'median' in self.statistic:\n", - " stat = 'median'\n", + " if \"mean\" in self.statistic:\n", + " stat = \"mean\"\n", + " elif \"median\" in self.statistic:\n", + " stat = \"median\"\n", " else:\n", " stat = self.statistic\n", "\n", - " diff_types = {'sequential': 'paired', 'baseline': 'paired', None: 'unpaired'}\n", + " diff_types = {\"sequential\": \"paired\", \"baseline\": \"paired\", None: \"unpaired\"}\n", " if self.is_difference:\n", - " a = 'The {} {} difference is {}.'.format(diff_types[self.is_paired],\n", - " stat, self.summary)\n", + " a = \"The {} {} difference is {}.\".format(\n", + " diff_types[self.is_paired], stat, self.summary\n", + " )\n", " else:\n", - " a = 'The {} is {}.'.format(stat, self.summary)\n", + " a = \"The {} is {}.\".format(stat, self.summary)\n", "\n", - " b = '[{} CI: {}, {}]'.format(self.ci, self.bca_ci_low, self.bca_ci_high)\n", - " return '\\n'.join([a, b])" + " b = \"[{} CI: {}, {}]\".format(self.ci, self.bca_ci_low, self.bca_ci_high)\n", + " return \"\\n\".join([a, b])" ] }, { @@ -269,7 +274,7 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "# |export\n", "def jackknife_indexes(data):\n", " # Taken without modification from scikits.bootstrap package.\n", " \"\"\"\n", @@ -281,39 +286,42 @@ " data set Y with the ith data point deleted.\n", " \"\"\"\n", "\n", - " base = np.arange(0,len(data))\n", - " return (np.delete(base,i) for i in base)\n", + " base = np.arange(0, len(data))\n", + " return (np.delete(base, i) for i in base)\n", + "\n", "\n", - "def bca(data, alphas, statarray, statfunction, ostat, reps):\n", - " '''\n", + "def bca(data, alphas, statarray, stat_function, ostat, reps):\n", + " \"\"\"\n", " Subroutine called to calculate the BCa statistics.\n", " Borrowed heavily from scikits.bootstrap code.\n", - " '''\n", + " \"\"\"\n", "\n", " # The bias correction value.\n", - " z0 = norm.ppf( ( 1.0*np.sum(statarray < ostat, axis = 0) ) / reps )\n", + " z0 = norm.ppf((1.0 * np.sum(statarray < ostat, axis=0)) / reps)\n", "\n", " # Statistics of the jackknife distribution\n", " jackindexes = jackknife_indexes(data[0])\n", - " jstat = [statfunction(*(x[indexes] for x in data))\n", - " for indexes in jackindexes]\n", - " jmean = np.mean(jstat,axis = 0)\n", + " jstat = [stat_function(*(x[indexes] for x in data)) for indexes in jackindexes]\n", + " jmean = np.mean(jstat, axis=0)\n", "\n", " # Acceleration value\n", - " a = np.divide(np.sum( (jmean - jstat)**3, axis = 0 ),\n", - " ( 6.0 * np.sum( (jmean - jstat)**2, axis = 0)**1.5 )\n", - " )\n", + " a = np.divide(\n", + " np.sum((jmean - jstat) ** 3, axis=0),\n", + " (6.0 * np.sum((jmean - jstat) ** 2, axis=0) ** 1.5),\n", + " )\n", " if np.any(np.isnan(a)):\n", " nanind = np.nonzero(np.isnan(a))\n", - " warnings.warn(\"Some acceleration values were undefined.\"\n", - " \"This is almost certainly because all values\"\n", - " \"for the statistic were equal. Affected\"\n", - " \"confidence intervals will have zero width and\"\n", - " \"may be inaccurate (indexes: {})\".format(nanind))\n", - " zs = z0 + norm.ppf(alphas).reshape(alphas.shape+(1,)*z0.ndim)\n", - " avals = norm.cdf(z0 + zs/(1-a*zs))\n", - " nvals = np.round((reps-1)*avals)\n", - " nvals = np.nan_to_num(nvals).astype('int')\n", + " warnings.warn(\n", + " \"Some acceleration values were undefined.\"\n", + " \"This is almost certainly because all values\"\n", + " \"for the statistic were equal. Affected\"\n", + " \"confidence intervals will have zero width and\"\n", + " \"may be inaccurate (indexes: {})\".format(nanind)\n", + " )\n", + " zs = z0 + norm.ppf(alphas).reshape(alphas.shape + (1,) * z0.ndim)\n", + " avals = norm.cdf(z0 + zs / (1 - a * zs))\n", + " nvals = np.round((reps - 1) * avals)\n", + " nvals = np.nan_to_num(nvals).astype(\"int\")\n", "\n", " return nvals" ] diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb index 8a69f064..1e85c3a7 100644 --- a/nbs/API/dabest_object.ipynb +++ b/nbs/API/dabest_object.ipynb @@ -154,7 +154,7 @@ " # Check if this is a 2x2 ANOVA case and x & y are valid columns\n", " # Create experiment_label and x1_level\n", " if self.__delta2:\n", - " # TODO Wrap the errors in a separate function\n", + " # TODO Wrap the errors in a separate function called check_errors()\n", " if self.__proportional:\n", " err0 = \"`proportional` and `delta` cannot be True at the same time.\"\n", " raise ValueError(err0)\n", @@ -324,7 +324,7 @@ " err = \"{} is not a column in `data`. \".format(id_col)\n", " raise IndexError(err)\n", "\n", - " self.compute_effectsize_dfs()\n", + " self._compute_effectsize_dfs()\n", "\n", " def __repr__(self):\n", " from .__init__ import __version__\n", @@ -684,7 +684,11 @@ "\n", " return plot_data\n", "\n", - " def compute_effectsize_dfs(self):\n", + " def _compute_effectsize_dfs(self):\n", + " '''\n", + " Function to compute all attributes based on EffectSizeDataFrame.\n", + " It returns nothing.\n", + " '''\n", " from ._effsize_objects import EffectSizeDataFrame\n", "\n", " effectsize_df_kwargs = dict(\n", diff --git a/nbs/API/effsize_objects.ipynb b/nbs/API/effsize_objects.ipynb index 50407b7c..7ca59311 100644 --- a/nbs/API/effsize_objects.ipynb +++ b/nbs/API/effsize_objects.ipynb @@ -166,7 +166,7 @@ " self.__random_seed = random_seed\n", " self.__ci = ci\n", " self.__proportional = proportional\n", - " self.check_errors(control, test)\n", + " self._check_errors(control, test)\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", @@ -218,7 +218,7 @@ " self.__bootstraps, self.__difference\n", " )\n", "\n", - " self.compute_bca_intervals(sorted_bootstraps)\n", + " self._compute_bca_intervals(sorted_bootstraps)\n", "\n", " # Compute percentile intervals.\n", " pct_idx_low = int((self.__alpha / 2) * self.__resamples)\n", @@ -228,7 +228,7 @@ " self.__pct_low = sorted_bootstraps[pct_idx_low]\n", " self.__pct_high = sorted_bootstraps[pct_idx_high]\n", "\n", - " self.perform_statistical_test()\n", + " self._perform_statistical_test()\n", "\n", " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", " RM_STATUS = {\n", @@ -292,14 +292,17 @@ "\n", " if show_resample_count and define_pval:\n", " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - " elif ~show_resample_count and define_pval:\n", + " elif not show_resample_count and define_pval:\n", " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, pval_def)\n", " elif show_resample_count and ~define_pval:\n", " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, bs)\n", " else:\n", " return \"{}\\n{}\".format(out, pvalue)\n", "\n", - " def check_errors(self, control, test):\n", + " def _check_errors(self, control, test):\n", + " '''\n", + " Function to check configuration errors for the given control and test data.\n", + " '''\n", " kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]\n", " if self.__effect_size not in kosher_es:\n", " err1 = \"The effect size '{}'\".format(self.__effect_size)\n", @@ -322,7 +325,10 @@ " )\n", " raise ValueError(err1)\n", "\n", - " def compute_bca_intervals(self, sorted_bootstraps):\n", + " def _compute_bca_intervals(self, sorted_bootstraps):\n", + " '''\n", + " Function to compute the bca intervals given the sorted bootstraps.\n", + " '''\n", " from ._stats_tools import confint_2group_diff as ci2g\n", "\n", " # Compute BCa intervals.\n", @@ -355,7 +361,7 @@ " )\n", "\n", " else:\n", - " # TODO improve error handling, separate file\n", + " # TODO improve error handling, separate file with error messages?\n", " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", " err2 = \"It is set to the effect size itself.\"\n", " err3 = \"All bootstrap values were likely all the same.\"\n", @@ -369,7 +375,10 @@ " self.__bca_high = self.__difference\n", " warnings.warn(err_temp.substitute(lim_type=\"upper\"), stacklevel=0)\n", "\n", - " def perform_statistical_test(self):\n", + " def _perform_statistical_test(self):\n", + " '''\n", + " Function to complete the statistical tests\n", + " '''\n", " from ._stats_tools import effsize as es\n", "\n", " # Perform statistical tests.\n", @@ -381,7 +390,7 @@ " self.__permutation_count,\n", " )\n", "\n", - " if self.__is_paired and self.__proportional is False:\n", + " if self.__is_paired and not self.__proportional:\n", " # Wilcoxon, a non-parametric version of the paired T-test.\n", " wilcoxon = spstats.wilcoxon(self.__control, self.__test)\n", " self.__pvalue_wilcoxon = wilcoxon.pvalue\n", @@ -394,10 +403,6 @@ " )\n", " self.__pvalue_paired_students_t = paired_t.pvalue\n", " self.__statistic_paired_students_t = paired_t.statistic\n", - " # TODO dead code\n", - " standardized_es = es.cohens_d(\n", - " self.__control, self.__test, self.__is_paired\n", - " )\n", "\n", " elif self.__is_paired and self.__proportional:\n", " # for binary paired data, use McNemar's test\n", @@ -1763,7 +1768,7 @@ " self.__permutations = []\n", " self.__permutations_var = []\n", "\n", - " for i in range(int(permutation_count)):\n", + " for i in range(int(self.__permutation_count)):\n", " if is_paired:\n", " # Select which control-test pairs to swap.\n", " random_idx = rng.choice(CONTROL_LEN,\n", @@ -1800,11 +1805,11 @@ " self.__permutations = array(self.__permutations)\n", " self.__permutations_var = array(self.__permutations_var)\n", "\n", - " self.pvalue = EXTREME_COUNT / permutation_count\n", + " self.pvalue = EXTREME_COUNT / self.__permutation_count\n", "\n", "\n", " def __repr__(self):\n", - " return(\"{} permutations were taken. The p-value is {}.\".format(self.permutation_count, \n", + " return(\"{} permutations were taken. The p-value is {}.\".format(self.__permutation_count, \n", " self.pvalue))\n", "\n", "\n", @@ -1880,11 +1885,7 @@ "source": [] } ], - "metadata": { - "language_info": { - "name": "python" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 2 } From af0990a668ece10b395db7f262f065412ef5ee68 Mon Sep 17 00:00:00 2001 From: cyberosa Date: Tue, 19 Dec 2023 17:08:27 +0100 Subject: [PATCH 09/10] Fixing name of pre-commit file --- .pre-commit-config.yaml.yaml => .pre-commit-config.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .pre-commit-config.yaml.yaml => .pre-commit-config.yaml (100%) diff --git a/.pre-commit-config.yaml.yaml b/.pre-commit-config.yaml similarity index 100% rename from .pre-commit-config.yaml.yaml rename to .pre-commit-config.yaml From 0d5bea8219693112bfebdeacdeb3bebbfdb49e8e Mon Sep 17 00:00:00 2001 From: cyberosa Date: Wed, 20 Dec 2023 09:57:33 +0100 Subject: [PATCH 10/10] Added missing tolerance parameter for ui tests in test_10 file --- nbs/tests/test_10_proportion_plot.py | 430 +++++++++++++++++---------- 1 file changed, 266 insertions(+), 164 deletions(-) diff --git a/nbs/tests/test_10_proportion_plot.py b/nbs/tests/test_10_proportion_plot.py index 8ae453cb..02927471 100644 --- a/nbs/tests/test_10_proportion_plot.py +++ b/nbs/tests/test_10_proportion_plot.py @@ -2,14 +2,14 @@ import numpy as np import pandas as pd import matplotlib as mpl -mpl.use('Agg') + +mpl.use("Agg") import matplotlib.ticker as Ticker import matplotlib.pyplot as plt from dabest._api import load def create_demo_prop_dataset(seed=9999, N=40): - np.random.seed(9999) # Fix the seed so the results are replicable. # Create samples n = 1 @@ -28,21 +28,32 @@ def create_demo_prop_dataset(seed=9999, N=40): t9 = np.zeros(N) # Add a `gender` column for coloring the data. - females = np.repeat('Female', N / 2).tolist() - males = np.repeat('Male', N / 2).tolist() + females = np.repeat("Female", N / 2).tolist() + males = np.repeat("Male", N / 2).tolist() gender = females + males # Add an `id` column for paired data plotting. id_col = pd.Series(range(1, N + 1)) # Combine samples and gender into a DataFrame. - df = pd.DataFrame({'Control 1': c1, 'Test 1': t1, - 'Control 2': c2, 'Test 2': t2, - 'Control 3': c3, 'Test 3': t3, - 'Test 4': t4, 'Test 5': t5, 'Test 6': t6, - 'Test 7': t7, 'Test 8': t8, 'Test 9': t9, - 'Gender': gender, 'ID': id_col - }) + df = pd.DataFrame( + { + "Control 1": c1, + "Test 1": t1, + "Control 2": c2, + "Test 2": t2, + "Control 3": c3, + "Test 3": t3, + "Test 4": t4, + "Test 5": t5, + "Test 6": t6, + "Test 7": t7, + "Test 8": t8, + "Test 9": t9, + "Gender": gender, + "ID": id_col, + } + ) return df @@ -51,127 +62,195 @@ def create_demo_prop_dataset(seed=9999, N=40): two_groups_unpaired = load(df, idx=("Control 1", "Test 1"), proportional=True) -multi_2group = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2")), - proportional=True) - -shared_control = load(df, idx=("Control 1", "Test 1", - "Test 2", "Test 3", - "Test 4", "Test 5", "Test 6"), - proportional=True) - -multi_groups = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2","Test 3"), - ("Control 3", "Test 4","Test 5", "Test 6") - ),proportional=True) - -two_groups_paired = load(df, idx=("Control 1", "Test 1"), - paired="baseline", id_col="ID",proportional=True) - -multi_2group_paired = load(df, idx=(("Control 1", "Test 1"), - ("Control 2", "Test 2")), - paired="baseline", id_col="ID", proportional=True) - -multi_groups_paired = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2","Test 3"), - ("Control 3", "Test 4","Test 5", "Test 6") - ),paired="baseline", id_col="ID", proportional=True) - -two_groups_sequential = load(df, idx=("Control 1", "Test 1"), - paired="sequential", id_col="ID",proportional=True) - -multi_2group_sequential = load(df, idx=(("Control 1", "Test 1"), - ("Control 2", "Test 2")), - paired="sequential", id_col="ID", proportional=True) - -multi_groups_sequential = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2","Test 3"), - ("Control 3", "Test 4","Test 5", "Test 6") - ),paired="sequential", id_col="ID", proportional=True) -shared_control_paired = load(df, idx=("Control 1", "Test 1", - "Test 2", "Test 3", - "Test 4", "Test 5", "Test 6"), - paired="sequential", id_col="ID", proportional=True) - -zero_to_zero = load(df, idx=('Test 7', 'Test 9'), - proportional=True, paired='sequential', id_col="ID") -zero_to_one = load(df, idx=('Test 7', 'Test 8'), - proportional=True, paired='sequential', id_col="ID") -one_to_zero = load(df, idx=('Test 8', 'Test 7'), - proportional=True, paired='sequential', id_col="ID") - -one_in_separate_control = load(df, idx=((("Control 1", "Test 1"), - ("Test 2", "Test 3"), - ("Test 4", "Test 8", "Test 6"))), - proportional=True, paired="sequential", id_col="ID") - - - - -@pytest.mark.mpl_image_compare +multi_2group = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2"), + ), + proportional=True, +) + +shared_control = load( + df, + idx=("Control 1", "Test 1", "Test 2", "Test 3", "Test 4", "Test 5", "Test 6"), + proportional=True, +) + +multi_groups = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2", "Test 3"), + ("Control 3", "Test 4", "Test 5", "Test 6"), + ), + proportional=True, +) + +two_groups_paired = load( + df, idx=("Control 1", "Test 1"), paired="baseline", id_col="ID", proportional=True +) + +multi_2group_paired = load( + df, + idx=(("Control 1", "Test 1"), ("Control 2", "Test 2")), + paired="baseline", + id_col="ID", + proportional=True, +) + +multi_groups_paired = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2", "Test 3"), + ("Control 3", "Test 4", "Test 5", "Test 6"), + ), + paired="baseline", + id_col="ID", + proportional=True, +) + +two_groups_sequential = load( + df, idx=("Control 1", "Test 1"), paired="sequential", id_col="ID", proportional=True +) + +multi_2group_sequential = load( + df, + idx=(("Control 1", "Test 1"), ("Control 2", "Test 2")), + paired="sequential", + id_col="ID", + proportional=True, +) + +multi_groups_sequential = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2", "Test 3"), + ("Control 3", "Test 4", "Test 5", "Test 6"), + ), + paired="sequential", + id_col="ID", + proportional=True, +) +shared_control_paired = load( + df, + idx=("Control 1", "Test 1", "Test 2", "Test 3", "Test 4", "Test 5", "Test 6"), + paired="sequential", + id_col="ID", + proportional=True, +) + +zero_to_zero = load( + df, idx=("Test 7", "Test 9"), proportional=True, paired="sequential", id_col="ID" +) +zero_to_one = load( + df, idx=("Test 7", "Test 8"), proportional=True, paired="sequential", id_col="ID" +) +one_to_zero = load( + df, idx=("Test 8", "Test 7"), proportional=True, paired="sequential", id_col="ID" +) + +one_in_separate_control = load( + df, + idx=( + (("Control 1", "Test 1"), ("Test 2", "Test 3"), ("Test 4", "Test 8", "Test 6")) + ), + proportional=True, + paired="sequential", + id_col="ID", +) + + +@pytest.mark.mpl_image_compare(tolerance=10) def test_101_gardner_altman_unpaired_propdiff(): - return two_groups_unpaired.mean_diff.plot(); + return two_groups_unpaired.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_103_cummings_two_group_unpaired_propdiff(): - return two_groups_unpaired.mean_diff.plot(fig_size=(4, 6), - float_contrast=False); + return two_groups_unpaired.mean_diff.plot(fig_size=(4, 6), float_contrast=False) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_105_cummings_multi_group_unpaired_propdiff(): - return multi_2group.mean_diff.plot(); + return multi_2group.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_106_cummings_shared_control_propdiff(): - return shared_control.mean_diff.plot(); + return shared_control.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_107_cummings_multi_groups_propdiff(): - return multi_groups.mean_diff.plot(); + return multi_groups.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_109_gardner_altman_ylabel(): - return two_groups_unpaired.mean_diff.plot(bar_label="This is my\nrawdata", - contrast_label="The bootstrap\ndistribtions!"); + return two_groups_unpaired.mean_diff.plot( + bar_label="This is my\nrawdata", contrast_label="The bootstrap\ndistribtions!" + ) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_110_change_fig_size(): - return two_groups_unpaired.mean_diff.plot(fig_size=(6, 6), - custom_palette="Dark2"); + return two_groups_unpaired.mean_diff.plot(fig_size=(6, 6), custom_palette="Dark2") -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_111_change_palette_b(): - return multi_2group.mean_diff.plot(custom_palette="Paired"); + return multi_2group.mean_diff.plot(custom_palette="Paired") + +my_color_palette = { + "Control 1": "blue", + "Test 1": "purple", + "Control 2": "#cb4b16", # This is a hex string. + "Test 2": (0.0, 0.7, 0.2), # This is a RGB tuple. +} -my_color_palette = {"Control 1" : "blue", - "Test 1" : "purple", - "Control 2" : "#cb4b16", # This is a hex string. - "Test 2" : (0., 0.7, 0.2) # This is a RGB tuple. - } -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_112_change_palette_c(): - return multi_2group.mean_diff.plot(custom_palette=my_color_palette); + return multi_2group.mean_diff.plot(custom_palette=my_color_palette) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_113_desat(): - return multi_2group.mean_diff.plot(custom_palette=my_color_palette, - bar_desat=0.1, - halfviolin_desat=0.25); + return multi_2group.mean_diff.plot( + custom_palette=my_color_palette, bar_desat=0.1, halfviolin_desat=0.25 + ) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_114_change_ylims(): - return multi_2group.mean_diff.plot(contrast_ylim=(-2, 2)); + return multi_2group.mean_diff.plot(contrast_ylim=(-2, 2)) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_115_invert_ylim(): - return multi_2group.mean_diff.plot(contrast_ylim=(2, -2), - contrast_label="More negative is better!"); + return multi_2group.mean_diff.plot( + contrast_ylim=(2, -2), contrast_label="More negative is better!" + ) -@pytest.mark.mpl_image_compare -def test_116_ticker_gardner_altman(): +@pytest.mark.mpl_image_compare(tolerance=10) +def test_116_ticker_gardner_altman(): fig = two_groups_unpaired.mean_diff.plot() rawswarm_axes = fig.axes[0] @@ -184,112 +263,135 @@ def test_116_ticker_gardner_altman(): contrast_axes.yaxis.set_minor_locator(Ticker.MultipleLocator(0.25)) return fig -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_117_err_color(): - return two_groups_unpaired.mean_diff.plot(err_color="purple"); + return two_groups_unpaired.mean_diff.plot(err_color="purple") + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_118_cummings_two_group_unpaired_meandiff_bar_width(): - return two_groups_unpaired.mean_diff.plot(bar_width=0.4,float_contrast=False); + return two_groups_unpaired.mean_diff.plot(bar_width=0.4, float_contrast=False) + np.random.seed(9999) Ns = [20, 10, 21, 20] -n=1 -c1 = pd.DataFrame({'Control':np.random.binomial(n, 0.2, size=Ns[0])}) -t1 = pd.DataFrame({'Test 1': np.random.binomial(n, 0.5, size=Ns[1])}) -t2 = pd.DataFrame({'Test 2': np.random.binomial(n, 0.4, size=Ns[2])}) -t3 = pd.DataFrame({'Test 3': np.random.binomial(n, 0.7, size=Ns[3])}) -wide_df = pd.concat([c1, t1, t2, t3],axis=1) +n = 1 +c1 = pd.DataFrame({"Control": np.random.binomial(n, 0.2, size=Ns[0])}) +t1 = pd.DataFrame({"Test 1": np.random.binomial(n, 0.5, size=Ns[1])}) +t2 = pd.DataFrame({"Test 2": np.random.binomial(n, 0.4, size=Ns[2])}) +t3 = pd.DataFrame({"Test 3": np.random.binomial(n, 0.7, size=Ns[3])}) +wide_df = pd.concat([c1, t1, t2, t3], axis=1) + +long_df = pd.melt( + wide_df, + value_vars=["Control", "Test 1", "Test 2", "Test 3"], + value_name="value", + var_name="group", +) +long_df["dummy"] = np.repeat(np.nan, len(long_df)) -long_df = pd.melt(wide_df, - value_vars=["Control", "Test 1", "Test 2", "Test 3"], - value_name="value", - var_name="group") -long_df['dummy'] = np.repeat(np.nan, len(long_df)) -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_119_wide_df_nan(): + wide_df_dabest = load( + wide_df, idx=("Control", "Test 1", "Test 2", "Test 3"), proportional=True + ) - wide_df_dabest = load(wide_df, - idx=("Control", "Test 1", "Test 2", "Test 3"), - proportional=True - ) + return wide_df_dabest.mean_diff.plot() - return wide_df_dabest.mean_diff.plot(); -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_120_long_df_nan(): + long_df_dabest = load( + long_df, + x="group", + y="value", + idx=("Control", "Test 1", "Test 2", "Test 3"), + proportional=True, + ) - long_df_dabest = load(long_df, x="group", y="value", - idx=("Control", "Test 1", "Test 2", "Test 3"), - proportional=True - ) + return long_df_dabest.mean_diff.plot() - return long_df_dabest.mean_diff.plot(); -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_121_cohens_h_gardner_altman(): - return two_groups_unpaired.cohens_h.plot(); + return two_groups_unpaired.cohens_h.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_122_cohens_h_cummings(): - return two_groups_unpaired.cohens_h.plot(float_contrast=False); + return two_groups_unpaired.cohens_h.plot(float_contrast=False) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_123_sankey_gardner_altman(): - return two_groups_paired.mean_diff.plot(); + return two_groups_paired.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_124_sankey_cummings(): - return two_groups_paired.mean_diff.plot(float_contrast=False); + return two_groups_paired.mean_diff.plot(float_contrast=False) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_125_sankey_2paired_groups(): - return multi_2group_paired.mean_diff.plot(); + return multi_2group_paired.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_126_sankey_2sequential_groups(): - return multi_2group_sequential.mean_diff.plot(); + return multi_2group_sequential.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_127_sankey_multi_group_paired(): - return multi_groups_paired.mean_diff.plot(); + return multi_groups_paired.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_128_sankey_transparency(): - return two_groups_paired.mean_diff.plot(sankey_kwargs = {"alpha": 0.2}); + return two_groups_paired.mean_diff.plot(sankey_kwargs={"alpha": 0.2}) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_129_zero_to_zero(): - return zero_to_zero.mean_diff.plot(); + return zero_to_zero.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_130_zero_to_one(): - return zero_to_one.mean_diff.plot(); + return zero_to_one.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_131_one_to_zero(): - return one_to_zero.mean_diff.plot(); + return one_to_zero.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_132_shared_control_sankey_off(): - return shared_control_paired.mean_diff.plot(sankey_kwargs={'sankey':False}); + return shared_control_paired.mean_diff.plot(sankey_kwargs={"sankey": False}) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_133_shared_control_flow_off(): - return shared_control_paired.mean_diff.plot(sankey_kwargs={'flow':False}); + return shared_control_paired.mean_diff.plot(sankey_kwargs={"flow": False}) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_134_separate_control_sankey_off(): - return multi_groups_sequential.mean_diff.plot(sankey_kwargs={'sankey':False}); + return multi_groups_sequential.mean_diff.plot(sankey_kwargs={"sankey": False}) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_135_separate_control_flow_off(): - return multi_groups_sequential.mean_diff.plot(sankey_kwargs={'flow':False}); + return multi_groups_sequential.mean_diff.plot(sankey_kwargs={"flow": False}) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_136_style_sheets(): # Perform this test last so we don't have to reset the plot style. plt.style.use("dark_background") - return multi_2group.mean_diff.plot(face_color="black"); \ No newline at end of file + return multi_2group.mean_diff.plot(face_color="black")