diff --git a/.pre-commit-config.yaml.yaml b/.pre-commit-config.yaml similarity index 100% rename from .pre-commit-config.yaml.yaml rename to .pre-commit-config.yaml diff --git a/README.md b/README.md index 33d4fb21..a3f3bfb0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -DABEST-Python -================ +# DABEST-Python diff --git a/dabest/__init__.py b/dabest/__init__.py index 2e46392d..e953af6e 100644 --- a/dabest/__init__.py +++ b/dabest/__init__.py @@ -1,5 +1,6 @@ from ._api import load, prop_dataset from ._stats_tools import effsize as effsize -from ._classes import TwoGroupsEffectSize, PermutationTest +from ._effsize_objects import TwoGroupsEffectSize, PermutationTest +from ._dabest_object import Dabest __version__ = "2023.03.29" diff --git a/dabest/_api.py b/dabest/_api.py index 825acefd..190d4e61 100644 --- a/dabest/_api.py +++ b/dabest/_api.py @@ -4,11 +4,24 @@ __all__ = ['load', 'prop_dataset'] # %% ../nbs/API/load.ipynb 4 -def load(data, idx=None, x=None, y=None, paired=None, id_col=None, - ci=95, resamples=5000, random_seed=12345, proportional=False, - delta2 = False, experiment = None, experiment_label = None, - x1_level = None, mini_meta=False): - ''' +def load( + data, + idx=None, + x=None, + y=None, + paired=None, + id_col=None, + ci=95, + resamples=5000, + random_seed=12345, + proportional=False, + delta2=False, + experiment=None, + experiment_label=None, + x1_level=None, + mini_meta=False, +): + """ Loads data in preparation for estimation statistics. This is designed to work with pandas DataFrames. @@ -22,15 +35,15 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, with each individual tuple producing its own contrast plot x : string or list, default None Column name(s) of the independent variable. This can be expressed as - a list of 2 elements if and only if 'delta2' is True; otherwise it + a list of 2 elements if and only if 'delta2' is True; otherwise it can only be a string. y : string, default None Column names for data to be plotted on the x-axis and y-axis. paired : string, default None - The type of the experiment under which the data are obtained. If 'paired' + The type of the experiment under which the data are obtained. If 'paired' is None then the data will not be treated as paired data in the subsequent - calculations. If 'paired' is 'baseline', then in each tuple of x, other - groups will be paired up with the first group (as control). If 'paired' is + calculations. If 'paired' is 'baseline', then in each tuple of x, other + groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with its previous group (as control). id_col : default None. @@ -45,7 +58,7 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, This integer is used to seed the random number generator during bootstrap resampling, ensuring that the confidence intervals reported are replicable. - proportional : boolean, default False. + proportional : boolean, default False. An indicator of whether the data is binary or not. When set to True, it specifies that the data consists of binary data, where the values are limited to 0 and 1. The code is not suitable for analyzing proportion @@ -55,39 +68,58 @@ def load(data, idx=None, x=None, y=None, paired=None, id_col=None, delta2 : boolean, default False Indicator of delta-delta experiment experiment : String, default None - The name of the column of the dataframe which contains the label of + The name of the column of the dataframe which contains the label of experiments experiment_lab : list, default None A list of String to specify the order of subplots for delta-delta plots. - This can be expressed as a list of 2 elements if and only if 'delta2' - is True; otherwise it can only be a string. + This can be expressed as a list of 2 elements if and only if 'delta2' + is True; otherwise it can only be a string. x1_level : list, default None A list of String to specify the order of subplots for delta-delta plots. - This can be expressed as a list of 2 elements if and only if 'delta2' - is True; otherwise it can only be a string. + This can be expressed as a list of 2 elements if and only if 'delta2' + is True; otherwise it can only be a string. mini_meta : boolean, default False Indicator of weighted delta calculation. Returns ------- A `Dabest` object. - ''' - from ._classes import Dabest - - return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta) - - + """ + from dabest import Dabest + + return Dabest( + data, + idx, + x, + y, + paired, + id_col, + ci, + resamples, + random_seed, + proportional, + delta2, + experiment, + experiment_label, + x1_level, + mini_meta, + ) # %% ../nbs/API/load.ipynb 5 import numpy as np from typing import Union, Optional +import pandas as pd -def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types. - group_names: Optional[list] = None): - ''' + +def prop_dataset( + group: Union[ + list, tuple, np.ndarray, dict + ], # Accepts lists, tuples, or numpy ndarrays of numeric types. + group_names: Optional[list] = None, +): + """ Convenient function to generate a dataframe of binary data. - ''' - import pandas as pd + """ if isinstance(group, dict): # If group_names is not provided, use the keys of the dict as group_names @@ -95,36 +127,53 @@ def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tup group_names = list(group.keys()) elif not set(group_names) == set(group.keys()): # Check if the group_names provided is the same as the keys of the dict - raise ValueError('group_names must be the same as the keys of the dict.') + raise ValueError("group_names must be the same as the keys of the dict.") # Check if the values in the dict are numeric - if not all([isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]): - raise ValueError('group must be a dict of lists, tuples, or numpy ndarrays of numeric types.') + if not all( + [isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names] + ): + raise ValueError( + "group must be a dict of lists, tuples, or numpy ndarrays of numeric types." + ) # Check if the values in the dict only have two elements under each parent key if not all([len(group[name]) == 2 for name in group_names]): - raise ValueError('Each parent key should have only two elements.') + raise ValueError("Each parent key should have only two elements.") group_val = group else: if group_names is None: - raise ValueError('group_names must be provided if group is not a dict.') + raise ValueError("group_names must be provided if group is not a dict.") # Check if the length of group is two times of the length of group_names if not len(group) == 2 * len(group_names): - raise ValueError('The length of group must be two times of the length of group_names.') - group_val = {group_names[i]: [group[i*2], group[i*2+1]] for i in range(len(group_names))} + raise ValueError( + "The length of group must be two times of the length of group_names." + ) + group_val = { + group_names[i]: [group[i * 2], group[i * 2 + 1]] + for i in range(len(group_names)) + } # Check if the sum of values in group_val under each key are the same - if not all([sum(group_val[name]) == sum(group_val[group_names[0]]) for name in group_val.keys()]): - raise ValueError('The sum of values under each key must be the same.') - - id_col = pd.Series(range(1, sum(group_val[group_names[0]])+1)) - + if not all( + [ + sum(group_val[name]) == sum(group_val[group_names[0]]) + for name in group_val.keys() + ] + ): + raise ValueError("The sum of values under each key must be the same.") + + id_col = pd.Series(range(1, sum(group_val[group_names[0]]) + 1)) + final_df = pd.DataFrame() for name in group_val.keys(): - col = np.repeat(0, group_val[name][0]).tolist() + np.repeat(1, group_val[name][1]).tolist() - df = pd.DataFrame({name:col}) + col = ( + np.repeat(0, group_val[name][0]).tolist() + + np.repeat(1, group_val[name][1]).tolist() + ) + df = pd.DataFrame({name: col}) final_df = pd.concat([final_df, df], axis=1) - final_df['ID'] = id_col + final_df["ID"] = id_col return final_df diff --git a/dabest/_bootstrap_tools.py b/dabest/_bootstrap_tools.py index d04a46c8..ed3398ee 100644 --- a/dabest/_bootstrap_tools.py +++ b/dabest/_bootstrap_tools.py @@ -5,12 +5,18 @@ # %% ../nbs/API/bootstrap.ipynb 3 import numpy as np +import pandas as pd +import seaborn as sns +from scipy.stats import norm +from scipy.stats import ttest_1samp, ttest_ind, ttest_rel +from scipy.stats import mannwhitneyu, wilcoxon, norm +import warnings # %% ../nbs/API/bootstrap.ipynb 4 class bootstrap: - ''' - Computes the summary statistic and a bootstrapped confidence interval. - + """ + Computes the summary statistic and a bootstrapped confidence interval. + Returns ------- An `bootstrap` object reporting the summary statistics, percentile CIs, bias-corrected and accelerated (BCa) CIs, and the settings used: @@ -47,85 +53,77 @@ class bootstrap: `pvalue_mann_whitney`: float Two-sided p-value obtained from scipy.stats.mannwhitneyu. If a single array was given (x1 only), returns 'NIL'. The Mann-Whitney U-test is a nonparametric unpaired test of the null hypothesis that x1 and x2 are from the same distribution. See - ''' - def __init__(self, - x1:np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. - x2:np.array=None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. - paired:bool=False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control). - statfunction:callable=np.mean,#The summary statistic called on data. - smoothboot:bool=False,#Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate). - alpha_level:float=0.05,#Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. - reps:int=5000 # Number of bootstrap iterations to perform. - ): - - import numpy as np - import pandas as pd - import seaborn as sns - - from scipy.stats import norm - from numpy.random import randint - from scipy.stats import ttest_1samp, ttest_ind, ttest_rel - from scipy.stats import mannwhitneyu, wilcoxon, norm - import warnings + """ + def __init__( + self, + x1: np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. + x2: np.array = None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded. + paired: bool = False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control). + stat_function: callable = np.mean, # The summary statistic called on data. + smoothboot: bool = False, # Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate). + alpha_level: float = 0.05, # Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. + reps: int = 5000, # Number of bootstrap iterations to perform. + ): # Turn to pandas series. x1 = pd.Series(x1).dropna() diff = False - # Initialise statfunction - if statfunction == None: - statfunction = np.mean + # Initialise stat_function + if stat_function is None: + stat_function = np.mean # Compute two-sided alphas. - if alpha_level > 1. or alpha_level < 0.: + if alpha_level > 1.0 or alpha_level < 0.0: raise ValueError("alpha_level must be between 0 and 1.") - alphas = np.array([alpha_level/2., 1-alpha_level/2.]) + alphas = np.array([alpha_level / 2.0, 1 - alpha_level / 2.0]) - sns_bootstrap_kwargs = {'func': statfunction, - 'n_boot': reps, - 'smooth': smoothboot} + sns_bootstrap_kwargs = { + "func": stat_function, + "n_boot": reps, + "smooth": smoothboot, + } if paired: # check x2 is not None: if x2 is None: - raise ValueError('Please specify x2.') + raise ValueError("Please specify x2.") else: x2 = pd.Series(x2).dropna() if len(x1) != len(x2): - raise ValueError('x1 and x2 are not the same length.') - - if (x2 is None) or (paired is not None) : + raise ValueError("x1 and x2 are not the same length.") + if (x2 is None) or (paired is not None): if x2 is None: tx = x1 paired = False ttest_single = ttest_1samp(x1, 0)[1] - ttest_2_ind = 'NIL' - ttest_2_paired = 'NIL' - wilcoxonresult = 'NIL' + ttest_2_ind = "NIL" + ttest_2_paired = "NIL" + wilcoxonresult = "NIL" - elif paired is not None: + # elif paired is not None: + else: # only two options to enter here diff = True tx = x2 - x1 - ttest_single = 'NIL' - ttest_2_ind = 'NIL' + ttest_single = "NIL" + ttest_2_ind = "NIL" ttest_2_paired = ttest_rel(x1, x2)[1] wilcoxonresult = wilcoxon(x1, x2)[1] - mannwhitneyresult = 'NIL' + mannwhitneyresult = "NIL" # Turns data into array, then tuple. tdata = (tx,) # The value of the statistic function applied # just to the actual data. - summ_stat = statfunction(*tdata) + summ_stat = stat_function(*tdata) statarray = sns.algorithms.bootstrap(tx, **sns_bootstrap_kwargs) statarray.sort() # Get Percentile indices - pct_low_high = np.round((reps-1) * alphas) - pct_low_high = np.nan_to_num(pct_low_high).astype('int') - + pct_low_high = np.round((reps - 1) * alphas) + pct_low_high = np.nan_to_num(pct_low_high).astype("int") elif x2 is not None and paired is None: diff = True @@ -137,42 +135,45 @@ def __init__(self, tdata = exp_statarray - ref_statarray statarray = tdata.copy() statarray.sort() - tdata = (tdata, ) # Note tuple form. + tdata = (tdata,) # Note tuple form. # The difference as one would calculate it. - summ_stat = statfunction(x2) - statfunction(x1) + summ_stat = stat_function(x2) - stat_function(x1) # Get Percentile indices - pct_low_high = np.round((reps-1) * alphas) - pct_low_high = np.nan_to_num(pct_low_high).astype('int') + pct_low_high = np.round((reps - 1) * alphas) + pct_low_high = np.nan_to_num(pct_low_high).astype("int") # Statistical tests. - ttest_single='NIL' - ttest_2_ind = ttest_ind(x1,x2)[1] - ttest_2_paired='NIL' - mannwhitneyresult = mannwhitneyu(x1, x2, alternative='two-sided')[1] - wilcoxonresult = 'NIL' + ttest_single = "NIL" + ttest_2_ind = ttest_ind(x1, x2)[1] + ttest_2_paired = "NIL" + mannwhitneyresult = mannwhitneyu(x1, x2, alternative="two-sided")[1] + wilcoxonresult = "NIL" # Get Bias-Corrected Accelerated indices convenience function invoked. - bca_low_high = bca(tdata, alphas, statarray, - statfunction, summ_stat, reps) + bca_low_high = bca(tdata, alphas, statarray, stat_function, summ_stat, reps) # Warnings for unstable or extreme indices. for ind in [pct_low_high, bca_low_high]: - if np.any(ind == 0) or np.any(ind == reps-1): - warnings.warn("Some values used extremal samples;" - " results are probably unstable.") - elif np.any(ind<10) or np.any(ind>=reps-10): - warnings.warn("Some values used top 10 low/high samples;" - " results may be unstable.") + if np.any(ind == 0) or np.any(ind == reps - 1): + warnings.warn( + "Some values used extremal samples;" + " results are probably unstable." + ) + elif np.any(ind < 10) or np.any(ind >= reps - 10): + warnings.warn( + "Some values used top 10 low/high samples;" + " results may be unstable." + ) self.summary = summ_stat self.is_paired = paired self.is_difference = diff - self.statistic = str(statfunction) + self.statistic = str(stat_function) self.n_reps = reps - self.ci = (1-alpha_level)*100 + self.ci = (1 - alpha_level) * 100 self.stat_array = np.array(statarray) self.pct_ci_low = statarray[pct_low_high[0]] @@ -189,33 +190,33 @@ def __init__(self, self.pvalue_wilcoxon = wilcoxonresult self.pvalue_mann_whitney = mannwhitneyresult - self.results = {'stat_summary': self.summary, - 'is_difference': diff, - 'is_paired': paired, - 'bca_ci_low': self.bca_ci_low, - 'bca_ci_high': self.bca_ci_high, - 'ci': self.ci - } + self.results = { + "stat_summary": self.summary, + "is_difference": diff, + "is_paired": paired, + "bca_ci_low": self.bca_ci_low, + "bca_ci_high": self.bca_ci_high, + "ci": self.ci, + } def __repr__(self): - import numpy as np - - if 'mean' in self.statistic: - stat = 'mean' - elif 'median' in self.statistic: - stat = 'median' + if "mean" in self.statistic: + stat = "mean" + elif "median" in self.statistic: + stat = "median" else: stat = self.statistic - diff_types = {'sequential': 'paired', 'baseline': 'paired', None: 'unpaired'} + diff_types = {"sequential": "paired", "baseline": "paired", None: "unpaired"} if self.is_difference: - a = 'The {} {} difference is {}.'.format(diff_types[self.is_paired], - stat, self.summary) + a = "The {} {} difference is {}.".format( + diff_types[self.is_paired], stat, self.summary + ) else: - a = 'The {} is {}.'.format(stat, self.summary) + a = "The {} is {}.".format(stat, self.summary) - b = '[{} CI: {}, {}]'.format(self.ci, self.bca_ci_low, self.bca_ci_high) - return '\n'.join([a, b]) + b = "[{} CI: {}, {}]".format(self.ci, self.bca_ci_low, self.bca_ci_high) + return "\n".join([a, b]) # %% ../nbs/API/bootstrap.ipynb 5 def jackknife_indexes(data): @@ -228,48 +229,42 @@ def jackknife_indexes(data): For a given set of data Y, the jackknife sample J[i] is defined as the data set Y with the ith data point deleted. """ - import numpy as np - base = np.arange(0,len(data)) - return (np.delete(base,i) for i in base) + base = np.arange(0, len(data)) + return (np.delete(base, i) for i in base) + -def bca(data, alphas, statarray, statfunction, ostat, reps): - ''' +def bca(data, alphas, statarray, stat_function, ostat, reps): + """ Subroutine called to calculate the BCa statistics. Borrowed heavily from scikits.bootstrap code. - ''' - import warnings - - import numpy as np - import pandas as pd - import seaborn as sns - - from scipy.stats import norm - from numpy.random import randint + """ # The bias correction value. - z0 = norm.ppf( ( 1.0*np.sum(statarray < ostat, axis = 0) ) / reps ) + z0 = norm.ppf((1.0 * np.sum(statarray < ostat, axis=0)) / reps) # Statistics of the jackknife distribution jackindexes = jackknife_indexes(data[0]) - jstat = [statfunction(*(x[indexes] for x in data)) - for indexes in jackindexes] - jmean = np.mean(jstat,axis = 0) + jstat = [stat_function(*(x[indexes] for x in data)) for indexes in jackindexes] + jmean = np.mean(jstat, axis=0) # Acceleration value - a = np.divide(np.sum( (jmean - jstat)**3, axis = 0 ), - ( 6.0 * np.sum( (jmean - jstat)**2, axis = 0)**1.5 ) - ) + a = np.divide( + np.sum((jmean - jstat) ** 3, axis=0), + (6.0 * np.sum((jmean - jstat) ** 2, axis=0) ** 1.5), + ) if np.any(np.isnan(a)): nanind = np.nonzero(np.isnan(a)) - warnings.warn("Some acceleration values were undefined." - "This is almost certainly because all values" - "for the statistic were equal. Affected" - "confidence intervals will have zero width and" - "may be inaccurate (indexes: {})".format(nanind)) - zs = z0 + norm.ppf(alphas).reshape(alphas.shape+(1,)*z0.ndim) - avals = norm.cdf(z0 + zs/(1-a*zs)) - nvals = np.round((reps-1)*avals) - nvals = np.nan_to_num(nvals).astype('int') + warnings.warn( + "Some acceleration values were undefined." + "This is almost certainly because all values" + "for the statistic were equal. Affected" + "confidence intervals will have zero width and" + "may be inaccurate (indexes: {})".format(nanind) + ) + zs = z0 + norm.ppf(alphas).reshape(alphas.shape + (1,) * z0.ndim) + avals = norm.cdf(z0 + zs / (1 - a * zs)) + nvals = np.round((reps - 1) * avals) + nvals = np.nan_to_num(nvals).astype("int") return nvals diff --git a/dabest/_classes.py b/dabest/_classes.py deleted file mode 100644 index d71e8733..00000000 --- a/dabest/_classes.py +++ /dev/null @@ -1,3011 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/class.ipynb. - -# %% auto 0 -__all__ = ['Dabest', 'DeltaDelta', 'MiniMetaDelta', 'TwoGroupsEffectSize', 'EffectSizeDataFrame', 'PermutationTest'] - -# %% ../nbs/API/class.ipynb 4 -import numpy as np -from scipy.stats import norm -import pandas as pd -from scipy.stats import randint - -# %% ../nbs/API/class.ipynb 6 -class Dabest(object): - - """ - Class for estimation statistics and plots. - """ - - def __init__(self, data, idx, x, y, paired, id_col, ci, - resamples, random_seed, proportional, delta2, - experiment, experiment_label, x1_level, mini_meta): - - """ - Parses and stores pandas DataFrames in preparation for estimation - statistics. You should not be calling this class directly; instead, - use `dabest.load()` to parse your DataFrame prior to analysis. - """ - - # Import standard data science libraries. - import numpy as np - import pandas as pd - import seaborn as sns - - self.__delta2 = delta2 - self.__experiment = experiment - self.__ci = ci - self.__data = data - self.__id_col = id_col - self.__is_paired = paired - self.__resamples = resamples - self.__random_seed = random_seed - self.__proportional = proportional - self.__mini_meta = mini_meta - - # Make a copy of the data, so we don't make alterations to it. - data_in = data.copy() - # data_in.reset_index(inplace=True) - # data_in_index_name = data_in.index.name - - - # Check if it is a valid mini_meta case - if mini_meta is True: - - # Only mini_meta calculation but not proportional and delta-delta function - if proportional is True: - err0 = '`proportional` and `mini_meta` cannot be True at the same time.' - raise ValueError(err0) - elif delta2 is True: - err0 = '`delta` and `mini_meta` cannot be True at the same time.' - raise ValueError(err0) - - # Check if the columns stated are valid - if all([isinstance(i, str) for i in idx]): - if len(pd.unique([t for t in idx]).tolist())!=2: - err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) - err1 = 'does not contain exactly 2 columns.' - raise ValueError(err0 + err1) - elif all([isinstance(i, (tuple, list)) for i in idx]): - all_idx_lengths = [len(t) for t in idx] - if (np.array(all_idx_lengths) != 2).any(): - err1 = "`mini_meta` is True, but some idx " - err2 = "in {} does not consist only of two groups.".format(idx) - raise ValueError(err1 + err2) - - - - # Check if this is a 2x2 ANOVA case and x & y are valid columns - # Create experiment_label and x1_level - if delta2 is True: - if proportional is True: - err0 = '`proportional` and `delta` cannot be True at the same time.' - raise ValueError(err0) - # idx should not be specified - if idx: - err0 = '`idx` should not be specified when `delta2` is True.'.format(len(x)) - raise ValueError(err0) - - # Check if x is valid - if len(x) != 2: - err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x)) - raise ValueError(err0) - else: - for i in x: - if i not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(i) - raise IndexError(err) - - # Check if y is valid - if not y: - err0 = '`delta2` is True but `y` is not indicated.' - raise ValueError(err0) - elif y not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(y) - raise IndexError(err) - - # Check if experiment is valid - if experiment not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(experiment) - raise IndexError(err) - - # Check if experiment_label is valid and create experiment when needed - if experiment_label: - if len(experiment_label) != 2: - err0 = '`experiment_label` does not have a length of 2.' - raise ValueError(err0) - else: - for i in experiment_label: - if i not in data_in[experiment].unique(): - err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) - raise IndexError(err) - else: - experiment_label = data_in[experiment].unique() - - # Check if x1_level is valid - if x1_level: - if len(x1_level) != 2: - err0 = '`x1_level` does not have a length of 2.' - raise ValueError(err0) - else: - for i in x1_level: - if i not in data_in[x[0]].unique(): - err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment) - raise IndexError(err) - - else: - x1_level = data_in[x[0]].unique() - elif experiment is not None: - experiment_label = data_in[experiment].unique() - x1_level = data_in[x[0]].unique() - self.__experiment_label = experiment_label - self.__x1_level = x1_level - - - # # Check if idx is specified - # if delta2 is False and not idx: - # err = '`idx` is not a column in `data`. Please check.' - # raise IndexError(err) - - - # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case - if idx is None and x is not None and y is not None: - # Add a length check for unique values in the first element in list x, - # if the length is greater than 2, force delta2 to be False - # Should be removed if delta2 for situations other than 2x2 is supported - if len(data_in[x[0]].unique()) > 2 and x1_level is None: - delta2 = False - self.__delta2 = delta2 - # stop the loop if delta2 is False - - # add a new column which is a combination of experiment and the first variable - new_col_name = experiment+x[0] - while new_col_name in data_in.columns: - new_col_name += "_" - data_in[new_col_name] = data_in[x[0]].astype(str) + " " + data_in[experiment].astype(str) - - #create idx and record the first and second x variable - idx = [] - for i in list(map(lambda x: str(x), experiment_label)): - temp = [] - for j in list(map(lambda x: str(x), x1_level)): - temp.append(j + " " + i) - idx.append(temp) - - self.__idx = idx - self.__x1 = x[0] - self.__x2 = x[1] - x = new_col_name - else: - self.__idx = idx - self.__x1 = None - self.__x2 = None - - - - # Determine the kind of estimation plot we need to produce. - if all([isinstance(i, (str, int, float)) for i in idx]): - # flatten out idx. - all_plot_groups = pd.unique([t for t in idx]).tolist() - if len(idx) > len(all_plot_groups): - err0 = '`idx` contains duplicated groups. Please remove any duplicates and try again.' - raise ValueError(err0) - - # We need to re-wrap this idx inside another tuple so as to - # easily loop thru each pairwise group later on. - self.__idx = (idx,) - - elif all([isinstance(i, (tuple, list)) for i in idx]): - all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist() - - actual_groups_given = sum([len(i) for i in idx]) - - if actual_groups_given > len(all_plot_groups): - err0 = 'Groups are repeated across tuples,' - err1 = ' or a tuple has repeated groups in it.' - err2 = ' Please remove any duplicates and try again.' - raise ValueError(err0 + err1 + err2) - - else: # mix of string and tuple? - err = 'There seems to be a problem with the idx you '\ - 'entered--{}.'.format(idx) - raise ValueError(err) - - # Having parsed the idx, check if it is a kosher paired plot, - # if so stated. - #if paired is True: - # all_idx_lengths = [len(t) for t in self.__idx] - # if (np.array(all_idx_lengths) != 2).any(): - # err1 = "`is_paired` is True, but some idx " - # err2 = "in {} does not consist only of two groups.".format(idx) - # raise ValueError(err1 + err2) - - # Check if there is a typo on paired - if paired is not None: - if paired not in ("baseline", "sequential"): - err = '{} assigned for `paired` is not valid.'.format(paired) - raise ValueError(err) - - - # Determine the type of data: wide or long. - if x is None and y is not None: - err = 'You have only specified `y`. Please also specify `x`.' - raise ValueError(err) - - elif y is None and x is not None: - err = 'You have only specified `x`. Please also specify `y`.' - raise ValueError(err) - - # Identify the type of data that was passed in. - elif x is not None and y is not None: - # Assume we have a long dataset. - # check both x and y are column names in data. - if x not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(x) - raise IndexError(err) - if y not in data_in.columns: - err = '{0} is not a column in `data`. Please check.'.format(y) - raise IndexError(err) - - # check y is numeric. - if not np.issubdtype(data_in[y].dtype, np.number): - err = '{0} is a column in `data`, but it is not numeric.'.format(y) - raise ValueError(err) - - # check all the idx can be found in data_in[x] - for g in all_plot_groups: - if g not in data_in[x].unique(): - err0 = '"{0}" is not a group in the column `{1}`.'.format(g, x) - err1 = " Please check `idx` and try again." - raise IndexError(err0 + err1) - - # Select only rows where the value in the `x` column - # is found in `idx`. - plot_data = data_in[data_in.loc[:, x].isin(all_plot_groups)].copy() - - # plot_data.drop("index", inplace=True, axis=1) - - # Assign attributes - self.__x = x - self.__y = y - self.__xvar = x - self.__yvar = y - - elif x is None and y is None: - # Assume we have a wide dataset. - # Assign attributes appropriately. - self.__x = None - self.__y = None - self.__xvar = "group" - self.__yvar = "value" - - # First, check we have all columns in the dataset. - for g in all_plot_groups: - if g not in data_in.columns: - err0 = '"{0}" is not a column in `data`.'.format(g) - err1 = " Please check `idx` and try again." - raise IndexError(err0 + err1) - - set_all_columns = set(data_in.columns.tolist()) - set_all_plot_groups = set(all_plot_groups) - id_vars = set_all_columns.difference(set_all_plot_groups) - - plot_data = pd.melt(data_in, - id_vars=id_vars, - value_vars=all_plot_groups, - value_name=self.__yvar, - var_name=self.__xvar) - - # Added in v0.2.7. - # remove any NA rows. - plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True) - - - # Lines 131 to 140 added in v0.2.3. - # Fixes a bug that jammed up when the xvar column was already - # a pandas Categorical. Now we check for this and act appropriately. - if isinstance(plot_data[self.__xvar].dtype, - pd.CategoricalDtype) is True: - plot_data[self.__xvar].cat.remove_unused_categories(inplace=True) - plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, - ordered=True, - inplace=True) - else: - plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar], - categories=all_plot_groups, - ordered=True) - - # # The line below was added in v0.2.4, removed in v0.2.5. - # plot_data.dropna(inplace=True) - - self.__plot_data = plot_data - - self.__all_plot_groups = all_plot_groups - - - # Sanity check that all idxs are paired, if so desired. - #if paired is True: - # if id_col is None: - # err = "`id_col` must be specified if `is_paired` is set to True." - # raise IndexError(err) - # elif id_col not in plot_data.columns: - # err = "{} is not a column in `data`. ".format(id_col) - # raise IndexError(err) - - # Check if `id_col` is valid - if paired: - if id_col is None: - err = "`id_col` must be specified if `paired` is assigned with a not NoneType value." - raise IndexError(err) - elif id_col not in plot_data.columns: - err = "{} is not a column in `data`. ".format(id_col) - raise IndexError(err) - - EffectSizeDataFrame_kwargs = dict(ci=ci, is_paired=paired, - random_seed=random_seed, - resamples=resamples, - proportional=proportional, - delta2=delta2, - experiment_label=self.__experiment_label, - x1_level=self.__x1_level, - x2=self.__x2, - mini_meta = mini_meta) - - self.__mean_diff = EffectSizeDataFrame(self, "mean_diff", - **EffectSizeDataFrame_kwargs) - - self.__median_diff = EffectSizeDataFrame(self, "median_diff", - **EffectSizeDataFrame_kwargs) - - self.__cohens_d = EffectSizeDataFrame(self, "cohens_d", - **EffectSizeDataFrame_kwargs) - - self.__cohens_h = EffectSizeDataFrame(self, "cohens_h", - **EffectSizeDataFrame_kwargs) - - self.__hedges_g = EffectSizeDataFrame(self, "hedges_g", - **EffectSizeDataFrame_kwargs) - - self.__delta_g = EffectSizeDataFrame(self, "delta_g", - **EffectSizeDataFrame_kwargs) - - if not paired: - self.__cliffs_delta = EffectSizeDataFrame(self, "cliffs_delta", - **EffectSizeDataFrame_kwargs) - else: - self.__cliffs_delta = "The data is paired; Cliff's delta is therefore undefined." - - - def __repr__(self): - from .__init__ import __version__ - import datetime as dt - import numpy as np - - from .misc_tools import print_greeting - - # Removed due to the deprecation of is_paired - #if self.__is_paired: - # es = "Paired e" - #else: - # es = "E" - - greeting_header = print_greeting() - - RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', - 'sequential': 'for the sequential design of repeated-measures experiment \n', - 'None' : '' - } - - PAIRED_STATUS = {'baseline' : 'Paired e', - 'sequential' : 'Paired e', - 'None' : 'E' - } - - first_line = {"rm_status" : RM_STATUS[str(self.__is_paired)], - "paired_status": PAIRED_STATUS[str(self.__is_paired)]} - - s1 = "{paired_status}ffect size(s) {rm_status}".format(**first_line) - s2 = "with {}% confidence intervals will be computed for:".format(self.__ci) - desc_line = s1 + s2 - - out = [greeting_header + "\n\n" + desc_line] - - comparisons = [] - - if self.__is_paired == 'sequential': - for j, current_tuple in enumerate(self.__idx): - for ix, test_name in enumerate(current_tuple[1:]): - control_name = current_tuple[ix] - comparisons.append("{} minus {}".format(test_name, control_name)) - else: - for j, current_tuple in enumerate(self.__idx): - control_name = current_tuple[0] - - for ix, test_name in enumerate(current_tuple[1:]): - comparisons.append("{} minus {}".format(test_name, control_name)) - - if self.__delta2 is True: - comparisons.append("{} minus {} (only for mean difference)".format(self.__experiment_label[1], self.__experiment_label[0])) - - if self.__mini_meta is True: - comparisons.append("weighted delta (only for mean difference)") - - for j, g in enumerate(comparisons): - out.append("{}. {}".format(j+1, g)) - - resamples_line1 = "\n{} resamples ".format(self.__resamples) - resamples_line2 = "will be used to generate the effect size bootstraps." - out.append(resamples_line1 + resamples_line2) - - return "\n".join(out) - - - # def __variable_name(self): - # return [k for k,v in locals().items() if v is self] - # - # @property - # def variable_name(self): - # return self.__variable_name() - - @property - def mean_diff(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()` - - """ - return self.__mean_diff - - - @property - def median_diff(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__median_diff - - - @property - def cohens_d(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__cohens_d - - - @property - def cohens_h(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `h`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `directional` argument in `dabest.load()`. - - """ - return self.__cohens_h - - - @property - def hedges_g(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__hedges_g - - - @property - def cliffs_delta(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - - """ - return self.__cliffs_delta - - @property - def delta_g(self): - """ - Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. - """ - return self.__delta_g - - @property - def data(self): - """ - Returns the pandas DataFrame that was passed to `dabest.load()`. - When `delta2` is True, a new column is added to support the - function. The name of this new column is indicated by `x`. - """ - return self.__data - - - @property - def idx(self): - """ - Returns the order of categories that was passed to `dabest.load()`. - """ - return self.__idx - - - @property - def x1(self): - """ - Returns the first variable declared in x when it is a delta-delta - case; returns None otherwise. - """ - return self.__x1 - - - @property - def x1_level(self): - """ - Returns the levels of first variable declared in x when it is a - delta-delta case; returns None otherwise. - """ - return self.__x1_level - - - @property - def x2(self): - """ - Returns the second variable declared in x when it is a delta-delta - case; returns None otherwise. - """ - return self.__x2 - - - @property - def experiment(self): - """ - Returns the column name of experiment labels that was passed to - `dabest.load()` when it is a delta-delta case; returns None otherwise. - """ - return self.__experiment - - - @property - def experiment_label(self): - """ - Returns the experiment labels in order that was passed to `dabest.load()` - when it is a delta-delta case; returns None otherwise. - """ - return self.__experiment_label - - - @property - def delta2(self): - """ - Returns the boolean parameter indicating if this is a delta-delta - situation. - """ - return self.__delta2 - - - @property - def is_paired(self): - """ - Returns the type of repeated-measures experiment. - """ - return self.__is_paired - - - @property - def id_col(self): - """ - Returns the id column declared to `dabest.load()`. - """ - return self.__id_col - - - @property - def ci(self): - """ - The width of the desired confidence interval. - """ - return self.__ci - - - @property - def resamples(self): - """ - The number of resamples used to generate the bootstrap. - """ - return self.__resamples - - - @property - def random_seed(self): - """ - The number used to initialise the numpy random seed generator, ie. - `seed_value` from `numpy.random.seed(seed_value)` is returned. - """ - return self.__random_seed - - - @property - def x(self): - """ - Returns the x column that was passed to `dabest.load()`, if any. - When `delta2` is True, `x` returns the name of the new column created - for the delta-delta situation. To retrieve the 2 variables passed into - `x` when `delta2` is True, please call `x1` and `x2` instead. - """ - return self.__x - - - @property - def y(self): - """ - Returns the y column that was passed to `dabest.load()`, if any. - """ - return self.__y - - - @property - def _xvar(self): - """ - Returns the xvar in dabest.plot_data. - """ - return self.__xvar - - - @property - def _yvar(self): - """ - Returns the yvar in dabest.plot_data. - """ - return self.__yvar - - - @property - def _plot_data(self): - """ - Returns the pandas DataFrame used to produce the estimation stats/plots. - """ - return self.__plot_data - - - @property - def proportional(self): - """ - Returns the proportional parameter class. - """ - return self.__proportional - - - @property - def mini_meta(self): - """ - Returns the mini_meta boolean parameter. - """ - return self.__mini_meta - - - @property - def _all_plot_groups(self): - """ - Returns the all plot groups, as indicated via the `idx` keyword. - """ - return self.__all_plot_groups - -# %% ../nbs/API/class.ipynb 28 -class DeltaDelta(object): - """ - A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs: - - - $$\Delta_{1} = \overline{X}_{A_{2}, B_{1}} - \overline{X}_{A_{1}, B_{1}}$$ - - $$\Delta_{2} = \overline{X}_{A_{2}, B_{2}} - \overline{X}_{A_{1}, B_{2}}$$ - - - where $\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\Delta$ is the mean difference between two samples. - - A delta-delta value is then calculated as the mean difference between the two primary deltas: - - - $$\Delta_{\Delta} = \Delta_{2} - \Delta_{1}$$ - - and a deltas' g value is calculated as the mean difference between the two primary deltas divided by - the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples: - - $$\Delta_{g} = \frac{\Delta_{\Delta}}{s_{\Delta_{\Delta}}}$$ - - $$s_{\Delta_{\Delta}} = \sqrt{\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$ - - where $s$ is the standard deviation and $n$ is the sample size. - - - """ - - def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta, - ci=95): - - import numpy as np - from numpy import sort as npsort - from numpy import sqrt, isinf, isnan - from ._stats_tools import effsize as es - from ._stats_tools import confint_1group as ci1g - from ._stats_tools import confint_2group_diff as ci2g - - - from string import Template - import warnings - - self.__effsizedf = effectsizedataframe.results - self.__dabest_obj = effectsizedataframe.dabest_obj - self.__ci = ci - self.__resamples = effectsizedataframe.resamples - self.__effect_size = effectsizedataframe.effect_size - self.__alpha = ci2g._compute_alpha_from_ci(ci) - self.__permutation_count = permutation_count - self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) - self.__control = self.__dabest_obj.experiment_label[0] - self.__test = self.__dabest_obj.experiment_label[1] - - - # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data - if self.__effect_size == "mean_diff": - self.__bootstraps_delta_delta = bootstraps_delta_delta[2] - self.__difference = self.__effsizedf["difference"][1] - self.__effsizedf["difference"][0] - else: - self.__bootstraps_delta_delta = bootstraps_delta_delta[0] - self.__difference = bootstraps_delta_delta[1] - - sorted_delta_delta = npsort(self.__bootstraps_delta_delta) - - self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps_delta_delta, self.__difference) - - self.__jackknives = np.array(ci1g.compute_1group_jackknife( - self.__bootstraps_delta_delta, - np.mean)) - - self.__acceleration_value = ci2g._calc_accel(self.__jackknives) - - # Compute BCa intervals. - bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) - - self.__bca_interval_idx = (bca_idx_low, bca_idx_high) - - if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_delta_delta[bca_idx_low] - self.__bca_high = sorted_delta_delta[bca_idx_high] - - err1 = "The $lim_type limit of the interval" - err2 = "was in the $loc 10 values." - err3 = "The result should be considered unstable." - err_temp = Template(" ".join([err1, err2, err3])) - - if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) - - if bca_idx_high >= self.__resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) - - else: - err1 = "The $lim_type limit of the BCa interval cannot be computed." - err2 = "It is set to the effect size itself." - err3 = "All bootstrap values were likely all the same." - err_temp = Template(" ".join([err1, err2, err3])) - - if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) - - if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) - - # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * self.__resamples) - pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) - - self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_delta_delta[pct_idx_low] - self.__pct_high = sorted_delta_delta[pct_idx_high] - - - - def __permutation_test(self): - """ - Perform a permutation test and obtain the permutation p-value - based on the permutation data. - """ - import numpy as np - self.__permutations = np.array(self.__effsizedf["permutations"]) - - THRESHOLD = np.abs(self.__difference) - - self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0]) - - count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD) - self.__pvalue_permutation = count/self.__permutation_count - - - - def __repr__(self, header=True, sigfig=3): - from .__init__ import __version__ - import datetime as dt - import numpy as np - - from .misc_tools import print_greeting - - first_line = {"control" : self.__control, - "test" : self.__test} - - if self.__effect_size == "mean_diff": - out1 = "The delta-delta between {control} and {test} ".format(**first_line) - else: - out1 = "The deltas' g between {control} and {test} ".format(**first_line) - - base_string_fmt = "{:." + str(sigfig) + "}" - if "." in str(self.__ci): - ci_width = base_string_fmt.format(self.__ci) - else: - ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - - out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) - out = out1 + out2 - - if header is True: - out = print_greeting() + "\n" + "\n" + out - - - pval_rounded = base_string_fmt.format(self.pvalue_permutation) - - - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) - p2 = "calculated for legacy purposes only. " - pvalue = p1 + p2 - - - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) - bs2 = "the confidence interval is bias-corrected and accelerated." - bs = bs1 + bs2 - - pval_def1 = "Any p-value reported is the probability of observing the " + \ - "effect size (or greater),\nassuming the null hypothesis of " + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." - pval_def = pval_def1 + pval_def2 - - - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - - - def to_dict(self): - """ - Returns the attributes of the `DeltaDelta` object as a - dictionary. - """ - # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] - out = {} - for a in attrs: - out[a] = getattr(self, a) - return out - - - @property - def ci(self): - """ - Returns the width of the confidence interval, in percent. - """ - return self.__ci - - - @property - def alpha(self): - """ - Returns the significance level of the statistical test as a float - between 0 and 1. - """ - return self.__alpha - - - @property - def bias_correction(self): - return self.__bias_correction - - - @property - def bootstraps(self): - ''' - Return the bootstrapped deltas from all the experiment groups. - ''' - return self.__bootstraps - - - @property - def jackknives(self): - return self.__jackknives - - - @property - def acceleration_value(self): - return self.__acceleration_value - - - @property - def bca_low(self): - """ - The bias-corrected and accelerated confidence interval lower limit. - """ - return self.__bca_low - - - @property - def bca_high(self): - """ - The bias-corrected and accelerated confidence interval upper limit. - """ - return self.__bca_high - - - @property - def bca_interval_idx(self): - return self.__bca_interval_idx - - - @property - def control(self): - ''' - Return the name of the control experiment group. - ''' - return self.__control - - - @property - def test(self): - ''' - Return the name of the test experiment group. - ''' - return self.__test - - - @property - def bootstraps_delta_delta(self): - ''' - Return the delta-delta values calculated from the bootstrapped - deltas. - ''' - return self.__bootstraps_delta_delta - - - @property - def difference(self): - ''' - Return the delta-delta value calculated based on the raw data. - ''' - return self.__difference - - - @property - def pct_interval_idx (self): - return self.__pct_interval_idx - - - @property - def pct_low(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_low - - - @property - def pct_high(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_high - - - @property - def pvalue_permutation(self): - try: - return self.__pvalue_permutation - except AttributeError: - self.__permutation_test() - return self.__pvalue_permutation - - - @property - def permutation_count(self): - """ - The number of permuations taken. - """ - return self.__permutation_count - - - @property - def permutations(self): - ''' - Return the mean differences of permutations obtained during - the permutation test for each experiment group. - ''' - try: - return self.__permutations - except AttributeError: - self.__permutation_test() - return self.__permutations - - - @property - def permutations_delta_delta(self): - ''' - Return the delta-delta values of permutations obtained - during the permutation test. - ''' - try: - return self.__permutations_delta_delta - except AttributeError: - self.__permutation_test() - return self.__permutations_delta_delta - - - -# %% ../nbs/API/class.ipynb 32 -class MiniMetaDelta(object): - """ - A class to compute and store the weighted delta. - A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``. - - """ - - def __init__(self, effectsizedataframe, permutation_count, - ci=95): - - import numpy as np - from numpy import sort as npsort - from numpy import sqrt, isinf, isnan - from ._stats_tools import effsize as es - from ._stats_tools import confint_1group as ci1g - from ._stats_tools import confint_2group_diff as ci2g - - - from string import Template - import warnings - - self.__effsizedf = effectsizedataframe.results - self.__dabest_obj = effectsizedataframe.dabest_obj - self.__ci = ci - self.__resamples = effectsizedataframe.resamples - self.__alpha = ci2g._compute_alpha_from_ci(ci) - self.__permutation_count = permutation_count - self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) - self.__control = np.array(self.__effsizedf["control"]) - self.__test = np.array(self.__effsizedf["test"]) - self.__control_N = np.array(self.__effsizedf["control_N"]) - self.__test_N = np.array(self.__effsizedf["test_N"]) - - - idx = self.__dabest_obj.idx - dat = self.__dabest_obj._plot_data - xvar = self.__dabest_obj._xvar - yvar = self.__dabest_obj._yvar - - # compute the variances of each control group and each test group - control_var=[] - test_var=[] - for j, current_tuple in enumerate(idx): - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - control_var.append(np.var(control, ddof=1)) - - tname = current_tuple[1] - test = dat[dat[xvar] == tname][yvar].copy() - test_var.append(np.var(test, ddof=1)) - self.__control_var = np.array(control_var) - self.__test_var = np.array(test_var) - - # Compute pooled group variances for each pair of experiment groups - # based on the raw data - self.__group_var = ci2g.calculate_group_var(self.__control_var, - self.__control_N, - self.__test_var, - self.__test_N) - - # Compute the weighted average mean differences of the bootstrap data - # using the pooled group variances of the raw data as the inverse of - # weights - self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta( - self.__group_var, - self.__bootstraps, - self.__resamples) - - # Compute the weighted average mean difference based on the raw data - self.__difference = es.weighted_delta(self.__effsizedf["difference"], - self.__group_var) - - sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta) - - - self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps_weighted_delta, self.__difference) - - self.__jackknives = np.array(ci1g.compute_1group_jackknife( - self.__bootstraps_weighted_delta, - np.mean)) - - self.__acceleration_value = ci2g._calc_accel(self.__jackknives) - - # Compute BCa intervals. - bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) - - self.__bca_interval_idx = (bca_idx_low, bca_idx_high) - - if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_weighted_deltas[bca_idx_low] - self.__bca_high = sorted_weighted_deltas[bca_idx_high] - - err1 = "The $lim_type limit of the interval" - err2 = "was in the $loc 10 values." - err3 = "The result should be considered unstable." - err_temp = Template(" ".join([err1, err2, err3])) - - if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) - - if bca_idx_high >= self.__resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) - - else: - err1 = "The $lim_type limit of the BCa interval cannot be computed." - err2 = "It is set to the effect size itself." - err3 = "All bootstrap values were likely all the same." - err_temp = Template(" ".join([err1, err2, err3])) - - if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) - - if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) - - # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * self.__resamples) - pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) - - self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_weighted_deltas[pct_idx_low] - self.__pct_high = sorted_weighted_deltas[pct_idx_high] - - - - def __permutation_test(self): - """ - Perform a permutation test and obtain the permutation p-value - based on the permutation data. - """ - import numpy as np - self.__permutations = np.array(self.__effsizedf["permutations"]) - self.__permutations_var = np.array(self.__effsizedf["permutations_var"]) - - THRESHOLD = np.abs(self.__difference) - - all_num = [] - all_denom = [] - - groups = len(self.__permutations) - for i in range(0, len(self.__permutations[0])): - weight = [1/self.__permutations_var[j][i] for j in range(0, groups)] - all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)])) - all_denom.append(np.sum(weight)) - - output=[] - for i in range(0, len(all_num)): - output.append(all_num[i]/all_denom[i]) - - self.__permutations_weighted_delta = np.array(output) - - count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD) - self.__pvalue_permutation = count/self.__permutation_count - - - - def __repr__(self, header=True, sigfig=3): - from .__init__ import __version__ - import datetime as dt - import numpy as np - - from .misc_tools import print_greeting - - is_paired = self.__dabest_obj.is_paired - - PAIRED_STATUS = {'baseline' : 'paired', - 'sequential' : 'paired', - 'None' : 'unpaired' - } - - first_line = {"paired_status": PAIRED_STATUS[str(is_paired)]} - - - out1 = "The weighted-average {paired_status} mean differences ".format(**first_line) - - base_string_fmt = "{:." + str(sigfig) + "}" - if "." in str(self.__ci): - ci_width = base_string_fmt.format(self.__ci) - else: - ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - - out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) - out = out1 + out2 - - if header is True: - out = print_greeting() + "\n" + "\n" + out - - - pval_rounded = base_string_fmt.format(self.pvalue_permutation) - - - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) - p2 = "calculated for legacy purposes only. " - pvalue = p1 + p2 - - - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) - bs2 = "the confidence interval is bias-corrected and accelerated." - bs = bs1 + bs2 - - pval_def1 = "Any p-value reported is the probability of observing the" + \ - "effect size (or greater),\nassuming the null hypothesis of" + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." - pval_def = pval_def1 + pval_def2 - - - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - - - def to_dict(self): - """ - Returns all attributes of the `dabest.MiniMetaDelta` object as a - dictionary. - """ - # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] - out = {} - for a in attrs: - out[a] = getattr(self, a) - return out - - - @property - def ci(self): - """ - Returns the width of the confidence interval, in percent. - """ - return self.__ci - - - @property - def alpha(self): - """ - Returns the significance level of the statistical test as a float - between 0 and 1. - """ - return self.__alpha - - - @property - def bias_correction(self): - return self.__bias_correction - - - @property - def bootstraps(self): - ''' - Return the bootstrapped differences from all the experiment groups. - ''' - return self.__bootstraps - - - @property - def jackknives(self): - return self.__jackknives - - - @property - def acceleration_value(self): - return self.__acceleration_value - - - @property - def bca_low(self): - """ - The bias-corrected and accelerated confidence interval lower limit. - """ - return self.__bca_low - - - @property - def bca_high(self): - """ - The bias-corrected and accelerated confidence interval upper limit. - """ - return self.__bca_high - - - @property - def bca_interval_idx(self): - return self.__bca_interval_idx - - - @property - def control(self): - ''' - Return the names of the control groups from all the experiment - groups in order. - ''' - return self.__control - - - @property - def test(self): - ''' - Return the names of the test groups from all the experiment - groups in order. - ''' - return self.__test - - @property - def control_N(self): - ''' - Return the sizes of the control groups from all the experiment - groups in order. - ''' - return self.__control_N - - - @property - def test_N(self): - ''' - Return the sizes of the test groups from all the experiment - groups in order. - ''' - return self.__test_N - - - @property - def control_var(self): - ''' - Return the estimated population variances of the control groups - from all the experiment groups in order. Here the population - variance is estimated from the sample variance. - ''' - return self.__control_var - - - @property - def test_var(self): - ''' - Return the estimated population variances of the control groups - from all the experiment groups in order. Here the population - variance is estimated from the sample variance. - ''' - return self.__test_var - - - @property - def group_var(self): - ''' - Return the pooled group variances of all the experiment groups - in order. - ''' - return self.__group_var - - - @property - def bootstraps_weighted_delta(self): - ''' - Return the weighted-average mean differences calculated from the bootstrapped - deltas and weights across the experiment groups, where the weights are - the inverse of the pooled group variances. - ''' - return self.__bootstraps_weighted_delta - - - @property - def difference(self): - ''' - Return the weighted-average delta calculated from the raw data. - ''' - return self.__difference - - - @property - def pct_interval_idx (self): - return self.__pct_interval_idx - - - @property - def pct_low(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_low - - - @property - def pct_high(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_high - - - @property - def pvalue_permutation(self): - try: - return self.__pvalue_permutation - except AttributeError: - self.__permutation_test() - return self.__pvalue_permutation - - - @property - def permutation_count(self): - """ - The number of permuations taken. - """ - return self.__permutation_count - - - @property - def permutations(self): - ''' - Return the mean differences of permutations obtained during - the permutation test for each experiment group. - ''' - try: - return self.__permutations - except AttributeError: - self.__permutation_test() - return self.__permutations - - - @property - def permutations_var(self): - ''' - Return the pooled group variances of permutations obtained during - the permutation test for each experiment group. - ''' - try: - return self.__permutations_var - except AttributeError: - self.__permutation_test() - return self.__permutations_var - - - @property - def permutations_weighted_delta(self): - ''' - Return the weighted-average deltas of permutations obtained - during the permutation test. - ''' - try: - return self.__permutations_weighted_delta - except AttributeError: - self.__permutation_test() - return self.__permutations_weighted_delta - - - -# %% ../nbs/API/class.ipynb 37 -class TwoGroupsEffectSize(object): - - """ - A class to compute and store the results of bootstrapped - mean differences between two groups. - - Compute the effect size between two groups. - - Parameters - ---------- - control : array-like - test : array-like - These should be numerical iterables. - effect_size : string. - Any one of the following are accepted inputs: - 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' - is_paired : string, default None - resamples : int, default 5000 - The number of bootstrap resamples to be taken for the calculation - of the confidence interval limits. - permutation_count : int, default 5000 - The number of permutations (reshuffles) to perform for the - computation of the permutation p-value - ci : float, default 95 - The confidence interval width. The default of 95 produces 95% - confidence intervals. - random_seed : int, default 12345 - `random_seed` is used to seed the random number generator during - bootstrap resampling. This ensures that the confidence intervals - reported are replicable. - - Returns - ------- - A :py:class:`TwoGroupEffectSize` object: - `difference` : float - The effect size of the difference between the control and the test. - `effect_size` : string - The type of effect size reported. - `is_paired` : string - The type of repeated-measures experiment. - `ci` : float - Returns the width of the confidence interval, in percent. - `alpha` : float - Returns the significance level of the statistical test as a float between 0 and 1. - `resamples` : int - The number of resamples performed during the bootstrap procedure. - `bootstraps` : numpy ndarray - The generated bootstraps of the effect size. - `random_seed` : int - The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned. - `bca_low, bca_high` : float - The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively. - `pct_low, pct_high` : float - The percentile confidence interval lower limit and upper limits, respectively. - """ - - def __init__(self, control, test, effect_size, - proportional=False, - is_paired=None, ci=95, - resamples=5000, - permutation_count=5000, - random_seed=12345): - - - import numpy as np - from numpy import array, isnan, isinf - from numpy import sort as npsort - from numpy.random import choice, seed - - import scipy.stats as spstats - - # import statsmodels.stats.power as power - import statsmodels - - from string import Template - import warnings - - from ._stats_tools import effsize as es - from ._stats_tools import confint_2group_diff as ci2g - - - self.__EFFECT_SIZE_DICT = {"mean_diff" : "mean difference", - "median_diff" : "median difference", - "cohens_d" : "Cohen's d", - "cohens_h" : "Cohen's h", - "hedges_g" : "Hedges' g", - "cliffs_delta" : "Cliff's delta", - "delta_g" : "deltas' g"} - - - kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] - if effect_size not in kosher_es: - err1 = "The effect size '{}'".format(effect_size) - err2 = "is not one of {}".format(kosher_es) - raise ValueError(" ".join([err1, err2])) - - if effect_size == "cliffs_delta" and is_paired: - err1 = "`paired` is not None; therefore Cliff's delta is not defined." - raise ValueError(err1) - - if proportional==True and effect_size not in ['mean_diff','cohens_h']: - err1 = "`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined." - raise ValueError(err1) - - if proportional==True and (np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False): - err1 = "`proportional` is True; Only accept binary data consisting of 0 and 1." - raise ValueError(err1) - - # Convert to numpy arrays for speed. - # NaNs are automatically dropped. - control = array(control) - test = array(test) - control = control[~isnan(control)] - test = test[~isnan(test)] - - self.__effect_size = effect_size - self.__control = control - self.__test = test - self.__is_paired = is_paired - self.__resamples = resamples - self.__permutation_count = permutation_count - self.__random_seed = random_seed - self.__ci = ci - self.__alpha = ci2g._compute_alpha_from_ci(ci) - - self.__difference = es.two_group_difference( - control, test, is_paired, effect_size) - - self.__jackknives = ci2g.compute_meandiff_jackknife( - control, test, is_paired, effect_size) - - self.__acceleration_value = ci2g._calc_accel(self.__jackknives) - - bootstraps = ci2g.compute_bootstrapped_diff( - control, test, is_paired, effect_size, - resamples, random_seed) - self.__bootstraps = bootstraps - - sorted_bootstraps = npsort(self.__bootstraps) - # Added in v0.2.6. - # Raises a UserWarning if there are any infiinities in the bootstraps. - num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)]) - - if num_infinities > 0: - warn_msg = "There are {} bootstrap(s) that are not defined. "\ - "This is likely due to smaple sample sizes. "\ - "The values in a bootstrap for a group will be more likely "\ - "to be all equal, with a resulting variance of zero. "\ - "The computation of Cohen's d and Hedges' g thus "\ - "involved a division by zero. " - warnings.warn(warn_msg.format(num_infinities), - category=UserWarning) - - self.__bias_correction = ci2g.compute_meandiff_bias_correction( - self.__bootstraps, self.__difference) - - # Compute BCa intervals. - bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( - self.__bias_correction, self.__acceleration_value, - self.__resamples, ci) - - self.__bca_interval_idx = (bca_idx_low, bca_idx_high) - - if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): - self.__bca_low = sorted_bootstraps[bca_idx_low] - self.__bca_high = sorted_bootstraps[bca_idx_high] - - err1 = "The $lim_type limit of the interval" - err2 = "was in the $loc 10 values." - err3 = "The result should be considered unstable." - err_temp = Template(" ".join([err1, err2, err3])) - - if bca_idx_low <= 10: - warnings.warn(err_temp.substitute(lim_type="lower", - loc="bottom"), - stacklevel=1) - - if bca_idx_high >= resamples-9: - warnings.warn(err_temp.substitute(lim_type="upper", - loc="top"), - stacklevel=1) - - else: - err1 = "The $lim_type limit of the BCa interval cannot be computed." - err2 = "It is set to the effect size itself." - err3 = "All bootstrap values were likely all the same." - err_temp = Template(" ".join([err1, err2, err3])) - - if isnan(bca_idx_low): - self.__bca_low = self.__difference - warnings.warn(err_temp.substitute(lim_type="lower"), - stacklevel=0) - - if isnan(bca_idx_high): - self.__bca_high = self.__difference - warnings.warn(err_temp.substitute(lim_type="upper"), - stacklevel=0) - - # Compute percentile intervals. - pct_idx_low = int((self.__alpha/2) * resamples) - pct_idx_high = int((1-(self.__alpha/2)) * resamples) - - self.__pct_interval_idx = (pct_idx_low, pct_idx_high) - self.__pct_low = sorted_bootstraps[pct_idx_low] - self.__pct_high = sorted_bootstraps[pct_idx_high] - - # Perform statistical tests. - - self.__PermutationTest_result = PermutationTest(control, test, - effect_size, - is_paired, - permutation_count) - - if is_paired and proportional is False: - # Wilcoxon, a non-parametric version of the paired T-test. - wilcoxon = spstats.wilcoxon(control, test) - self.__pvalue_wilcoxon = wilcoxon.pvalue - self.__statistic_wilcoxon = wilcoxon.statistic - - - if effect_size != "median_diff": - # Paired Student's t-test. - paired_t = spstats.ttest_rel(control, test, nan_policy='omit') - self.__pvalue_paired_students_t = paired_t.pvalue - self.__statistic_paired_students_t = paired_t.statistic - - standardized_es = es.cohens_d(control, test, is_paired) - # self.__power = power.tt_solve_power(standardized_es, - # len(control), - # alpha=self.__alpha) - - elif is_paired and proportional is True: - # for binary paired data, use McNemar's test - # References: - # https://en.wikipedia.org/wiki/McNemar%27s_test - from statsmodels.stats.contingency_tables import mcnemar - import pandas as pd - df_temp = pd.DataFrame({'control': control, 'test': test}) - x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)]) - x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)]) - x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)]) - x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)]) - table = [[x1,x2],[x3,x4]] - _mcnemar = mcnemar(table, exact=True, correction=True) - self.__pvalue_mcnemar = _mcnemar.pvalue - self.__statistic_mcnemar = _mcnemar.statistic - - elif effect_size == "cliffs_delta": - # Let's go with Brunner-Munzel! - brunner_munzel = spstats.brunnermunzel(control, test, - nan_policy='omit') - self.__pvalue_brunner_munzel = brunner_munzel.pvalue - self.__statistic_brunner_munzel = brunner_munzel.statistic - - - elif effect_size == "median_diff": - # According to scipy's documentation of the function, - # "The Kruskal-Wallis H-test tests the null hypothesis - # that the population median of all of the groups are equal." - kruskal = spstats.kruskal(control, test, nan_policy='omit') - self.__pvalue_kruskal = kruskal.pvalue - self.__statistic_kruskal = kruskal.statistic - # self.__power = np.nan - - else: # for mean difference, Cohen's d, and Hedges' g. - # Welch's t-test, assumes normality of distributions, - # but does not assume equal variances. - welch = spstats.ttest_ind(control, test, equal_var=False, - nan_policy='omit') - self.__pvalue_welch = welch.pvalue - self.__statistic_welch = welch.statistic - - # Student's t-test, assumes normality of distributions, - # as well as assumption of equal variances. - students_t = spstats.ttest_ind(control, test, equal_var=True, - nan_policy='omit') - self.__pvalue_students_t = students_t.pvalue - self.__statistic_students_t = students_t.statistic - - # Mann-Whitney test: Non parametric, - # does not assume normality of distributions - try: - mann_whitney = spstats.mannwhitneyu(control, test, - alternative='two-sided') - self.__pvalue_mann_whitney = mann_whitney.pvalue - self.__statistic_mann_whitney = mann_whitney.statistic - except ValueError: - # Occurs when the control and test are exactly identical - # in terms of rank (eg. all zeros.) - pass - - - - standardized_es = es.cohens_d(control, test, is_paired = None) - - # The Cohen's h calculation is for binary categorical data - try: - self.__proportional_difference = es.cohens_h(control, test) - except ValueError: - # Occur only when the data consists not only 0's and 1's. - pass - # self.__power = power.tt_ind_solve_power(standardized_es, - # len(control), - # alpha=self.__alpha, - # ratio=len(test)/len(control) - # ) - - - - - - - def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): - - # # Deprecated in v0.3.0; permutation p-values will be reported by default. - # UNPAIRED_ES_TO_TEST = {"mean_diff" : "Mann-Whitney", - # "median_diff" : "Kruskal", - # "cohens_d" : "Mann-Whitney", - # "hedges_g" : "Mann-Whitney", - # "cliffs_delta" : "Brunner-Munzel"} - # - # TEST_TO_PVAL_ATTR = {"Mann-Whitney" : "pvalue_mann_whitney", - # "Kruskal" : "pvalue_kruskal", - # "Brunner-Munzel" : "pvalue_brunner_munzel", - # "Wilcoxon" : "pvalue_wilcoxon"} - - RM_STATUS = {'baseline' : 'for repeated measures against baseline \n', - 'sequential': 'for the sequential design of repeated-measures experiment \n', - 'None' : '' - } - - PAIRED_STATUS = {'baseline' : 'paired', - 'sequential' : 'paired', - 'None' : 'unpaired' - } - - first_line = {"rm_status" : RM_STATUS[str(self.__is_paired)], - "es" : self.__EFFECT_SIZE_DICT[self.__effect_size], - "paired_status": PAIRED_STATUS[str(self.__is_paired)]} - - - out1 = "The {paired_status} {es} {rm_status}".format(**first_line) - - base_string_fmt = "{:." + str(sigfig) + "}" - if "." in str(self.__ci): - ci_width = base_string_fmt.format(self.__ci) - else: - ci_width = str(self.__ci) - - ci_out = {"es" : base_string_fmt.format(self.__difference), - "ci" : ci_width, - "bca_low" : base_string_fmt.format(self.__bca_low), - "bca_high" : base_string_fmt.format(self.__bca_high)} - - out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) - out = out1 + out2 - - # # Deprecated in v0.3.0; permutation p-values will be reported by default. - # if self.__is_paired: - # stats_test = "Wilcoxon" - # else: - # stats_test = UNPAIRED_ES_TO_TEST[self.__effect_size] - - - # pval_rounded = base_string_fmt.format(getattr(self, - # TEST_TO_PVAL_ATTR[stats_test]) - # ) - - pval_rounded = base_string_fmt.format(self.pvalue_permutation) - - # # Deprecated in v0.3.0; permutation p-values will be reported by default. - # pvalue = "The two-sided p-value of the {} test is {}.".format(stats_test, - # pval_rounded) - - # pvalue = "The two-sided p-value of the {} test is {}.".format(stats_test, - # pval_rounded) - - - p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) - p2 = "calculated for legacy purposes only. " - pvalue = p1 + p2 - - bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) - bs2 = "the confidence interval is bias-corrected and accelerated." - bs = bs1 + bs2 - - pval_def1 = "Any p-value reported is the probability of observing the" + \ - "effect size (or greater),\nassuming the null hypothesis of" + \ - "zero difference is true." - pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ - "control and test labels were performed." - pval_def = pval_def1 + pval_def2 - - if show_resample_count and define_pval: - return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) - elif show_resample_count is False and define_pval is True: - return "{}\n{}\n\n{}".format(out, pvalue, pval_def) - elif show_resample_count is True and define_pval is False: - return "{}\n{}\n\n{}".format(out, pvalue, bs) - else: - return "{}\n{}".format(out, pvalue) - - - - def to_dict(self): - """ - Returns the attributes of the `dabest.TwoGroupEffectSize` object as a - dictionary. - """ - # Only get public (user-facing) attributes. - attrs = [a for a in dir(self) - if not a.startswith(("_", "to_dict"))] - out = {} - for a in attrs: - out[a] = getattr(self, a) - return out - - - @property - def difference(self): - """ - Returns the difference between the control and the test. - """ - return self.__difference - - @property - def effect_size(self): - """ - Returns the type of effect size reported. - """ - return self.__EFFECT_SIZE_DICT[self.__effect_size] - - @property - def is_paired(self): - return self.__is_paired - - @property - def ci(self): - """ - Returns the width of the confidence interval, in percent. - """ - return self.__ci - - @property - def alpha(self): - """ - Returns the significance level of the statistical test as a float - between 0 and 1. - """ - return self.__alpha - - @property - def resamples(self): - """ - The number of resamples performed during the bootstrap procedure. - """ - return self.__resamples - - @property - def bootstraps(self): - """ - The generated bootstraps of the effect size. - """ - return self.__bootstraps - - @property - def random_seed(self): - """ - The number used to initialise the numpy random seed generator, ie. - `seed_value` from `numpy.random.seed(seed_value)` is returned. - """ - return self.__random_seed - - @property - def bca_interval_idx(self): - return self.__bca_interval_idx - - @property - def bca_low(self): - """ - The bias-corrected and accelerated confidence interval lower limit. - """ - return self.__bca_low - - @property - def bca_high(self): - """ - The bias-corrected and accelerated confidence interval upper limit. - """ - return self.__bca_high - - @property - def pct_interval_idx(self): - return self.__pct_interval_idx - - @property - def pct_low(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_low - - @property - def pct_high(self): - """ - The percentile confidence interval lower limit. - """ - return self.__pct_high - - - - @property - def pvalue_brunner_munzel(self): - from numpy import nan as npnan - try: - return self.__pvalue_brunner_munzel - except AttributeError: - return npnan - - @property - def statistic_brunner_munzel(self): - from numpy import nan as npnan - try: - return self.__statistic_brunner_munzel - except AttributeError: - return npnan - - - - @property - def pvalue_wilcoxon(self): - from numpy import nan as npnan - try: - return self.__pvalue_wilcoxon - except AttributeError: - return npnan - - @property - def statistic_wilcoxon(self): - from numpy import nan as npnan - try: - return self.__statistic_wilcoxon - except AttributeError: - return npnan - - @property - def pvalue_mcnemar(self): - from numpy import nan as npnan - try: - return self.__pvalue_mcnemar - except AttributeError: - return npnan - - @property - def statistic_mcnemar(self): - from numpy import nan as npnan - try: - return self.__statistic_mcnemar - except AttributeError: - return npnan - - - - @property - def pvalue_paired_students_t(self): - from numpy import nan as npnan - try: - return self.__pvalue_paired_students_t - except AttributeError: - return npnan - - @property - def statistic_paired_students_t(self): - from numpy import nan as npnan - try: - return self.__statistic_paired_students_t - except AttributeError: - return npnan - - - - @property - def pvalue_kruskal(self): - from numpy import nan as npnan - try: - return self.__pvalue_kruskal - except AttributeError: - return npnan - - @property - def statistic_kruskal(self): - from numpy import nan as npnan - try: - return self.__statistic_kruskal - except AttributeError: - return npnan - - - - @property - def pvalue_welch(self): - from numpy import nan as npnan - try: - return self.__pvalue_welch - except AttributeError: - return npnan - - @property - def statistic_welch(self): - from numpy import nan as npnan - try: - return self.__statistic_welch - except AttributeError: - return npnan - - - - @property - def pvalue_students_t(self): - from numpy import nan as npnan - try: - return self.__pvalue_students_t - except AttributeError: - return npnan - - @property - def statistic_students_t(self): - from numpy import nan as npnan - try: - return self.__statistic_students_t - except AttributeError: - return npnan - - - - @property - def pvalue_mann_whitney(self): - from numpy import nan as npnan - try: - return self.__pvalue_mann_whitney - except AttributeError: - return npnan - - - - @property - def statistic_mann_whitney(self): - from numpy import nan as npnan - try: - return self.__statistic_mann_whitney - except AttributeError: - return npnan - - # Introduced in v0.3.0. - @property - def pvalue_permutation(self): - return self.__PermutationTest_result.pvalue - - # - # - @property - def permutation_count(self): - """ - The number of permuations taken. - """ - return self.__PermutationTest_result.permutation_count - - - @property - def permutations(self): - return self.__PermutationTest_result.permutations - - - @property - def permutations_var(self): - return self.__PermutationTest_result.permutations_var - - - @property - def proportional_difference(self): - from numpy import nan as npnan - try: - return self.__proportional_difference - except AttributeError: - return npnan - - -# %% ../nbs/API/class.ipynb 41 -class EffectSizeDataFrame(object): - """A class that generates and stores the results of bootstrapped effect - sizes for several comparisons.""" - - def __init__(self, dabest, effect_size, - is_paired, ci=95, proportional=False, - resamples=5000, - permutation_count=5000, - random_seed=12345, - x1_level=None, x2=None, - delta2=False, experiment_label=None, - mini_meta=False): - """ - Parses the data from a Dabest object, enabling plotting and printing - capability for the effect size of interest. - """ - - self.__dabest_obj = dabest - self.__effect_size = effect_size - self.__is_paired = is_paired - self.__ci = ci - self.__resamples = resamples - self.__permutation_count = permutation_count - self.__random_seed = random_seed - self.__proportional = proportional - self.__x1_level = x1_level - self.__experiment_label = experiment_label - self.__x2 = x2 - self.__delta2 = delta2 - self.__mini_meta = mini_meta - - - def __pre_calc(self): - import pandas as pd - from .misc_tools import print_greeting, get_varname - from ._stats_tools import confint_2group_diff as ci2g - - idx = self.__dabest_obj.idx - dat = self.__dabest_obj._plot_data - xvar = self.__dabest_obj._xvar - yvar = self.__dabest_obj._yvar - - out = [] - reprs = [] - - if self.__delta2==True: - mixed_data = [] - for j, current_tuple in enumerate(idx): - if self.__is_paired != "sequential": - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - - for ix, tname in enumerate(current_tuple[1:]): - if self.__is_paired == "sequential": - cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() - mixed_data.append(control) - mixed_data.append(test) - bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3], - self.__is_paired, self.__resamples, self.__random_seed) - - - for j, current_tuple in enumerate(idx): - if self.__is_paired!="sequential": - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - - for ix, tname in enumerate(current_tuple[1:]): - if self.__is_paired == "sequential": - cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() - - result = TwoGroupsEffectSize(control, test, - self.__effect_size, - self.__proportional, - self.__is_paired, - self.__ci, - self.__resamples, - self.__permutation_count, - self.__random_seed) - r_dict = result.to_dict() - r_dict["control"] = cname - r_dict["test"] = tname - r_dict["control_N"] = int(len(control)) - r_dict["test_N"] = int(len(test)) - out.append(r_dict) - if j == len(idx)-1 and ix == len(current_tuple)-2: - if self.__delta2 and self.__effect_size in ["mean_diff","delta_g"]: - resamp_count = False - def_pval = False - elif self.__mini_meta and self.__effect_size == "mean_diff": - resamp_count = False - def_pval = False - else: - resamp_count = True - def_pval = True - else: - resamp_count = False - def_pval = False - - text_repr = result.__repr__(show_resample_count=resamp_count, - define_pval=def_pval) - - to_replace = "between {} and {} is".format(cname, tname) - text_repr = text_repr.replace("is", to_replace, 1) - - reprs.append(text_repr) - - - self.__for_print = "\n\n".join(reprs) - - out_ = pd.DataFrame(out) - - columns_in_order = ['control', 'test', 'control_N', 'test_N', - 'effect_size', 'is_paired', - 'difference', 'ci', - - 'bca_low', 'bca_high', 'bca_interval_idx', - 'pct_low', 'pct_high', 'pct_interval_idx', - - 'bootstraps', 'resamples', 'random_seed', - - 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var', - - 'pvalue_welch', - 'statistic_welch', - - 'pvalue_students_t', - 'statistic_students_t', - - 'pvalue_mann_whitney', - 'statistic_mann_whitney', - - 'pvalue_brunner_munzel', - 'statistic_brunner_munzel', - - 'pvalue_wilcoxon', - 'statistic_wilcoxon', - - 'pvalue_mcnemar', - 'statistic_mcnemar', - - 'pvalue_paired_students_t', - 'statistic_paired_students_t', - - 'pvalue_kruskal', - 'statistic_kruskal', - 'proportional_difference' - ] - self.__results = out_.reindex(columns=columns_in_order) - self.__results.dropna(axis="columns", how="all", inplace=True) - - # Add the is_paired column back when is_paired is None - if self.is_paired is None: - self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1)) - - # Create and compute the delta-delta statistics - if self.__delta2 is True: - self.__delta_delta = DeltaDelta(self, - self.__permutation_count, - bootstraps_delta_delta, - self.__ci) - reprs.append(self.__delta_delta.__repr__(header=False)) - elif self.__delta2 is True and self.__effect_size not in ["mean_diff", "delta_g"]: - self.__delta_delta = "Delta-delta is not supported for {}.".format(self.__effect_size) - else: - self.__delta_delta = "`delta2` is False; delta-delta is therefore not calculated." - - # Create and compute the weighted average statistics - if self.__mini_meta is True and self.__effect_size == "mean_diff": - self.__mini_meta_delta = MiniMetaDelta(self, - self.__permutation_count, - self.__ci) - reprs.append(self.__mini_meta_delta.__repr__(header=False)) - elif self.__mini_meta is True and self.__effect_size != "mean_diff": - self.__mini_meta_delta = "Weighted delta is not supported for {}.".format(self.__effect_size) - else: - self.__mini_meta_delta = "`mini_meta` is False; weighted delta is therefore not calculated." - - - varname = get_varname(self.__dabest_obj) - lastline = "To get the results of all valid statistical tests, " +\ - "use `{}.{}.statistical_tests`".format(varname, self.__effect_size) - reprs.append(lastline) - - reprs.insert(0, print_greeting()) - - self.__for_print = "\n\n".join(reprs) - - - def __repr__(self): - try: - return self.__for_print - except AttributeError: - self.__pre_calc() - return self.__for_print - - - - def __calc_lqrt(self): - import lqrt - import pandas as pd - - rnd_seed = self.__random_seed - db_obj = self.__dabest_obj - dat = db_obj._plot_data - xvar = db_obj._xvar - yvar = db_obj._yvar - delta2 = self.__delta2 - - - out = [] - - for j, current_tuple in enumerate(db_obj.idx): - if self.__is_paired != "sequential": - cname = current_tuple[0] - control = dat[dat[xvar] == cname][yvar].copy() - - for ix, tname in enumerate(current_tuple[1:]): - if self.__is_paired == "sequential": - cname = current_tuple[ix] - control = dat[dat[xvar] == cname][yvar].copy() - test = dat[dat[xvar] == tname][yvar].copy() - - if self.__is_paired: - # Refactored here in v0.3.0 for performance issues. - lqrt_result = lqrt.lqrtest_rel(control, test, - random_state=rnd_seed) - - out.append({"control": cname, "test": tname, - "control_N": int(len(control)), - "test_N": int(len(test)), - "pvalue_paired_lqrt": lqrt_result.pvalue, - "statistic_paired_lqrt": lqrt_result.statistic - }) - - else: - # Likelihood Q-Ratio test: - lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, - random_state=rnd_seed, - equal_var=True) - - - lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, - random_state=rnd_seed, - equal_var=False) - - out.append({"control": cname, "test": tname, - "control_N": int(len(control)), - "test_N": int(len(test)), - - "pvalue_lqrt_equal_var" : lqrt_equal_var_result.pvalue, - "statistic_lqrt_equal_var" : lqrt_equal_var_result.statistic, - "pvalue_lqrt_unequal_var" : lqrt_unequal_var_result.pvalue, - "statistic_lqrt_unequal_var" : lqrt_unequal_var_result.statistic, - }) - self.__lqrt_results = pd.DataFrame(out) - - - def plot(self, color_col=None, - - raw_marker_size=6, es_marker_size=9, - - swarm_label=None, contrast_label=None, delta2_label=None, - swarm_ylim=None, contrast_ylim=None, delta2_ylim=None, - - custom_palette=None, swarm_desat=0.5, halfviolin_desat=1, - halfviolin_alpha=0.8, - - face_color = None, - #bar plot - bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None, - # error bar of proportion plot - ci=None, ci_type='bca', err_color=None, - - float_contrast=True, - show_pairs=True, - show_delta2=True, - show_mini_meta=True, - group_summaries=None, - group_summaries_offset=0.1, - - fig_size=None, - dpi=100, - ax=None, - - contrast_show_es = False, - es_sf = 2, - es_fontsize = 10, - - contrast_show_deltas = True, - - gridkey_rows=None, - gridkey_merge_pairs = False, - gridkey_show_Ns = True, - gridkey_show_es = True, - - swarmplot_kwargs=None, - barplot_kwargs=None, - violinplot_kwargs=None, - slopegraph_kwargs=None, - sankey_kwargs=None, - reflines_kwargs=None, - group_summary_kwargs=None, - legend_kwargs=None, - title=None, fontsize_title = 16, - fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12, - fontsize_delta2label = 12): - - """ - Creates an estimation plot for the effect size of interest. - - - Parameters - ---------- - color_col : string, default None - Column to be used for colors. - raw_marker_size : float, default 6 - The diameter (in points) of the marker dots plotted in the - swarmplot. - es_marker_size : float, default 9 - The size (in points) of the effect size points on the difference - axes. - swarm_label, contrast_label, delta2_label : strings, default None - Set labels for the y-axis of the swarmplot and the contrast plot, - respectively. If `swarm_label` is not specified, it defaults to - "value", unless a column name was passed to `y`. If - `contrast_label` is not specified, it defaults to the effect size - being plotted. If `delta2_label` is not specifed, it defaults to - "delta - delta" - swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None - The desired y-limits of the raw data (swarmplot) axes, the - difference axes and the delta-delta axes respectively, as a tuple. - These will be autoscaled to sensible values if they are not - specified. The delta2 axes and contrast axes should have the same - limits for y. When `show_delta2` is True, if both of the `contrast_ylim` - and `delta2_ylim` are not None, then they must be specified with the - same values; when `show_delta2` is True and only one of them is specified, - then the other will automatically be assigned with the same value. - Specifying `delta2_ylim` does not have any effect when `show_delta2` is - False. - custom_palette : dict, list, or matplotlib color palette, default None - This keyword accepts a dictionary with {'group':'color'} pairings, - a list of RGB colors, or a specified matplotlib palette. This - palette will be used to color the swarmplot. If `color_col` is not - specified, then each group will be colored in sequence according - to the default palette currently used by matplotlib. - Please take a look at the seaborn commands `color_palette` - and `cubehelix_palette` to generate a custom palette. Both - these functions generate a list of RGB colors. - See: - https://seaborn.pydata.org/generated/seaborn.color_palette.html - https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html - The named colors of matplotlib can be found here: - https://matplotlib.org/examples/color/named_colors.html - swarm_desat : float, default 1 - Decreases the saturation of the colors in the swarmplot by the - desired proportion. Uses `seaborn.desaturate()` to acheive this. - halfviolin_desat : float, default 0.5 - Decreases the saturation of the colors of the half-violin bootstrap - curves by the desired proportion. Uses `seaborn.desaturate()` to - acheive this. - halfviolin_alpha : float, default 0.8 - The alpha (transparency) level of the half-violin bootstrap curves. - float_contrast : boolean, default True - Whether or not to display the halfviolin bootstrapped difference - distribution alongside the raw data. - show_pairs : boolean, default True - If the data is paired, whether or not to show the raw data as a - swarmplot, or as slopegraph, with a line joining each pair of - observations. - show_delta2, show_mini_meta : boolean, default True - If delta-delta or mini-meta delta is calculated, whether or not to - show the delta-delta plot or mini-meta plot. - group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None. - Plots the summary statistics for each group. If 'mean_sd', then - the mean and standard deviation of each group is plotted as a - notched line beside each group. If 'median_quantiles', then the - median and 25th and 75th percentiles of each group is plotted - instead. If 'None', the summaries are not shown. - group_summaries_offset : float, default 0.1 - If group summaries are displayed, they will be offset from the raw - data swarmplot groups by this value. - fig_size : tuple, default None - The desired dimensions of the figure as a (length, width) tuple. - dpi : int, default 100 - The dots per inch of the resulting figure. - ax : matplotlib.Axes, default None - Provide an existing Axes for the plots to be created. If no Axes is - specified, a new matplotlib Figure will be created. - gridkey_rows : list, default None - Provide a list of row labels for the gridkey. The supplied idx is - checked against the row labels to determine whether the corresponding - cell should be populated or not. - swarmplot_kwargs : dict, default None - Pass any keyword arguments accepted by the seaborn `swarmplot` - command here, as a dict. If None, the following keywords are - passed to sns.swarmplot : {'size':`raw_marker_size`}. - violinplot_kwargs : dict, default None - Pass any keyword arguments accepted by the matplotlib ` - pyplot.violinplot` command here, as a dict. If None, the following - keywords are passed to violinplot : {'widths':0.5, 'vert':True, - 'showextrema':False, 'showmedians':False}. - slopegraph_kwargs : dict, default None - This will change the appearance of the lines used to join each pair - of observations when `show_pairs=True`. Pass any keyword arguments - accepted by matplotlib `plot()` function here, as a dict. - If None, the following keywords are - passed to plot() : {'linewidth':1, 'alpha':0.5}. - sankey_kwargs: dict, default None - Whis will change the appearance of the sankey diagram used to depict - paired proportional data when `show_pairs=True` and `proportional=True`. - Pass any keyword arguments accepted by plot_tools.sankeydiag() function - here, as a dict. If None, the following keywords are passed to sankey diagram: - {"width": 0.5, "align": "center", "alpha": 0.4, "bar_width": 0.1, "rightColor": False} - reflines_kwargs : dict, default None - This will change the appearance of the zero reference lines. Pass - any keyword arguments accepted by the matplotlib Axes `hlines` - command here, as a dict. If None, the following keywords are - passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75, - 'zorder':2, 'color' : default y-tick color}. - group_summary_kwargs : dict, default None - Pass any keyword arguments accepted by the matplotlib.lines.Line2D - command here, as a dict. This will change the appearance of the - vertical summary lines for each group, if `group_summaries` is not - 'None'. If None, the following keywords are passed to - matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}. - legend_kwargs : dict, default None - Pass any keyword arguments accepted by the matplotlib Axes - `legend` command here, as a dict. If None, the following keywords - are passed to matplotlib.Axes.legend : {'loc':'upper left', - 'frameon':False}. - title : string, default None - Title for the plot. If None, no title will be displayed. Pass any - keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here, - as a string. - fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large' - Font size for the plot title. If a float, the fontsize in points. The - string values denote sizes relative to the default font size. Pass any keyword arguments accepted - by the matplotlib.pyplot.suptitle `fontsize` command here, as a string. - fontsize_rawxlabel : float, default 12 - Font size for the raw axes xlabel. - fontsize_rawylabel : float, default 12 - Font size for the raw axes ylabel. - fontsize_contrastxlabel : float, default 12 - Font size for the contrast axes xlabel. - fontsize_contrastylabel : float, default 12 - Font size for the contrast axes ylabel. - fontsize_delta2label : float, default 12 - Font size for the delta-delta axes ylabel. - - - Returns - ------- - A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``. - - The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it. - - If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` - itself, while the effect size axes is accessed at ``ax.contrast_axes``. - See the last example below. - - - - """ - - from .plotter import EffectSizeDataFramePlotter - - if hasattr(self, "results") is False: - self.__pre_calc() - - if self.__delta2: - color_col = self.__x2 - - # if self.__proportional: - # raw_marker_size = 0.01 - - # Modification incurred due to update of Seaborn - ci = ('ci', ci) if ci is not None else None - - all_kwargs = locals() - del all_kwargs["self"] - - out = EffectSizeDataFramePlotter(self, **all_kwargs) - - return out - - - @property - def proportional(self): - """ - Returns the proportional parameter - class. - """ - return self.__proportional - - @property - def results(self): - """Prints all pairwise comparisons nicely.""" - try: - return self.__results - except AttributeError: - self.__pre_calc() - return self.__results - - - - @property - def statistical_tests(self): - results_df = self.results - - # Select only the statistics and p-values. - stats_columns = [c for c in results_df.columns - if c.startswith("statistic") or c.startswith("pvalue")] - - default_cols = ['control', 'test', 'control_N', 'test_N', - 'effect_size', 'is_paired', - 'difference', 'ci', 'bca_low', 'bca_high'] - - cols_of_interest = default_cols + stats_columns - - return results_df[cols_of_interest] - - - @property - def _for_print(self): - return self.__for_print - - @property - def _plot_data(self): - return self.__dabest_obj._plot_data - - @property - def idx(self): - return self.__dabest_obj.idx - - @property - def xvar(self): - return self.__dabest_obj._xvar - - @property - def yvar(self): - return self.__dabest_obj._yvar - - @property - def is_paired(self): - return self.__is_paired - - @property - def ci(self): - """ - The width of the confidence interval being produced, in percent. - """ - return self.__ci - - @property - def x1_level(self): - return self.__x1_level - - - @property - def x2(self): - return self.__x2 - - - @property - def experiment_label(self): - return self.__experiment_label - - - @property - def delta2(self): - return self.__delta2 - - - @property - def resamples(self): - """ - The number of resamples (with replacement) during bootstrap resampling." - """ - return self.__resamples - - @property - def random_seed(self): - """ - The seed used by `numpy.seed()` for bootstrap resampling. - """ - return self.__random_seed - - @property - def effect_size(self): - """The type of effect size being computed.""" - return self.__effect_size - - @property - def dabest_obj(self): - """ - Returns the `dabest` object that invoked the current EffectSizeDataFrame - class. - """ - return self.__dabest_obj - - @property - def proportional(self): - """ - Returns the proportional parameter - class. - """ - return self.__proportional - - @property - def lqrt(self): - """Returns all pairwise Lq-Likelihood Ratio Type test results - as a pandas DataFrame. - - For more information on LqRT tests, see https://arxiv.org/abs/1911.11922 - """ - try: - return self.__lqrt_results - except AttributeError: - self.__calc_lqrt() - return self.__lqrt_results - - - @property - def mini_meta(self): - """ - Returns the mini_meta boolean parameter. - """ - return self.__mini_meta - - - @property - def mini_meta_delta(self): - """ - Returns the mini_meta results. - """ - try: - return self.__mini_meta_delta - except AttributeError: - self.__pre_calc() - return self.__mini_meta_delta - - - @property - def delta_delta(self): - """ - Returns the mini_meta results. - """ - try: - return self.__delta_delta - except AttributeError: - self.__pre_calc() - return self.__delta_delta - - - -# %% ../nbs/API/class.ipynb 59 -class PermutationTest: - """ - A class to compute and report permutation tests. - - Parameters - ---------- - control : array-like - test : array-like - These should be numerical iterables. - effect_size : string. - Any one of the following are accepted inputs: - 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g" or 'cliffs_delta' - is_paired : string, default None - permutation_count : int, default 10000 - The number of permutations (reshuffles) to perform. - random_seed : int, default 12345 - `random_seed` is used to seed the random number generator during - bootstrap resampling. This ensures that the generated permutations - are replicable. - - Returns - ------- - A :py:class:`PermutationTest` object: - `difference`:float - The effect size of the difference between the control and the test. - `effect_size`:string - The type of effect size reported. - - - """ - - def __init__(self, control:np.array, - test:np.array, # These should be numerical iterables. - effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' - is_paired:str=None, - permutation_count:int=5000, # The number of permutations (reshuffles) to perform. - random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable. - **kwargs): - - import numpy as np - from numpy.random import PCG64, RandomState - from ._stats_tools.effsize import two_group_difference - from ._stats_tools.confint_2group_diff import calculate_group_var - - - self.__permutation_count = permutation_count - - # Run Sanity Check. - if is_paired and len(control) != len(test): - raise ValueError("The two arrays do not have the same length.") - - # Initialise random number generator. - # rng = np.random.default_rng(seed=random_seed) - rng = RandomState(PCG64(random_seed)) - - # Set required constants and variables - control = np.array(control) - test = np.array(test) - - control_sample = control.copy() - test_sample = test.copy() - - BAG = np.array([*control, *test]) - CONTROL_LEN = int(len(control)) - EXTREME_COUNT = 0. - THRESHOLD = np.abs(two_group_difference(control, test, - is_paired, effect_size)) - self.__permutations = [] - self.__permutations_var = [] - - for i in range(int(permutation_count)): - - if is_paired: - # Select which control-test pairs to swap. - random_idx = rng.choice(CONTROL_LEN, - rng.randint(0, CONTROL_LEN+1), - replace=False) - - # Perform swap. - for i in random_idx: - _placeholder = control_sample[i] - control_sample[i] = test_sample[i] - test_sample[i] = _placeholder - - else: - # Shuffle the bag and assign to control and test groups. - # NB. rng.shuffle didn't produce replicable results... - shuffled = rng.permutation(BAG) - control_sample = shuffled[:CONTROL_LEN] - test_sample = shuffled[CONTROL_LEN:] - - - es = two_group_difference(control_sample, test_sample, - False, effect_size) - - var = calculate_group_var(np.var(control_sample, ddof=1), - CONTROL_LEN, - np.var(test_sample, ddof=1), - len(test_sample)) - self.__permutations.append(es) - self.__permutations_var.append(var) - - if np.abs(es) > THRESHOLD: - EXTREME_COUNT += 1. - - self.__permutations = np.array(self.__permutations) - self.__permutations_var = np.array(self.__permutations_var) - - self.pvalue = EXTREME_COUNT / permutation_count - - - def __repr__(self): - return("{} permutations were taken. The p-value is {}.".format(self.permutation_count, - self.pvalue)) - - - @property - def permutation_count(self): - """ - The number of permuations taken. - """ - return self.__permutation_count - - - @property - def permutations(self): - """ - The effect sizes of all the permutations in a list. - """ - return self.__permutations - - - @property - def permutations_var(self): - """ - The experiment group variance of all the permutations in a list. - """ - return self.__permutations_var - diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py new file mode 100644 index 00000000..c1d6be8f --- /dev/null +++ b/dabest/_dabest_object.py @@ -0,0 +1,661 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/dabest_object.ipynb. + +# %% auto 0 +__all__ = ['Dabest'] + +# %% ../nbs/API/dabest_object.ipynb 4 +# Import standard data science libraries +from numpy import array, repeat, random, issubdtype, number +import pandas as pd +from scipy.stats import norm +from scipy.stats import randint + + +# %% ../nbs/API/dabest_object.ipynb 6 +class Dabest(object): + + """ + Class for estimation statistics and plots. + """ + + def __init__( + self, + data, + idx, + x, + y, + paired, + id_col, + ci, + resamples, + random_seed, + proportional, + delta2, + experiment, + experiment_label, + x1_level, + mini_meta, + ): + """ + Parses and stores pandas DataFrames in preparation for estimation + statistics. You should not be calling this class directly; instead, + use `dabest.load()` to parse your DataFrame prior to analysis. + """ + + self.__delta2 = delta2 + self.__experiment = experiment + self.__ci = ci + self.__input_data = data + self.__output_data = data.copy() + self.__id_col = id_col + self.__is_paired = paired + self.__resamples = resamples + self.__random_seed = random_seed + self.__proportional = proportional + self.__mini_meta = mini_meta + + # Check if it is a valid mini_meta case + if self.__mini_meta: + # Only mini_meta calculation but not proportional and delta-delta function + if self.__proportional: + err0 = "`proportional` and `mini_meta` cannot be True at the same time." + raise ValueError(err0) + if self.__delta2: + err0 = "`delta` and `mini_meta` cannot be True at the same time." + raise ValueError(err0) + + # Check if the columns stated are valid + # TODO instead of traversing twice idx you can traverse only once + # and break the loop if the condition is not satisfied? + # TODO What if the type is not str and not tuple,list? missing raise Error + if all([isinstance(i, str) for i in idx]): + if len(pd.unique([t for t in idx]).tolist()) != 2: + err0 = "`mini_meta` is True, but `idx` ({})".format(idx) + err1 = "does not contain exactly 2 columns." + raise ValueError(err0 + err1) + + if all([isinstance(i, (tuple, list)) for i in idx]): + all_idx_lengths = [len(t) for t in idx] + if (array(all_idx_lengths) != 2).any(): + err1 = "`mini_meta` is True, but some idx " + err2 = "in {} does not consist only of two groups.".format(idx) + raise ValueError(err1 + err2) + + # TODO can you have True mini_meta and delta2 at the same time? + # Check if this is a 2x2 ANOVA case and x & y are valid columns + # Create experiment_label and x1_level + if self.__delta2: + # TODO Wrap the errors in a separate function called check_errors() + if self.__proportional: + err0 = "`proportional` and `delta` cannot be True at the same time." + raise ValueError(err0) + + # idx should not be specified + if idx: + err0 = "`idx` should not be specified when `delta2` is True.".format( + len(x) + ) + raise ValueError(err0) + + # Check if x is valid + # TODO if x is None is fine?? + if len(x) != 2: + err0 = "`delta2` is True but the number of variables indicated by `x` is {}.".format( + len(x) + ) + raise ValueError(err0) + + for i in x: + if i not in self.__output_data.columns: + err = "{0} is not a column in `data`. Please check.".format(i) + raise IndexError(err) + + # Check if y is valid + if not y: + err0 = "`delta2` is True but `y` is not indicated." + raise ValueError(err0) + + if y not in self.__output_data.columns: + err = "{0} is not a column in `data`. Please check.".format(y) + raise IndexError(err) + + # Check if experiment is valid + if experiment not in self.__output_data.columns: + err = "{0} is not a column in `data`. Please check.".format(experiment) + raise IndexError(err) + + # Check if experiment_label is valid and create experiment when needed + if experiment_label: + if len(experiment_label) != 2: + err0 = "`experiment_label` does not have a length of 2." + raise ValueError(err0) + + for i in experiment_label: + if i not in self.__output_data[experiment].unique(): + err = "{0} is not an element in the column `{1}` of `data`. Please check.".format( + i, experiment + ) + raise IndexError(err) + else: + experiment_label = self.__output_data[experiment].unique() + + # Check if x1_level is valid + if x1_level: + if len(x1_level) != 2: + err0 = "`x1_level` does not have a length of 2." + raise ValueError(err0) + + for i in x1_level: + if i not in self.__output_data[x[0]].unique(): + err = "{0} is not an element in the column `{1}` of `data`. Please check.".format( + i, experiment + ) + raise IndexError(err) + + else: + x1_level = self.__output_data[x[0]].unique() + + # TODO what if experiment is None? + elif experiment: + experiment_label = self.__output_data[experiment].unique() + x1_level = self.__output_data[x[0]].unique() + self.__experiment_label = experiment_label + self.__x1_level = x1_level + + # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case + if idx is None and x is not None and y is not None: + # Add a length check for unique values in the first element in list x, + # if the length is greater than 2, force delta2 to be False + # Should be removed if delta2 for situations other than 2x2 is supported + if len(self.__output_data[x[0]].unique()) > 2 and x1_level is None: + self.__delta2 = False + # stop the loop if delta2 is False + + # add a new column which is a combination of experiment and the first variable + new_col_name = experiment + x[0] + while new_col_name in self.__output_data.columns: + new_col_name += "_" + + self.__output_data[new_col_name] = ( + self.__output_data[x[0]].astype(str) + + " " + + self.__output_data[experiment].astype(str) + ) + + # create idx and record the first and second x variable + idx = [] + for i in list(map(lambda x: str(x), experiment_label)): + temp = [] + for j in list(map(lambda x: str(x), x1_level)): + temp.append(j + " " + i) + idx.append(temp) + + self.__idx = idx + self.__x1 = x[0] + self.__x2 = x[1] + x = new_col_name + else: + self.__idx = idx + self.__x1 = None + self.__x2 = None + + # Determine the kind of estimation plot we need to produce. + if all([isinstance(i, (str, int, float)) for i in idx]): + # flatten out idx. + all_plot_groups = pd.unique([t for t in idx]).tolist() + if len(idx) > len(all_plot_groups): + err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again." + raise ValueError(err0) + + # We need to re-wrap this idx inside another tuple so as to + # easily loop thru each pairwise group later on. + self.__idx = (idx,) + + elif all([isinstance(i, (tuple, list)) for i in idx]): + all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist() + + actual_groups_given = sum([len(i) for i in idx]) + + if actual_groups_given > len(all_plot_groups): + err0 = "Groups are repeated across tuples," + err1 = " or a tuple has repeated groups in it." + err2 = " Please remove any duplicates and try again." + raise ValueError(err0 + err1 + err2) + + else: # mix of string and tuple? + err = "There seems to be a problem with the idx you " "entered--{}.".format( + idx + ) + raise ValueError(err) + + # Check if there is a typo on paired + if self.__is_paired and self.__is_paired not in ("baseline", "sequential"): + err = "{} assigned for `paired` is not valid.".format(self.__is_paired) + raise ValueError(err) + + # Determine the type of data: wide or long. + if x is None and y is not None: + err = "You have only specified `y`. Please also specify `x`." + raise ValueError(err) + + if x is not None and y is None: + err = "You have only specified `x`. Please also specify `y`." + raise ValueError(err) + + self.__plot_data = self.get_plot_data(x, y, all_plot_groups) + self.__all_plot_groups = all_plot_groups + + # Check if `id_col` is valid + if self.__is_paired: + if id_col is None: + err = "`id_col` must be specified if `paired` is assigned with a not NoneType value." + raise IndexError(err) + + if id_col not in self.__plot_data.columns: + err = "{} is not a column in `data`. ".format(id_col) + raise IndexError(err) + + self._compute_effectsize_dfs() + + def __repr__(self): + from .__init__ import __version__ + from .misc_tools import print_greeting + + greeting_header = print_greeting() + + RM_STATUS = { + "baseline": "for repeated measures against baseline \n", + "sequential": "for the sequential design of repeated-measures experiment \n", + "None": "", + } + + PAIRED_STATUS = {"baseline": "Paired e", "sequential": "Paired e", "None": "E"} + + first_line = { + "rm_status": RM_STATUS[str(self.__is_paired)], + "paired_status": PAIRED_STATUS[str(self.__is_paired)], + } + + s1 = "{paired_status}ffect size(s) {rm_status}".format(**first_line) + s2 = "with {}% confidence intervals will be computed for:".format(self.__ci) + desc_line = s1 + s2 + + out = [greeting_header + "\n\n" + desc_line] + + comparisons = [] + + if self.__is_paired == "sequential": + for j, current_tuple in enumerate(self.__idx): + for ix, test_name in enumerate(current_tuple[1:]): + control_name = current_tuple[ix] + comparisons.append("{} minus {}".format(test_name, control_name)) + else: + for j, current_tuple in enumerate(self.__idx): + control_name = current_tuple[0] + + for ix, test_name in enumerate(current_tuple[1:]): + comparisons.append("{} minus {}".format(test_name, control_name)) + + if self.__delta2: + comparisons.append( + "{} minus {} (only for mean difference)".format( + self.__experiment_label[1], self.__experiment_label[0] + ) + ) + + if self.__mini_meta: + comparisons.append("weighted delta (only for mean difference)") + + for j, g in enumerate(comparisons): + out.append("{}. {}".format(j + 1, g)) + + resamples_line1 = "\n{} resamples ".format(self.__resamples) + resamples_line2 = "will be used to generate the effect size bootstraps." + out.append(resamples_line1 + resamples_line2) + + return "\n".join(out) + + @property + def mean_diff(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()` + + """ + return self.__mean_diff + + @property + def median_diff(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + """ + return self.__median_diff + + @property + def cohens_d(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + """ + return self.__cohens_d + + @property + def cohens_h(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `h`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `directional` argument in `dabest.load()`. + + """ + return self.__cohens_h + + @property + def hedges_g(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + """ + return self.__hedges_g + + @property + def cliffs_delta(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + + """ + return self.__cliffs_delta + + @property + def delta_g(self): + """ + Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`. + """ + return self.__delta_g + + @property + def input_data(self): + """ + Returns the pandas DataFrame that was passed to `dabest.load()`. + When `delta2` is True, a new column is added to support the + function. The name of this new column is indicated by `x`. + """ + return self.__input_data + + @property + def idx(self): + """ + Returns the order of categories that was passed to `dabest.load()`. + """ + return self.__idx + + @property + def x1(self): + """ + Returns the first variable declared in x when it is a delta-delta + case; returns None otherwise. + """ + return self.__x1 + + @property + def x1_level(self): + """ + Returns the levels of first variable declared in x when it is a + delta-delta case; returns None otherwise. + """ + return self.__x1_level + + @property + def x2(self): + """ + Returns the second variable declared in x when it is a delta-delta + case; returns None otherwise. + """ + return self.__x2 + + @property + def experiment(self): + """ + Returns the column name of experiment labels that was passed to + `dabest.load()` when it is a delta-delta case; returns None otherwise. + """ + return self.__experiment + + @property + def experiment_label(self): + """ + Returns the experiment labels in order that was passed to `dabest.load()` + when it is a delta-delta case; returns None otherwise. + """ + return self.__experiment_label + + @property + def delta2(self): + """ + Returns the boolean parameter indicating if this is a delta-delta + situation. + """ + return self.__delta2 + + @property + def is_paired(self): + """ + Returns the type of repeated-measures experiment. + """ + return self.__is_paired + + @property + def id_col(self): + """ + Returns the id column declared to `dabest.load()`. + """ + return self.__id_col + + @property + def ci(self): + """ + The width of the desired confidence interval. + """ + return self.__ci + + @property + def resamples(self): + """ + The number of resamples used to generate the bootstrap. + """ + return self.__resamples + + @property + def random_seed(self): + """ + The number used to initialise the numpy random seed generator, ie. + `seed_value` from `numpy.random.seed(seed_value)` is returned. + """ + return self.__random_seed + + @property + def x(self): + """ + Returns the x column that was passed to `dabest.load()`, if any. + When `delta2` is True, `x` returns the name of the new column created + for the delta-delta situation. To retrieve the 2 variables passed into + `x` when `delta2` is True, please call `x1` and `x2` instead. + """ + return self.__x + + @property + def y(self): + """ + Returns the y column that was passed to `dabest.load()`, if any. + """ + return self.__y + + @property + def _xvar(self): + """ + Returns the xvar in dabest.plot_data. + """ + return self.__xvar + + @property + def _yvar(self): + """ + Returns the yvar in dabest.plot_data. + """ + return self.__yvar + + @property + def _plot_data(self): + """ + Returns the pandas DataFrame used to produce the estimation stats/plots. + """ + return self.__plot_data + + @property + def proportional(self): + """ + Returns the proportional parameter class. + """ + return self.__proportional + + @property + def mini_meta(self): + """ + Returns the mini_meta boolean parameter. + """ + return self.__mini_meta + + @property + def _all_plot_groups(self): + """ + Returns the all plot groups, as indicated via the `idx` keyword. + """ + return self.__all_plot_groups + + def get_plot_data(self, x, y, all_plot_groups): + """ + Function to prepare some attributes for plotting + """ + + # Identify the type of data that was passed in. + if x is not None and y is not None: + # Assume we have a long dataset. + # check both x and y are column names in data. + if x not in self.__output_data.columns: + err = "{0} is not a column in `data`. Please check.".format(x) + raise IndexError(err) + if y not in self.__output_data.columns: + err = "{0} is not a column in `data`. Please check.".format(y) + raise IndexError(err) + + # check y is numeric. + if not issubdtype(self.__output_data[y].dtype, number): + err = "{0} is a column in `data`, but it is not numeric.".format(y) + raise ValueError(err) + + # check all the idx can be found in self.__output_data[x] + for g in all_plot_groups: + if g not in self.__output_data[x].unique(): + err0 = '"{0}" is not a group in the column `{1}`.'.format(g, x) + err1 = " Please check `idx` and try again." + raise IndexError(err0 + err1) + + # Select only rows where the value in the `x` column + # is found in `idx`. + plot_data = self.__output_data[ + self.__output_data.loc[:, x].isin(all_plot_groups) + ].copy() + + # Assign attributes + self.__x = x + self.__y = y + self.__xvar = x + self.__yvar = y + + elif x is None and y is None: + # Assume we have a wide dataset. + # Assign attributes appropriately. + self.__x = None + self.__y = None + self.__xvar = "group" + self.__yvar = "value" + + # First, check we have all columns in the dataset. + for g in all_plot_groups: + if g not in self.__output_data.columns: + err0 = '"{0}" is not a column in `data`.'.format(g) + err1 = " Please check `idx` and try again." + raise IndexError(err0 + err1) + + set_all_columns = set(self.__output_data.columns.tolist()) + set_all_plot_groups = set(all_plot_groups) + id_vars = set_all_columns.difference(set_all_plot_groups) + + plot_data = pd.melt( + self.__output_data, + id_vars=id_vars, + value_vars=all_plot_groups, + value_name=self.__yvar, + var_name=self.__xvar, + ) + + # Added in v0.2.7. + plot_data.dropna(axis=0, how="any", subset=[self.__yvar], inplace=True) + + # TODO these comments should not be in the code but on the release notes of the package version + # Lines 131 to 140 added in v0.2.3. + # Fixes a bug that jammed up when the xvar column was already + # a pandas Categorical. Now we check for this and act appropriately. + if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype): + plot_data[self.__xvar].cat.remove_unused_categories(inplace=True) + plot_data[self.__xvar].cat.reorder_categories( + all_plot_groups, ordered=True, inplace=True + ) + else: + plot_data.loc[:, self.__xvar] = pd.Categorical( + plot_data[self.__xvar], categories=all_plot_groups, ordered=True + ) + + return plot_data + + def _compute_effectsize_dfs(self): + ''' + Function to compute all attributes based on EffectSizeDataFrame. + It returns nothing. + ''' + from ._effsize_objects import EffectSizeDataFrame + + effectsize_df_kwargs = dict( + ci=self.__ci, + is_paired=self.__is_paired, + random_seed=self.__random_seed, + resamples=self.__resamples, + proportional=self.__proportional, + delta2=self.__delta2, + experiment_label=self.__experiment_label, + x1_level=self.__x1_level, + x2=self.__x2, + mini_meta=self.__mini_meta, + ) + + self.__mean_diff = EffectSizeDataFrame( + self, "mean_diff", **effectsize_df_kwargs + ) + + self.__median_diff = EffectSizeDataFrame( + self, "median_diff", **effectsize_df_kwargs + ) + + self.__cohens_d = EffectSizeDataFrame(self, "cohens_d", **effectsize_df_kwargs) + + self.__cohens_h = EffectSizeDataFrame(self, "cohens_h", **effectsize_df_kwargs) + + self.__hedges_g = EffectSizeDataFrame(self, "hedges_g", **effectsize_df_kwargs) + + self.__delta_g = EffectSizeDataFrame(self, "delta_g", **effectsize_df_kwargs) + + if not self.__is_paired: + self.__cliffs_delta = EffectSizeDataFrame( + self, "cliffs_delta", **effectsize_df_kwargs + ) + else: + self.__cliffs_delta = ( + "The data is paired; Cliff's delta is therefore undefined." + ) diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py new file mode 100644 index 00000000..fa455637 --- /dev/null +++ b/dabest/_delta_objects.py @@ -0,0 +1,802 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/delta_objects.ipynb. + +# %% auto 0 +__all__ = ['DeltaDelta', 'MiniMetaDelta'] + +# %% ../nbs/API/delta_objects.ipynb 5 +from scipy.stats import norm +import pandas as pd +import numpy as np +from numpy import sort as npsort +from numpy import isnan +from string import Template +import warnings +import datetime as dt + +# %% ../nbs/API/delta_objects.ipynb 6 +class DeltaDelta(object): + """ + A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs: + + + $$\Delta_{1} = \overline{X}_{A_{2}, B_{1}} - \overline{X}_{A_{1}, B_{1}}$$ + + $$\Delta_{2} = \overline{X}_{A_{2}, B_{2}} - \overline{X}_{A_{1}, B_{2}}$$ + + + where $\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\Delta$ is the mean difference between two samples. + + A delta-delta value is then calculated as the mean difference between the two primary deltas: + + + $$\Delta_{\Delta} = \Delta_{2} - \Delta_{1}$$ + + and a deltas' g value is calculated as the mean difference between the two primary deltas divided by + the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples: + + $$\Delta_{g} = \frac{\Delta_{\Delta}}{s_{\Delta_{\Delta}}}$$ + + $$s_{\Delta_{\Delta}} = \sqrt{\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$ + + where $s$ is the standard deviation and $n$ is the sample size. + + + """ + + def __init__( + self, effectsizedataframe, permutation_count, bootstraps_delta_delta, ci=95 + ): + from ._stats_tools import effsize as es + from ._stats_tools import confint_1group as ci1g + from ._stats_tools import confint_2group_diff as ci2g + + self.__effsizedf = effectsizedataframe.results + self.__dabest_obj = effectsizedataframe.dabest_obj + self.__ci = ci + self.__resamples = effectsizedataframe.resamples + self.__effect_size = effectsizedataframe.effect_size + self.__alpha = ci2g._compute_alpha_from_ci(ci) + self.__permutation_count = permutation_count + self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) + self.__control = self.__dabest_obj.experiment_label[0] + self.__test = self.__dabest_obj.experiment_label[1] + + # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data + if self.__effect_size == "mean_diff": + self.__bootstraps_delta_delta = bootstraps_delta_delta[2] + self.__difference = ( + self.__effsizedf["difference"][1] - self.__effsizedf["difference"][0] + ) + else: + self.__bootstraps_delta_delta = bootstraps_delta_delta[0] + self.__difference = bootstraps_delta_delta[1] + + sorted_delta_delta = npsort(self.__bootstraps_delta_delta) + + self.__bias_correction = ci2g.compute_meandiff_bias_correction( + self.__bootstraps_delta_delta, self.__difference + ) + + self.__jackknives = np.array( + ci1g.compute_1group_jackknife(self.__bootstraps_delta_delta, np.mean) + ) + + self.__acceleration_value = ci2g._calc_accel(self.__jackknives) + + # Compute BCa intervals. + bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( + self.__bias_correction, self.__acceleration_value, self.__resamples, ci + ) + + self.__bca_interval_idx = (bca_idx_low, bca_idx_high) + + if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): + self.__bca_low = sorted_delta_delta[bca_idx_low] + self.__bca_high = sorted_delta_delta[bca_idx_high] + + err1 = "The $lim_type limit of the interval" + err2 = "was in the $loc 10 values." + err3 = "The result should be considered unstable." + err_temp = Template(" ".join([err1, err2, err3])) + + if bca_idx_low <= 10: + warnings.warn( + err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1 + ) + + if bca_idx_high >= self.__resamples - 9: + warnings.warn( + err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1 + ) + + else: + err1 = "The $lim_type limit of the BCa interval cannot be computed." + err2 = "It is set to the effect size itself." + err3 = "All bootstrap values were likely all the same." + err_temp = Template(" ".join([err1, err2, err3])) + + if isnan(bca_idx_low): + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0) + + if isnan(bca_idx_high): + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0) + + # Compute percentile intervals. + pct_idx_low = int((self.__alpha / 2) * self.__resamples) + pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples) + + self.__pct_interval_idx = (pct_idx_low, pct_idx_high) + self.__pct_low = sorted_delta_delta[pct_idx_low] + self.__pct_high = sorted_delta_delta[pct_idx_high] + + def __permutation_test(self): + """ + Perform a permutation test and obtain the permutation p-value + based on the permutation data. + """ + self.__permutations = np.array(self.__effsizedf["permutations"]) + + THRESHOLD = np.abs(self.__difference) + + self.__permutations_delta_delta = np.array( + self.__permutations[1] - self.__permutations[0] + ) + + count = sum(np.abs(self.__permutations_delta_delta) > THRESHOLD) + self.__pvalue_permutation = count / self.__permutation_count + + def __repr__(self, header=True, sigfig=3): + from .misc_tools import print_greeting + + first_line = {"control": self.__control, "test": self.__test} + + if self.__effect_size == "mean_diff": + out1 = "The delta-delta between {control} and {test} ".format(**first_line) + else: + out1 = "The deltas' g between {control} and {test} ".format(**first_line) + + base_string_fmt = "{:." + str(sigfig) + "}" + if "." in str(self.__ci): + ci_width = base_string_fmt.format(self.__ci) + else: + ci_width = str(self.__ci) + + ci_out = { + "es": base_string_fmt.format(self.__difference), + "ci": ci_width, + "bca_low": base_string_fmt.format(self.__bca_low), + "bca_high": base_string_fmt.format(self.__bca_high), + } + + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) + out = out1 + out2 + + if header is True: + out = print_greeting() + "\n" + "\n" + out + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + p1 = "The p-value of the two-sided permutation t-test is {}, ".format( + pval_rounded + ) + p2 = "calculated for legacy purposes only. " + pvalue = p1 + p2 + + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) + bs2 = "the confidence interval is bias-corrected and accelerated." + bs = bs1 + bs2 + + pval_def1 = ( + "Any p-value reported is the probability of observing the " + + "effect size (or greater),\nassuming the null hypothesis of " + + "zero difference is true." + ) + pval_def2 = ( + "\nFor each p-value, 5000 reshuffles of the " + + "control and test labels were performed." + ) + pval_def = pval_def1 + pval_def2 + + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + + def to_dict(self): + """ + Returns the attributes of the `DeltaDelta` object as a + dictionary. + """ + # Only get public (user-facing) attributes. + attrs = [a for a in dir(self) if not a.startswith(("_", "to_dict"))] + out = {} + for a in attrs: + out[a] = getattr(self, a) + return out + + @property + def ci(self): + """ + Returns the width of the confidence interval, in percent. + """ + return self.__ci + + @property + def alpha(self): + """ + Returns the significance level of the statistical test as a float + between 0 and 1. + """ + return self.__alpha + + @property + def bias_correction(self): + return self.__bias_correction + + @property + def bootstraps(self): + """ + Return the bootstrapped deltas from all the experiment groups. + """ + return self.__bootstraps + + @property + def jackknives(self): + return self.__jackknives + + @property + def acceleration_value(self): + return self.__acceleration_value + + @property + def bca_low(self): + """ + The bias-corrected and accelerated confidence interval lower limit. + """ + return self.__bca_low + + @property + def bca_high(self): + """ + The bias-corrected and accelerated confidence interval upper limit. + """ + return self.__bca_high + + @property + def bca_interval_idx(self): + return self.__bca_interval_idx + + @property + def control(self): + """ + Return the name of the control experiment group. + """ + return self.__control + + @property + def test(self): + """ + Return the name of the test experiment group. + """ + return self.__test + + @property + def bootstraps_delta_delta(self): + """ + Return the delta-delta values calculated from the bootstrapped + deltas. + """ + return self.__bootstraps_delta_delta + + @property + def difference(self): + """ + Return the delta-delta value calculated based on the raw data. + """ + return self.__difference + + @property + def pct_interval_idx(self): + return self.__pct_interval_idx + + @property + def pct_low(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_low + + @property + def pct_high(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_high + + @property + def pvalue_permutation(self): + try: + return self.__pvalue_permutation + except AttributeError: + self.__permutation_test() + return self.__pvalue_permutation + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__permutation_count + + @property + def permutations(self): + """ + Return the mean differences of permutations obtained during + the permutation test for each experiment group. + """ + try: + return self.__permutations + except AttributeError: + self.__permutation_test() + return self.__permutations + + @property + def permutations_delta_delta(self): + """ + Return the delta-delta values of permutations obtained + during the permutation test. + """ + try: + return self.__permutations_delta_delta + except AttributeError: + self.__permutation_test() + return self.__permutations_delta_delta + +# %% ../nbs/API/delta_objects.ipynb 10 +class MiniMetaDelta(object): + """ + A class to compute and store the weighted delta. + A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``. + + """ + + def __init__(self, effectsizedataframe, permutation_count, + ci=95): + from ._stats_tools import effsize as es + from ._stats_tools import confint_1group as ci1g + from ._stats_tools import confint_2group_diff as ci2g + + self.__effsizedf = effectsizedataframe.results + self.__dabest_obj = effectsizedataframe.dabest_obj + self.__ci = ci + self.__resamples = effectsizedataframe.resamples + self.__alpha = ci2g._compute_alpha_from_ci(ci) + self.__permutation_count = permutation_count + self.__bootstraps = np.array(self.__effsizedf["bootstraps"]) + self.__control = np.array(self.__effsizedf["control"]) + self.__test = np.array(self.__effsizedf["test"]) + self.__control_N = np.array(self.__effsizedf["control_N"]) + self.__test_N = np.array(self.__effsizedf["test_N"]) + + + idx = self.__dabest_obj.idx + dat = self.__dabest_obj._plot_data + xvar = self.__dabest_obj._xvar + yvar = self.__dabest_obj._yvar + + # compute the variances of each control group and each test group + control_var=[] + test_var=[] + for j, current_tuple in enumerate(idx): + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + control_var.append(np.var(control, ddof=1)) + + tname = current_tuple[1] + test = dat[dat[xvar] == tname][yvar].copy() + test_var.append(np.var(test, ddof=1)) + self.__control_var = np.array(control_var) + self.__test_var = np.array(test_var) + + # Compute pooled group variances for each pair of experiment groups + # based on the raw data + self.__group_var = ci2g.calculate_group_var(self.__control_var, + self.__control_N, + self.__test_var, + self.__test_N) + + # Compute the weighted average mean differences of the bootstrap data + # using the pooled group variances of the raw data as the inverse of + # weights + self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta( + self.__group_var, + self.__bootstraps, + self.__resamples) + + # Compute the weighted average mean difference based on the raw data + self.__difference = es.weighted_delta(self.__effsizedf["difference"], + self.__group_var) + + sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta) + + + self.__bias_correction = ci2g.compute_meandiff_bias_correction( + self.__bootstraps_weighted_delta, self.__difference) + + self.__jackknives = np.array(ci1g.compute_1group_jackknife( + self.__bootstraps_weighted_delta, + np.mean)) + + self.__acceleration_value = ci2g._calc_accel(self.__jackknives) + + # Compute BCa intervals. + bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( + self.__bias_correction, self.__acceleration_value, + self.__resamples, ci) + + self.__bca_interval_idx = (bca_idx_low, bca_idx_high) + + if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): + self.__bca_low = sorted_weighted_deltas[bca_idx_low] + self.__bca_high = sorted_weighted_deltas[bca_idx_high] + + err1 = "The $lim_type limit of the interval" + err2 = "was in the $loc 10 values." + err3 = "The result should be considered unstable." + err_temp = Template(" ".join([err1, err2, err3])) + + if bca_idx_low <= 10: + warnings.warn(err_temp.substitute(lim_type="lower", + loc="bottom"), + stacklevel=1) + + if bca_idx_high >= self.__resamples-9: + warnings.warn(err_temp.substitute(lim_type="upper", + loc="top"), + stacklevel=1) + + else: + err1 = "The $lim_type limit of the BCa interval cannot be computed." + err2 = "It is set to the effect size itself." + err3 = "All bootstrap values were likely all the same." + err_temp = Template(" ".join([err1, err2, err3])) + + if isnan(bca_idx_low): + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), + stacklevel=0) + + if isnan(bca_idx_high): + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), + stacklevel=0) + + # Compute percentile intervals. + pct_idx_low = int((self.__alpha/2) * self.__resamples) + pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples) + + self.__pct_interval_idx = (pct_idx_low, pct_idx_high) + self.__pct_low = sorted_weighted_deltas[pct_idx_low] + self.__pct_high = sorted_weighted_deltas[pct_idx_high] + + + + def __permutation_test(self): + """ + Perform a permutation test and obtain the permutation p-value + based on the permutation data. + """ + self.__permutations = np.array(self.__effsizedf["permutations"]) + self.__permutations_var = np.array(self.__effsizedf["permutations_var"]) + + THRESHOLD = np.abs(self.__difference) + + all_num = [] + all_denom = [] + + groups = len(self.__permutations) + for i in range(0, len(self.__permutations[0])): + weight = [1/self.__permutations_var[j][i] for j in range(0, groups)] + all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)])) + all_denom.append(np.sum(weight)) + + output=[] + for i in range(0, len(all_num)): + output.append(all_num[i]/all_denom[i]) + + self.__permutations_weighted_delta = np.array(output) + + count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD) + self.__pvalue_permutation = count/self.__permutation_count + + + + def __repr__(self, header=True, sigfig=3): + from .misc_tools import print_greeting + + is_paired = self.__dabest_obj.is_paired + + PAIRED_STATUS = {'baseline' : 'paired', + 'sequential' : 'paired', + 'None' : 'unpaired' + } + + first_line = {"paired_status": PAIRED_STATUS[str(is_paired)]} + + + out1 = "The weighted-average {paired_status} mean differences ".format(**first_line) + + base_string_fmt = "{:." + str(sigfig) + "}" + if "." in str(self.__ci): + ci_width = base_string_fmt.format(self.__ci) + else: + ci_width = str(self.__ci) + + ci_out = {"es" : base_string_fmt.format(self.__difference), + "ci" : ci_width, + "bca_low" : base_string_fmt.format(self.__bca_low), + "bca_high" : base_string_fmt.format(self.__bca_high)} + + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) + out = out1 + out2 + + if header is True: + out = print_greeting() + "\n" + "\n" + out + + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + + p1 = "The p-value of the two-sided permutation t-test is {}, ".format(pval_rounded) + p2 = "calculated for legacy purposes only. " + pvalue = p1 + p2 + + + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) + bs2 = "the confidence interval is bias-corrected and accelerated." + bs = bs1 + bs2 + + pval_def1 = "Any p-value reported is the probability of observing the" + \ + "effect size (or greater),\nassuming the null hypothesis of" + \ + "zero difference is true." + pval_def2 = "\nFor each p-value, 5000 reshuffles of the " + \ + "control and test labels were performed." + pval_def = pval_def1 + pval_def2 + + + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + + + def to_dict(self): + """ + Returns all attributes of the `dabest.MiniMetaDelta` object as a + dictionary. + """ + # Only get public (user-facing) attributes. + attrs = [a for a in dir(self) + if not a.startswith(("_", "to_dict"))] + out = {} + for a in attrs: + out[a] = getattr(self, a) + return out + + + @property + def ci(self): + """ + Returns the width of the confidence interval, in percent. + """ + return self.__ci + + + @property + def alpha(self): + """ + Returns the significance level of the statistical test as a float + between 0 and 1. + """ + return self.__alpha + + + @property + def bias_correction(self): + return self.__bias_correction + + + @property + def bootstraps(self): + ''' + Return the bootstrapped differences from all the experiment groups. + ''' + return self.__bootstraps + + + @property + def jackknives(self): + return self.__jackknives + + + @property + def acceleration_value(self): + return self.__acceleration_value + + + @property + def bca_low(self): + """ + The bias-corrected and accelerated confidence interval lower limit. + """ + return self.__bca_low + + + @property + def bca_high(self): + """ + The bias-corrected and accelerated confidence interval upper limit. + """ + return self.__bca_high + + + @property + def bca_interval_idx(self): + return self.__bca_interval_idx + + + @property + def control(self): + ''' + Return the names of the control groups from all the experiment + groups in order. + ''' + return self.__control + + + @property + def test(self): + ''' + Return the names of the test groups from all the experiment + groups in order. + ''' + return self.__test + + @property + def control_N(self): + ''' + Return the sizes of the control groups from all the experiment + groups in order. + ''' + return self.__control_N + + + @property + def test_N(self): + ''' + Return the sizes of the test groups from all the experiment + groups in order. + ''' + return self.__test_N + + + @property + def control_var(self): + ''' + Return the estimated population variances of the control groups + from all the experiment groups in order. Here the population + variance is estimated from the sample variance. + ''' + return self.__control_var + + + @property + def test_var(self): + ''' + Return the estimated population variances of the control groups + from all the experiment groups in order. Here the population + variance is estimated from the sample variance. + ''' + return self.__test_var + + + @property + def group_var(self): + ''' + Return the pooled group variances of all the experiment groups + in order. + ''' + return self.__group_var + + + @property + def bootstraps_weighted_delta(self): + ''' + Return the weighted-average mean differences calculated from the bootstrapped + deltas and weights across the experiment groups, where the weights are + the inverse of the pooled group variances. + ''' + return self.__bootstraps_weighted_delta + + + @property + def difference(self): + ''' + Return the weighted-average delta calculated from the raw data. + ''' + return self.__difference + + + @property + def pct_interval_idx (self): + return self.__pct_interval_idx + + + @property + def pct_low(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_low + + + @property + def pct_high(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_high + + + @property + def pvalue_permutation(self): + try: + return self.__pvalue_permutation + except AttributeError: + self.__permutation_test() + return self.__pvalue_permutation + + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__permutation_count + + + @property + def permutations(self): + ''' + Return the mean differences of permutations obtained during + the permutation test for each experiment group. + ''' + try: + return self.__permutations + except AttributeError: + self.__permutation_test() + return self.__permutations + + + @property + def permutations_var(self): + ''' + Return the pooled group variances of permutations obtained during + the permutation test for each experiment group. + ''' + try: + return self.__permutations_var + except AttributeError: + self.__permutation_test() + return self.__permutations_var + + + @property + def permutations_weighted_delta(self): + ''' + Return the weighted-average deltas of permutations obtained + during the permutation test. + ''' + try: + return self.__permutations_weighted_delta + except AttributeError: + self.__permutation_test() + return self.__permutations_weighted_delta + + diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py new file mode 100644 index 00000000..3bf10723 --- /dev/null +++ b/dabest/_effsize_objects.py @@ -0,0 +1,1468 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/API/effsize_objects.ipynb. + +# %% auto 0 +__all__ = ['TwoGroupsEffectSize', 'EffectSizeDataFrame', 'PermutationTest'] + +# %% ../nbs/API/effsize_objects.ipynb 5 +import pandas as pd +import lqrt +from scipy.stats import norm +from numpy import array, isnan, isinf, repeat, random, isin, abs, var +from numpy import sort as npsort +from numpy import nan as npnan +from numpy.random import PCG64, RandomState +from statsmodels.stats.contingency_tables import mcnemar +import warnings +from string import Template +import scipy.stats as spstats + +# %% ../nbs/API/effsize_objects.ipynb 6 +class TwoGroupsEffectSize(object): + + """ + A class to compute and store the results of bootstrapped + mean differences between two groups. + + Compute the effect size between two groups. + + Parameters + ---------- + control : array-like + test : array-like + These should be numerical iterables. + effect_size : string. + Any one of the following are accepted inputs: + 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' + is_paired : string, default None + resamples : int, default 5000 + The number of bootstrap resamples to be taken for the calculation + of the confidence interval limits. + permutation_count : int, default 5000 + The number of permutations (reshuffles) to perform for the + computation of the permutation p-value + ci : float, default 95 + The confidence interval width. The default of 95 produces 95% + confidence intervals. + random_seed : int, default 12345 + `random_seed` is used to seed the random number generator during + bootstrap resampling. This ensures that the confidence intervals + reported are replicable. + + Returns + ------- + A :py:class:`TwoGroupEffectSize` object: + `difference` : float + The effect size of the difference between the control and the test. + `effect_size` : string + The type of effect size reported. + `is_paired` : string + The type of repeated-measures experiment. + `ci` : float + Returns the width of the confidence interval, in percent. + `alpha` : float + Returns the significance level of the statistical test as a float between 0 and 1. + `resamples` : int + The number of resamples performed during the bootstrap procedure. + `bootstraps` : numpy ndarray + The generated bootstraps of the effect size. + `random_seed` : int + The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned. + `bca_low, bca_high` : float + The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively. + `pct_low, pct_high` : float + The percentile confidence interval lower limit and upper limits, respectively. + """ + + def __init__( + self, + control, + test, + effect_size, + proportional=False, + is_paired=None, + ci=95, + resamples=5000, + permutation_count=5000, + random_seed=12345, + ): + from ._stats_tools import confint_2group_diff as ci2g + from ._stats_tools import effsize as es + + self.__EFFECT_SIZE_DICT = { + "mean_diff": "mean difference", + "median_diff": "median difference", + "cohens_d": "Cohen's d", + "cohens_h": "Cohen's h", + "hedges_g": "Hedges' g", + "cliffs_delta": "Cliff's delta", + "delta_g": "deltas' g", + } + + self.__is_paired = is_paired + self.__resamples = resamples + self.__effect_size = effect_size + self.__random_seed = random_seed + self.__ci = ci + self.__proportional = proportional + self._check_errors(control, test) + + # Convert to numpy arrays for speed. + # NaNs are automatically dropped. + control = array(control) + test = array(test) + self.__control = control[~isnan(control)] + self.__test = test[~isnan(test)] + self.__permutation_count = permutation_count + + self.__alpha = ci2g._compute_alpha_from_ci(self.__ci) + + self.__difference = es.two_group_difference( + self.__control, self.__test, self.__is_paired, self.__effect_size + ) + + self.__jackknives = ci2g.compute_meandiff_jackknife( + self.__control, self.__test, self.__is_paired, self.__effect_size + ) + + self.__acceleration_value = ci2g._calc_accel(self.__jackknives) + + bootstraps = ci2g.compute_bootstrapped_diff( + self.__control, + self.__test, + self.__is_paired, + self.__effect_size, + self.__resamples, + self.__random_seed, + ) + self.__bootstraps = bootstraps + + sorted_bootstraps = npsort(self.__bootstraps) + # Added in v0.2.6. + # Raises a UserWarning if there are any infiinities in the bootstraps. + num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)]) + + if num_infinities > 0: + warn_msg = ( + "There are {} bootstrap(s) that are not defined. " + "This is likely due to smaple sample sizes. " + "The values in a bootstrap for a group will be more likely " + "to be all equal, with a resulting variance of zero. " + "The computation of Cohen's d and Hedges' g thus " + "involved a division by zero. " + ) + warnings.warn(warn_msg.format(num_infinities), category=UserWarning) + + self.__bias_correction = ci2g.compute_meandiff_bias_correction( + self.__bootstraps, self.__difference + ) + + self._compute_bca_intervals(sorted_bootstraps) + + # Compute percentile intervals. + pct_idx_low = int((self.__alpha / 2) * self.__resamples) + pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples) + + self.__pct_interval_idx = (pct_idx_low, pct_idx_high) + self.__pct_low = sorted_bootstraps[pct_idx_low] + self.__pct_high = sorted_bootstraps[pct_idx_high] + + self._perform_statistical_test() + + def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3): + RM_STATUS = { + "baseline": "for repeated measures against baseline \n", + "sequential": "for the sequential design of repeated-measures experiment \n", + "None": "", + } + + PAIRED_STATUS = { + "baseline": "paired", + "sequential": "paired", + "None": "unpaired", + } + + first_line = { + "rm_status": RM_STATUS[str(self.__is_paired)], + "es": self.__EFFECT_SIZE_DICT[self.__effect_size], + "paired_status": PAIRED_STATUS[str(self.__is_paired)], + } + + out1 = "The {paired_status} {es} {rm_status}".format(**first_line) + + base_string_fmt = "{:." + str(sigfig) + "}" + if "." in str(self.__ci): + ci_width = base_string_fmt.format(self.__ci) + else: + ci_width = str(self.__ci) + + ci_out = { + "es": base_string_fmt.format(self.__difference), + "ci": ci_width, + "bca_low": base_string_fmt.format(self.__bca_low), + "bca_high": base_string_fmt.format(self.__bca_high), + } + + out2 = "is {es} [{ci}%CI {bca_low}, {bca_high}].".format(**ci_out) + out = out1 + out2 + + pval_rounded = base_string_fmt.format(self.pvalue_permutation) + + p1 = "The p-value of the two-sided permutation t-test is {}, ".format( + pval_rounded + ) + p2 = "calculated for legacy purposes only. " + pvalue = p1 + p2 + + bs1 = "{} bootstrap samples were taken; ".format(self.__resamples) + bs2 = "the confidence interval is bias-corrected and accelerated." + bs = bs1 + bs2 + + pval_def1 = ( + "Any p-value reported is the probability of observing the" + + "effect size (or greater),\nassuming the null hypothesis of" + + "zero difference is true." + ) + pval_def2 = ( + "\nFor each p-value, 5000 reshuffles of the " + + "control and test labels were performed." + ) + pval_def = pval_def1 + pval_def2 + + if show_resample_count and define_pval: + return "{}\n{}\n\n{}\n{}".format(out, pvalue, bs, pval_def) + elif not show_resample_count and define_pval: + return "{}\n{}\n\n{}".format(out, pvalue, pval_def) + elif show_resample_count and ~define_pval: + return "{}\n{}\n\n{}".format(out, pvalue, bs) + else: + return "{}\n{}".format(out, pvalue) + + def _check_errors(self, control, test): + ''' + Function to check configuration errors for the given control and test data. + ''' + kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()] + if self.__effect_size not in kosher_es: + err1 = "The effect size '{}'".format(self.__effect_size) + err2 = "is not one of {}".format(kosher_es) + raise ValueError(" ".join([err1, err2])) + + if self.__effect_size == "cliffs_delta" and self.__is_paired: + err1 = "`paired` is not None; therefore Cliff's delta is not defined." + raise ValueError(err1) + + if self.__proportional and self.__effect_size not in ["mean_diff", "cohens_h"]: + err1 = "`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined." + raise ValueError(err1) + + if self.__proportional and ( + isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False + ): + err1 = ( + "`proportional` is True; Only accept binary data consisting of 0 and 1." + ) + raise ValueError(err1) + + def _compute_bca_intervals(self, sorted_bootstraps): + ''' + Function to compute the bca intervals given the sorted bootstraps. + ''' + from ._stats_tools import confint_2group_diff as ci2g + + # Compute BCa intervals. + bca_idx_low, bca_idx_high = ci2g.compute_interval_limits( + self.__bias_correction, + self.__acceleration_value, + self.__resamples, + self.__ci, + ) + + self.__bca_interval_idx = (bca_idx_low, bca_idx_high) + + if ~isnan(bca_idx_low) and ~isnan(bca_idx_high): + self.__bca_low = sorted_bootstraps[bca_idx_low] + self.__bca_high = sorted_bootstraps[bca_idx_high] + + err1 = "The $lim_type limit of the interval" + err2 = "was in the $loc 10 values." + err3 = "The result should be considered unstable." + err_temp = Template(" ".join([err1, err2, err3])) + + if bca_idx_low <= 10: + warnings.warn( + err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1 + ) + + if bca_idx_high >= self.__resamples - 9: + warnings.warn( + err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1 + ) + + else: + # TODO improve error handling, separate file with error messages? + err1 = "The $lim_type limit of the BCa interval cannot be computed." + err2 = "It is set to the effect size itself." + err3 = "All bootstrap values were likely all the same." + err_temp = Template(" ".join([err1, err2, err3])) + + if isnan(bca_idx_low): + self.__bca_low = self.__difference + warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0) + + if isnan(bca_idx_high): + self.__bca_high = self.__difference + warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0) + + def _perform_statistical_test(self): + ''' + Function to complete the statistical tests + ''' + from ._stats_tools import effsize as es + + # Perform statistical tests. + self.__PermutationTest_result = PermutationTest( + self.__control, + self.__test, + self.__effect_size, + self.__is_paired, + self.__permutation_count, + ) + + if self.__is_paired and not self.__proportional: + # Wilcoxon, a non-parametric version of the paired T-test. + wilcoxon = spstats.wilcoxon(self.__control, self.__test) + self.__pvalue_wilcoxon = wilcoxon.pvalue + self.__statistic_wilcoxon = wilcoxon.statistic + + if self.__effect_size != "median_diff": + # Paired Student's t-test. + paired_t = spstats.ttest_rel( + self.__control, self.__test, nan_policy="omit" + ) + self.__pvalue_paired_students_t = paired_t.pvalue + self.__statistic_paired_students_t = paired_t.statistic + + elif self.__is_paired and self.__proportional: + # for binary paired data, use McNemar's test + # References: + # https://en.wikipedia.org/wiki/McNemar%27s_test + + df_temp = pd.DataFrame({"control": self.__control, "test": self.__test}) + x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)]) + x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)]) + x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)]) + x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)]) + table = [[x1, x2], [x3, x4]] + _mcnemar = mcnemar(table, exact=True, correction=True) + self.__pvalue_mcnemar = _mcnemar.pvalue + self.__statistic_mcnemar = _mcnemar.statistic + + elif self.__effect_size == "cliffs_delta": + # Let's go with Brunner-Munzel! + brunner_munzel = spstats.brunnermunzel( + self.__control, self.__test, nan_policy="omit" + ) + self.__pvalue_brunner_munzel = brunner_munzel.pvalue + self.__statistic_brunner_munzel = brunner_munzel.statistic + + elif self.__effect_size == "median_diff": + # According to scipy's documentation of the function, + # "The Kruskal-Wallis H-test tests the null hypothesis + # that the population median of all of the groups are equal." + kruskal = spstats.kruskal(self.__control, self.__test, nan_policy="omit") + self.__pvalue_kruskal = kruskal.pvalue + self.__statistic_kruskal = kruskal.statistic + + else: # for mean difference, Cohen's d, and Hedges' g. + # Welch's t-test, assumes normality of distributions, + # but does not assume equal variances. + welch = spstats.ttest_ind( + self.__control, self.__test, equal_var=False, nan_policy="omit" + ) + self.__pvalue_welch = welch.pvalue + self.__statistic_welch = welch.statistic + + # Student's t-test, assumes normality of distributions, + # as well as assumption of equal variances. + students_t = spstats.ttest_ind( + self.__control, self.__test, equal_var=True, nan_policy="omit" + ) + self.__pvalue_students_t = students_t.pvalue + self.__statistic_students_t = students_t.statistic + + # Mann-Whitney test: Non parametric, + # does not assume normality of distributions + try: + mann_whitney = spstats.mannwhitneyu( + self.__control, self.__test, alternative="two-sided" + ) + self.__pvalue_mann_whitney = mann_whitney.pvalue + self.__statistic_mann_whitney = mann_whitney.statistic + except ValueError: + # TODO At least print some warning? + # Occurs when the control and test are exactly identical + # in terms of rank (eg. all zeros.) + pass + + standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None) + + # The Cohen's h calculation is for binary categorical data + try: + self.__proportional_difference = es.cohens_h( + self.__control, self.__test + ) + except ValueError: + # TODO At least print some warning? + # Occur only when the data consists not only 0's and 1's. + pass + + def to_dict(self): + """ + Returns the attributes of the `dabest.TwoGroupEffectSize` object as a + dictionary. + """ + # Only get public (user-facing) attributes. + attrs = [a for a in dir(self) if not a.startswith(("_", "to_dict"))] + out = {} + for a in attrs: + out[a] = getattr(self, a) + return out + + @property + def difference(self): + """ + Returns the difference between the control and the test. + """ + return self.__difference + + @property + def effect_size(self): + """ + Returns the type of effect size reported. + """ + return self.__EFFECT_SIZE_DICT[self.__effect_size] + + @property + def is_paired(self): + return self.__is_paired + + @property + def proportional(self): + return self.__proportional + + @property + def ci(self): + """ + Returns the width of the confidence interval, in percent. + """ + return self.__ci + + @property + def alpha(self): + """ + Returns the significance level of the statistical test as a float + between 0 and 1. + """ + return self.__alpha + + @property + def resamples(self): + """ + The number of resamples performed during the bootstrap procedure. + """ + return self.__resamples + + @property + def bootstraps(self): + """ + The generated bootstraps of the effect size. + """ + return self.__bootstraps + + @property + def random_seed(self): + """ + The number used to initialise the numpy random seed generator, ie. + `seed_value` from `numpy.random.seed(seed_value)` is returned. + """ + return self.__random_seed + + @property + def bca_interval_idx(self): + return self.__bca_interval_idx + + @property + def bca_low(self): + """ + The bias-corrected and accelerated confidence interval lower limit. + """ + return self.__bca_low + + @property + def bca_high(self): + """ + The bias-corrected and accelerated confidence interval upper limit. + """ + return self.__bca_high + + @property + def pct_interval_idx(self): + return self.__pct_interval_idx + + @property + def pct_low(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_low + + @property + def pct_high(self): + """ + The percentile confidence interval lower limit. + """ + return self.__pct_high + + @property + def pvalue_brunner_munzel(self): + try: + return self.__pvalue_brunner_munzel + except AttributeError: + return npnan + + @property + def statistic_brunner_munzel(self): + try: + return self.__statistic_brunner_munzel + except AttributeError: + return npnan + + @property + def pvalue_wilcoxon(self): + try: + return self.__pvalue_wilcoxon + except AttributeError: + return npnan + + @property + def statistic_wilcoxon(self): + try: + return self.__statistic_wilcoxon + except AttributeError: + return npnan + + @property + def pvalue_mcnemar(self): + try: + return self.__pvalue_mcnemar + except AttributeError: + return npnan + + @property + def statistic_mcnemar(self): + try: + return self.__statistic_mcnemar + except AttributeError: + return npnan + + @property + def pvalue_paired_students_t(self): + # TODO Missing docstring + try: + return self.__pvalue_paired_students_t + except AttributeError: + return npnan + + @property + def statistic_paired_students_t(self): + # TODO Missing docstring + try: + return self.__statistic_paired_students_t + except AttributeError: + return npnan + + @property + def pvalue_kruskal(self): + # TODO Missing docstring + try: + return self.__pvalue_kruskal + except AttributeError: + return npnan + + @property + def statistic_kruskal(self): + # TODO Missing docstring + try: + return self.__statistic_kruskal + except AttributeError: + return npnan + + @property + def pvalue_welch(self): + # TODO Missing docstring + try: + return self.__pvalue_welch + except AttributeError: + return npnan + + @property + def statistic_welch(self): + # TODO Missing docstring + try: + return self.__statistic_welch + except AttributeError: + return npnan + + @property + def pvalue_students_t(self): + # TODO Missing docstring + try: + return self.__pvalue_students_t + except AttributeError: + return npnan + + @property + def statistic_students_t(self): + # TODO Missing docstring + try: + return self.__statistic_students_t + except AttributeError: + return npnan + + @property + def pvalue_mann_whitney(self): + # TODO Missing docstring + try: + return self.__pvalue_mann_whitney + except AttributeError: + return npnan + + @property + def statistic_mann_whitney(self): + # TODO Missing docstring + try: + return self.__statistic_mann_whitney + except AttributeError: + return npnan + + @property + def pvalue_permutation(self): + # TODO Missing docstring + return self.__PermutationTest_result.pvalue + + @property + def permutation_count(self): + """ + The number of permutations taken. + """ + return self.__PermutationTest_result.permutation_count + + @property + def permutations(self): + return self.__PermutationTest_result.permutations + + @property + def permutations_var(self): + return self.__PermutationTest_result.permutations_var + + @property + def proportional_difference(self): + try: + return self.__proportional_difference + except AttributeError: + return npnan + +# %% ../nbs/API/effsize_objects.ipynb 10 +class EffectSizeDataFrame(object): + """A class that generates and stores the results of bootstrapped effect + sizes for several comparisons.""" + + def __init__(self, dabest, effect_size, + is_paired, ci=95, proportional=False, + resamples=5000, + permutation_count=5000, + random_seed=12345, + x1_level=None, x2=None, + delta2=False, experiment_label=None, + mini_meta=False): + """ + Parses the data from a Dabest object, enabling plotting and printing + capability for the effect size of interest. + """ + + self.__dabest_obj = dabest + self.__effect_size = effect_size + self.__is_paired = is_paired + self.__ci = ci + self.__resamples = resamples + self.__permutation_count = permutation_count + self.__random_seed = random_seed + self.__proportional = proportional + self.__x1_level = x1_level + self.__experiment_label = experiment_label + self.__x2 = x2 + self.__delta2 = delta2 + self.__mini_meta = mini_meta + + + def __pre_calc(self): + from .misc_tools import print_greeting, get_varname + from ._stats_tools import confint_2group_diff as ci2g + from ._delta_objects import MiniMetaDelta, DeltaDelta + + idx = self.__dabest_obj.idx + dat = self.__dabest_obj._plot_data + xvar = self.__dabest_obj._xvar + yvar = self.__dabest_obj._yvar + + out = [] + reprs = [] + + if self.__delta2: + mixed_data = [] + for j, current_tuple in enumerate(idx): + if self.__is_paired != "sequential": + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + + for ix, tname in enumerate(current_tuple[1:]): + if self.__is_paired == "sequential": + cname = current_tuple[ix] + control = dat[dat[xvar] == cname][yvar].copy() + test = dat[dat[xvar] == tname][yvar].copy() + mixed_data.append(control) + mixed_data.append(test) + bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3], + self.__is_paired, self.__resamples, self.__random_seed) + + + for j, current_tuple in enumerate(idx): + if self.__is_paired!="sequential": + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + + for ix, tname in enumerate(current_tuple[1:]): + if self.__is_paired == "sequential": + cname = current_tuple[ix] + control = dat[dat[xvar] == cname][yvar].copy() + test = dat[dat[xvar] == tname][yvar].copy() + + result = TwoGroupsEffectSize(control, test, + self.__effect_size, + self.__proportional, + self.__is_paired, + self.__ci, + self.__resamples, + self.__permutation_count, + self.__random_seed) + r_dict = result.to_dict() + r_dict["control"] = cname + r_dict["test"] = tname + r_dict["control_N"] = int(len(control)) + r_dict["test_N"] = int(len(test)) + out.append(r_dict) + if j == len(idx)-1 and ix == len(current_tuple)-2: + if self.__delta2 and self.__effect_size in ["mean_diff","delta_g"]: + resamp_count = False + def_pval = False + elif self.__mini_meta and self.__effect_size == "mean_diff": + resamp_count = False + def_pval = False + else: + resamp_count = True + def_pval = True + else: + resamp_count = False + def_pval = False + + text_repr = result.__repr__(show_resample_count=resamp_count, + define_pval=def_pval) + + to_replace = "between {} and {} is".format(cname, tname) + text_repr = text_repr.replace("is", to_replace, 1) + + reprs.append(text_repr) + + + self.__for_print = "\n\n".join(reprs) + + out_ = pd.DataFrame(out) + + columns_in_order = ['control', 'test', 'control_N', 'test_N', + 'effect_size', 'is_paired', + 'difference', 'ci', + + 'bca_low', 'bca_high', 'bca_interval_idx', + 'pct_low', 'pct_high', 'pct_interval_idx', + + 'bootstraps', 'resamples', 'random_seed', + + 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var', + + 'pvalue_welch', + 'statistic_welch', + + 'pvalue_students_t', + 'statistic_students_t', + + 'pvalue_mann_whitney', + 'statistic_mann_whitney', + + 'pvalue_brunner_munzel', + 'statistic_brunner_munzel', + + 'pvalue_wilcoxon', + 'statistic_wilcoxon', + + 'pvalue_mcnemar', + 'statistic_mcnemar', + + 'pvalue_paired_students_t', + 'statistic_paired_students_t', + + 'pvalue_kruskal', + 'statistic_kruskal', + 'proportional_difference' + ] + self.__results = out_.reindex(columns=columns_in_order) + self.__results.dropna(axis="columns", how="all", inplace=True) + + # Add the is_paired column back when is_paired is None + if self.is_paired is None: + self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1)) + + # Create and compute the delta-delta statistics + if self.__delta2: + self.__delta_delta = DeltaDelta(self, + self.__permutation_count, + bootstraps_delta_delta, + self.__ci) + reprs.append(self.__delta_delta.__repr__(header=False)) + elif self.__delta2 and self.__effect_size not in ["mean_diff", "delta_g"]: + self.__delta_delta = "Delta-delta is not supported for {}.".format(self.__effect_size) + else: + self.__delta_delta = "`delta2` is False; delta-delta is therefore not calculated." + + # Create and compute the weighted average statistics + if self.__mini_meta and self.__effect_size == "mean_diff": + self.__mini_meta_delta = MiniMetaDelta(self, + self.__permutation_count, + self.__ci) + reprs.append(self.__mini_meta_delta.__repr__(header=False)) + elif self.__mini_meta and self.__effect_size != "mean_diff": + self.__mini_meta_delta = "Weighted delta is not supported for {}.".format(self.__effect_size) + else: + self.__mini_meta_delta = "`mini_meta` is False; weighted delta is therefore not calculated." + + + varname = get_varname(self.__dabest_obj) + lastline = "To get the results of all valid statistical tests, " +\ + "use `{}.{}.statistical_tests`".format(varname, self.__effect_size) + reprs.append(lastline) + + reprs.insert(0, print_greeting()) + + self.__for_print = "\n\n".join(reprs) + + + def __repr__(self): + try: + return self.__for_print + except AttributeError: + self.__pre_calc() + return self.__for_print + + + + def __calc_lqrt(self): + + rnd_seed = self.__random_seed + db_obj = self.__dabest_obj + dat = db_obj._plot_data + xvar = db_obj._xvar + yvar = db_obj._yvar + delta2 = self.__delta2 + + + out = [] + + for j, current_tuple in enumerate(db_obj.idx): + if self.__is_paired != "sequential": + cname = current_tuple[0] + control = dat[dat[xvar] == cname][yvar].copy() + + for ix, tname in enumerate(current_tuple[1:]): + if self.__is_paired == "sequential": + cname = current_tuple[ix] + control = dat[dat[xvar] == cname][yvar].copy() + test = dat[dat[xvar] == tname][yvar].copy() + + if self.__is_paired: + # Refactored here in v0.3.0 for performance issues. + lqrt_result = lqrt.lqrtest_rel(control, test, + random_state=rnd_seed) + + out.append({"control": cname, "test": tname, + "control_N": int(len(control)), + "test_N": int(len(test)), + "pvalue_paired_lqrt": lqrt_result.pvalue, + "statistic_paired_lqrt": lqrt_result.statistic + }) + + else: + # Likelihood Q-Ratio test: + lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, + random_state=rnd_seed, + equal_var=True) + + + lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, + random_state=rnd_seed, + equal_var=False) + + out.append({"control": cname, "test": tname, + "control_N": int(len(control)), + "test_N": int(len(test)), + + "pvalue_lqrt_equal_var" : lqrt_equal_var_result.pvalue, + "statistic_lqrt_equal_var" : lqrt_equal_var_result.statistic, + "pvalue_lqrt_unequal_var" : lqrt_unequal_var_result.pvalue, + "statistic_lqrt_unequal_var" : lqrt_unequal_var_result.statistic, + }) + self.__lqrt_results = pd.DataFrame(out) + + + def plot(self, color_col=None, + + raw_marker_size=6, es_marker_size=9, + + swarm_label=None, contrast_label=None, delta2_label=None, + swarm_ylim=None, contrast_ylim=None, delta2_ylim=None, + + custom_palette=None, swarm_desat=0.5, halfviolin_desat=1, + halfviolin_alpha=0.8, + + face_color = None, + #bar plot + bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None, + # error bar of proportion plot + ci=None, ci_type='bca', err_color=None, + + float_contrast=True, + show_pairs=True, + show_delta2=True, + show_mini_meta=True, + group_summaries=None, + group_summaries_offset=0.1, + + fig_size=None, + dpi=100, + ax=None, + + contrast_show_es = False, + es_sf = 2, + es_fontsize = 10, + + contrast_show_deltas = True, + + gridkey_rows=None, + gridkey_merge_pairs = False, + gridkey_show_Ns = True, + gridkey_show_es = True, + + swarmplot_kwargs=None, + barplot_kwargs=None, + violinplot_kwargs=None, + slopegraph_kwargs=None, + sankey_kwargs=None, + reflines_kwargs=None, + group_summary_kwargs=None, + legend_kwargs=None, + title=None, fontsize_title = 16, + fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12, + fontsize_delta2label = 12): + + """ + Creates an estimation plot for the effect size of interest. + + + Parameters + ---------- + color_col : string, default None + Column to be used for colors. + raw_marker_size : float, default 6 + The diameter (in points) of the marker dots plotted in the + swarmplot. + es_marker_size : float, default 9 + The size (in points) of the effect size points on the difference + axes. + swarm_label, contrast_label, delta2_label : strings, default None + Set labels for the y-axis of the swarmplot and the contrast plot, + respectively. If `swarm_label` is not specified, it defaults to + "value", unless a column name was passed to `y`. If + `contrast_label` is not specified, it defaults to the effect size + being plotted. If `delta2_label` is not specifed, it defaults to + "delta - delta" + swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None + The desired y-limits of the raw data (swarmplot) axes, the + difference axes and the delta-delta axes respectively, as a tuple. + These will be autoscaled to sensible values if they are not + specified. The delta2 axes and contrast axes should have the same + limits for y. When `show_delta2` is True, if both of the `contrast_ylim` + and `delta2_ylim` are not None, then they must be specified with the + same values; when `show_delta2` is True and only one of them is specified, + then the other will automatically be assigned with the same value. + Specifying `delta2_ylim` does not have any effect when `show_delta2` is + False. + custom_palette : dict, list, or matplotlib color palette, default None + This keyword accepts a dictionary with {'group':'color'} pairings, + a list of RGB colors, or a specified matplotlib palette. This + palette will be used to color the swarmplot. If `color_col` is not + specified, then each group will be colored in sequence according + to the default palette currently used by matplotlib. + Please take a look at the seaborn commands `color_palette` + and `cubehelix_palette` to generate a custom palette. Both + these functions generate a list of RGB colors. + See: + https://seaborn.pydata.org/generated/seaborn.color_palette.html + https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html + The named colors of matplotlib can be found here: + https://matplotlib.org/examples/color/named_colors.html + swarm_desat : float, default 1 + Decreases the saturation of the colors in the swarmplot by the + desired proportion. Uses `seaborn.desaturate()` to acheive this. + halfviolin_desat : float, default 0.5 + Decreases the saturation of the colors of the half-violin bootstrap + curves by the desired proportion. Uses `seaborn.desaturate()` to + acheive this. + halfviolin_alpha : float, default 0.8 + The alpha (transparency) level of the half-violin bootstrap curves. + float_contrast : boolean, default True + Whether or not to display the halfviolin bootstrapped difference + distribution alongside the raw data. + show_pairs : boolean, default True + If the data is paired, whether or not to show the raw data as a + swarmplot, or as slopegraph, with a line joining each pair of + observations. + show_delta2, show_mini_meta : boolean, default True + If delta-delta or mini-meta delta is calculated, whether or not to + show the delta-delta plot or mini-meta plot. + group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None. + Plots the summary statistics for each group. If 'mean_sd', then + the mean and standard deviation of each group is plotted as a + notched line beside each group. If 'median_quantiles', then the + median and 25th and 75th percentiles of each group is plotted + instead. If 'None', the summaries are not shown. + group_summaries_offset : float, default 0.1 + If group summaries are displayed, they will be offset from the raw + data swarmplot groups by this value. + fig_size : tuple, default None + The desired dimensions of the figure as a (length, width) tuple. + dpi : int, default 100 + The dots per inch of the resulting figure. + ax : matplotlib.Axes, default None + Provide an existing Axes for the plots to be created. If no Axes is + specified, a new matplotlib Figure will be created. + gridkey_rows : list, default None + Provide a list of row labels for the gridkey. The supplied idx is + checked against the row labels to determine whether the corresponding + cell should be populated or not. + swarmplot_kwargs : dict, default None + Pass any keyword arguments accepted by the seaborn `swarmplot` + command here, as a dict. If None, the following keywords are + passed to sns.swarmplot : {'size':`raw_marker_size`}. + violinplot_kwargs : dict, default None + Pass any keyword arguments accepted by the matplotlib ` + pyplot.violinplot` command here, as a dict. If None, the following + keywords are passed to violinplot : {'widths':0.5, 'vert':True, + 'showextrema':False, 'showmedians':False}. + slopegraph_kwargs : dict, default None + This will change the appearance of the lines used to join each pair + of observations when `show_pairs=True`. Pass any keyword arguments + accepted by matplotlib `plot()` function here, as a dict. + If None, the following keywords are + passed to plot() : {'linewidth':1, 'alpha':0.5}. + sankey_kwargs: dict, default None + Whis will change the appearance of the sankey diagram used to depict + paired proportional data when `show_pairs=True` and `proportional=True`. + Pass any keyword arguments accepted by plot_tools.sankeydiag() function + here, as a dict. If None, the following keywords are passed to sankey diagram: + {"width": 0.5, "align": "center", "alpha": 0.4, "bar_width": 0.1, "rightColor": False} + reflines_kwargs : dict, default None + This will change the appearance of the zero reference lines. Pass + any keyword arguments accepted by the matplotlib Axes `hlines` + command here, as a dict. If None, the following keywords are + passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75, + 'zorder':2, 'color' : default y-tick color}. + group_summary_kwargs : dict, default None + Pass any keyword arguments accepted by the matplotlib.lines.Line2D + command here, as a dict. This will change the appearance of the + vertical summary lines for each group, if `group_summaries` is not + 'None'. If None, the following keywords are passed to + matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}. + legend_kwargs : dict, default None + Pass any keyword arguments accepted by the matplotlib Axes + `legend` command here, as a dict. If None, the following keywords + are passed to matplotlib.Axes.legend : {'loc':'upper left', + 'frameon':False}. + title : string, default None + Title for the plot. If None, no title will be displayed. Pass any + keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here, + as a string. + fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large' + Font size for the plot title. If a float, the fontsize in points. The + string values denote sizes relative to the default font size. Pass any keyword arguments accepted + by the matplotlib.pyplot.suptitle `fontsize` command here, as a string. + fontsize_rawxlabel : float, default 12 + Font size for the raw axes xlabel. + fontsize_rawylabel : float, default 12 + Font size for the raw axes ylabel. + fontsize_contrastxlabel : float, default 12 + Font size for the contrast axes xlabel. + fontsize_contrastylabel : float, default 12 + Font size for the contrast axes ylabel. + fontsize_delta2label : float, default 12 + Font size for the delta-delta axes ylabel. + + + Returns + ------- + A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``. + + The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it. + + If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` + itself, while the effect size axes is accessed at ``ax.contrast_axes``. + See the last example below. + + + + """ + + from .plotter import EffectSizeDataFramePlotter + + if hasattr(self, "results") is False: + self.__pre_calc() + + if self.__delta2: + color_col = self.__x2 + + # if self.__proportional: + # raw_marker_size = 0.01 + + # Modification incurred due to update of Seaborn + ci = ('ci', ci) if ci is not None else None + + all_kwargs = locals() + del all_kwargs["self"] + + out = EffectSizeDataFramePlotter(self, **all_kwargs) + + return out + + + @property + def proportional(self): + """ + Returns the proportional parameter + class. + """ + return self.__proportional + + @property + def results(self): + """Prints all pairwise comparisons nicely.""" + try: + return self.__results + except AttributeError: + self.__pre_calc() + return self.__results + + + + @property + def statistical_tests(self): + results_df = self.results + + # Select only the statistics and p-values. + stats_columns = [c for c in results_df.columns + if c.startswith("statistic") or c.startswith("pvalue")] + + default_cols = ['control', 'test', 'control_N', 'test_N', + 'effect_size', 'is_paired', + 'difference', 'ci', 'bca_low', 'bca_high'] + + cols_of_interest = default_cols + stats_columns + + return results_df[cols_of_interest] + + + @property + def _for_print(self): + return self.__for_print + + @property + def _plot_data(self): + return self.__dabest_obj._plot_data + + @property + def idx(self): + return self.__dabest_obj.idx + + @property + def xvar(self): + return self.__dabest_obj._xvar + + @property + def yvar(self): + return self.__dabest_obj._yvar + + @property + def is_paired(self): + return self.__is_paired + + @property + def ci(self): + """ + The width of the confidence interval being produced, in percent. + """ + return self.__ci + + @property + def x1_level(self): + return self.__x1_level + + + @property + def x2(self): + return self.__x2 + + + @property + def experiment_label(self): + return self.__experiment_label + + + @property + def delta2(self): + return self.__delta2 + + + @property + def resamples(self): + """ + The number of resamples (with replacement) during bootstrap resampling." + """ + return self.__resamples + + @property + def random_seed(self): + """ + The seed used by `numpy.seed()` for bootstrap resampling. + """ + return self.__random_seed + + @property + def effect_size(self): + """The type of effect size being computed.""" + return self.__effect_size + + @property + def dabest_obj(self): + """ + Returns the `dabest` object that invoked the current EffectSizeDataFrame + class. + """ + return self.__dabest_obj + + @property + def proportional(self): + """ + Returns the proportional parameter + class. + """ + return self.__proportional + + @property + def lqrt(self): + """Returns all pairwise Lq-Likelihood Ratio Type test results + as a pandas DataFrame. + + For more information on LqRT tests, see https://arxiv.org/abs/1911.11922 + """ + try: + return self.__lqrt_results + except AttributeError: + self.__calc_lqrt() + return self.__lqrt_results + + + @property + def mini_meta(self): + """ + Returns the mini_meta boolean parameter. + """ + return self.__mini_meta + + + @property + def mini_meta_delta(self): + """ + Returns the mini_meta results. + """ + try: + return self.__mini_meta_delta + except AttributeError: + self.__pre_calc() + return self.__mini_meta_delta + + + @property + def delta_delta(self): + """ + Returns the mini_meta results. + """ + try: + return self.__delta_delta + except AttributeError: + self.__pre_calc() + return self.__delta_delta + + + +# %% ../nbs/API/effsize_objects.ipynb 29 +class PermutationTest: + """ + A class to compute and report permutation tests. + + Parameters + ---------- + control : array-like + test : array-like + These should be numerical iterables. + effect_size : string. + Any one of the following are accepted inputs: + 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g" or 'cliffs_delta' + is_paired : string, default None + permutation_count : int, default 10000 + The number of permutations (reshuffles) to perform. + random_seed : int, default 12345 + `random_seed` is used to seed the random number generator during + bootstrap resampling. This ensures that the generated permutations + are replicable. + + Returns + ------- + A :py:class:`PermutationTest` object: + `difference`:float + The effect size of the difference between the control and the test. + `effect_size`:string + The type of effect size reported. + + + """ + + def __init__(self, control: array, + test: array, # These should be numerical iterables. + effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta' + is_paired:str=None, + permutation_count:int=5000, # The number of permutations (reshuffles) to perform. + random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable. + **kwargs): + from ._stats_tools.effsize import two_group_difference + from ._stats_tools.confint_2group_diff import calculate_group_var + + + self.__permutation_count = permutation_count + + # Run Sanity Check. + if is_paired and len(control) != len(test): + raise ValueError("The two arrays do not have the same length.") + + # Initialise random number generator. + # rng = random.default_rng(seed=random_seed) + rng = RandomState(PCG64(random_seed)) + + # Set required constants and variables + control = array(control) + test = array(test) + + control_sample = control.copy() + test_sample = test.copy() + + BAG = array([*control, *test]) + CONTROL_LEN = int(len(control)) + EXTREME_COUNT = 0. + THRESHOLD = abs(two_group_difference(control, test, + is_paired, effect_size)) + self.__permutations = [] + self.__permutations_var = [] + + for i in range(int(self.__permutation_count)): + if is_paired: + # Select which control-test pairs to swap. + random_idx = rng.choice(CONTROL_LEN, + rng.randint(0, CONTROL_LEN+1), + replace=False) + + # Perform swap. + for i in random_idx: + _placeholder = control_sample[i] + control_sample[i] = test_sample[i] + test_sample[i] = _placeholder + + else: + # Shuffle the bag and assign to control and test groups. + # NB. rng.shuffle didn't produce replicable results... + shuffled = rng.permutation(BAG) + control_sample = shuffled[:CONTROL_LEN] + test_sample = shuffled[CONTROL_LEN:] + + + es = two_group_difference(control_sample, test_sample, + False, effect_size) + + group_var = calculate_group_var(var(control_sample, ddof=1), + CONTROL_LEN, + var(test_sample, ddof=1), + len(test_sample)) + self.__permutations.append(es) + self.__permutations_var.append(group_var) + + if abs(es) > THRESHOLD: + EXTREME_COUNT += 1. + + self.__permutations = array(self.__permutations) + self.__permutations_var = array(self.__permutations_var) + + self.pvalue = EXTREME_COUNT / self.__permutation_count + + + def __repr__(self): + return("{} permutations were taken. The p-value is {}.".format(self.__permutation_count, + self.pvalue)) + + + @property + def permutation_count(self): + """ + The number of permuations taken. + """ + return self.__permutation_count + + + @property + def permutations(self): + """ + The effect sizes of all the permutations in a list. + """ + return self.__permutations + + + @property + def permutations_var(self): + """ + The experiment group variance of all the permutations in a list. + """ + return self.__permutations_var + diff --git a/dabest/_stats_tools/confint_1group.py b/dabest/_stats_tools/confint_1group.py index 29d82f74..35cef3f9 100644 --- a/dabest/_stats_tools/confint_1group.py +++ b/dabest/_stats_tools/confint_1group.py @@ -6,25 +6,23 @@ # %% ../../nbs/API/confint_1group.ipynb 4 import numpy as np +from numpy.random import PCG64, RandomState +from scipy.stats import norm +from numpy import sort as npsort # %% ../../nbs/API/confint_1group.ipynb 5 def create_bootstrap_indexes(array, resamples=5000, random_seed=12345): """Given an array-like, returns a generator of bootstrap indexes to be used for resampling. """ - import numpy as np - from numpy.random import PCG64, RandomState + rng = RandomState(PCG64(random_seed)) - + indexes = range(0, len(array)) - out = (rng.choice(indexes, len(indexes), replace=True) - for i in range(0, resamples)) - - # Reset RNG - # rng = RandomState(MT19937()) - return out + out = (rng.choice(indexes, len(indexes), replace=True) for i in range(0, resamples)) + return out def compute_1group_jackknife(x, func, *args, **kwargs): @@ -32,53 +30,53 @@ def compute_1group_jackknife(x, func, *args, **kwargs): Returns the jackknife bootstraps for func(x). """ from . import confint_2group_diff as ci_2g + jackknives = [i for i in ci_2g.create_jackknife_indexes(x)] out = [func(x[j], *args, **kwargs) for j in jackknives] - del jackknives # memory management. + del jackknives # memory management. return out - def compute_1group_acceleration(jack_dist): from . import confint_2group_diff as ci_2g - return ci_2g._calc_accel(jack_dist) + return ci_2g._calc_accel(jack_dist) -def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345, - *args, **kwargs): +def compute_1group_bootstraps( + x, func, resamples=5000, random_seed=12345, *args, **kwargs +): """Bootstraps func(x), with the number of specified resamples.""" - import numpy as np - # Create bootstrap indexes. - boot_indexes = create_bootstrap_indexes(x, resamples=resamples, - random_seed=random_seed) + boot_indexes = create_bootstrap_indexes( + x, resamples=resamples, random_seed=random_seed + ) out = [func(x[b], *args, **kwargs) for b in boot_indexes] - + del boot_indexes - - return out + return out def compute_1group_bias_correction(x, bootstraps, func, *args, **kwargs): - from scipy.stats import norm metric = func(x, *args, **kwargs) prop_boots_less_than_metric = sum(bootstraps < metric) / len(bootstraps) return norm.ppf(prop_boots_less_than_metric) - -def summary_ci_1group(x:np.array,# An numerical iterable. - func, #The function to be applied to x. - resamples:int=5000, #The number of bootstrap resamples to be taken of func(x). - alpha:float=0.05, #Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. - random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. - sort_bootstraps:bool=True, - *args, **kwargs): +def summary_ci_1group( + x: np.array, # An numerical iterable. + func, # The function to be applied to x. + resamples: int = 5000, # The number of bootstrap resamples to be taken of func(x). + alpha: float = 0.05, # Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced. + random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. + sort_bootstraps: bool = True, + *args, + **kwargs +): """ Given an array-like x, returns func(x), and a bootstrap confidence interval of func(x). @@ -101,11 +99,10 @@ def summary_ci_1group(x:np.array,# An numerical iterable. """ from . import confint_2group_diff as ci2g - from numpy import sort as npsort - boots = compute_1group_bootstraps(x, func, resamples=resamples, - random_seed=random_seed, - *args, **kwargs) + boots = compute_1group_bootstraps( + x, func, resamples=resamples, random_seed=random_seed, *args, **kwargs + ) bias = compute_1group_bias_correction(x, boots, func) jk = compute_1group_jackknife(x, func, *args, **kwargs) @@ -126,10 +123,13 @@ def summary_ci_1group(x:np.array,# An numerical iterable. del boots del boots_sorted - out = {'summary': func(x), 'func': func, - 'bca_ci_low': low, 'bca_ci_high': high, - 'bootstraps': B} + out = { + "summary": func(x), + "func": func, + "bca_ci_low": low, + "bca_ci_high": high, + "bootstraps": B, + } del B return out - diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index fe482fd4..a1b78747 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -7,6 +7,13 @@ # %% ../../nbs/API/confint_2group_diff.ipynb 4 import numpy as np +from numpy import arange, delete, errstate +from numpy import mean as npmean +from numpy import sum as npsum +from numpy.random import PCG64, RandomState +import pandas as pd +from scipy.stats import norm +from numpy import isnan # %% ../../nbs/API/confint_2group_diff.ipynb 5 def create_jackknife_indexes(data): @@ -24,43 +31,45 @@ def create_jackknife_indexes(data): ------- Generator that yields all jackknife bootstrap samples. """ - from numpy import arange, delete index_range = arange(0, len(data)) return (delete(index_range, i) for i in index_range) - def create_repeated_indexes(data): """ Convenience function. Given an array-like with length N, returns a generator that yields N indexes [0, 1, ..., N]. """ - from numpy import arange index_range = arange(0, len(data)) return (index_range for i in index_range) - def _create_two_group_jackknife_indexes(x0, x1, is_paired): """Creates the jackknife bootstrap for 2 groups.""" if is_paired and len(x0) == len(x1): - out = list(zip([j for j in create_jackknife_indexes(x0)], - [i for i in create_jackknife_indexes(x1)] - ) - ) + out = list( + zip( + [j for j in create_jackknife_indexes(x0)], + [i for i in create_jackknife_indexes(x1)], + ) + ) else: - jackknife_c = list(zip([j for j in create_jackknife_indexes(x0)], - [i for i in create_repeated_indexes(x1)] - ) - ) - - jackknife_t = list(zip([i for i in create_repeated_indexes(x0)], - [j for j in create_jackknife_indexes(x1)] - ) - ) + jackknife_c = list( + zip( + [j for j in create_jackknife_indexes(x0)], + [i for i in create_repeated_indexes(x1)], + ) + ) + + jackknife_t = list( + zip( + [i for i in create_repeated_indexes(x0)], + [j for j in create_jackknife_indexes(x1)], + ) + ) out = jackknife_c + jackknife_t del jackknife_c del jackknife_t @@ -68,7 +77,6 @@ def _create_two_group_jackknife_indexes(x0, x1, is_paired): return out - def compute_meandiff_jackknife(x0, x1, is_paired, effect_size): """ Given two arrays, returns the jackknife for their effect size. @@ -83,46 +91,37 @@ def compute_meandiff_jackknife(x0, x1, is_paired, effect_size): x0_shuffled = x0[j[0]] x1_shuffled = x1[j[1]] - es = __es.two_group_difference(x0_shuffled, x1_shuffled, - is_paired, effect_size) + es = __es.two_group_difference(x0_shuffled, x1_shuffled, is_paired, effect_size) out.append(es) return out - def _calc_accel(jack_dist): - from numpy import mean as npmean - from numpy import sum as npsum - from numpy import errstate - jack_mean = npmean(jack_dist) - numer = npsum((jack_mean - jack_dist)**3) - denom = 6.0 * (npsum((jack_mean - jack_dist)**2) ** 1.5) + numer = npsum((jack_mean - jack_dist) ** 3) + denom = 6.0 * (npsum((jack_mean - jack_dist) ** 2) ** 1.5) - with errstate(invalid='ignore'): + with errstate(invalid="ignore"): # does not raise warning if invalid division encountered. return numer / denom -def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, - resamples=5000, random_seed=12345): +def compute_bootstrapped_diff( + x0, x1, is_paired, effect_size, resamples=5000, random_seed=12345 +): """Bootstraps the effect_size for 2 groups.""" - + from . import effsize as __es - import numpy as np - from numpy.random import PCG64, RandomState - - # rng = RandomState(default_rng(random_seed)) + rng = RandomState(PCG64(random_seed)) out = np.repeat(np.nan, resamples) x0_len = len(x0) x1_len = len(x1) - + for i in range(int(resamples)): - if is_paired: if x0_len != x1_len: raise ValueError("The two arrays do not have the same length.") @@ -132,45 +131,27 @@ def compute_bootstrapped_diff(x0, x1, is_paired, effect_size, else: x0_sample = rng.choice(x0, x0_len, replace=True) x1_sample = rng.choice(x1, x1_len, replace=True) - - out[i] = __es.two_group_difference(x0_sample, x1_sample, - is_paired, effect_size) - - # check whether there are any infinities in the bootstrap, - # which likely indicates the sample sizes are too small as - # the computation of Cohen's d and Hedges' g necessitated - # a division by zero. - # Added in v0.2.6. - - # num_infinities = len(out[np.isinf(out)]) - # print(num_infinities) - # if num_infinities > 0: - # warn_msg = "There are {} bootstraps that are not defined. "\ - # "This is likely due to smaple sample sizes. "\ - # "The values in a bootstrap for a group will be more likely "\ - # "to be all equal, with a resulting variance of zero. "\ - # "The computation of Cohen's d and Hedges' g will therefore "\ - # "involved a division by zero. " - # warnings.warn(warn_msg.format(num_infinities), category="UserWarning") - + + out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size) + return out -def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1 - x2:np.ndarray,# Test group 1 - x3:np.ndarray,# Control group 2 - x4:np.ndarray,# Test group 2 - is_paired:str=None, - resamples:int=5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits. - random_seed:int=12345# `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. - )->tuple: # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta + +def compute_delta2_bootstrapped_diff( + x1: np.ndarray, # Control group 1 + x2: np.ndarray, # Test group 1 + x3: np.ndarray, # Control group 2 + x4: np.ndarray, # Test group 2 + is_paired: str = None, + resamples: int = 5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits. + random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable. +) -> ( + tuple +): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta """ Bootstraps the effect size deltas' g. - - """ - import numpy as np - import pandas as pd - from numpy.random import PCG64, RandomState + """ rng = RandomState(PCG64(random_seed)) x1_len = len(x1) @@ -180,11 +161,15 @@ def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1 out_delta_g = np.repeat(np.nan, resamples) deltadelta = np.repeat(np.nan, resamples) - n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2= x1_len, x2_len, x3_len, x4_len + n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4) - sd_numerator = ((n_a2_b1 - 1) * s_a2_b1 ** 2 + (n_a1_b1 - 1) * s_a1_b1 ** 2 + (n_a2_b2 - 1) * s_a2_b2 ** 2 + ( - n_a1_b2 - 1) * s_a1_b2 ** 2) + sd_numerator = ( + (n_a2_b1 - 1) * s_a2_b1**2 + + (n_a1_b1 - 1) * s_a1_b1**2 + + (n_a2_b2 - 1) * s_a2_b2**2 + + (n_a1_b2 - 1) * s_a1_b2**2 + ) sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1) pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator) @@ -192,46 +177,58 @@ def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1 if is_paired: if (x1_len != x2_len) or (x3_len != x4_len): raise ValueError("The two arrays do not have the same length.") - df_paired_1 = pd.DataFrame({ - 'value': np.concatenate([x1, x3]), - 'array_id': np.repeat(['x1','x3'], [x1_len, x3_len]) - }) - df_paired_2 = pd.DataFrame({ - 'value': np.concatenate([x2, x4]), - 'array_id': np.repeat(['x2','x4'], [x1_len, x3_len]) - }) - x_sample_index = rng.choice(len(df_paired_1), len(df_paired_1), replace=True) + df_paired_1 = pd.DataFrame( + { + "value": np.concatenate([x1, x3]), + "array_id": np.repeat(["x1", "x3"], [x1_len, x3_len]), + } + ) + df_paired_2 = pd.DataFrame( + { + "value": np.concatenate([x2, x4]), + "array_id": np.repeat(["x2", "x4"], [x1_len, x3_len]), + } + ) + x_sample_index = rng.choice( + len(df_paired_1), len(df_paired_1), replace=True + ) x_sample_1 = df_paired_1.loc[x_sample_index] x_sample_2 = df_paired_2.loc[x_sample_index] - x1_sample = x_sample_1[x_sample_1['array_id'] == 'x1']['value'] - x2_sample = x_sample_2[x_sample_2['array_id'] == 'x2']['value'] - x3_sample = x_sample_1[x_sample_1['array_id'] == 'x3']['value'] - x4_sample = x_sample_2[x_sample_2['array_id'] == 'x4']['value'] + x1_sample = x_sample_1[x_sample_1["array_id"] == "x1"]["value"] + x2_sample = x_sample_2[x_sample_2["array_id"] == "x2"]["value"] + x3_sample = x_sample_1[x_sample_1["array_id"] == "x3"]["value"] + x4_sample = x_sample_2[x_sample_2["array_id"] == "x4"]["value"] else: - df = pd.DataFrame({ - 'value': np.concatenate([x1, x2, x3, x4]), - 'array_id': np.repeat(['x1', 'x2', 'x3', 'x4'], [x1_len, x2_len, x3_len, x4_len]) - }) - x_sample_index = rng.choice(len(df),len(df), replace=True) + df = pd.DataFrame( + { + "value": np.concatenate([x1, x2, x3, x4]), + "array_id": np.repeat( + ["x1", "x2", "x3", "x4"], [x1_len, x2_len, x3_len, x4_len] + ), + } + ) + x_sample_index = rng.choice(len(df), len(df), replace=True) x_sample = df.loc[x_sample_index] - x1_sample = x_sample[x_sample['array_id'] == 'x1']['value'] - x2_sample = x_sample[x_sample['array_id'] == 'x2']['value'] - x3_sample = x_sample[x_sample['array_id'] == 'x3']['value'] - x4_sample = x_sample[x_sample['array_id'] == 'x4']['value'] + x1_sample = x_sample[x_sample["array_id"] == "x1"]["value"] + x2_sample = x_sample[x_sample["array_id"] == "x2"]["value"] + x3_sample = x_sample[x_sample["array_id"] == "x3"]["value"] + x4_sample = x_sample[x_sample["array_id"] == "x4"]["value"] - delta_1 = np.mean(x2_sample)-np.mean(x1_sample) - delta_2 = np.mean(x4_sample)-np.mean(x3_sample) + delta_1 = np.mean(x2_sample) - np.mean(x1_sample) + delta_2 = np.mean(x4_sample) - np.mean(x3_sample) delta_delta = delta_2 - delta_1 deltadelta[i] = delta_delta - out_delta_g[i] = delta_delta/pooled_sample_sd - delta_g = ((np.mean(x4)-np.mean(x3)) - (np.mean(x2)-np.mean(x1))) / pooled_sample_sd + out_delta_g[i] = delta_delta / pooled_sample_sd + delta_g = ( + (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1)) + ) / pooled_sample_sd return out_delta_g, delta_g, deltadelta - -def compute_meandiff_bias_correction(bootstraps, #An numerical iterable, comprising bootstrap resamples of the effect size. - effsize # The effect size for the original sample. - ): #The bias correction value for the given bootstraps and effect size. +def compute_meandiff_bias_correction( + bootstraps, # An numerical iterable, comprising bootstrap resamples of the effect size. + effsize, # The effect size for the original sample. +): # The bias correction value for the given bootstraps and effect size. """ Computes the bias correction required for the BCa method of confidence interval construction. @@ -243,22 +240,18 @@ def compute_meandiff_bias_correction(bootstraps, #An numerical iterable, compris and effect size. """ - from scipy.stats import norm - from numpy import array - B = array(bootstraps) + B = np.array(bootstraps) prop_less_than_es = sum(B < effsize) / len(B) return norm.ppf(prop_less_than_es) - def _compute_alpha_from_ci(ci): if ci < 0 or ci > 100: raise ValueError("`ci` must be a number between 0 and 100.") - return (100. - ci) / 100. - + return (100.0 - ci) / 100.0 def _compute_quantile(z, bias, acceleration): @@ -268,15 +261,12 @@ def _compute_quantile(z, bias, acceleration): return bias + (numer / denom) - def compute_interval_limits(bias, acceleration, n_boots, ci=95): """ Returns the indexes of the interval limits for a given bootstrap. Supply the bias, acceleration factor, and number of bootstraps. """ - from scipy.stats import norm - from numpy import isnan, nan alpha = _compute_alpha_from_ci(ci) @@ -286,7 +276,7 @@ def compute_interval_limits(bias, acceleration, n_boots, ci=95): z_low = norm.ppf(alpha_low) z_high = norm.ppf(alpha_high) - kws = {'bias': bias, 'acceleration': acceleration} + kws = {"bias": bias, "acceleration": acceleration} low = _compute_quantile(z_low, **kws) high = _compute_quantile(z_high, **kws) @@ -299,18 +289,17 @@ def compute_interval_limits(bias, acceleration, n_boots, ci=95): return low, high -def calculate_group_var(control_var, control_N,test_var, test_N): - return control_var/control_N + test_var/test_N +def calculate_group_var(control_var, control_N, test_var, test_N): + return control_var / control_N + test_var / test_N def calculate_weighted_delta(group_var, differences, resamples): - ''' + """ Compute the weighted deltas. - ''' - import numpy as np + """ - weight = 1/group_var + weight = 1 / group_var denom = np.sum(weight) num = np.sum(weight[i] * differences[i] for i in range(0, len(weight))) - return num/denom + return num / denom diff --git a/dabest/_stats_tools/effsize.py b/dabest/_stats_tools/effsize.py index cf9e81b4..b5d0a6ee 100644 --- a/dabest/_stats_tools/effsize.py +++ b/dabest/_stats_tools/effsize.py @@ -3,6 +3,9 @@ # %% ../../nbs/API/effsize.ipynb 4 from __future__ import annotations import numpy as np +import warnings +from scipy.special import gamma +from scipy.stats import mannwhitneyu # %% auto 0 __all__ = ['two_group_difference', 'func_difference', 'cohens_d', 'cohens_h', 'hedges_g', 'cliffs_delta', 'weighted_delta'] @@ -56,8 +59,7 @@ def two_group_difference(control:list|tuple|np.ndarray, #Accepts lists, tuples, median of `test`. """ - import numpy as np - import warnings + if effect_size == "mean_diff": return func_difference(control, test, np.mean, is_paired) @@ -100,13 +102,12 @@ def func_difference(control:list|tuple|np.ndarray, # NaNs are automatically disc Applies func to `control` and `test`, and then returns the difference. """ - import numpy as np # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) if is_paired: @@ -178,13 +179,12 @@ def cohens_d(control:list|tuple|np.ndarray, - https://en.wikipedia.org/wiki/Bessel%27s_correction - https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation """ - import numpy as np # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -226,9 +226,7 @@ def cohens_h(control:list|tuple|np.ndarray, and a dict for mapping the 0s and 1s to the actual labels, e.g.{1: "Smoker", 0: "Non-smoker"} ''' - import numpy as np np.seterr(divide='ignore', invalid='ignore') - import pandas as pd # Check whether dataframe contains only 0s and 1s. if np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False: @@ -237,10 +235,10 @@ def cohens_h(control:list|tuple|np.ndarray, # Convert to numpy arrays for speed. # NaNs are automatically dropped. # Aligned with cohens_d calculation. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: - test = np.array(test) + if ~isinstance(test, np.ndarray): + test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -266,13 +264,12 @@ def hedges_g(control:list|tuple|np.ndarray, See [here](https://en.wikipedia.org/wiki/Effect_size#Hedges'_g) """ - import numpy as np # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) control = control[~np.isnan(control)] test = test[~np.isnan(test)] @@ -291,14 +288,13 @@ def cliffs_delta(control:list|tuple|np.ndarray, Computes Cliff's delta for 2 samples. See [here](https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data) """ - import numpy as np - from scipy.stats import mannwhitneyu + # Convert to numpy arrays for speed. # NaNs are automatically dropped. - if control.__class__ != np.ndarray: + if ~isinstance(control, np.ndarray): control = np.array(control) - if test.__class__ != np.ndarray: + if ~isinstance(test, np.ndarray): test = np.array(test) c = control[~np.isnan(control)] @@ -311,54 +307,31 @@ def cliffs_delta(control:list|tuple|np.ndarray, U, _ = mannwhitneyu(t, c, alternative='two-sided') cliffs_delta = ((2 * U) / (control_n * test_n)) - 1 - # more = 0 - # less = 0 - # - # for i, c in enumerate(control): - # for j, t in enumerate(test): - # if t > c: - # more += 1 - # elif t < c: - # less += 1 - # - # cliffs_delta = (more - less) / (control_n * test_n) return cliffs_delta # %% ../../nbs/API/effsize.ipynb 11 def _compute_standardizers(control, test): - from numpy import mean, var, sqrt, nan + # TODO missing docstring # For calculation of correlation; not currently used. # from scipy.stats import pearsonr control_n = len(control) test_n = len(test) - control_mean = mean(control) - test_mean = mean(test) + control_var = np.var(control, ddof=1) # use N-1 to compute the variance. + test_var = np.var(test, ddof=1) - control_var = var(control, ddof=1) # use N-1 to compute the variance. - test_var = var(test, ddof=1) - - control_std = sqrt(control_var) - test_std = sqrt(test_var) # For unpaired 2-groups standardized mean difference. - pooled = sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) / + pooled = np.sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) / (control_n + test_n - 2) ) # For paired standardized mean difference. - average = sqrt((control_var + test_var) / 2) - - # if len(control) == len(test): - # corr = pearsonr(control, test)[0] - # std_diff = sqrt(control_var + test_var - (2 * corr * control_std * test_std)) - # std_diff_corrected = std_diff / (sqrt(2 * (1 - corr))) - # return pooled, average, std_diff_corrected - # - # else: + average = np.sqrt((control_var + test_var) / 2) + return pooled, average # indent if you implement above code chunk. # %% ../../nbs/API/effsize.ipynb 12 @@ -377,16 +350,12 @@ def _compute_hedges_correction_factor(n1, ISBN 0-12-336380-2. """ - from scipy.special import gamma - from numpy import sqrt, isinf - import warnings - df = n1 + n2 - 2 numer = gamma(df / 2) denom0 = gamma((df - 1) / 2) - denom = sqrt(df / 2) * denom0 + denom = np.sqrt(df / 2) * denom0 - if isinf(numer) or isinf(denom): + if np.isinf(numer) or np.isinf(denom): # occurs when df is too large. # Apply Hedges and Olkin's approximation. df_sum = n1 + n2 @@ -404,7 +373,6 @@ def weighted_delta(difference, group_var): Compute the weighted deltas where the weight is the inverse of the pooled group difference. ''' - import numpy as np weight = np.true_divide(1, group_var) return np.sum(difference*weight)/np.sum(weight) diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py index 4b2617ef..c581c5db 100644 --- a/dabest/misc_tools.py +++ b/dabest/misc_tools.py @@ -4,9 +4,13 @@ __all__ = ['merge_two_dicts', 'unpack_and_add', 'print_greeting', 'get_varname'] # %% ../nbs/API/misc_tools.ipynb 4 -def merge_two_dicts(x:dict, - y:dict - )->dict:#A dictionary containing a union of all keys in both original dicts. +import datetime as dt +from numpy import repeat + +# %% ../nbs/API/misc_tools.ipynb 5 +def merge_two_dicts( + x: dict, y: dict +) -> dict: # A dictionary containing a union of all keys in both original dicts. """ Given two dicts, merge them into a new dict as a shallow copy. Any overlapping keys in `y` will override the values in `x`. @@ -20,24 +24,20 @@ def merge_two_dicts(x:dict, return z - def unpack_and_add(l, c): """Convenience function to allow me to add to an existing list without altering that list.""" t = [a for a in l] t.append(c) - return(t) - + return t def print_greeting(): from .__init__ import __version__ - import datetime as dt - import numpy as np line1 = "DABEST v{}".format(__version__) - header = "".join(np.repeat("=", len(line1))) - spacer = "".join(np.repeat(" ", len(line1))) + header = "".join(repeat("=", len(line1))) + spacer = "".join(repeat(" ", len(line1))) now = dt.datetime.now() if 0 < now.hour < 12: @@ -53,9 +53,7 @@ def print_greeting(): def get_varname(obj): - matching_vars = [k for k,v in globals().items() if v is obj] + matching_vars = [k for k, v in globals().items() if v is obj] if len(matching_vars) > 0: return matching_vars[0] - else: - return "" - + return "" diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index 44a07f8e..a8dc5331 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -14,25 +14,23 @@ import seaborn as sns import numpy as np import itertools +import matplotlib.lines as mlines # %% ../nbs/API/plot_tools.ipynb 5 -def halfviolin(v, half='right', fill_color='k', alpha=1, - line_color='k', line_width=0): - import numpy as np - - for b in v['bodies']: +def halfviolin(v, half="right", fill_color="k", alpha=1, line_color="k", line_width=0): + for b in v["bodies"]: V = b.get_paths()[0].vertices mean_vertical = np.mean(V[:, 0]) mean_horizontal = np.mean(V[:, 1]) - if half == 'right': + if half == "right": V[:, 0] = np.clip(V[:, 0], mean_vertical, np.inf) - elif half == 'left': + elif half == "left": V[:, 0] = np.clip(V[:, 0], -np.inf, mean_vertical) - elif half == 'bottom': + elif half == "bottom": V[:, 1] = np.clip(V[:, 1], -np.inf, mean_horizontal) - elif half == 'top': + elif half == "top": V[:, 1] = np.clip(V[:, 1], mean_horizontal, np.inf) b.set_color(fill_color) @@ -41,70 +39,50 @@ def halfviolin(v, half='right', fill_color='k', alpha=1, b.set_linewidth(line_width) - -# def align_yaxis(ax1, v1, ax2, v2): -# """adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1""" -# # Taken from -# # http://stackoverflow.com/questions/7630778/ -# # matplotlib-align-origin-of-right-axis-with-specific-left-axis-value -# _, y1 = ax1.transData.transform((0, v1)) -# _, y2 = ax2.transData.transform((0, v2)) -# inv = ax2.transData.inverted() -# _, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2)) -# miny, maxy = ax2.get_ylim() -# ax2.set_ylim(miny+dy, maxy+dy) -# -# -# -# def rotate_ticks(axes, angle=45, alignment='right'): -# for tick in axes.get_xticklabels(): -# tick.set_rotation(angle) -# tick.set_horizontalalignment(alignment) - - - def get_swarm_spans(coll): """ Given a matplotlib Collection, will obtain the x and y spans for the collection. Will return None if this fails. """ - import numpy as np x, y = np.array(coll.get_offsets()).T try: return x.min(), x.max(), y.min(), y.max() except ValueError: return None -def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. - x:str, #x column to be plotted. - y:str, # y column to be plotted. - type:str='mean_sd', # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead. - offset:float=0.2, #Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets. - ax=None, #If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used. - line_color="black", # The color of the gapped lines. - gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span. - pos:list=[0, 1],#The positions of the error bars for the sankey_error_bar method. - method:str='gapped_lines', #The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'. - **kwargs:dict - ): - ''' + +def error_bar( + data: pd.DataFrame, # This DataFrame should be in 'long' format. + x: str, # x column to be plotted. + y: str, # y column to be plotted. + type: str = "mean_sd", # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead. + offset: float = 0.2, # Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets. + ax=None, # If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used. + line_color="black", # The color of the gapped lines. + gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span. + pos: list = [ + 0, + 1, + ], # The positions of the error bars for the sankey_error_bar method. + method: str = "gapped_lines", # The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'. + **kwargs: dict, +): + """ Function to plot the standard deviations as vertical errorbars. The mean is a gap defined by negative space. This function combines the functionality of gapped_lines(), proportional_error_bar(), and sankey_error_bar(). - ''' - import numpy as np - import pandas as pd - import matplotlib.pyplot as plt - import matplotlib.lines as mlines + """ if gap_width_percent < 0 or gap_width_percent > 100: raise ValueError("`gap_width_percent` must be between 0 and 100.") - if method not in ['gapped_lines', 'proportional_error_bar', 'sankey_error_bar']: - raise ValueError("Invalid `method`. Must be one of 'gapped_lines', \ - 'proportional_error_bar', or 'sankey_error_bar'.") + if method not in ["gapped_lines", "proportional_error_bar", "sankey_error_bar"]: + raise ValueError( + "Invalid `method`. Must be one of 'gapped_lines', \ + 'proportional_error_bar', or 'sankey_error_bar'." + ) if ax is None: ax = plt.gca() @@ -113,14 +91,14 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. gap_width = ax_yspan * gap_width_percent / 100 keys = kwargs.keys() - if 'clip_on' not in keys: - kwargs['clip_on'] = False + if "clip_on" not in keys: + kwargs["clip_on"] = False - if 'zorder' not in keys: - kwargs['zorder'] = 5 + if "zorder" not in keys: + kwargs["zorder"] = 5 - if 'lw' not in keys: - kwargs['lw'] = 2. + if "lw" not in keys: + kwargs["lw"] = 2.0 if isinstance(data[x].dtype, pd.CategoricalDtype): group_order = pd.unique(data[x]).categories @@ -129,8 +107,10 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. means = data.groupby(x)[y].mean().reindex(index=group_order) - if method in ['proportional_error_bar', 'sankey_error_bar']: - g = lambda x: np.sqrt((np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))) + if method in ["proportional_error_bar", "sankey_error_bar"]: + g = lambda x: np.sqrt( + (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x)) + ) sd = data.groupby(x)[y].apply(g) else: sd = data.groupby(x)[y].std().reindex(index=group_order) @@ -139,20 +119,20 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. upper_sd = means + sd if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any(): - kwargs['clip_on'] = True + kwargs["clip_on"] = True medians = data.groupby(x)[y].median().reindex(index=group_order) - quantiles = data.groupby(x)[y].quantile([0.25, 0.75]) \ - .unstack() \ - .reindex(index=group_order) + quantiles = ( + data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order) + ) lower_quartiles = quantiles[0.25] upper_quartiles = quantiles[0.75] - if type == 'mean_sd': + if type == "mean_sd": central_measures = means lows = lower_sd highs = upper_sd - elif type == 'median_quartiles': + elif type == "median_quartiles": central_measures = medians lows = lower_quartiles highs = upper_quartiles @@ -179,13 +159,12 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. err2 = "{} offset(s) were supplied in `offset`.".format(len_offset) raise ValueError(err1 + err2) - kwargs['zorder'] = kwargs['zorder'] + kwargs["zorder"] = kwargs["zorder"] for xpos, central_measure in enumerate(central_measures): - - kwargs['color'] = custom_palette[xpos] + kwargs["color"] = custom_palette[xpos] - if method == 'sankey_error_bar': + if method == "sankey_error_bar": _xpos = pos[xpos] + offset[xpos] else: _xpos = xpos + offset[xpos] @@ -193,36 +172,37 @@ def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format. low = lows[xpos] high = highs[xpos] if low == high == central_measure: - low_to_mean = mlines.Line2D([_xpos, _xpos], - [low, central_measure], - **kwargs) + low_to_mean = mlines.Line2D( + [_xpos, _xpos], [low, central_measure], **kwargs + ) ax.add_line(low_to_mean) - - mean_to_high = mlines.Line2D([_xpos, _xpos], - [central_measure, high], - **kwargs) + + mean_to_high = mlines.Line2D( + [_xpos, _xpos], [central_measure, high], **kwargs + ) ax.add_line(mean_to_high) else: - low_to_mean = mlines.Line2D([_xpos, _xpos], - [low, central_measure - gap_width], - **kwargs) + low_to_mean = mlines.Line2D( + [_xpos, _xpos], [low, central_measure - gap_width], **kwargs + ) ax.add_line(low_to_mean) - - mean_to_high = mlines.Line2D([_xpos, _xpos], - [central_measure + gap_width, high], - **kwargs) + + mean_to_high = mlines.Line2D( + [_xpos, _xpos], [central_measure + gap_width, high], **kwargs + ) ax.add_line(mean_to_high) - - -def check_data_matches_labels(labels,#list of input labels - data, #Pandas Series of input data - side:str # 'left' or 'right' on the sankey diagram - ): - ''' - Function to check that the labels and data match in the sankey diagram. + + +def check_data_matches_labels( + labels, # list of input labels + data, # Pandas Series of input data + side: str, # 'left' or 'right' on the sankey diagram +): + """ + Function to check that the labels and data match in the sankey diagram. And enforce labels and data to be lists. Raises an exception if the labels and data do not match. - ''' + """ if len(labels) > 0: if isinstance(data, list): data = set(data) @@ -238,12 +218,18 @@ def check_data_matches_labels(labels,#list of input labels msg += "Data: " + ",".join(data) raise Exception(f"{side} labels and data do not match.{msg}") - + def normalize_dict(nested_dict, target): val = {} for key in nested_dict.keys(): - val[key] = np.sum([nested_dict[sub_key][key] for sub_key in nested_dict.keys() if key in nested_dict[sub_key]]) - + val[key] = np.sum( + [ + nested_dict[sub_key][key] + for sub_key in nested_dict.keys() + if key in nested_dict[sub_key] + ] + ) + for key, value in nested_dict.items(): if isinstance(value, dict): for subkey in value.keys(): @@ -251,66 +237,68 @@ def normalize_dict(nested_dict, target): if subkey in val.keys(): if val[subkey] != 0: # Address the problem when one of the label have zero value - value[subkey] = value[subkey] * target[subkey]['right']/val[subkey] + value[subkey] = ( + value[subkey] * target[subkey]["right"] / val[subkey] + ) else: value[subkey] = 0 else: - value[subkey] = target[subkey]['right'] + value[subkey] = target[subkey]["right"] return nested_dict -def width_determine(labels, data, pos='left'): +def width_determine(labels, data, pos="left"): widths_norm = defaultdict() for i, label in enumerate(labels): myD = {} myD[pos] = data[data[pos] == label][pos + "Weight"].sum() if len(labels) != 1: if i == 0: - myD['bottom'] = 0 + myD["bottom"] = 0 myD[pos] -= 0.01 - myD['top'] = myD[pos] + myD["top"] = myD[pos] elif i == len(labels) - 1: myD[pos] -= 0.01 - myD['bottom'] = 1 - myD[pos] - myD['top'] = 1 + myD["bottom"] = 1 - myD[pos] + myD["top"] = 1 else: myD[pos] -= 0.02 - myD['bottom'] = widths_norm[labels[i - 1]]['top'] + 0.02 - myD['top'] = myD['bottom'] + myD[pos] - topEdge = myD['top'] + myD["bottom"] = widths_norm[labels[i - 1]]["top"] + 0.02 + myD["top"] = myD["bottom"] + myD[pos] else: - myD['bottom'] = 0 - myD['top'] = 1 + myD["bottom"] = 0 + myD["top"] = 1 widths_norm[label] = myD return widths_norm -def single_sankey(left:np.array,# data on the left of the diagram - right:np.array, # data on the right of the diagram, len(left) == len(right) - xpos:float=0, # the starting point on the x-axis - leftWeight:np.array=None, #weights for the left labels, if None, all weights are 1 - rightWeight:np.array=None, #weights for the right labels, if None, all weights are corresponding leftWeight - colorDict:dict=None, #input format: {'label': 'color'} - leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels. - rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels. - ax=None, #matplotlib axes to be drawn on - flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison - sankey:bool=True, #if True, draw the sankey diagram, else draw barplot - width=0.5, - alpha=0.65, - bar_width=0.2, - error_bar_on:bool=True, #if True, draw error bar for each group comparison - strip_on:bool=True, #if True, draw strip for each group comparison - one_sankey:bool=False, #if True, only draw one sankey diagram - rightColor:bool=False, #if True, each strip of the diagram will be colored according to the corresponding left labels - align:bool='center'# if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick - ): - - ''' + +def single_sankey( + left: np.array, # data on the left of the diagram + right: np.array, # data on the right of the diagram, len(left) == len(right) + xpos: float = 0, # the starting point on the x-axis + leftWeight: np.array = None, # weights for the left labels, if None, all weights are 1 + rightWeight: np.array = None, # weights for the right labels, if None, all weights are corresponding leftWeight + colorDict: dict = None, # input format: {'label': 'color'} + leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels. + rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels. + ax=None, # matplotlib axes to be drawn on + flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison + sankey: bool = True, # if True, draw the sankey diagram, else draw barplot + width=0.5, + alpha=0.65, + bar_width=0.2, + error_bar_on: bool = True, # if True, draw error bar for each group comparison + strip_on: bool = True, # if True, draw strip for each group comparison + one_sankey: bool = False, # if True, only draw one sankey diagram + rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels + align: bool = "center", # if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick +): + """ Make a single Sankey diagram showing proportion flow from left to right Original code from: https://github.com/anazalea/pySankey Changes are added to normalize each diagram's height to be 1 - ''' + """ # Initiating values if ax is None: @@ -335,26 +323,35 @@ def single_sankey(left:np.array,# data on the left of the diagram left.reset_index(drop=True, inplace=True) if isinstance(right, pd.Series): right.reset_index(drop=True, inplace=True) - dataFrame = pd.DataFrame({'left': left, 'right': right, 'leftWeight': leftWeight, - 'rightWeight': rightWeight}, index=range(len(left))) - - if dataFrame[['left', 'right']].isnull().any(axis=None): - raise Exception('Sankey graph does not support null values.') + dataFrame = pd.DataFrame( + { + "left": left, + "right": right, + "leftWeight": leftWeight, + "rightWeight": rightWeight, + }, + index=range(len(left)), + ) + + if dataFrame[["left", "right"]].isnull().any(axis=None): + raise Exception("Sankey graph does not support null values.") # Identify all labels that appear 'left' or 'right' - allLabels = pd.Series(np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]).unique() + allLabels = pd.Series( + np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1] + ).unique() # Identify left labels if len(leftLabels) == 0: leftLabels = pd.Series(np.sort(dataFrame.left.unique())[::-1]).unique() else: - check_data_matches_labels(leftLabels, dataFrame['left'], 'left') + check_data_matches_labels(leftLabels, dataFrame["left"], "left") # Identify right labels if len(rightLabels) == 0: rightLabels = pd.Series(np.sort(dataFrame.right.unique())[::-1]).unique() else: - check_data_matches_labels(leftLabels, dataFrame['right'], 'right') + check_data_matches_labels(leftLabels, dataFrame["right"], "right") # If no colorDict given, make one if colorDict is None: @@ -363,31 +360,33 @@ def single_sankey(left:np.array,# data on the left of the diagram colorPalette = sns.color_palette(palette, len(allLabels)) for i, label in enumerate(allLabels): colorDict[label] = colorPalette[i] - fail_color = {0:"grey"} + fail_color = {0: "grey"} colorDict.update(fail_color) else: missing = [label for label in allLabels if label not in colorDict.keys()] if missing: msg = "The palette parameter is missing values for the following labels : " - msg += '{}'.format(', '.join(missing)) + msg += "{}".format(", ".join(missing)) raise ValueError(msg) if align not in ("center", "edge"): - err = '{} assigned for `align` is not valid.'.format(align) + err = "{} assigned for `align` is not valid.".format(align) raise ValueError(err) if align == "center": try: leftpos = xpos - width / 2 except TypeError as e: - raise TypeError(f'the dtypes of parameters x ({xpos.dtype}) ' - f'and width ({width.dtype}) ' - f'are incompatible') from e - else: + raise TypeError( + f"the dtypes of parameters x ({xpos.dtype}) " + f"and width ({width.dtype}) " + f"are incompatible" + ) from e + else: leftpos = xpos # Combine left and right arrays to have a pandas.DataFrame in the 'long' format - left_series = pd.Series(left, name='values').to_frame().assign(groups='left') - right_series = pd.Series(right, name='values').to_frame().assign(groups='right') + left_series = pd.Series(left, name="values").to_frame().assign(groups="left") + right_series = pd.Series(right, name="values").to_frame().assign(groups="right") concatenated_df = pd.concat([left_series, right_series], ignore_index=True) # Determine positions of left label patches and total widths @@ -395,53 +394,57 @@ def single_sankey(left:np.array,# data on the left of the diagram leftWidths_norm = defaultdict() for i, leftLabel in enumerate(leftLabels): myD = {} - myD['left'] = (dataFrame[dataFrame.left == leftLabel].leftWeight.sum()/ \ - dataFrame.leftWeight.sum()) + myD["left"] = ( + dataFrame[dataFrame.left == leftLabel].leftWeight.sum() + / dataFrame.leftWeight.sum() + ) if len(leftLabels) != 1: if i == 0: - myD['bottom'] = 0 - myD['left'] -= 0.01 - myD['top'] = myD['left'] + myD["bottom"] = 0 + myD["left"] -= 0.01 + myD["top"] = myD["left"] elif i == len(leftLabels) - 1: - myD['left'] -= 0.01 - myD['bottom'] = 1 - myD['left'] - myD['top'] = 1 + myD["left"] -= 0.01 + myD["bottom"] = 1 - myD["left"] + myD["top"] = 1 else: - myD['left'] -= 0.02 - myD['bottom'] = leftWidths_norm[leftLabels[i - 1]]['top'] + 0.02 - myD['top'] = myD['bottom'] + myD['left'] - topEdge = myD['top'] + myD["left"] -= 0.02 + myD["bottom"] = leftWidths_norm[leftLabels[i - 1]]["top"] + 0.02 + myD["top"] = myD["bottom"] + myD["left"] + topEdge = myD["top"] else: - myD['bottom'] = 0 - myD['top'] = 1 - myD['left'] = 1 + myD["bottom"] = 0 + myD["top"] = 1 + myD["left"] = 1 leftWidths_norm[leftLabel] = myD # Determine positions of right label patches and total widths rightWidths_norm = defaultdict() for i, rightLabel in enumerate(rightLabels): myD = {} - myD['right'] = (dataFrame[dataFrame.right == rightLabel].rightWeight.sum()/ \ - dataFrame.rightWeight.sum()) + myD["right"] = ( + dataFrame[dataFrame.right == rightLabel].rightWeight.sum() + / dataFrame.rightWeight.sum() + ) if len(rightLabels) != 1: if i == 0: - myD['bottom'] = 0 - myD['right'] -= 0.01 - myD['top'] = myD['right'] + myD["bottom"] = 0 + myD["right"] -= 0.01 + myD["top"] = myD["right"] elif i == len(rightLabels) - 1: - myD['right'] -= 0.01 - myD['bottom'] = 1 - myD['right'] - myD['top'] = 1 + myD["right"] -= 0.01 + myD["bottom"] = 1 - myD["right"] + myD["top"] = 1 else: - myD['right'] -= 0.02 - myD['bottom'] = rightWidths_norm[rightLabels[i - 1]]['top'] + 0.02 - myD['top'] = myD['bottom'] + myD['right'] - topEdge = myD['top'] + myD["right"] -= 0.02 + myD["bottom"] = rightWidths_norm[rightLabels[i - 1]]["top"] + 0.02 + myD["top"] = myD["bottom"] + myD["right"] + topEdge = myD["top"] else: - myD['bottom'] = 0 - myD['top'] = 1 - myD['right'] = 1 - rightWidths_norm[rightLabel] = myD + myD["bottom"] = 0 + myD["top"] = 1 + myD["right"] = 1 + rightWidths_norm[rightLabel] = myD # Total width of the graph xMax = width @@ -458,19 +461,29 @@ def single_sankey(left:np.array,# data on the left of the diagram if (flow == False and sankey == True) or one_sankey: for rightLabel in rightLabels: ax.fill_between( - [xMax + leftpos + (-bar_width * xMax * 0.5), leftpos + xMax + (bar_width * xMax * 0.5)], + [ + xMax + leftpos + (-bar_width * xMax * 0.5), + leftpos + xMax + (bar_width * xMax * 0.5), + ], 2 * [rightWidths_norm[rightLabel]["bottom"]], 2 * [rightWidths_norm[rightLabel]["top"]], color=colorDict[rightLabel], - alpha=0.99 + alpha=0.99, ) # Plot error bars if error_bar_on and strip_on: - error_bar(concatenated_df, x='groups', y='values', ax=ax, offset=0, gap_width_percent=2, - method="sankey_error_bar", - pos=[leftpos, leftpos + xMax]) - + error_bar( + concatenated_df, + x="groups", + y="values", + ax=ax, + offset=0, + gap_width_percent=2, + method="sankey_error_bar", + pos=[leftpos, leftpos + xMax], + ) + # Determine widths of individual strips, all widths are normalized to 1 ns_l = defaultdict() ns_r = defaultdict() @@ -482,96 +495,121 @@ def single_sankey(left:np.array,# data on the left of the diagram for rightLabel in rightLabels: leftDict[rightLabel] = dataFrame[ (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel) - ].leftWeight.sum() - + ].leftWeight.sum() + rightDict[rightLabel] = dataFrame[ (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel) - ].rightWeight.sum() - factorleft = leftWidths_norm[leftLabel]['left']/sum(leftDict.values()) - leftDict_norm = {k: v*factorleft for k, v in leftDict.items()} + ].rightWeight.sum() + factorleft = leftWidths_norm[leftLabel]["left"] / sum(leftDict.values()) + leftDict_norm = {k: v * factorleft for k, v in leftDict.items()} ns_l_norm[leftLabel] = leftDict_norm ns_r[leftLabel] = rightDict - + # ns_r should be using a different way of normalization to fit the right side # It is normalized using the value with the same key in each sub-dictionary ns_r_norm = normalize_dict(ns_r, rightWidths_norm) - + # Plot strips - if sankey == True and strip_on == True: + if sankey and strip_on: for leftLabel, rightLabel in itertools.product(leftLabels, rightLabels): labelColor = leftLabel if rightColor: labelColor = rightLabel - if len(dataFrame[(dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)]) > 0: + if ( + len( + dataFrame[ + (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel) + ] + ) + > 0 + ): # Create array of y values for each strip, half at left value, # half at right, convolve - ys_d = np.array(50 * [leftWidths_norm[leftLabel]['bottom']] + \ - 50 * [rightWidths_norm[rightLabel]['bottom']]) - ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid') - ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid') - ys_u = np.array(50 * [leftWidths_norm[leftLabel]['bottom'] + ns_l_norm[leftLabel][rightLabel]] + \ - 50 * [rightWidths_norm[rightLabel]['bottom'] + ns_r_norm[leftLabel][rightLabel]]) - ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid') - ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid') + ys_d = np.array( + 50 * [leftWidths_norm[leftLabel]["bottom"]] + + 50 * [rightWidths_norm[rightLabel]["bottom"]] + ) + ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode="valid") + ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode="valid") + ys_u = np.array( + 50 + * [ + leftWidths_norm[leftLabel]["bottom"] + + ns_l_norm[leftLabel][rightLabel] + ] + + 50 + * [ + rightWidths_norm[rightLabel]["bottom"] + + ns_r_norm[leftLabel][rightLabel] + ] + ) + ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode="valid") + ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode="valid") # Update bottom edges at each label so next strip starts at the right place - leftWidths_norm[leftLabel]['bottom'] += ns_l_norm[leftLabel][rightLabel] - rightWidths_norm[rightLabel]['bottom'] += ns_r_norm[leftLabel][rightLabel] + leftWidths_norm[leftLabel]["bottom"] += ns_l_norm[leftLabel][rightLabel] + rightWidths_norm[rightLabel]["bottom"] += ns_r_norm[leftLabel][ + rightLabel + ] ax.fill_between( - np.linspace(leftpos + (bar_width * xMax * 0.5), \ - leftpos + xMax - (bar_width * xMax * 0.5), len(ys_d)), \ - ys_d, ys_u, alpha=alpha, - color=colorDict[labelColor], edgecolor='none' + np.linspace( + leftpos + (bar_width * xMax * 0.5), + leftpos + xMax - (bar_width * xMax * 0.5), + len(ys_d), + ), + ys_d, + ys_u, + alpha=alpha, + color=colorDict[labelColor], + edgecolor="none", ) - -def sankeydiag(data:pd.DataFrame, - xvar:str, # x column to be plotted. - yvar:str, # y column to be plotted. - left_idx:str, #the value in column xvar that is on the left side of each sankey diagram - right_idx:str, #the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx - leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels. - rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels. - palette:str|dict=None, - ax=None, #matplotlib axes to be drawn on - flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison - sankey:bool=True, #if True, draw the sankey diagram, else draw barplot - one_sankey:bool=False,# determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes - width:float=0.4, # the width of each sankey diagram - rightColor:bool=False,#if True, each strip of the diagram will be colored according to the corresponding left labels - align:str='center', #the alignment of each sankey diagram, can be 'center' or 'left' - alpha:float=0.65, #the transparency of each strip - **kwargs): - ''' + + +def sankeydiag( + data: pd.DataFrame, + xvar: str, # x column to be plotted. + yvar: str, # y column to be plotted. + left_idx: str, # the value in column xvar that is on the left side of each sankey diagram + right_idx: str, # the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx + leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels. + rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels. + palette: str | dict = None, + ax=None, # matplotlib axes to be drawn on + flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison + sankey: bool = True, # if True, draw the sankey diagram, else draw barplot + one_sankey: bool = False, # determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes + width: float = 0.4, # the width of each sankey diagram + rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels + align: str = "center", # the alignment of each sankey diagram, can be 'center' or 'left' + alpha: float = 0.65, # the transparency of each strip + **kwargs, +): + """ Read in melted pd.DataFrame, and draw multiple sankey diagram on a single axes using the value in column yvar according to the value in column xvar left_idx in the column xvar is on the left side of each sankey diagram right_idx in the column xvar is on the right side of each sankey diagram - ''' - - import numpy as np - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt + """ if "width" in kwargs: width = kwargs["width"] if "align" in kwargs: align = kwargs["align"] - + if "alpha" in kwargs: alpha = kwargs["alpha"] - + if "rightColor" in kwargs: rightColor = kwargs["rightColor"] - + if "bar_width" in kwargs: bar_width = kwargs["bar_width"] - + if "sankey" in kwargs: sankey = kwargs["sankey"] - + if "flow" in kwargs: flow = kwargs["flow"] @@ -579,7 +617,7 @@ def sankeydiag(data:pd.DataFrame, ax = plt.gca() allLabels = pd.Series(np.sort(data[yvar].unique())[::-1]).unique() - + # Check if all the elements in left_idx and right_idx are in xvar column unique_xvar = data[xvar].unique() if not all(elem in unique_xvar for elem in left_idx): @@ -591,7 +629,7 @@ def sankeydiag(data:pd.DataFrame, # For baseline comparison, broadcast left_idx to the same length as right_idx # so that the left of sankey diagram will be the same - # For sequential comparison, left_idx and right_idx can have anything different + # For sequential comparison, left_idx and right_idx can have anything different # but should have the same length if len(left_idx) == 1: broadcasted_left = np.broadcast_to(left_idx, len(right_idx)) @@ -603,8 +641,7 @@ def sankeydiag(data:pd.DataFrame, if isinstance(palette, dict): if not all(key in allLabels for key in palette.keys()): raise ValueError(f"keys in palette should be in {yvar} column") - else: - plot_palette = palette + plot_palette = palette elif isinstance(palette, str): plot_palette = {} colorPalette = sns.color_palette(palette, len(allLabels)) @@ -614,38 +651,75 @@ def sankeydiag(data:pd.DataFrame, plot_palette = None # Create a strip_on list to determine whether to draw the strip during repeated measures - strip_on = [int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx)] + strip_on = [ + int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx) + ] draw_idx = list(zip(broadcasted_left, right_idx)) for i, (left, right) in enumerate(draw_idx): if one_sankey == False: if flow == True: width = 1 - align = 'edge' - sankey = False if i == len(draw_idx)-1 else sankey # Remove last strip in flow - error_bar_on = False if i == len(draw_idx)-1 and flow else True # Remove last error_bar in flow + align = "edge" + sankey = ( + False if i == len(draw_idx) - 1 else sankey + ) # Remove last strip in flow + error_bar_on = ( + False if i == len(draw_idx) - 1 and flow else True + ) # Remove last error_bar in flow bar_width = 0.4 if sankey == False and flow == False else bar_width - single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], - xpos=xpos, ax=ax, colorDict=plot_palette, width=width, - leftLabels=leftLabels, rightLabels=rightLabels, strip_on=strip_on[i], - rightColor=rightColor, bar_width=bar_width, sankey=sankey, - error_bar_on=error_bar_on, flow=flow, align=align, alpha=alpha) + single_sankey( + data[data[xvar] == left][yvar], + data[data[xvar] == right][yvar], + xpos=xpos, + ax=ax, + colorDict=plot_palette, + width=width, + leftLabels=leftLabels, + rightLabels=rightLabels, + strip_on=strip_on[i], + rightColor=rightColor, + bar_width=bar_width, + sankey=sankey, + error_bar_on=error_bar_on, + flow=flow, + align=align, + alpha=alpha, + ) xpos += 1 else: xpos = 0 width = 1 if sankey == False: bar_width = 0.5 - single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], - xpos=xpos, ax=ax, colorDict=plot_palette, width=width, - leftLabels=leftLabels, rightLabels=rightLabels, - rightColor=rightColor, bar_width=bar_width, sankey=sankey, - one_sankey=one_sankey, flow=False, align='edge', alpha=alpha) - -# Now only draw vs xticks for two-column sankey diagram - if one_sankey == False or (sankey and not flow): - sankey_ticks = [f"{left}" for left in broadcasted_left] if flow \ - else [f"{left}\n v.s.\n{right}" for left, right in zip(broadcasted_left, right_idx)] + single_sankey( + data[data[xvar] == left][yvar], + data[data[xvar] == right][yvar], + xpos=xpos, + ax=ax, + colorDict=plot_palette, + width=width, + leftLabels=leftLabels, + rightLabels=rightLabels, + rightColor=rightColor, + bar_width=bar_width, + sankey=sankey, + one_sankey=one_sankey, + flow=False, + align="edge", + alpha=alpha, + ) + + # Now only draw vs xticks for two-column sankey diagram + if ~one_sankey or (sankey and not flow): + sankey_ticks = ( + [f"{left}" for left in broadcasted_left] + if flow + else [ + f"{left}\n v.s.\n{right}" + for left, right in zip(broadcasted_left, right_idx) + ] + ) ax.get_xaxis().set_ticks(np.arange(len(right_idx))) ax.get_xaxis().set_ticklabels(sankey_ticks) else: diff --git a/dabest/plotter.py b/dabest/plotter.py index a1cf2929..7bcdb136 100644 --- a/dabest/plotter.py +++ b/dabest/plotter.py @@ -4,14 +4,24 @@ __all__ = ['EffectSizeDataFramePlotter'] # %% ../nbs/API/plotter.ipynb 4 -def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): +import numpy as np +import seaborn as sns +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd +import warnings +import logging + +# %% ../nbs/API/plotter.ipynb 5 +# TODO refactor function name +def EffectSizeDataFramePlotter(effectsize_df, **plot_kwargs): """ Custom function that creates an estimation plot from an EffectSizeDataFrame. Keywords -------- Parameters ---------- - EffectSizeDataFrame + effectsize_df A `dabest` EffectSizeDataFrame object. plot_kwargs color_col=None @@ -44,31 +54,28 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): fontsize_contrastxlabel=12, fontsize_contrastylabel=12, fontsize_delta2label=12 """ - - import numpy as np - import seaborn as sns - import matplotlib - import matplotlib.pyplot as plt - import pandas as pd - import warnings - warnings.filterwarnings('ignore', 'This figure includes Axes that are not compatible with tight_layout') - from .misc_tools import merge_two_dicts from .plot_tools import halfviolin, get_swarm_spans, error_bar, sankeydiag - from ._stats_tools.effsize import _compute_standardizers, _compute_hedges_correction_factor + from ._stats_tools.effsize import ( + _compute_standardizers, + _compute_hedges_correction_factor, + ) + + warnings.filterwarnings( + "ignore", "This figure includes Axes that are not compatible with tight_layout" + ) - import logging # Have to disable logging of warning when get_legend_handles_labels() # tries to get from slopegraph. logging.disable(logging.WARNING) # Save rcParams that I will alter, so I can reset back. original_rcParams = {} - _changed_rcParams = ['axes.grid'] + _changed_rcParams = ["axes.grid"] for parameter in _changed_rcParams: original_rcParams[parameter] = plt.rcParams[parameter] - plt.rcParams['axes.grid'] = False + plt.rcParams["axes.grid"] = False ytick_color = plt.rcParams["ytick.color"] face_color = plot_kwargs["face_color"] @@ -76,18 +83,18 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if plot_kwargs["face_color"] is None: face_color = "white" - dabest_obj = EffectSizeDataFrame.dabest_obj - plot_data = EffectSizeDataFrame._plot_data - xvar = EffectSizeDataFrame.xvar - yvar = EffectSizeDataFrame.yvar - is_paired = EffectSizeDataFrame.is_paired - delta2 = EffectSizeDataFrame.delta2 - mini_meta = EffectSizeDataFrame.mini_meta - effect_size = EffectSizeDataFrame.effect_size - proportional = EffectSizeDataFrame.proportional + dabest_obj = effectsize_df.dabest_obj + plot_data = effectsize_df._plot_data + xvar = effectsize_df.xvar + yvar = effectsize_df.yvar + is_paired = effectsize_df.is_paired + delta2 = effectsize_df.delta2 + mini_meta = effectsize_df.mini_meta + effect_size = effectsize_df.effect_size + proportional = effectsize_df.proportional all_plot_groups = dabest_obj._all_plot_groups - idx = dabest_obj.idx + idx = dabest_obj.idx if effect_size not in ["mean_diff", "delta_g"] or not delta2: show_delta2 = False @@ -105,16 +112,16 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # Disable Gardner-Altman plotting if any of the idxs comprise of more than # two groups or if it is a delta-delta plot. - float_contrast = plot_kwargs["float_contrast"] - effect_size_type = EffectSizeDataFrame.effect_size + float_contrast = plot_kwargs["float_contrast"] + effect_size_type = effectsize_df.effect_size if len(idx) > 1 or len(idx[0]) > 2: float_contrast = False - if effect_size_type in ['cliffs_delta']: + if effect_size_type in ["cliffs_delta"]: float_contrast = False if show_delta2 or show_mini_meta: - float_contrast = False + float_contrast = False if not is_paired: show_pairs = False @@ -122,12 +129,13 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): show_pairs = plot_kwargs["show_pairs"] # Set default kwargs first, then merge with user-dictated ones. - default_swarmplot_kwargs = {'size': plot_kwargs["raw_marker_size"]} + default_swarmplot_kwargs = {"size": plot_kwargs["raw_marker_size"]} if plot_kwargs["swarmplot_kwargs"] is None: swarmplot_kwargs = default_swarmplot_kwargs else: - swarmplot_kwargs = merge_two_dicts(default_swarmplot_kwargs, - plot_kwargs["swarmplot_kwargs"]) + swarmplot_kwargs = merge_two_dicts( + default_swarmplot_kwargs, plot_kwargs["swarmplot_kwargs"] + ) # Barplot kwargs default_barplot_kwargs = {"estimator": np.mean, "errorbar": plot_kwargs["ci"]} @@ -135,87 +143,105 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if plot_kwargs["barplot_kwargs"] is None: barplot_kwargs = default_barplot_kwargs else: - barplot_kwargs = merge_two_dicts(default_barplot_kwargs, - plot_kwargs["barplot_kwargs"]) + barplot_kwargs = merge_two_dicts( + default_barplot_kwargs, plot_kwargs["barplot_kwargs"] + ) # Sankey Diagram kwargs - default_sankey_kwargs = {"width": 0.4, "align": "center", - "sankey":True, "flow":True, - "alpha": 0.4, "rightColor": False, - "bar_width":0.2} + default_sankey_kwargs = { + "width": 0.4, + "align": "center", + "sankey": True, + "flow": True, + "alpha": 0.4, + "rightColor": False, + "bar_width": 0.2, + } if plot_kwargs["sankey_kwargs"] is None: sankey_kwargs = default_sankey_kwargs else: - sankey_kwargs = merge_two_dicts(default_sankey_kwargs, - plot_kwargs["sankey_kwargs"]) + sankey_kwargs = merge_two_dicts( + default_sankey_kwargs, plot_kwargs["sankey_kwargs"] + ) # We also need to extract the `sankey` and `flow` from the kwargs for plotter.py # to use for varying different kinds of paired proportional plots # We also don't want to pop the parameter from the kwargs - sankey = sankey_kwargs['sankey'] - flow = sankey_kwargs['flow'] + sankey = sankey_kwargs["sankey"] + flow = sankey_kwargs["flow"] # Violinplot kwargs. - default_violinplot_kwargs = {'widths':0.5, 'vert':True, - 'showextrema':False, 'showmedians':False} + default_violinplot_kwargs = { + "widths": 0.5, + "vert": True, + "showextrema": False, + "showmedians": False, + } if plot_kwargs["violinplot_kwargs"] is None: violinplot_kwargs = default_violinplot_kwargs else: - violinplot_kwargs = merge_two_dicts(default_violinplot_kwargs, - plot_kwargs["violinplot_kwargs"]) + violinplot_kwargs = merge_two_dicts( + default_violinplot_kwargs, plot_kwargs["violinplot_kwargs"] + ) # slopegraph kwargs. - default_slopegraph_kwargs = {'linewidth':1, 'alpha':0.5} + default_slopegraph_kwargs = {"linewidth": 1, "alpha": 0.5} if plot_kwargs["slopegraph_kwargs"] is None: slopegraph_kwargs = default_slopegraph_kwargs else: - slopegraph_kwargs = merge_two_dicts(default_slopegraph_kwargs, - plot_kwargs["slopegraph_kwargs"]) + slopegraph_kwargs = merge_two_dicts( + default_slopegraph_kwargs, plot_kwargs["slopegraph_kwargs"] + ) # Zero reference-line kwargs. - default_reflines_kwargs = {'linestyle':'solid', 'linewidth':0.75, - 'zorder': 2, - 'color': ytick_color} + default_reflines_kwargs = { + "linestyle": "solid", + "linewidth": 0.75, + "zorder": 2, + "color": ytick_color, + } if plot_kwargs["reflines_kwargs"] is None: reflines_kwargs = default_reflines_kwargs else: - reflines_kwargs = merge_two_dicts(default_reflines_kwargs, - plot_kwargs["reflines_kwargs"]) + reflines_kwargs = merge_two_dicts( + default_reflines_kwargs, plot_kwargs["reflines_kwargs"] + ) # Legend kwargs. - default_legend_kwargs = {'loc': 'upper left', 'frameon': False} + default_legend_kwargs = {"loc": "upper left", "frameon": False} if plot_kwargs["legend_kwargs"] is None: legend_kwargs = default_legend_kwargs else: - legend_kwargs = merge_two_dicts(default_legend_kwargs, - plot_kwargs["legend_kwargs"]) - - -################################################### GRIDKEY WIP - extracting arguments - + legend_kwargs = merge_two_dicts( + default_legend_kwargs, plot_kwargs["legend_kwargs"] + ) + + ################################################### GRIDKEY WIP - extracting arguments + gridkey_rows = plot_kwargs["gridkey_rows"] gridkey_merge_pairs = plot_kwargs["gridkey_merge_pairs"] gridkey_show_Ns = plot_kwargs["gridkey_show_Ns"] gridkey_show_es = plot_kwargs["gridkey_show_es"] - - if gridkey_rows == None: + + if gridkey_rows is None: gridkey_show_Ns = False gridkey_show_es = False - -################################################### END GRIDKEY WIP - extracting arguments + + ################################################### END GRIDKEY WIP - extracting arguments # Group summaries kwargs. - gs_default = {'mean_sd', 'median_quartiles', None} + gs_default = {"mean_sd", "median_quartiles", None} if plot_kwargs["group_summaries"] not in gs_default: - raise ValueError('group_summaries must be one of' - ' these: {}.'.format(gs_default) ) + raise ValueError( + "group_summaries must be one of" " these: {}.".format(gs_default) + ) - default_group_summary_kwargs = {'zorder': 3, 'lw': 2, - 'alpha': 1} + default_group_summary_kwargs = {"zorder": 3, "lw": 2, "alpha": 1} if plot_kwargs["group_summary_kwargs"] is None: group_summary_kwargs = default_group_summary_kwargs else: - group_summary_kwargs = merge_two_dicts(default_group_summary_kwargs, - plot_kwargs["group_summary_kwargs"]) + group_summary_kwargs = merge_two_dicts( + default_group_summary_kwargs, plot_kwargs["group_summary_kwargs"] + ) # Create color palette that will be shared across subplots. color_col = plot_kwargs["color_col"] @@ -241,35 +267,24 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if custom_pal is None: unsat_colors = sns.color_palette(n_colors=n_groups) else: - if isinstance(custom_pal, dict): - groups_in_palette = {k: v for k,v in custom_pal.items() - if k in color_groups} - - # # check that all the keys in custom_pal are found in the - # # color column. - # col_grps = {k for k in color_groups} - # pal_grps = {k for k in custom_pal.keys()} - # not_in_pal = pal_grps.difference(col_grps) - # if len(not_in_pal) > 0: - # err1 = 'The custom palette keys {} '.format(not_in_pal) - # err2 = 'are not found in `{}`. Please check.'.format(color_col) - # errstring = (err1 + err2) - # raise IndexError(errstring) + groups_in_palette = { + k: v for k, v in custom_pal.items() if k in color_groups + } names = groups_in_palette.keys() unsat_colors = groups_in_palette.values() elif isinstance(custom_pal, list): - unsat_colors = custom_pal[0: n_groups] + unsat_colors = custom_pal[0:n_groups] elif isinstance(custom_pal, str): # check it is in the list of matplotlib palettes. if custom_pal in plt.colormaps(): unsat_colors = sns.color_palette(custom_pal, n_groups) else: - err1 = 'The specified `custom_palette` {}'.format(custom_pal) - err2 = ' is not a matplotlib palette. Please check.' + err1 = "The specified `custom_palette` {}".format(custom_pal) + err2 = " is not a matplotlib palette. Please check." raise ValueError(err1 + err2) if custom_pal is None and color_col is None: @@ -299,159 +314,165 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): plot_palette_sankey = custom_pal # Infer the figsize. - fig_size = plot_kwargs["fig_size"] + fig_size = plot_kwargs["fig_size"] if fig_size is None: all_groups_count = np.sum([len(i) for i in dabest_obj.idx]) # Increase the width for delta-delta graph if show_delta2 or show_mini_meta: all_groups_count += 2 - if is_paired and show_pairs is True and proportional is False: + if is_paired and show_pairs and proportional is False: frac = 0.75 else: frac = 1 - if float_contrast is True: + if float_contrast: height_inches = 4 each_group_width_inches = 2.5 * frac else: height_inches = 6 each_group_width_inches = 1.5 * frac - width_inches = (each_group_width_inches * all_groups_count) + width_inches = each_group_width_inches * all_groups_count fig_size = (width_inches, height_inches) # Initialise the figure. - # sns.set(context="talk", style='ticks') - init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs["dpi"] - ,tight_layout=True) + init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs["dpi"], tight_layout=True) width_ratios_ga = [2.5, 1] - -###################### GRIDKEY HSPACE ALTERATION + + ###################### GRIDKEY HSPACE ALTERATION # Sets hspace for cummings plots if gridkey is shown. - if gridkey_rows != None: + if gridkey_rows is not None: h_space_cummings = 0.1 else: h_space_cummings = 0.3 - - -###################### END GRIDKEY HSPACE ALTERATION - + + ###################### END GRIDKEY HSPACE ALTERATION + if plot_kwargs["ax"] is not None: # New in v0.2.6. # Use inset axes to create the estimation plot inside a single axes. # Author: Adam L Nekimken. (PR #73) - inset_contrast = True rawdata_axes = plot_kwargs["ax"] ax_position = rawdata_axes.get_position() # [[x0, y0], [x1, y1]] - + fig = rawdata_axes.get_figure() fig.patch.set_facecolor(face_color) - - if float_contrast is True: + + if float_contrast: axins = rawdata_axes.inset_axes( - [1, 0, - width_ratios_ga[1]/width_ratios_ga[0], 1]) + [1, 0, width_ratios_ga[1] / width_ratios_ga[0], 1] + ) rawdata_axes.set_position( # [l, b, w, h] - [ax_position.x0, - ax_position.y0, - (ax_position.x1 - ax_position.x0) * (width_ratios_ga[0] / - sum(width_ratios_ga)), - (ax_position.y1 - ax_position.y0)]) + [ + ax_position.x0, + ax_position.y0, + (ax_position.x1 - ax_position.x0) + * (width_ratios_ga[0] / sum(width_ratios_ga)), + (ax_position.y1 - ax_position.y0), + ] + ) contrast_axes = axins else: axins = rawdata_axes.inset_axes([0, -1 - h_space_cummings, 1, 1]) - plot_height = ((ax_position.y1 - ax_position.y0) / - (2 + h_space_cummings)) + plot_height = (ax_position.y1 - ax_position.y0) / (2 + h_space_cummings) rawdata_axes.set_position( - [ax_position.x0, - ax_position.y0 + (1 + h_space_cummings) * plot_height, - (ax_position.x1 - ax_position.x0), - plot_height]) - - # If the contrast axes are NOT floating, create lists to store - # raw ylims and raw tick intervals, so that I can normalize - # their ylims later. - contrast_ax_ylim_low = list() - contrast_ax_ylim_high = list() - contrast_ax_ylim_tickintervals = list() + [ + ax_position.x0, + ax_position.y0 + (1 + h_space_cummings) * plot_height, + (ax_position.x1 - ax_position.x0), + plot_height, + ] + ) + contrast_axes = axins rawdata_axes.contrast_axes = axins else: - inset_contrast = False # Here, we hardcode some figure parameters. - if float_contrast is True: + if float_contrast: fig, axx = plt.subplots( - ncols=2, - gridspec_kw={"width_ratios": width_ratios_ga, - "wspace": 0}, - **init_fig_kwargs) + ncols=2, + gridspec_kw={"width_ratios": width_ratios_ga, "wspace": 0}, + **init_fig_kwargs + ) fig.patch.set_facecolor(face_color) else: - fig, axx = plt.subplots(nrows=2, - gridspec_kw={"hspace": h_space_cummings}, - **init_fig_kwargs) + fig, axx = plt.subplots( + nrows=2, gridspec_kw={"hspace": h_space_cummings}, **init_fig_kwargs + ) fig.patch.set_facecolor(face_color) - # If the contrast axes are NOT floating, create lists to store - # raw ylims and raw tick intervals, so that I can normalize - # their ylims later. - contrast_ax_ylim_low = list() - contrast_ax_ylim_high = list() - contrast_ax_ylim_tickintervals = list() - + # Title title = plot_kwargs["title"] fontsize_title = plot_kwargs["fontsize_title"] if title is not None: fig.suptitle(title, fontsize=fontsize_title) - rawdata_axes = axx[0] + rawdata_axes = axx[0] contrast_axes = axx[1] rawdata_axes.set_frame_on(False) contrast_axes.set_frame_on(False) - redraw_axes_kwargs = {'colors' : ytick_color, - 'facecolors' : ytick_color, - 'lw' : 1, - 'zorder' : 10, - 'clip_on' : False} + redraw_axes_kwargs = { + "colors": ytick_color, + "facecolors": ytick_color, + "lw": 1, + "zorder": 10, + "clip_on": False, + } swarm_ylim = plot_kwargs["swarm_ylim"] if swarm_ylim is not None: rawdata_axes.set_ylim(swarm_ylim) - one_sankey = False if is_paired is not None else False # Flag to indicate if only one sankey is plotted. - two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False + one_sankey = ( + False if is_paired is not None else False + ) # Flag to indicate if only one sankey is plotted. + two_col_sankey = ( + True if proportional and not one_sankey and sankey and not flow else False + ) - if show_pairs is True: + if show_pairs: # Determine temp_idx based on is_paired and proportional conditions if is_paired == "baseline": - idx_pairs = [(control, test) for i in idx for control, test in zip([i[0]] * (len(i) - 1), i[1:])] + idx_pairs = [ + (control, test) + for i in idx + for control, test in zip([i[0]] * (len(i) - 1), i[1:]) + ] temp_idx = idx if not proportional else idx_pairs else: - idx_pairs = [(control, test) for i in idx for control, test in zip(i[:-1], i[1:])] + idx_pairs = [ + (control, test) for i in idx for control, test in zip(i[:-1], i[1:]) + ] temp_idx = idx if not proportional else idx_pairs # Determine temp_all_plot_groups based on proportional condition plot_groups = [item for i in temp_idx for item in i] temp_all_plot_groups = all_plot_groups if not proportional else plot_groups - - if proportional==False: - # Plot the raw data as a slopegraph. - # Pivot the long (melted) data. + + if not proportional: + # Plot the raw data as a slopegraph. + # Pivot the long (melted) data. if color_col is None: pivot_values = [yvar] else: pivot_values = [yvar, color_col] - pivoted_plot_data = pd.pivot(data=plot_data, index=dabest_obj.id_col, - columns=xvar, values=pivot_values) + pivoted_plot_data = pd.pivot( + data=plot_data, + index=dabest_obj.id_col, + columns=xvar, + values=pivot_values, + ) x_start = 0 for ii, current_tuple in enumerate(temp_idx): - current_pair = pivoted_plot_data.loc[:, pd.MultiIndex.from_product([pivot_values, current_tuple])].dropna() + current_pair = pivoted_plot_data.loc[ + :, pd.MultiIndex.from_product([pivot_values, current_tuple]) + ].dropna() grp_count = len(current_tuple) # Iterate through the data for the current tuple. for ID, observation in current_pair.iterrows(): @@ -459,136 +480,174 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): y_points = observation[yvar].tolist() if color_col is None: - slopegraph_kwargs['color'] = ytick_color + slopegraph_kwargs["color"] = ytick_color else: color_key = observation[color_col][0] - if isinstance(color_key, (str, np.int64, np.float64)) == True: - slopegraph_kwargs['color'] = plot_palette_raw[color_key] - slopegraph_kwargs['label'] = color_key + if isinstance(color_key, (str, np.int64, np.float64)): + slopegraph_kwargs["color"] = plot_palette_raw[color_key] + slopegraph_kwargs["label"] = color_key - rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs) + rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs) - x_start = x_start + grp_count - - ##################### DELTA PTS ON CONTRAST PLOT WIP + + ##################### DELTA PTS ON CONTRAST PLOT WIP contrast_show_deltas = plot_kwargs["contrast_show_deltas"] - - if is_paired == None: + + if is_paired is None: contrast_show_deltas = False - - if contrast_show_deltas == True: - - trans = plt.gca().transData - + + if contrast_show_deltas: delta_plot_data_temp = plot_data.copy() delta_id_col = dabest_obj.id_col - if color_col != None: - delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col, color_col]] - deltapts_args = {"hue" : color_col, - "palette" : plot_palette_raw, - "marker" : "^", - "alpha" : 0.5} - + if color_col is not None: + delta_plot_data = delta_plot_data_temp[ + [xvar, yvar, delta_id_col, color_col] + ] + deltapts_args = { + "hue": color_col, + "palette": plot_palette_raw, + "marker": "^", + "alpha": 0.5, + } + else: delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col]] - deltapts_args = {"color" : "k", - "marker" : "^", - "alpha" : 0.5} - + deltapts_args = {"color": "k", "marker": "^", "alpha": 0.5} + final_deltas = pd.DataFrame() for i in idx: for j in i: if i.index(j) != 0: - temp_df_exp = delta_plot_data[delta_plot_data[xvar].str.contains(j)].reset_index(drop=True) + temp_df_exp = delta_plot_data[ + delta_plot_data[xvar].str.contains(j) + ].reset_index(drop=True) if is_paired == "baseline": - temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[0])].reset_index(drop=True) + temp_df_cont = delta_plot_data[ + delta_plot_data[xvar].str.contains(i[0]) + ].reset_index(drop=True) elif is_paired == "sequential": - temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[i.index(j) - 1])].reset_index(drop=True) + temp_df_cont = delta_plot_data[ + delta_plot_data[xvar].str.contains( + i[i.index(j) - 1] + ) + ].reset_index(drop=True) delta_df = temp_df_exp.copy() delta_df[yvar] = temp_df_exp[yvar] - temp_df_cont[yvar] - final_deltas = pd.concat([final_deltas, delta_df]) - - + final_deltas = pd.concat([final_deltas, delta_df]) + # Plot the raw data as a swarmplot. - deltapts_plot = sns.swarmplot(data=final_deltas, x=xvar, y=yvar, - ax=contrast_axes, - order=all_plot_groups, - zorder=2, - **deltapts_args) + deltapts_plot = sns.swarmplot( + data=final_deltas, + x=xvar, + y=yvar, + ax=contrast_axes, + order=all_plot_groups, + zorder=2, + **deltapts_args + ) contrast_axes.legend().set_visible(False) - - ##################### DELTA PTS ON CONTRAST PLOT END - + ##################### DELTA PTS ON CONTRAST PLOT END + # Set the tick labels, because the slopegraph plotting doesn't. rawdata_axes.set_xticks(np.arange(0, len(temp_all_plot_groups))) rawdata_axes.set_xticklabels(temp_all_plot_groups) - + else: # Plot the raw data as a set of Sankey Diagrams aligned like barplot. group_summaries = plot_kwargs["group_summaries"] if group_summaries is None: group_summaries = "mean_sd" err_color = plot_kwargs["err_color"] - if err_color == None: + if err_color is None: err_color = "black" - if show_pairs is True: + if show_pairs: sankey_control_group = [] sankey_test_group = [] # Design for Sankey Flow Diagram - sankey_idx = [(control, test) for i in idx for control, test in zip(i[:], (i[1:]+(i[0],)))]\ - if flow is True else temp_idx + sankey_idx = ( + [ + (control, test) + for i in idx + for control, test in zip(i[:], (i[1:] + (i[0],))) + ] + if flow + else temp_idx + ) for i in sankey_idx: sankey_control_group.append(i[0]) - sankey_test_group.append(i[1]) + sankey_test_group.append(i[1]) if len(temp_all_plot_groups) == 2: - one_sankey = True - sankey_control_group.pop(); sankey_test_group.pop() # Remove the last element from two lists + one_sankey = True + sankey_control_group.pop() + sankey_test_group.pop() # Remove the last element from two lists # two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False # Replace the paired proportional plot with sankey diagram - sankeyplot = sankeydiag(plot_data, xvar=xvar, yvar=yvar, - left_idx=sankey_control_group, - right_idx=sankey_test_group, - palette=plot_palette_sankey, - ax=rawdata_axes, - one_sankey=one_sankey, - **sankey_kwargs) - + sankeyplot = sankeydiag( + plot_data, + xvar=xvar, + yvar=yvar, + left_idx=sankey_control_group, + right_idx=sankey_test_group, + palette=plot_palette_sankey, + ax=rawdata_axes, + one_sankey=one_sankey, + **sankey_kwargs + ) + else: - if proportional==False: + if not proportional: # Plot the raw data as a swarmplot. - rawdata_plot = sns.swarmplot(data=plot_data, x=xvar, y=yvar, - ax=rawdata_axes, - order=all_plot_groups, hue=color_col, - palette=plot_palette_raw, zorder=1, - **swarmplot_kwargs) + rawdata_plot = sns.swarmplot( + data=plot_data, + x=xvar, + y=yvar, + ax=rawdata_axes, + order=all_plot_groups, + hue=color_col, + palette=plot_palette_raw, + zorder=1, + **swarmplot_kwargs + ) else: # Plot the raw data as a barplot. - bar1_df = pd.DataFrame({xvar: all_plot_groups, 'proportion': np.ones(len(all_plot_groups))}) - bar1 = sns.barplot(data=bar1_df, x=xvar, y="proportion", - ax=rawdata_axes, - order=all_plot_groups, - linewidth=2, facecolor=(1, 1, 1, 0), edgecolor=bar_color, - zorder=1) - bar2 = sns.barplot(data=plot_data, x=xvar, y=yvar, - ax=rawdata_axes, - order=all_plot_groups, - palette=plot_palette_bar, - zorder=1, - **barplot_kwargs) + bar1_df = pd.DataFrame( + {xvar: all_plot_groups, "proportion": np.ones(len(all_plot_groups))} + ) + bar1 = sns.barplot( + data=bar1_df, + x=xvar, + y="proportion", + ax=rawdata_axes, + order=all_plot_groups, + linewidth=2, + facecolor=(1, 1, 1, 0), + edgecolor=bar_color, + zorder=1, + ) + bar2 = sns.barplot( + data=plot_data, + x=xvar, + y=yvar, + ax=rawdata_axes, + order=all_plot_groups, + palette=plot_palette_bar, + zorder=1, + **barplot_kwargs + ) # adjust the width of bars bar_width = plot_kwargs["bar_width"] for bar in bar1.patches: x = bar.get_x() width = bar.get_width() - centre = x + width / 2. - bar.set_x(centre - bar_width / 2.) + centre = x + width / 2.0 + bar.set_x(centre - bar_width / 2.0) bar.set_width(bar_width) # Plot the gapped line summaries, if this is not a Cumming plot. @@ -597,7 +656,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if group_summaries is None: group_summaries = "mean_sd" - if group_summaries is not None and proportional==False: + if group_summaries is not None and not proportional: # Create list to gather xspans. xspans = [] line_colors = [] @@ -610,33 +669,42 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # we have got a None, so skip and move on. pass - if bootstraps_color_by_group is True: + if bootstraps_color_by_group: line_colors.append(plot_palette_raw[all_plot_groups[jj]]) if len(line_colors) != len(all_plot_groups): line_colors = ytick_color - error_bar(plot_data, x=xvar, y=yvar, - # Hardcoded offset... - offset=xspans + np.array(plot_kwargs["group_summaries_offset"]), - line_color=line_colors, - gap_width_percent=1.5, - type=group_summaries, ax=rawdata_axes, - method="gapped_lines", - **group_summary_kwargs) - - if group_summaries is not None and proportional == True: - + error_bar( + plot_data, + x=xvar, + y=yvar, + # Hardcoded offset... + offset=xspans + np.array(plot_kwargs["group_summaries_offset"]), + line_color=line_colors, + gap_width_percent=1.5, + type=group_summaries, + ax=rawdata_axes, + method="gapped_lines", + **group_summary_kwargs + ) + + if group_summaries is not None and proportional: err_color = plot_kwargs["err_color"] - if err_color == None: + if err_color is None: err_color = "black" - error_bar(plot_data, x=xvar, y=yvar, - offset=0, - line_color=err_color, - gap_width_percent=1.5, - type=group_summaries, ax=rawdata_axes, - method="proportional_error_bar", - **group_summary_kwargs) + error_bar( + plot_data, + x=xvar, + y=yvar, + offset=0, + line_color=err_color, + gap_width_percent=1.5, + type=group_summaries, + ax=rawdata_axes, + method="proportional_error_bar", + **group_summary_kwargs + ) # Add the counts to the rawdata axes xticks. counts = plot_data.groupby(xvar).count()[yvar] @@ -646,7 +714,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): for xticklab in rawdata_axes.xaxis.get_ticklabels(): t = xticklab.get_text() if t.rfind("\n") != -1: - te = t[t.rfind("\n") + len("\n"):] + te = t[t.rfind("\n") + len("\n") :] N = str(counts.loc[te]) te = t else: @@ -655,13 +723,13 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): ticks_with_counts.append("{}\nN = {}".format(te, N)) - if plot_kwargs['fontsize_rawxlabel'] is not None: - fontsize_rawxlabel = plot_kwargs['fontsize_rawxlabel'] - rawdata_axes.set_xticklabels(ticks_with_counts,fontsize=fontsize_rawxlabel) + if plot_kwargs["fontsize_rawxlabel"] is not None: + fontsize_rawxlabel = plot_kwargs["fontsize_rawxlabel"] + rawdata_axes.set_xticklabels(ticks_with_counts, fontsize=fontsize_rawxlabel) # Save the handles and labels for the legend. handles, labels = rawdata_axes.get_legend_handles_labels() - legend_labels = [l for l in labels] + legend_labels = [l for l in labels] legend_handles = [h for h in handles] if bootstraps_color_by_group is False: rawdata_axes.legend().set_visible(False) @@ -672,11 +740,11 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): # Plot effect sizes and bootstraps. # Take note of where the `control` groups are. - if is_paired == "baseline" and show_pairs == True: + if is_paired == "baseline" and show_pairs: if two_col_sankey: ticks_to_skip = [] - ticks_to_plot = np.arange(0, len(temp_all_plot_groups)/2).tolist() - ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist() + ticks_to_plot = np.arange(0, len(temp_all_plot_groups) / 2).tolist() + ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist() ticks_to_start_twocol_sankey.pop() ticks_to_start_twocol_sankey.insert(0, 0) else: @@ -685,60 +753,63 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist() ticks_to_skip.insert(0, 0) # Then obtain the ticks where we have to plot the effect sizes. - ticks_to_plot = [t for t in range(0, len(all_plot_groups)) - if t not in ticks_to_skip] + ticks_to_plot = [ + t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip + ] ticks_to_skip_contrast = np.cumsum([(len(t)) for t in idx])[:-1].tolist() ticks_to_skip_contrast.insert(0, 0) else: if two_col_sankey: ticks_to_skip = [len(sankey_control_group)] # Then obtain the ticks where we have to plot the effect sizes. - ticks_to_plot = [t for t in range(0, len(temp_idx)) - if t not in ticks_to_skip] + ticks_to_plot = [ + t for t in range(0, len(temp_idx)) if t not in ticks_to_skip + ] ticks_to_skip = [] - ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist() + ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist() ticks_to_start_twocol_sankey.pop() ticks_to_start_twocol_sankey.insert(0, 0) else: ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist() ticks_to_skip.insert(0, 0) # Then obtain the ticks where we have to plot the effect sizes. - ticks_to_plot = [t for t in range(0, len(all_plot_groups)) - if t not in ticks_to_skip] + ticks_to_plot = [ + t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip + ] # Plot the bootstraps, then the effect sizes and CIs. - es_marker_size = plot_kwargs["es_marker_size"] + es_marker_size = plot_kwargs["es_marker_size"] halfviolin_alpha = plot_kwargs["halfviolin_alpha"] ci_type = plot_kwargs["ci_type"] - results = EffectSizeDataFrame.results + results = effectsize_df.results contrast_xtick_labels = [] - for j, tick in enumerate(ticks_to_plot): - current_group = results.test[j] - current_control = results.control[j] + current_group = results.test[j] + current_control = results.control[j] current_bootstrap = results.bootstraps[j] - current_effsize = results.difference[j] + current_effsize = results.difference[j] if ci_type == "bca": - current_ci_low = results.bca_low[j] - current_ci_high = results.bca_high[j] + current_ci_low = results.bca_low[j] + current_ci_high = results.bca_high[j] else: - current_ci_low = results.pct_low[j] - current_ci_high = results.pct_high[j] - + current_ci_low = results.pct_low[j] + current_ci_high = results.pct_high[j] # Create the violinplot. # New in v0.2.6: drop negative infinities before plotting. - v = contrast_axes.violinplot(current_bootstrap[~np.isinf(current_bootstrap)], - positions=[tick], - **violinplot_kwargs) + v = contrast_axes.violinplot( + current_bootstrap[~np.isinf(current_bootstrap)], + positions=[tick], + **violinplot_kwargs + ) # Turn the violinplot into half, and color it the same as the swarmplot. # Do this only if the color column is not specified. # Ideally, the alpha (transparency) fo the violin plot should be # less than one so the effect size and CIs are visible. - if bootstraps_color_by_group is True: + if bootstraps_color_by_group: fc = plot_palette_contrast[current_group] else: fc = "grey" @@ -746,96 +817,110 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): halfviolin(v, fill_color=fc, alpha=halfviolin_alpha) # Plot the effect size. - contrast_axes.plot([tick], current_effsize, marker='o', - color=ytick_color, - markersize=es_marker_size) - -################## SHOW ES ON CONTRAST PLOT WIP + contrast_axes.plot( + [tick], + current_effsize, + marker="o", + color=ytick_color, + markersize=es_marker_size, + ) + + ################## SHOW ES ON CONTRAST PLOT WIP contrast_show_es = plot_kwargs["contrast_show_es"] - es_sf = plot_kwargs['es_sf'] - es_fontsize = plot_kwargs['es_fontsize'] - - if gridkey_show_es == True: + es_sf = plot_kwargs["es_sf"] + es_fontsize = plot_kwargs["es_fontsize"] + + if gridkey_show_es: contrast_show_es = False - - effsize_for_print = current_effsize - - printed_es = np.format_float_positional(effsize_for_print, - precision=es_sf, - sign=True, - trim= 'k', - min_digits = es_sf) - if contrast_show_es == True: + + printed_es = np.format_float_positional( + effsize_for_print, precision=es_sf, sign=True, trim="k", min_digits=es_sf + ) + if contrast_show_es: if effsize_for_print < 0: textoffset = 10 else: textoffset = 15 - contrast_axes.annotate(text=printed_es, - xy = (tick, effsize_for_print), - xytext = (-textoffset-len(printed_es)*es_fontsize/2,-es_fontsize/2), - textcoords = "offset points", - **{ "fontsize" : es_fontsize }) - -################## SHOW ES ON CONTRAST PLOT END - - # Plot the confidence interval. - contrast_axes.plot([tick, tick], - [current_ci_low, current_ci_high], - linestyle="-", - color=ytick_color, - linewidth=group_summary_kwargs['lw']) + contrast_axes.annotate( + text=printed_es, + xy=(tick, effsize_for_print), + xytext=( + -textoffset - len(printed_es) * es_fontsize / 2, + -es_fontsize / 2, + ), + textcoords="offset points", + **{"fontsize": es_fontsize} + ) + + ################## SHOW ES ON CONTRAST PLOT END - contrast_xtick_labels.append("{}\nminus\n{}".format(current_group, - current_control)) + # Plot the confidence interval. + contrast_axes.plot( + [tick, tick], + [current_ci_low, current_ci_high], + linestyle="-", + color=ytick_color, + linewidth=group_summary_kwargs["lw"], + ) + + contrast_xtick_labels.append( + "{}\nminus\n{}".format(current_group, current_control) + ) # Plot mini-meta violin if show_mini_meta or show_delta2: if show_mini_meta: - mini_meta_delta = EffectSizeDataFrame.mini_meta_delta - data = mini_meta_delta.bootstraps_weighted_delta - difference = mini_meta_delta.difference + mini_meta_delta = effectsize_df.mini_meta_delta + data = mini_meta_delta.bootstraps_weighted_delta + difference = mini_meta_delta.difference if ci_type == "bca": - ci_low = mini_meta_delta.bca_low - ci_high = mini_meta_delta.bca_high + ci_low = mini_meta_delta.bca_low + ci_high = mini_meta_delta.bca_high else: - ci_low = mini_meta_delta.pct_low - ci_high = mini_meta_delta.pct_high - else: - delta_delta = EffectSizeDataFrame.delta_delta - data = delta_delta.bootstraps_delta_delta - difference = delta_delta.difference + ci_low = mini_meta_delta.pct_low + ci_high = mini_meta_delta.pct_high + else: + delta_delta = effectsize_df.delta_delta + data = delta_delta.bootstraps_delta_delta + difference = delta_delta.difference if ci_type == "bca": - ci_low = delta_delta.bca_low - ci_high = delta_delta.bca_high + ci_low = delta_delta.bca_low + ci_high = delta_delta.bca_high else: - ci_low = delta_delta.pct_low - ci_high = delta_delta.pct_high - #Create the violinplot. - #New in v0.2.6: drop negative infinities before plotting. - position = max(rawdata_axes.get_xticks())+2 - v = contrast_axes.violinplot(data[~np.isinf(data)], - positions=[position], - **violinplot_kwargs) + ci_low = delta_delta.pct_low + ci_high = delta_delta.pct_high + # Create the violinplot. + # New in v0.2.6: drop negative infinities before plotting. + position = max(rawdata_axes.get_xticks()) + 2 + v = contrast_axes.violinplot( + data[~np.isinf(data)], positions=[position], **violinplot_kwargs + ) fc = "grey" halfviolin(v, fill_color=fc, alpha=halfviolin_alpha) # Plot the effect size. - contrast_axes.plot([position], difference, marker='o', - color=ytick_color, - markersize=es_marker_size) + contrast_axes.plot( + [position], + difference, + marker="o", + color=ytick_color, + markersize=es_marker_size, + ) # Plot the confidence interval. - contrast_axes.plot([position, position], - [ci_low, ci_high], - linestyle="-", - color=ytick_color, - linewidth=group_summary_kwargs['lw']) + contrast_axes.plot( + [position, position], + [ci_low, ci_high], + linestyle="-", + color=ytick_color, + linewidth=group_summary_kwargs["lw"], + ) if show_mini_meta: - contrast_xtick_labels.extend(["","Weighted delta"]) + contrast_xtick_labels.extend(["", "Weighted delta"]) elif effect_size == "delta_g": contrast_xtick_labels.extend(["", "deltas' g"]) else: @@ -847,22 +932,22 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): contrast_axes.set_xticks(rawdata_axes.get_xticks()) else: temp = rawdata_axes.get_xticks() - temp = np.append(temp, [max(temp)+1, max(temp)+2]) + temp = np.append(temp, [max(temp) + 1, max(temp) + 2]) contrast_axes.set_xticks(temp) - if show_pairs is True: + if show_pairs: max_x = contrast_axes.get_xlim()[1] rawdata_axes.set_xlim(-0.375, max_x) - if float_contrast is True: + if float_contrast: contrast_axes.set_xlim(0.5, 1.5) elif show_delta2 or show_mini_meta: # Increase the xlim of raw data by 2 temp = rawdata_axes.get_xlim() if show_pairs: - rawdata_axes.set_xlim(temp[0], temp[1]+0.25) + rawdata_axes.set_xlim(temp[0], temp[1] + 0.25) else: - rawdata_axes.set_xlim(temp[0], temp[1]+2) + rawdata_axes.set_xlim(temp[0], temp[1] + 2) contrast_axes.set_xlim(rawdata_axes.get_xlim()) else: contrast_axes.set_xlim(rawdata_axes.get_xlim()) @@ -871,55 +956,67 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): for t in ticks_to_skip: contrast_xtick_labels.insert(t, "") - if plot_kwargs['fontsize_contrastxlabel'] is not None: - fontsize_contrastxlabel = plot_kwargs['fontsize_contrastxlabel'] + if plot_kwargs["fontsize_contrastxlabel"] is not None: + fontsize_contrastxlabel = plot_kwargs["fontsize_contrastxlabel"] - contrast_axes.set_xticklabels(contrast_xtick_labels,fontsize=fontsize_contrastxlabel) + contrast_axes.set_xticklabels( + contrast_xtick_labels, fontsize=fontsize_contrastxlabel + ) if bootstraps_color_by_group is False: legend_labels_unique = np.unique(legend_labels) unique_idx = np.unique(legend_labels, return_index=True)[1] - legend_handles_unique = (pd.Series(legend_handles, dtype="object").loc[unique_idx]).tolist() + legend_handles_unique = ( + pd.Series(legend_handles, dtype="object").loc[unique_idx] + ).tolist() if len(legend_handles_unique) > 0: - if float_contrast is True: + if float_contrast: axes_with_legend = contrast_axes - if show_pairs is True: + if show_pairs: bta = (1.75, 1.02) else: bta = (1.5, 1.02) else: axes_with_legend = rawdata_axes - if show_pairs is True: - bta = (1.02, 1.) + if show_pairs: + bta = (1.02, 1.0) else: - bta = (1.,1.) - leg = axes_with_legend.legend(legend_handles_unique, - legend_labels_unique, - bbox_to_anchor=bta, - **legend_kwargs) - if show_pairs is True: + bta = (1.0, 1.0) + leg = axes_with_legend.legend( + legend_handles_unique, + legend_labels_unique, + bbox_to_anchor=bta, + **legend_kwargs + ) + if show_pairs: for line in leg.get_lines(): line.set_linewidth(3.0) og_ylim_raw = rawdata_axes.get_ylim() og_xlim_raw = rawdata_axes.get_xlim() - if float_contrast is True: + if float_contrast: # For Gardner-Altman plots only. # Normalize ylims and despine the floating contrast axes. # Check that the effect size is within the swarm ylims. - if effect_size_type in ["mean_diff", "cohens_d", "hedges_g","cohens_h"]: - control_group_summary = plot_data.groupby(xvar)\ - .mean(numeric_only=True).loc[current_control, yvar] - test_group_summary = plot_data.groupby(xvar)\ - .mean(numeric_only=True).loc[current_group, yvar] + if effect_size_type in ["mean_diff", "cohens_d", "hedges_g", "cohens_h"]: + control_group_summary = ( + plot_data.groupby(xvar) + .mean(numeric_only=True) + .loc[current_control, yvar] + ) + test_group_summary = ( + plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar] + ) elif effect_size_type == "median_diff": - control_group_summary = plot_data.groupby(xvar)\ - .median().loc[current_control, yvar] - test_group_summary = plot_data.groupby(xvar)\ - .median().loc[current_group, yvar] + control_group_summary = ( + plot_data.groupby(xvar).median().loc[current_control, yvar] + ) + test_group_summary = ( + plot_data.groupby(xvar).median().loc[current_group, yvar] + ) if swarm_ylim is None: swarm_ylim = rawdata_axes.get_ylim() @@ -927,7 +1024,7 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): _, contrast_xlim_max = contrast_axes.get_xlim() difference = float(results.difference[0]) - + if effect_size_type in ["mean_diff", "median_diff"]: # Align 0 of contrast_axes to reference group mean of rawdata_axes. # If the effect size is positive, shift the contrast axis up. @@ -945,48 +1042,53 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): og_ylim_contrast = rawdata_axes.get_ylim() - np.array(control_group_summary) contrast_axes.set_ylim(og_ylim_contrast) - contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max) + contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max) - elif effect_size_type in ["cohens_d", "hedges_g","cohens_h"]: + elif effect_size_type in ["cohens_d", "hedges_g", "cohens_h"]: if is_paired: which_std = 1 else: which_std = 0 temp_control = plot_data[plot_data[xvar] == current_control][yvar] - temp_test = plot_data[plot_data[xvar] == current_group][yvar] - + temp_test = plot_data[plot_data[xvar] == current_group][yvar] + stds = _compute_standardizers(temp_control, temp_test) if is_paired: pooled_sd = stds[1] else: pooled_sd = stds[0] - - if effect_size_type == 'hedges_g': - gby_count = plot_data.groupby(xvar).count() + + if effect_size_type == "hedges_g": + gby_count = plot_data.groupby(xvar).count() len_control = gby_count.loc[current_control, yvar] - len_test = gby_count.loc[current_group, yvar] - - hg_correction_factor = _compute_hedges_correction_factor(len_control, len_test) - + len_test = gby_count.loc[current_group, yvar] + + hg_correction_factor = _compute_hedges_correction_factor( + len_control, len_test + ) + ylim_scale_factor = pooled_sd / hg_correction_factor elif effect_size_type == "cohens_h": - ylim_scale_factor = (np.mean(temp_test)-np.mean(temp_control)) / difference + ylim_scale_factor = ( + np.mean(temp_test) - np.mean(temp_control) + ) / difference else: ylim_scale_factor = pooled_sd - - scaled_ylim = ((rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor).tolist() + + scaled_ylim = ( + (rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor + ).tolist() contrast_axes.set_ylim(scaled_ylim) og_ylim_contrast = scaled_ylim - contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max) + contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max) if one_sankey is None: # Draw summary lines for control and test groups.. for jj, axx in enumerate([rawdata_axes, contrast_axes]): - # Draw effect size line. if jj == 0: ref = control_group_summary @@ -996,66 +1098,74 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): elif jj == 1: ref = 0 diff = ref + difference - effsize_line_start = contrast_xlim_max-1.1 + effsize_line_start = contrast_xlim_max - 1.1 xlimlow, xlimhigh = axx.get_xlim() # Draw reference line. - axx.hlines(ref, # y-coordinates - 0, xlimhigh, # x-coordinates, start and end. - **reflines_kwargs) - + axx.hlines( + ref, # y-coordinates + 0, + xlimhigh, # x-coordinates, start and end. + **reflines_kwargs + ) + # Draw effect size line. - axx.hlines(diff, - effsize_line_start, xlimhigh, - **reflines_kwargs) - else: + axx.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs) + else: ref = 0 diff = ref + difference effsize_line_start = contrast_xlim_max - 0.9 xlimlow, xlimhigh = contrast_axes.get_xlim() # Draw reference line. - contrast_axes.hlines(ref, # y-coordinates - effsize_line_start, xlimhigh, # x-coordinates, start and end. - **reflines_kwargs) - + contrast_axes.hlines( + ref, # y-coordinates + effsize_line_start, + xlimhigh, # x-coordinates, start and end. + **reflines_kwargs + ) + # Draw effect size line. - contrast_axes.hlines(diff, - effsize_line_start, xlimhigh, - **reflines_kwargs) - rawdata_axes.set_xlim(og_xlim_raw) # to align the axis + contrast_axes.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs) + rawdata_axes.set_xlim(og_xlim_raw) # to align the axis # Despine appropriately. - sns.despine(ax=rawdata_axes, bottom=True) + sns.despine(ax=rawdata_axes, bottom=True) sns.despine(ax=contrast_axes, left=True, right=False) # Insert break between the rawdata axes and the contrast axes # by re-drawing the x-spine. - rawdata_axes.hlines(og_ylim_raw[0], # yindex - rawdata_axes.get_xlim()[0], 1.3, # xmin, xmax - **redraw_axes_kwargs) + rawdata_axes.hlines( + og_ylim_raw[0], # yindex + rawdata_axes.get_xlim()[0], + 1.3, # xmin, xmax + **redraw_axes_kwargs + ) rawdata_axes.set_ylim(og_ylim_raw) - contrast_axes.hlines(contrast_axes.get_ylim()[0], - contrast_xlim_max-0.8, contrast_xlim_max, - **redraw_axes_kwargs) - + contrast_axes.hlines( + contrast_axes.get_ylim()[0], + contrast_xlim_max - 0.8, + contrast_xlim_max, + **redraw_axes_kwargs + ) else: # For Cumming Plots only. # Set custom contrast_ylim, if it was specified. - if plot_kwargs['contrast_ylim'] is not None or (plot_kwargs['delta2_ylim'] is not None and show_delta2): - - if plot_kwargs['contrast_ylim'] is not None: - custom_contrast_ylim = plot_kwargs['contrast_ylim'] - if plot_kwargs['delta2_ylim'] is not None and show_delta2: - custom_delta2_ylim = plot_kwargs['delta2_ylim'] - if custom_contrast_ylim!=custom_delta2_ylim: + if plot_kwargs["contrast_ylim"] is not None or ( + plot_kwargs["delta2_ylim"] is not None and show_delta2 + ): + if plot_kwargs["contrast_ylim"] is not None: + custom_contrast_ylim = plot_kwargs["contrast_ylim"] + if plot_kwargs["delta2_ylim"] is not None and show_delta2: + custom_delta2_ylim = plot_kwargs["delta2_ylim"] + if custom_contrast_ylim != custom_delta2_ylim: err1 = "Please check if `contrast_ylim` and `delta2_ylim` are assigned" err2 = "with same values." raise ValueError(err1 + err2) else: - custom_delta2_ylim = plot_kwargs['delta2_ylim'] + custom_delta2_ylim = plot_kwargs["delta2_ylim"] custom_contrast_ylim = custom_delta2_ylim if len(custom_contrast_ylim) != 2: @@ -1065,8 +1175,8 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if effect_size_type == "cliffs_delta": # Ensure the ylims for a cliffs_delta plot never exceed [-1, 1]. - l = plot_kwargs['contrast_ylim'][0] - h = plot_kwargs['contrast_ylim'][1] + l = plot_kwargs["contrast_ylim"][0] + h = plot_kwargs["contrast_ylim"][1] low = -1 if l < -1 else l high = 1 if h > 1 else h contrast_axes.set_ylim(low, high) @@ -1083,228 +1193,237 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): if contrast_ylim_low < 0 < contrast_ylim_high: contrast_axes.axhline(y=0, **reflines_kwargs) - if is_paired == "baseline" and show_pairs == True: + if is_paired == "baseline" and show_pairs: if two_col_sankey: - rightend_ticks_raw = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey) + rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array( + ticks_to_start_twocol_sankey + ) elif proportional and is_paired is not None: - rightend_ticks_raw = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip) - else: - rightend_ticks_raw = np.array([len(i)-1 for i in temp_idx]) + np.array(ticks_to_skip) + rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array( + ticks_to_skip + ) + else: + rightend_ticks_raw = np.array( + [len(i) - 1 for i in temp_idx] + ) + np.array(ticks_to_skip) for ax in [rawdata_axes]: sns.despine(ax=ax, bottom=True) - + ylim = ax.get_ylim() xlim = ax.get_xlim() - redraw_axes_kwargs['y'] = ylim[0] - + redraw_axes_kwargs["y"] = ylim[0] + if two_col_sankey: for k, start_tick in enumerate(ticks_to_start_twocol_sankey): end_tick = rightend_ticks_raw[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) - else: + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) + else: for k, start_tick in enumerate(ticks_to_skip): end_tick = rightend_ticks_raw[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) ax.set_ylim(ylim) - del redraw_axes_kwargs['y'] - - if proportional == False: - temp_length = [(len(i)-1) for i in idx] + del redraw_axes_kwargs["y"] + + if not proportional: + temp_length = [(len(i) - 1) for i in idx] else: - temp_length = [(len(i)-1)*2-1 for i in idx] + temp_length = [(len(i) - 1) * 2 - 1 for i in idx] if two_col_sankey: - rightend_ticks_contrast = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey) + rightend_ticks_contrast = np.array( + [len(i) - 2 for i in idx] + ) + np.array(ticks_to_start_twocol_sankey) elif proportional and is_paired is not None: - rightend_ticks_contrast = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip) - else: - rightend_ticks_contrast = np.array(temp_length) + np.array(ticks_to_skip_contrast) + rightend_ticks_contrast = np.array( + [len(i) - 1 for i in idx] + ) + np.array(ticks_to_skip) + else: + rightend_ticks_contrast = np.array(temp_length) + np.array( + ticks_to_skip_contrast + ) for ax in [contrast_axes]: sns.despine(ax=ax, bottom=True) - + ylim = ax.get_ylim() xlim = ax.get_xlim() - redraw_axes_kwargs['y'] = ylim[0] - + redraw_axes_kwargs["y"] = ylim[0] + if two_col_sankey: for k, start_tick in enumerate(ticks_to_start_twocol_sankey): end_tick = rightend_ticks_contrast[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) else: for k, start_tick in enumerate(ticks_to_skip_contrast): end_tick = rightend_ticks_contrast[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) - + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) + ax.set_ylim(ylim) - del redraw_axes_kwargs['y'] + del redraw_axes_kwargs["y"] else: # Compute the end of each x-axes line. if two_col_sankey: - rightend_ticks = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey) + rightend_ticks = np.array([len(i) - 2 for i in idx]) + np.array( + ticks_to_start_twocol_sankey + ) else: - rightend_ticks = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip) - + rightend_ticks = np.array([len(i) - 1 for i in idx]) + np.array( + ticks_to_skip + ) + for ax in [rawdata_axes, contrast_axes]: sns.despine(ax=ax, bottom=True) - + ylim = ax.get_ylim() xlim = ax.get_xlim() - redraw_axes_kwargs['y'] = ylim[0] - + redraw_axes_kwargs["y"] = ylim[0] + if two_col_sankey: for k, start_tick in enumerate(ticks_to_start_twocol_sankey): end_tick = rightend_ticks[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) else: for k, start_tick in enumerate(ticks_to_skip): end_tick = rightend_ticks[k] - ax.hlines(xmin=start_tick, xmax=end_tick, - **redraw_axes_kwargs) - + ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs) + ax.set_ylim(ylim) - del redraw_axes_kwargs['y'] + del redraw_axes_kwargs["y"] - if show_delta2 is True or show_mini_meta is True: + if show_delta2 or show_mini_meta: ylim = contrast_axes.get_ylim() - redraw_axes_kwargs['y'] = ylim[0] + redraw_axes_kwargs["y"] = ylim[0] x_ticks = contrast_axes.get_xticks() - contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1], - **redraw_axes_kwargs) - del redraw_axes_kwargs['y'] + contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1], **redraw_axes_kwargs) + del redraw_axes_kwargs["y"] # Set raw axes y-label. - swarm_label = plot_kwargs['swarm_label'] + swarm_label = plot_kwargs["swarm_label"] if swarm_label is None and yvar is None: swarm_label = "value" elif swarm_label is None and yvar is not None: swarm_label = yvar - bar_label = plot_kwargs['bar_label'] + bar_label = plot_kwargs["bar_label"] if bar_label is None and effect_size_type != "cohens_h": bar_label = "proportion of success" elif bar_label is None and effect_size_type == "cohens_h": bar_label = "value" # Place contrast axes y-label. - contrast_label_dict = {'mean_diff': "mean difference", - 'median_diff': "median difference", - 'cohens_d': "Cohen's d", - 'hedges_g': "Hedges' g", - 'cliffs_delta': "Cliff's delta", - 'cohens_h': "Cohen's h", - 'delta_g': "mean difference"} - - if proportional == True and effect_size_type != "cohens_h": + contrast_label_dict = { + "mean_diff": "mean difference", + "median_diff": "median difference", + "cohens_d": "Cohen's d", + "hedges_g": "Hedges' g", + "cliffs_delta": "Cliff's delta", + "cohens_h": "Cohen's h", + "delta_g": "mean difference", + } + + if proportional and effect_size_type != "cohens_h": default_contrast_label = "proportion difference" elif effect_size_type == "delta_g": default_contrast_label = "Hedges' g" else: - default_contrast_label = contrast_label_dict[EffectSizeDataFrame.effect_size] + default_contrast_label = contrast_label_dict[effectsize_df.effect_size] - - if plot_kwargs['contrast_label'] is None: + if plot_kwargs["contrast_label"] is None: if is_paired: contrast_label = "paired\n{}".format(default_contrast_label) else: contrast_label = default_contrast_label contrast_label = contrast_label.capitalize() else: - contrast_label = plot_kwargs['contrast_label'] + contrast_label = plot_kwargs["contrast_label"] - if plot_kwargs['fontsize_rawylabel'] is not None: - fontsize_rawylabel = plot_kwargs['fontsize_rawylabel'] - if plot_kwargs['fontsize_contrastylabel'] is not None: - fontsize_contrastylabel = plot_kwargs['fontsize_contrastylabel'] - if plot_kwargs['fontsize_delta2label'] is not None: - fontsize_delta2label = plot_kwargs['fontsize_delta2label'] + if plot_kwargs["fontsize_rawylabel"] is not None: + fontsize_rawylabel = plot_kwargs["fontsize_rawylabel"] + if plot_kwargs["fontsize_contrastylabel"] is not None: + fontsize_contrastylabel = plot_kwargs["fontsize_contrastylabel"] + if plot_kwargs["fontsize_delta2label"] is not None: + fontsize_delta2label = plot_kwargs["fontsize_delta2label"] - contrast_axes.set_ylabel(contrast_label,fontsize = fontsize_contrastylabel) - if float_contrast is True: + contrast_axes.set_ylabel(contrast_label, fontsize=fontsize_contrastylabel) + if float_contrast: contrast_axes.yaxis.set_label_position("right") # Set the rawdata axes labels appropriately - if proportional == False: - rawdata_axes.set_ylabel(swarm_label,fontsize = fontsize_rawylabel) + if not proportional: + rawdata_axes.set_ylabel(swarm_label, fontsize=fontsize_rawylabel) else: - rawdata_axes.set_ylabel(bar_label,fontsize = fontsize_rawylabel) + rawdata_axes.set_ylabel(bar_label, fontsize=fontsize_rawylabel) rawdata_axes.set_xlabel("") # Because we turned the axes frame off, we also need to draw back # the y-spine for both axes. - if float_contrast==False: + if not float_contrast: rawdata_axes.set_xlim(contrast_axes.get_xlim()) og_xlim_raw = rawdata_axes.get_xlim() - rawdata_axes.vlines(og_xlim_raw[0], - og_ylim_raw[0], og_ylim_raw[1], - **redraw_axes_kwargs) + rawdata_axes.vlines( + og_xlim_raw[0], og_ylim_raw[0], og_ylim_raw[1], **redraw_axes_kwargs + ) og_xlim_contrast = contrast_axes.get_xlim() - if float_contrast is True: + if float_contrast: xpos = og_xlim_contrast[1] else: xpos = og_xlim_contrast[0] og_ylim_contrast = contrast_axes.get_ylim() - contrast_axes.vlines(xpos, - og_ylim_contrast[0], og_ylim_contrast[1], - **redraw_axes_kwargs) - - - if show_delta2 is True: - if plot_kwargs['delta2_label'] is not None: - delta2_label = plot_kwargs['delta2_label'] - elif effect_size == "mean_diff" : + contrast_axes.vlines( + xpos, og_ylim_contrast[0], og_ylim_contrast[1], **redraw_axes_kwargs + ) + + if show_delta2: + if plot_kwargs["delta2_label"] is not None: + delta2_label = plot_kwargs["delta2_label"] + elif effect_size == "mean_diff": delta2_label = "delta - delta" else: delta2_label = "deltas' g" delta2_axes = contrast_axes.twinx() delta2_axes.set_frame_on(False) - delta2_axes.set_ylabel(delta2_label, fontsize = fontsize_delta2label) + delta2_axes.set_ylabel(delta2_label, fontsize=fontsize_delta2label) og_xlim_delta = contrast_axes.get_xlim() og_ylim_delta = contrast_axes.get_ylim() delta2_axes.set_ylim(og_ylim_delta) - delta2_axes.vlines(og_xlim_delta[1], - og_ylim_delta[0], og_ylim_delta[1], - **redraw_axes_kwargs) + delta2_axes.vlines( + og_xlim_delta[1], og_ylim_delta[0], og_ylim_delta[1], **redraw_axes_kwargs + ) + ################################################### GRIDKEY MAIN CODE WIP -################################################### GRIDKEY MAIN CODE WIP - - #if gridkey_rows is None, skip everything here + # if gridkey_rows is None, skip everything here if gridkey_rows is not None: - # Raise error if there are more than 2 items in any idx and gridkey_merge_pairs is True and is_paired is not None - if gridkey_merge_pairs is True and is_paired is not None: + if gridkey_merge_pairs and is_paired is not None: for i in idx: if len(i) > 2: - warnings.warn("gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False") + warnings.warn( + "gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False" + ) gridkey_merge_pairs = False break - elif gridkey_merge_pairs is True and is_paired is None: - warnings.warn("gridkey_merge_pairs=True is only applicable for paired data.") + elif gridkey_merge_pairs and is_paired is None: + warnings.warn( + "gridkey_merge_pairs=True is only applicable for paired data." + ) gridkey_merge_pairs = False - + # Checks for gridkey_merge_pairs and is_paired; if both are true, "merges" the gridkey per pair - if gridkey_merge_pairs is True and is_paired is not None: + if gridkey_merge_pairs and is_paired is not None: groups_for_gridkey = [] for i in idx: groups_for_gridkey.append(i[1]) else: groups_for_gridkey = all_plot_groups - - + # raise errors if gridkey_rows is not a list, or if the list is empty if isinstance(gridkey_rows, list) is False: raise TypeError("gridkey_rows must be a list.") elif len(gridkey_rows) == 0: warnings.warn("gridkey_rows is an empty list.") - - + # raise Warning if an item in gridkey_rows is not contained in any idx for i in gridkey_rows: in_idx = 0 @@ -1313,93 +1432,101 @@ def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs): in_idx += 1 if in_idx == 0: if is_paired is not None: - warnings.warn(i + " is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False.") + warnings.warn( + i + + " is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False." + ) else: - warnings.warn(i + " is not in any idx. Please check.") - - + warnings.warn(i + " is not in any idx. Please check.") + # Populate table: checks if idx for each column contains rowlabel name # IF so, marks that element as present w black dot, or space if not present - table_cellcols = [] + table_cellcols = [] for i in gridkey_rows: thisrow = [] for q in groups_for_gridkey: if str(i) in q: - thisrow.append(u"\u25CF") + thisrow.append("\u25CF") else: thisrow.append("") table_cellcols.append(thisrow) - - + # Adds a row for Ns with the Ns values - if gridkey_show_Ns == True: + if gridkey_show_Ns: gridkey_rows.append("Ns") list_of_Ns = [] for i in groups_for_gridkey: list_of_Ns.append(str(counts.loc[i])) table_cellcols.append(list_of_Ns) - # Adds a row for effectsizes with effectsize values - if gridkey_show_es == True: - gridkey_rows.append(u"\u0394") + if gridkey_show_es: + gridkey_rows.append("\u0394") effsize_list = [] results_list = results.test.to_list() - + # get the effect size, append + or -, 2 dec places for i in enumerate(groups_for_gridkey): if i[1] in results_list: - curr_esval = results.loc[results["test"] == i[1]]["difference"].iloc[0] - curr_esval_str = np.format_float_positional(curr_esval, - precision=es_sf, - sign=True, - trim= 'k', - min_digits = es_sf) + curr_esval = results.loc[results["test"] == i[1]][ + "difference" + ].iloc[0] + curr_esval_str = np.format_float_positional( + curr_esval, + precision=es_sf, + sign=True, + trim="k", + min_digits=es_sf, + ) effsize_list.append(curr_esval_str) else: effsize_list.append("-") - + table_cellcols.append(effsize_list) - + # If Gardner-Altman plot, plot on raw data and not contrast axes - if float_contrast == True: + if float_contrast: axes_ploton = rawdata_axes else: axes_ploton = contrast_axes - + # Account for extended x axis in case of show_delta2 or show_mini_meta x_groups_for_width = len(groups_for_gridkey) - if show_delta2 is True or show_mini_meta is True: - x_groups_for_width += 2 + if show_delta2 or show_mini_meta: + x_groups_for_width += 2 gridkey_width = len(groups_for_gridkey) / x_groups_for_width - - gridkey = axes_ploton.table(cellText = table_cellcols, - rowLabels = gridkey_rows, - cellLoc = "center", - bbox = [0, -len(gridkey_rows)*0.1-0.05, gridkey_width, len(gridkey_rows)*0.1], - **{"alpha" : 0.5}) - + + gridkey = axes_ploton.table( + cellText=table_cellcols, + rowLabels=gridkey_rows, + cellLoc="center", + bbox=[ + 0, + -len(gridkey_rows) * 0.1 - 0.05, + gridkey_width, + len(gridkey_rows) * 0.1, + ], + **{"alpha": 0.5} + ) + # modifies row label cells for cell in gridkey._cells: if cell[1] == -1: gridkey._cells[cell].visible_edges = "open" - gridkey._cells[cell].set_text_props(**{ "ha" : "right" }) - + gridkey._cells[cell].set_text_props(**{"ha": "right"}) + # turns off both x axes rawdata_axes.get_xaxis().set_visible(False) contrast_axes.get_xaxis().set_visible(False) - - ####################################################### END GRIDKEY MAIN CODE WIP - - - - + + ####################################################### END GRIDKEY MAIN CODE WIP + # Make sure no stray ticks appear! - rawdata_axes.xaxis.set_ticks_position('bottom') - rawdata_axes.yaxis.set_ticks_position('left') - contrast_axes.xaxis.set_ticks_position('bottom') + rawdata_axes.xaxis.set_ticks_position("bottom") + rawdata_axes.yaxis.set_ticks_position("left") + contrast_axes.xaxis.set_ticks_position("bottom") if float_contrast is False: - contrast_axes.yaxis.set_ticks_position('left') + contrast_axes.yaxis.set_ticks_position("left") # Reset rcParams. for parameter in _changed_rcParams: diff --git a/nbs/API/bootstrap.ipynb b/nbs/API/bootstrap.ipynb index fe9d2c48..a89ea5af 100644 --- a/nbs/API/bootstrap.ipynb +++ b/nbs/API/bootstrap.ipynb @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp _bootstrap_tools" + "# | default_exp _bootstrap_tools" ] }, { @@ -28,10 +28,11 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "from __future__ import division\n", "from nbdev.showdoc import *\n", "import nbdev\n", + "\n", "nbdev.nbdev_export()" ] }, @@ -42,8 +43,14 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", - "import numpy as np" + "# |export\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from scipy.stats import norm\n", + "from scipy.stats import ttest_1samp, ttest_ind, ttest_rel\n", + "from scipy.stats import mannwhitneyu, wilcoxon, norm\n", + "import warnings" ] }, { @@ -53,11 +60,11 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "# |export\n", "class bootstrap:\n", - " '''\n", - " Computes the summary statistic and a bootstrapped confidence interval. \n", - " \n", + " \"\"\"\n", + " Computes the summary statistic and a bootstrapped confidence interval.\n", + "\n", " Returns\n", " -------\n", " An `bootstrap` object reporting the summary statistics, percentile CIs, bias-corrected and accelerated (BCa) CIs, and the settings used:\n", @@ -94,85 +101,77 @@ " `pvalue_mann_whitney`: float\n", " Two-sided p-value obtained from scipy.stats.mannwhitneyu. If a single array was given (x1 only), returns 'NIL'. The Mann-Whitney U-test is a nonparametric unpaired test of the null hypothesis that x1 and x2 are from the same distribution. See \n", "\n", - " '''\n", - " def __init__(self, \n", - " x1:np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", - " x2:np.array=None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", - " paired:bool=False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control).\n", - " statfunction:callable=np.mean,#The summary statistic called on data.\n", - " smoothboot:bool=False,#Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate).\n", - " alpha_level:float=0.05,#Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", - " reps:int=5000 # Number of bootstrap iterations to perform.\n", - " ):\n", - "\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - "\n", - " from scipy.stats import norm\n", - " from numpy.random import randint\n", - " from scipy.stats import ttest_1samp, ttest_ind, ttest_rel\n", - " from scipy.stats import mannwhitneyu, wilcoxon, norm\n", - " import warnings\n", + " \"\"\"\n", "\n", + " def __init__(\n", + " self,\n", + " x1: np.array, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", + " x2: np.array = None, # The data in a one-dimensional array form. Only x1 is required. If x2 is given, the bootstrapped summary difference between the two groups (x2-x1) is computed. NaNs are automatically discarded.\n", + " paired: bool = False, # Whether or not x1 and x2 are paired samples. If 'paired' is None then the data will not be treated as paired data in the subsequent calculations. If 'paired' is 'baseline', then in each tuple of x, other groups will be paired up with the first group (as control). If 'paired' is 'sequential', then in each tuple of x, each group will be paired up with the previous group (as control).\n", + " stat_function: callable = np.mean, # The summary statistic called on data.\n", + " smoothboot: bool = False, # Taken from seaborn.algorithms.bootstrap. If True, performs a smoothed bootstrap (draws samples from a kernel destiny estimate).\n", + " alpha_level: float = 0.05, # Denotes the likelihood that the confidence interval produced does not include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", + " reps: int = 5000, # Number of bootstrap iterations to perform.\n", + " ):\n", " # Turn to pandas series.\n", " x1 = pd.Series(x1).dropna()\n", " diff = False\n", "\n", - " # Initialise statfunction\n", - " if statfunction == None:\n", - " statfunction = np.mean\n", + " # Initialise stat_function\n", + " if stat_function is None:\n", + " stat_function = np.mean\n", "\n", " # Compute two-sided alphas.\n", - " if alpha_level > 1. or alpha_level < 0.:\n", + " if alpha_level > 1.0 or alpha_level < 0.0:\n", " raise ValueError(\"alpha_level must be between 0 and 1.\")\n", - " alphas = np.array([alpha_level/2., 1-alpha_level/2.])\n", + " alphas = np.array([alpha_level / 2.0, 1 - alpha_level / 2.0])\n", "\n", - " sns_bootstrap_kwargs = {'func': statfunction,\n", - " 'n_boot': reps,\n", - " 'smooth': smoothboot}\n", + " sns_bootstrap_kwargs = {\n", + " \"func\": stat_function,\n", + " \"n_boot\": reps,\n", + " \"smooth\": smoothboot,\n", + " }\n", "\n", " if paired:\n", " # check x2 is not None:\n", " if x2 is None:\n", - " raise ValueError('Please specify x2.')\n", + " raise ValueError(\"Please specify x2.\")\n", " else:\n", " x2 = pd.Series(x2).dropna()\n", " if len(x1) != len(x2):\n", - " raise ValueError('x1 and x2 are not the same length.')\n", - "\n", - " if (x2 is None) or (paired is not None) :\n", + " raise ValueError(\"x1 and x2 are not the same length.\")\n", "\n", + " if (x2 is None) or (paired is not None):\n", " if x2 is None:\n", " tx = x1\n", " paired = False\n", " ttest_single = ttest_1samp(x1, 0)[1]\n", - " ttest_2_ind = 'NIL'\n", - " ttest_2_paired = 'NIL'\n", - " wilcoxonresult = 'NIL'\n", + " ttest_2_ind = \"NIL\"\n", + " ttest_2_paired = \"NIL\"\n", + " wilcoxonresult = \"NIL\"\n", "\n", - " elif paired is not None:\n", + " # elif paired is not None:\n", + " else: # only two options to enter here\n", " diff = True\n", " tx = x2 - x1\n", - " ttest_single = 'NIL'\n", - " ttest_2_ind = 'NIL'\n", + " ttest_single = \"NIL\"\n", + " ttest_2_ind = \"NIL\"\n", " ttest_2_paired = ttest_rel(x1, x2)[1]\n", " wilcoxonresult = wilcoxon(x1, x2)[1]\n", - " mannwhitneyresult = 'NIL'\n", + " mannwhitneyresult = \"NIL\"\n", "\n", " # Turns data into array, then tuple.\n", " tdata = (tx,)\n", "\n", " # The value of the statistic function applied\n", " # just to the actual data.\n", - " summ_stat = statfunction(*tdata)\n", + " summ_stat = stat_function(*tdata)\n", " statarray = sns.algorithms.bootstrap(tx, **sns_bootstrap_kwargs)\n", " statarray.sort()\n", "\n", " # Get Percentile indices\n", - " pct_low_high = np.round((reps-1) * alphas)\n", - " pct_low_high = np.nan_to_num(pct_low_high).astype('int')\n", - "\n", + " pct_low_high = np.round((reps - 1) * alphas)\n", + " pct_low_high = np.nan_to_num(pct_low_high).astype(\"int\")\n", "\n", " elif x2 is not None and paired is None:\n", " diff = True\n", @@ -184,42 +183,45 @@ " tdata = exp_statarray - ref_statarray\n", " statarray = tdata.copy()\n", " statarray.sort()\n", - " tdata = (tdata, ) # Note tuple form.\n", + " tdata = (tdata,) # Note tuple form.\n", "\n", " # The difference as one would calculate it.\n", - " summ_stat = statfunction(x2) - statfunction(x1)\n", + " summ_stat = stat_function(x2) - stat_function(x1)\n", "\n", " # Get Percentile indices\n", - " pct_low_high = np.round((reps-1) * alphas)\n", - " pct_low_high = np.nan_to_num(pct_low_high).astype('int')\n", + " pct_low_high = np.round((reps - 1) * alphas)\n", + " pct_low_high = np.nan_to_num(pct_low_high).astype(\"int\")\n", "\n", " # Statistical tests.\n", - " ttest_single='NIL'\n", - " ttest_2_ind = ttest_ind(x1,x2)[1]\n", - " ttest_2_paired='NIL'\n", - " mannwhitneyresult = mannwhitneyu(x1, x2, alternative='two-sided')[1]\n", - " wilcoxonresult = 'NIL'\n", + " ttest_single = \"NIL\"\n", + " ttest_2_ind = ttest_ind(x1, x2)[1]\n", + " ttest_2_paired = \"NIL\"\n", + " mannwhitneyresult = mannwhitneyu(x1, x2, alternative=\"two-sided\")[1]\n", + " wilcoxonresult = \"NIL\"\n", "\n", " # Get Bias-Corrected Accelerated indices convenience function invoked.\n", - " bca_low_high = bca(tdata, alphas, statarray,\n", - " statfunction, summ_stat, reps)\n", + " bca_low_high = bca(tdata, alphas, statarray, stat_function, summ_stat, reps)\n", "\n", " # Warnings for unstable or extreme indices.\n", " for ind in [pct_low_high, bca_low_high]:\n", - " if np.any(ind == 0) or np.any(ind == reps-1):\n", - " warnings.warn(\"Some values used extremal samples;\"\n", - " \" results are probably unstable.\")\n", - " elif np.any(ind<10) or np.any(ind>=reps-10):\n", - " warnings.warn(\"Some values used top 10 low/high samples;\"\n", - " \" results may be unstable.\")\n", + " if np.any(ind == 0) or np.any(ind == reps - 1):\n", + " warnings.warn(\n", + " \"Some values used extremal samples;\"\n", + " \" results are probably unstable.\"\n", + " )\n", + " elif np.any(ind < 10) or np.any(ind >= reps - 10):\n", + " warnings.warn(\n", + " \"Some values used top 10 low/high samples;\"\n", + " \" results may be unstable.\"\n", + " )\n", "\n", " self.summary = summ_stat\n", " self.is_paired = paired\n", " self.is_difference = diff\n", - " self.statistic = str(statfunction)\n", + " self.statistic = str(stat_function)\n", " self.n_reps = reps\n", "\n", - " self.ci = (1-alpha_level)*100\n", + " self.ci = (1 - alpha_level) * 100\n", " self.stat_array = np.array(statarray)\n", "\n", " self.pct_ci_low = statarray[pct_low_high[0]]\n", @@ -236,33 +238,33 @@ " self.pvalue_wilcoxon = wilcoxonresult\n", " self.pvalue_mann_whitney = mannwhitneyresult\n", "\n", - " self.results = {'stat_summary': self.summary,\n", - " 'is_difference': diff,\n", - " 'is_paired': paired,\n", - " 'bca_ci_low': self.bca_ci_low,\n", - " 'bca_ci_high': self.bca_ci_high,\n", - " 'ci': self.ci\n", - " }\n", + " self.results = {\n", + " \"stat_summary\": self.summary,\n", + " \"is_difference\": diff,\n", + " \"is_paired\": paired,\n", + " \"bca_ci_low\": self.bca_ci_low,\n", + " \"bca_ci_high\": self.bca_ci_high,\n", + " \"ci\": self.ci,\n", + " }\n", "\n", " def __repr__(self):\n", - " import numpy as np\n", - "\n", - " if 'mean' in self.statistic:\n", - " stat = 'mean'\n", - " elif 'median' in self.statistic:\n", - " stat = 'median'\n", + " if \"mean\" in self.statistic:\n", + " stat = \"mean\"\n", + " elif \"median\" in self.statistic:\n", + " stat = \"median\"\n", " else:\n", " stat = self.statistic\n", "\n", - " diff_types = {'sequential': 'paired', 'baseline': 'paired', None: 'unpaired'}\n", + " diff_types = {\"sequential\": \"paired\", \"baseline\": \"paired\", None: \"unpaired\"}\n", " if self.is_difference:\n", - " a = 'The {} {} difference is {}.'.format(diff_types[self.is_paired],\n", - " stat, self.summary)\n", + " a = \"The {} {} difference is {}.\".format(\n", + " diff_types[self.is_paired], stat, self.summary\n", + " )\n", " else:\n", - " a = 'The {} is {}.'.format(stat, self.summary)\n", + " a = \"The {} is {}.\".format(stat, self.summary)\n", "\n", - " b = '[{} CI: {}, {}]'.format(self.ci, self.bca_ci_low, self.bca_ci_high)\n", - " return '\\n'.join([a, b])" + " b = \"[{} CI: {}, {}]\".format(self.ci, self.bca_ci_low, self.bca_ci_high)\n", + " return \"\\n\".join([a, b])" ] }, { @@ -272,7 +274,7 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "# |export\n", "def jackknife_indexes(data):\n", " # Taken without modification from scikits.bootstrap package.\n", " \"\"\"\n", @@ -283,49 +285,43 @@ " For a given set of data Y, the jackknife sample J[i] is defined as the\n", " data set Y with the ith data point deleted.\n", " \"\"\"\n", - " import numpy as np\n", "\n", - " base = np.arange(0,len(data))\n", - " return (np.delete(base,i) for i in base)\n", + " base = np.arange(0, len(data))\n", + " return (np.delete(base, i) for i in base)\n", "\n", - "def bca(data, alphas, statarray, statfunction, ostat, reps):\n", - " '''\n", + "\n", + "def bca(data, alphas, statarray, stat_function, ostat, reps):\n", + " \"\"\"\n", " Subroutine called to calculate the BCa statistics.\n", " Borrowed heavily from scikits.bootstrap code.\n", - " '''\n", - " import warnings\n", - "\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - "\n", - " from scipy.stats import norm\n", - " from numpy.random import randint\n", + " \"\"\"\n", "\n", " # The bias correction value.\n", - " z0 = norm.ppf( ( 1.0*np.sum(statarray < ostat, axis = 0) ) / reps )\n", + " z0 = norm.ppf((1.0 * np.sum(statarray < ostat, axis=0)) / reps)\n", "\n", " # Statistics of the jackknife distribution\n", " jackindexes = jackknife_indexes(data[0])\n", - " jstat = [statfunction(*(x[indexes] for x in data))\n", - " for indexes in jackindexes]\n", - " jmean = np.mean(jstat,axis = 0)\n", + " jstat = [stat_function(*(x[indexes] for x in data)) for indexes in jackindexes]\n", + " jmean = np.mean(jstat, axis=0)\n", "\n", " # Acceleration value\n", - " a = np.divide(np.sum( (jmean - jstat)**3, axis = 0 ),\n", - " ( 6.0 * np.sum( (jmean - jstat)**2, axis = 0)**1.5 )\n", - " )\n", + " a = np.divide(\n", + " np.sum((jmean - jstat) ** 3, axis=0),\n", + " (6.0 * np.sum((jmean - jstat) ** 2, axis=0) ** 1.5),\n", + " )\n", " if np.any(np.isnan(a)):\n", " nanind = np.nonzero(np.isnan(a))\n", - " warnings.warn(\"Some acceleration values were undefined.\"\n", - " \"This is almost certainly because all values\"\n", - " \"for the statistic were equal. Affected\"\n", - " \"confidence intervals will have zero width and\"\n", - " \"may be inaccurate (indexes: {})\".format(nanind))\n", - " zs = z0 + norm.ppf(alphas).reshape(alphas.shape+(1,)*z0.ndim)\n", - " avals = norm.cdf(z0 + zs/(1-a*zs))\n", - " nvals = np.round((reps-1)*avals)\n", - " nvals = np.nan_to_num(nvals).astype('int')\n", + " warnings.warn(\n", + " \"Some acceleration values were undefined.\"\n", + " \"This is almost certainly because all values\"\n", + " \"for the statistic were equal. Affected\"\n", + " \"confidence intervals will have zero width and\"\n", + " \"may be inaccurate (indexes: {})\".format(nanind)\n", + " )\n", + " zs = z0 + norm.ppf(alphas).reshape(alphas.shape + (1,) * z0.ndim)\n", + " avals = norm.cdf(z0 + zs / (1 - a * zs))\n", + " nvals = np.round((reps - 1) * avals)\n", + " nvals = np.nan_to_num(nvals).astype(\"int\")\n", "\n", " return nvals" ] diff --git a/nbs/API/class.ipynb b/nbs/API/class.ipynb deleted file mode 100644 index ff6d2398..00000000 --- a/nbs/API/class.ipynb +++ /dev/null @@ -1,4243 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "ed122c74", - "metadata": {}, - "source": [ - "# Class\n", - "\n", - "> Several classes for estimating statistics and generating plots.\n", - "\n", - "- order: 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb97d9b1", - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp _classes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d5d586f", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "from __future__ import annotations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dcd32470", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "from nbdev.showdoc import *\n", - "import nbdev\n", - "nbdev.nbdev_export()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d3c6f47a", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import numpy as np\n", - "from scipy.stats import norm\n", - "import pandas as pd\n", - "from scipy.stats import randint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "204a64b4", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "import dabest" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "350b12c1", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class Dabest(object):\n", - "\n", - " \"\"\"\n", - " Class for estimation statistics and plots.\n", - " \"\"\"\n", - "\n", - " def __init__(self, data, idx, x, y, paired, id_col, ci, \n", - " resamples, random_seed, proportional, delta2, \n", - " experiment, experiment_label, x1_level, mini_meta):\n", - "\n", - " \"\"\"\n", - " Parses and stores pandas DataFrames in preparation for estimation\n", - " statistics. You should not be calling this class directly; instead,\n", - " use `dabest.load()` to parse your DataFrame prior to analysis.\n", - " \"\"\"\n", - "\n", - " # Import standard data science libraries.\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - "\n", - " self.__delta2 = delta2\n", - " self.__experiment = experiment\n", - " self.__ci = ci\n", - " self.__data = data\n", - " self.__id_col = id_col\n", - " self.__is_paired = paired\n", - " self.__resamples = resamples\n", - " self.__random_seed = random_seed\n", - " self.__proportional = proportional\n", - " self.__mini_meta = mini_meta \n", - "\n", - " # Make a copy of the data, so we don't make alterations to it.\n", - " data_in = data.copy()\n", - " # data_in.reset_index(inplace=True)\n", - " # data_in_index_name = data_in.index.name\n", - "\n", - "\n", - " # Check if it is a valid mini_meta case\n", - " if mini_meta is True:\n", - "\n", - " # Only mini_meta calculation but not proportional and delta-delta function\n", - " if proportional is True:\n", - " err0 = '`proportional` and `mini_meta` cannot be True at the same time.'\n", - " raise ValueError(err0)\n", - " elif delta2 is True:\n", - " err0 = '`delta` and `mini_meta` cannot be True at the same time.'\n", - " raise ValueError(err0)\n", - " \n", - " # Check if the columns stated are valid\n", - " if all([isinstance(i, str) for i in idx]):\n", - " if len(pd.unique([t for t in idx]).tolist())!=2:\n", - " err0 = '`mini_meta` is True, but `idx` ({})'.format(idx) \n", - " err1 = 'does not contain exactly 2 columns.'\n", - " raise ValueError(err0 + err1)\n", - " elif all([isinstance(i, (tuple, list)) for i in idx]):\n", - " all_idx_lengths = [len(t) for t in idx]\n", - " if (np.array(all_idx_lengths) != 2).any():\n", - " err1 = \"`mini_meta` is True, but some idx \"\n", - " err2 = \"in {} does not consist only of two groups.\".format(idx)\n", - " raise ValueError(err1 + err2)\n", - " \n", - "\n", - "\n", - " # Check if this is a 2x2 ANOVA case and x & y are valid columns\n", - " # Create experiment_label and x1_level\n", - " if delta2 is True:\n", - " if proportional is True:\n", - " err0 = '`proportional` and `delta` cannot be True at the same time.'\n", - " raise ValueError(err0)\n", - " # idx should not be specified\n", - " if idx:\n", - " err0 = '`idx` should not be specified when `delta2` is True.'.format(len(x))\n", - " raise ValueError(err0)\n", - "\n", - " # Check if x is valid\n", - " if len(x) != 2:\n", - " err0 = '`delta2` is True but the number of variables indicated by `x` is {}.'.format(len(x))\n", - " raise ValueError(err0)\n", - " else:\n", - " for i in x:\n", - " if i not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(i)\n", - " raise IndexError(err)\n", - "\n", - " # Check if y is valid\n", - " if not y:\n", - " err0 = '`delta2` is True but `y` is not indicated.'\n", - " raise ValueError(err0)\n", - " elif y not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(y)\n", - " raise IndexError(err)\n", - "\n", - " # Check if experiment is valid\n", - " if experiment not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(experiment)\n", - " raise IndexError(err)\n", - "\n", - " # Check if experiment_label is valid and create experiment when needed\n", - " if experiment_label:\n", - " if len(experiment_label) != 2:\n", - " err0 = '`experiment_label` does not have a length of 2.'\n", - " raise ValueError(err0)\n", - " else: \n", - " for i in experiment_label:\n", - " if i not in data_in[experiment].unique():\n", - " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", - " raise IndexError(err)\n", - " else:\n", - " experiment_label = data_in[experiment].unique()\n", - "\n", - " # Check if x1_level is valid\n", - " if x1_level:\n", - " if len(x1_level) != 2:\n", - " err0 = '`x1_level` does not have a length of 2.'\n", - " raise ValueError(err0)\n", - " else: \n", - " for i in x1_level:\n", - " if i not in data_in[x[0]].unique():\n", - " err = '{0} is not an element in the column `{1}` of `data`. Please check.'.format(i, experiment)\n", - " raise IndexError(err)\n", - "\n", - " else:\n", - " x1_level = data_in[x[0]].unique() \n", - " elif experiment is not None:\n", - " experiment_label = data_in[experiment].unique()\n", - " x1_level = data_in[x[0]].unique() \n", - " self.__experiment_label = experiment_label\n", - " self.__x1_level = x1_level\n", - "\n", - "\n", - " # # Check if idx is specified\n", - " # if delta2 is False and not idx:\n", - " # err = '`idx` is not a column in `data`. Please check.'\n", - " # raise IndexError(err)\n", - "\n", - "\n", - " # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case\n", - " if idx is None and x is not None and y is not None:\n", - " # Add a length check for unique values in the first element in list x, \n", - " # if the length is greater than 2, force delta2 to be False\n", - " # Should be removed if delta2 for situations other than 2x2 is supported\n", - " if len(data_in[x[0]].unique()) > 2 and x1_level is None:\n", - " delta2 = False\n", - " self.__delta2 = delta2\n", - " # stop the loop if delta2 is False\n", - " \n", - " # add a new column which is a combination of experiment and the first variable\n", - " new_col_name = experiment+x[0]\n", - " while new_col_name in data_in.columns:\n", - " new_col_name += \"_\"\n", - " data_in[new_col_name] = data_in[x[0]].astype(str) + \" \" + data_in[experiment].astype(str)\n", - "\n", - " #create idx and record the first and second x variable \n", - " idx = []\n", - " for i in list(map(lambda x: str(x), experiment_label)):\n", - " temp = []\n", - " for j in list(map(lambda x: str(x), x1_level)):\n", - " temp.append(j + \" \" + i)\n", - " idx.append(temp)\n", - " \n", - " self.__idx = idx\n", - " self.__x1 = x[0]\n", - " self.__x2 = x[1]\n", - " x = new_col_name\n", - " else:\n", - " self.__idx = idx\n", - " self.__x1 = None\n", - " self.__x2 = None\n", - "\n", - "\n", - "\n", - " # Determine the kind of estimation plot we need to produce.\n", - " if all([isinstance(i, (str, int, float)) for i in idx]):\n", - " # flatten out idx.\n", - " all_plot_groups = pd.unique([t for t in idx]).tolist()\n", - " if len(idx) > len(all_plot_groups):\n", - " err0 = '`idx` contains duplicated groups. Please remove any duplicates and try again.'\n", - " raise ValueError(err0)\n", - " \n", - " # We need to re-wrap this idx inside another tuple so as to\n", - " # easily loop thru each pairwise group later on.\n", - " self.__idx = (idx,)\n", - "\n", - " elif all([isinstance(i, (tuple, list)) for i in idx]):\n", - " all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist()\n", - " \n", - " actual_groups_given = sum([len(i) for i in idx])\n", - " \n", - " if actual_groups_given > len(all_plot_groups):\n", - " err0 = 'Groups are repeated across tuples,'\n", - " err1 = ' or a tuple has repeated groups in it.'\n", - " err2 = ' Please remove any duplicates and try again.'\n", - " raise ValueError(err0 + err1 + err2)\n", - "\n", - " else: # mix of string and tuple?\n", - " err = 'There seems to be a problem with the idx you '\\\n", - " 'entered--{}.'.format(idx)\n", - " raise ValueError(err)\n", - "\n", - " # Having parsed the idx, check if it is a kosher paired plot,\n", - " # if so stated.\n", - " #if paired is True:\n", - " # all_idx_lengths = [len(t) for t in self.__idx]\n", - " # if (np.array(all_idx_lengths) != 2).any():\n", - " # err1 = \"`is_paired` is True, but some idx \"\n", - " # err2 = \"in {} does not consist only of two groups.\".format(idx)\n", - " # raise ValueError(err1 + err2)\n", - "\n", - " # Check if there is a typo on paired\n", - " if paired is not None:\n", - " if paired not in (\"baseline\", \"sequential\"):\n", - " err = '{} assigned for `paired` is not valid.'.format(paired)\n", - " raise ValueError(err)\n", - "\n", - "\n", - " # Determine the type of data: wide or long.\n", - " if x is None and y is not None:\n", - " err = 'You have only specified `y`. Please also specify `x`.'\n", - " raise ValueError(err)\n", - "\n", - " elif y is None and x is not None:\n", - " err = 'You have only specified `x`. Please also specify `y`.'\n", - " raise ValueError(err)\n", - "\n", - " # Identify the type of data that was passed in.\n", - " elif x is not None and y is not None:\n", - " # Assume we have a long dataset.\n", - " # check both x and y are column names in data.\n", - " if x not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(x)\n", - " raise IndexError(err)\n", - " if y not in data_in.columns:\n", - " err = '{0} is not a column in `data`. Please check.'.format(y)\n", - " raise IndexError(err)\n", - "\n", - " # check y is numeric.\n", - " if not np.issubdtype(data_in[y].dtype, np.number):\n", - " err = '{0} is a column in `data`, but it is not numeric.'.format(y)\n", - " raise ValueError(err)\n", - "\n", - " # check all the idx can be found in data_in[x]\n", - " for g in all_plot_groups:\n", - " if g not in data_in[x].unique():\n", - " err0 = '\"{0}\" is not a group in the column `{1}`.'.format(g, x)\n", - " err1 = \" Please check `idx` and try again.\"\n", - " raise IndexError(err0 + err1)\n", - "\n", - " # Select only rows where the value in the `x` column \n", - " # is found in `idx`.\n", - " plot_data = data_in[data_in.loc[:, x].isin(all_plot_groups)].copy()\n", - " \n", - " # plot_data.drop(\"index\", inplace=True, axis=1)\n", - "\n", - " # Assign attributes\n", - " self.__x = x\n", - " self.__y = y\n", - " self.__xvar = x\n", - " self.__yvar = y\n", - "\n", - " elif x is None and y is None:\n", - " # Assume we have a wide dataset.\n", - " # Assign attributes appropriately.\n", - " self.__x = None\n", - " self.__y = None\n", - " self.__xvar = \"group\"\n", - " self.__yvar = \"value\"\n", - "\n", - " # First, check we have all columns in the dataset.\n", - " for g in all_plot_groups:\n", - " if g not in data_in.columns:\n", - " err0 = '\"{0}\" is not a column in `data`.'.format(g)\n", - " err1 = \" Please check `idx` and try again.\"\n", - " raise IndexError(err0 + err1)\n", - " \n", - " set_all_columns = set(data_in.columns.tolist())\n", - " set_all_plot_groups = set(all_plot_groups)\n", - " id_vars = set_all_columns.difference(set_all_plot_groups)\n", - "\n", - " plot_data = pd.melt(data_in,\n", - " id_vars=id_vars,\n", - " value_vars=all_plot_groups,\n", - " value_name=self.__yvar,\n", - " var_name=self.__xvar)\n", - " \n", - " # Added in v0.2.7.\n", - " # remove any NA rows.\n", - " plot_data.dropna(axis=0, how='any', subset=[self.__yvar], inplace=True)\n", - "\n", - " \n", - " # Lines 131 to 140 added in v0.2.3.\n", - " # Fixes a bug that jammed up when the xvar column was already \n", - " # a pandas Categorical. Now we check for this and act appropriately.\n", - " if isinstance(plot_data[self.__xvar].dtype, \n", - " pd.CategoricalDtype) is True:\n", - " plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)\n", - " plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, \n", - " ordered=True, \n", - " inplace=True)\n", - " else:\n", - " plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],\n", - " categories=all_plot_groups,\n", - " ordered=True)\n", - " \n", - " # # The line below was added in v0.2.4, removed in v0.2.5.\n", - " # plot_data.dropna(inplace=True)\n", - " \n", - " self.__plot_data = plot_data\n", - " \n", - " self.__all_plot_groups = all_plot_groups\n", - "\n", - "\n", - " # Sanity check that all idxs are paired, if so desired.\n", - " #if paired is True:\n", - " # if id_col is None:\n", - " # err = \"`id_col` must be specified if `is_paired` is set to True.\"\n", - " # raise IndexError(err)\n", - " # elif id_col not in plot_data.columns:\n", - " # err = \"{} is not a column in `data`. \".format(id_col)\n", - " # raise IndexError(err)\n", - "\n", - " # Check if `id_col` is valid\n", - " if paired:\n", - " if id_col is None:\n", - " err = \"`id_col` must be specified if `paired` is assigned with a not NoneType value.\"\n", - " raise IndexError(err)\n", - " elif id_col not in plot_data.columns:\n", - " err = \"{} is not a column in `data`. \".format(id_col)\n", - " raise IndexError(err)\n", - "\n", - " EffectSizeDataFrame_kwargs = dict(ci=ci, is_paired=paired,\n", - " random_seed=random_seed,\n", - " resamples=resamples,\n", - " proportional=proportional, \n", - " delta2=delta2, \n", - " experiment_label=self.__experiment_label,\n", - " x1_level=self.__x1_level,\n", - " x2=self.__x2,\n", - " mini_meta = mini_meta)\n", - "\n", - " self.__mean_diff = EffectSizeDataFrame(self, \"mean_diff\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " self.__median_diff = EffectSizeDataFrame(self, \"median_diff\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " self.__cohens_d = EffectSizeDataFrame(self, \"cohens_d\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " self.__cohens_h = EffectSizeDataFrame(self, \"cohens_h\",\n", - " **EffectSizeDataFrame_kwargs) \n", - "\n", - " self.__hedges_g = EffectSizeDataFrame(self, \"hedges_g\",\n", - " **EffectSizeDataFrame_kwargs)\n", - " \n", - " self.__delta_g = EffectSizeDataFrame(self, \"delta_g\",\n", - " **EffectSizeDataFrame_kwargs)\n", - "\n", - " if not paired:\n", - " self.__cliffs_delta = EffectSizeDataFrame(self, \"cliffs_delta\",\n", - " **EffectSizeDataFrame_kwargs)\n", - " else:\n", - " self.__cliffs_delta = \"The data is paired; Cliff's delta is therefore undefined.\"\n", - "\n", - "\n", - " def __repr__(self):\n", - " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", - "\n", - " from .misc_tools import print_greeting\n", - "\n", - " # Removed due to the deprecation of is_paired\n", - " #if self.__is_paired:\n", - " # es = \"Paired e\"\n", - " #else:\n", - " # es = \"E\"\n", - "\n", - " greeting_header = print_greeting()\n", - "\n", - " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", - " 'sequential': 'for the sequential design of repeated-measures experiment \\n',\n", - " 'None' : ''\n", - " }\n", - "\n", - " PAIRED_STATUS = {'baseline' : 'Paired e', \n", - " 'sequential' : 'Paired e',\n", - " 'None' : 'E'\n", - " }\n", - "\n", - " first_line = {\"rm_status\" : RM_STATUS[str(self.__is_paired)],\n", - " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)]}\n", - "\n", - " s1 = \"{paired_status}ffect size(s) {rm_status}\".format(**first_line)\n", - " s2 = \"with {}% confidence intervals will be computed for:\".format(self.__ci)\n", - " desc_line = s1 + s2\n", - "\n", - " out = [greeting_header + \"\\n\\n\" + desc_line]\n", - "\n", - " comparisons = []\n", - "\n", - " if self.__is_paired == 'sequential':\n", - " for j, current_tuple in enumerate(self.__idx):\n", - " for ix, test_name in enumerate(current_tuple[1:]):\n", - " control_name = current_tuple[ix]\n", - " comparisons.append(\"{} minus {}\".format(test_name, control_name))\n", - " else:\n", - " for j, current_tuple in enumerate(self.__idx):\n", - " control_name = current_tuple[0]\n", - "\n", - " for ix, test_name in enumerate(current_tuple[1:]):\n", - " comparisons.append(\"{} minus {}\".format(test_name, control_name))\n", - "\n", - " if self.__delta2 is True:\n", - " comparisons.append(\"{} minus {} (only for mean difference)\".format(self.__experiment_label[1], self.__experiment_label[0]))\n", - " \n", - " if self.__mini_meta is True:\n", - " comparisons.append(\"weighted delta (only for mean difference)\")\n", - "\n", - " for j, g in enumerate(comparisons):\n", - " out.append(\"{}. {}\".format(j+1, g))\n", - "\n", - " resamples_line1 = \"\\n{} resamples \".format(self.__resamples)\n", - " resamples_line2 = \"will be used to generate the effect size bootstraps.\"\n", - " out.append(resamples_line1 + resamples_line2)\n", - "\n", - " return \"\\n\".join(out)\n", - "\n", - "\n", - " # def __variable_name(self):\n", - " # return [k for k,v in locals().items() if v is self]\n", - " #\n", - " # @property\n", - " # def variable_name(self):\n", - " # return self.__variable_name()\n", - " \n", - " @property\n", - " def mean_diff(self):\n", - " \"\"\"\n", - " Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`\n", - "\n", - " \"\"\"\n", - " return self.__mean_diff\n", - " \n", - " \n", - " @property \n", - " def median_diff(self):\n", - " \"\"\"\n", - " Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", - "\n", - " \"\"\"\n", - " return self.__median_diff\n", - " \n", - " \n", - " @property\n", - " def cohens_d(self):\n", - " \"\"\"\n", - " Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", - "\n", - " \"\"\"\n", - " return self.__cohens_d\n", - " \n", - " \n", - " @property\n", - " def cohens_h(self):\n", - " \"\"\"\n", - " Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `h`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `directional` argument in `dabest.load()`.\n", - "\n", - " \"\"\"\n", - " return self.__cohens_h\n", - "\n", - "\n", - " @property \n", - " def hedges_g(self):\n", - " \"\"\"\n", - " Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", - "\n", - " \"\"\"\n", - " return self.__hedges_g\n", - " \n", - " \n", - " @property \n", - " def cliffs_delta(self):\n", - " \"\"\"\n", - " Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", - "\n", - " \"\"\"\n", - " return self.__cliffs_delta\n", - "\n", - " @property\n", - " def delta_g(self):\n", - " \"\"\"\n", - " Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", - " \"\"\"\n", - " return self.__delta_g\n", - "\n", - " @property\n", - " def data(self):\n", - " \"\"\"\n", - " Returns the pandas DataFrame that was passed to `dabest.load()`.\n", - " When `delta2` is True, a new column is added to support the \n", - " function. The name of this new column is indicated by `x`.\n", - " \"\"\"\n", - " return self.__data\n", - "\n", - "\n", - " @property\n", - " def idx(self):\n", - " \"\"\"\n", - " Returns the order of categories that was passed to `dabest.load()`.\n", - " \"\"\"\n", - " return self.__idx\n", - " \n", - "\n", - " @property\n", - " def x1(self):\n", - " \"\"\"\n", - " Returns the first variable declared in x when it is a delta-delta\n", - " case; returns None otherwise.\n", - " \"\"\"\n", - " return self.__x1\n", - "\n", - "\n", - " @property\n", - " def x1_level(self):\n", - " \"\"\"\n", - " Returns the levels of first variable declared in x when it is a \n", - " delta-delta case; returns None otherwise.\n", - " \"\"\"\n", - " return self.__x1_level\n", - "\n", - "\n", - " @property\n", - " def x2(self):\n", - " \"\"\"\n", - " Returns the second variable declared in x when it is a delta-delta\n", - " case; returns None otherwise.\n", - " \"\"\"\n", - " return self.__x2\n", - "\n", - "\n", - " @property\n", - " def experiment(self):\n", - " \"\"\"\n", - " Returns the column name of experiment labels that was passed to \n", - " `dabest.load()` when it is a delta-delta case; returns None otherwise.\n", - " \"\"\"\n", - " return self.__experiment\n", - " \n", - "\n", - " @property\n", - " def experiment_label(self):\n", - " \"\"\"\n", - " Returns the experiment labels in order that was passed to `dabest.load()`\n", - " when it is a delta-delta case; returns None otherwise.\n", - " \"\"\"\n", - " return self.__experiment_label\n", - "\n", - "\n", - " @property\n", - " def delta2(self):\n", - " \"\"\"\n", - " Returns the boolean parameter indicating if this is a delta-delta \n", - " situation.\n", - " \"\"\"\n", - " return self.__delta2\n", - "\n", - "\n", - " @property\n", - " def is_paired(self):\n", - " \"\"\"\n", - " Returns the type of repeated-measures experiment.\n", - " \"\"\"\n", - " return self.__is_paired\n", - "\n", - "\n", - " @property\n", - " def id_col(self):\n", - " \"\"\"\n", - " Returns the id column declared to `dabest.load()`.\n", - " \"\"\"\n", - " return self.__id_col\n", - "\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " The width of the desired confidence interval.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - "\n", - " @property\n", - " def resamples(self):\n", - " \"\"\"\n", - " The number of resamples used to generate the bootstrap.\n", - " \"\"\"\n", - " return self.__resamples\n", - "\n", - "\n", - " @property\n", - " def random_seed(self):\n", - " \"\"\"\n", - " The number used to initialise the numpy random seed generator, ie.\n", - " `seed_value` from `numpy.random.seed(seed_value)` is returned.\n", - " \"\"\"\n", - " return self.__random_seed\n", - "\n", - "\n", - " @property\n", - " def x(self):\n", - " \"\"\"\n", - " Returns the x column that was passed to `dabest.load()`, if any.\n", - " When `delta2` is True, `x` returns the name of the new column created \n", - " for the delta-delta situation. To retrieve the 2 variables passed into \n", - " `x` when `delta2` is True, please call `x1` and `x2` instead.\n", - " \"\"\"\n", - " return self.__x\n", - "\n", - "\n", - " @property\n", - " def y(self):\n", - " \"\"\"\n", - " Returns the y column that was passed to `dabest.load()`, if any.\n", - " \"\"\"\n", - " return self.__y\n", - "\n", - "\n", - " @property\n", - " def _xvar(self):\n", - " \"\"\"\n", - " Returns the xvar in dabest.plot_data.\n", - " \"\"\"\n", - " return self.__xvar\n", - "\n", - "\n", - " @property\n", - " def _yvar(self):\n", - " \"\"\"\n", - " Returns the yvar in dabest.plot_data.\n", - " \"\"\"\n", - " return self.__yvar\n", - "\n", - "\n", - " @property\n", - " def _plot_data(self):\n", - " \"\"\"\n", - " Returns the pandas DataFrame used to produce the estimation stats/plots.\n", - " \"\"\"\n", - " return self.__plot_data\n", - "\n", - " \n", - " @property\n", - " def proportional(self):\n", - " \"\"\"\n", - " Returns the proportional parameter class.\n", - " \"\"\"\n", - " return self.__proportional\n", - "\n", - " \n", - " @property\n", - " def mini_meta(self):\n", - " \"\"\"\n", - " Returns the mini_meta boolean parameter.\n", - " \"\"\"\n", - " return self.__mini_meta\n", - "\n", - "\n", - " @property\n", - " def _all_plot_groups(self):\n", - " \"\"\"\n", - " Returns the all plot groups, as indicated via the `idx` keyword.\n", - " \"\"\"\n", - " return self.__all_plot_groups" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c86c0487", - "metadata": {}, - "source": [ - "#### Example: mean_diff" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d07d58b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:26 2023.\n", - "\n", - "The unpaired mean difference between control and test is 0.5 [95%CI -0.0412, 1.0].\n", - "The p-value of the two-sided permutation t-test is 0.0758, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", - "\n", - "To get the results of all valid statistical tests, use `.mean_diff.statistical_tests`" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "control = norm.rvs(loc=0, size=30, random_state=12345)\n", - "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", - "my_df = pd.DataFrame({\"control\": control,\n", - " \"test\": test})\n", - "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", - "my_dabest_object.mean_diff" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "cf5ca0a0", - "metadata": {}, - "source": [ - "This is simply the mean of the control group subtracted from\n", - "the mean of the test group.\n", - "\n", - "$$\\text{Mean difference} = \\overline{x}_{Test} - \\overline{x}_{Control}$$\n", - "\n", - "where $\\overline{x}$ is the mean for the group $x$." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8b3b146c", - "metadata": {}, - "source": [ - "#### Example: median_diff" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e9b8635", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jacobluke/opt/anaconda3/envs/dabest-nbdev/lib/python3.8/site-packages/dabest/_stats_tools/effsize.py:72: UserWarning: Using median as the statistic in bootstrapping may result in a biased estimate and cause problems with BCa confidence intervals. Consider using a different statistic, such as the mean.\n", - "When plotting, please consider using percetile confidence intervals by specifying `ci_type='percentile'`. For detailed information, refer to https://github.com/ACCLAB/DABEST-python/issues/129 \n", - "\n", - " warnings.warn(message=mes1+mes2, category=UserWarning)\n" - ] - }, - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:28 2023.\n", - "\n", - "The unpaired median difference between control and test is 0.5 [95%CI -0.0758, 0.991].\n", - "The p-value of the two-sided permutation t-test is 0.103, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", - "\n", - "To get the results of all valid statistical tests, use `.median_diff.statistical_tests`" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "control = norm.rvs(loc=0, size=30, random_state=12345)\n", - "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", - "my_df = pd.DataFrame({\"control\": control,\n", - " \"test\": test})\n", - "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", - "my_dabest_object.median_diff" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "838b2978", - "metadata": {}, - "source": [ - "\n", - "This is the median difference between the control group and the test group.\n", - "\n", - "If the comparison(s) are unpaired, median_diff is computed with the following equation:\n", - "\n", - "\n", - "$$\\text{Median difference} = \\widetilde{x}_{Test} - \\widetilde{x}_{Control}$$\n", - "\n", - "where $\\widetilde{x}$ is the median for the group $x$.\n", - "\n", - "If the comparison(s) are paired, median_diff is computed with the following equation:\n", - "\n", - "$$\\text{Median difference} = \\widetilde{x}_{Test - Control}$$\n", - " \n", - "\n", - "##### Things to note\n", - "\n", - "Using median difference as the statistic in bootstrapping may result in a biased estimate and cause problems with BCa confidence intervals. Consider using mean difference instead. \n", - "\n", - "When plotting, consider using percentile confidence intervals instead of BCa confidence intervals by specifying `ci_type = 'percentile'` in .plot(). \n", - "\n", - "For detailed information, please refer to [Issue 129](https://github.com/ACCLAB/DABEST-python/issues/129). \n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a5324d21", - "metadata": {}, - "source": [ - "#### Example: cohens_d" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "748b5c60", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:29 2023.\n", - "\n", - "The unpaired Cohen's d between control and test is 0.471 [95%CI -0.0843, 0.976].\n", - "The p-value of the two-sided permutation t-test is 0.0758, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", - "\n", - "To get the results of all valid statistical tests, use `.cohens_d.statistical_tests`" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "control = norm.rvs(loc=0, size=30, random_state=12345)\n", - "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", - "my_df = pd.DataFrame({\"control\": control,\n", - " \"test\": test})\n", - "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", - "my_dabest_object.cohens_d" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "6f66579c", - "metadata": {}, - "source": [ - "\n", - "Cohen's `d` is simply the mean of the control group subtracted from\n", - "the mean of the test group.\n", - "\n", - "If `paired` is None, then the comparison(s) are unpaired; \n", - "otherwise the comparison(s) are paired.\n", - "\n", - "If the comparison(s) are unpaired, Cohen's `d` is computed with the following equation:\n", - "\n", - "\n", - "$$d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{pooled standard deviation}}$$\n", - "\n", - "\n", - "For paired comparisons, Cohen's d is given by\n", - "\n", - "$$d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{average standard deviation}}$$\n", - "\n", - "where $\\overline{x}$ is the mean of the respective group of observations, ${Var}_{x}$ denotes the variance of that group,\n", - "\n", - "\n", - "$$\\text{pooled standard deviation} = \\sqrt{ \\frac{(n_{control} - 1) * {Var}_{control} + (n_{test} - 1) * {Var}_{test} } {n_{control} + n_{test} - 2} }$$\n", - "\n", - "and\n", - "\n", - "\n", - "$$\\text{average standard deviation} = \\sqrt{ \\frac{{Var}_{control} + {Var}_{test}} {2}}$$\n", - "\n", - "The sample variance (and standard deviation) uses N-1 degrees of freedoms.\n", - "This is an application of [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction), and yields the unbiased sample variance.\n", - "\n", - "References:\n", - "\n", - "\n", - " \n", - "\n", - " \n", - "" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "40f4eff9", - "metadata": {}, - "source": [ - "#### Example: cohens_h" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f713781c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:30 2023.\n", - "\n", - "The unpaired Cohen's h between control and test is 0.0 [95%CI -0.613, 0.429].\n", - "The p-value of the two-sided permutation t-test is 0.799, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", - "\n", - "To get the results of all valid statistical tests, use `.cohens_h.statistical_tests`" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "control = randint.rvs(0, 2, size=30, random_state=12345)\n", - "test = randint.rvs(0, 2, size=30, random_state=12345)\n", - "my_df = pd.DataFrame({\"control\": control,\n", - " \"test\": test})\n", - "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", - "my_dabest_object.cohens_h" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "9e3e57bd", - "metadata": {}, - "source": [ - "Cohen's *h* uses the information of proportion in the control and test groups to calculate the distance between two proportions.\n", - "\n", - "It can be used to describe the difference between two proportions as \"small\", \"medium\", or \"large\".\n", - "\n", - "It can be used to determine if the difference between two proportions is \"meaningful\".\n", - "\n", - "A directional Cohen's *h* is computed with the following equation:\n", - "\n", - "\n", - "$$h = 2 * \\arcsin{\\sqrt{proportion_{Test}}} - 2 * \\arcsin{\\sqrt{proportion_{Control}}}$$\n", - "\n", - "For a non-directional Cohen's *h*, the equation is:\n", - "\n", - "$$h = |2 * \\arcsin{\\sqrt{proportion_{Test}}} - 2 * \\arcsin{\\sqrt{proportion_{Control}}}|$$\n", - "\n", - "References:\n", - "\n", - "" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "970fb3b2", - "metadata": {}, - "source": [ - "#### Example: hedges_g" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "26960f9e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:32 2023.\n", - "\n", - "The unpaired Hedges' g between control and test is 0.465 [95%CI -0.0832, 0.963].\n", - "The p-value of the two-sided permutation t-test is 0.0758, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", - "\n", - "To get the results of all valid statistical tests, use `.hedges_g.statistical_tests`" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "control = norm.rvs(loc=0, size=30, random_state=12345)\n", - "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", - "my_df = pd.DataFrame({\"control\": control,\n", - " \"test\": test})\n", - "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", - "my_dabest_object.hedges_g" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "66c8a83a", - "metadata": {}, - "source": [ - "Hedges' `g` is `cohens_d` corrected for bias via multiplication with the following correction factor:\n", - " \n", - "$$\\frac{ \\Gamma( \\frac{a} {2} )} {\\sqrt{ \\frac{a} {2} } \\times \\Gamma( \\frac{a - 1} {2} )}$$\n", - "\n", - "where\n", - "\n", - "$$a = {n}_{control} + {n}_{test} - 2$$\n", - "\n", - "and $\\Gamma(x)$ is the [Gamma function](https://en.wikipedia.org/wiki/Gamma_function).\n", - "\n", - "\n", - "\n", - "References:\n", - "\n", - "\n", - " \n", - "" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b1cf0080", - "metadata": {}, - "source": [ - "#### Example: cliffs_delta" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dce86c76", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:40 2023.\n", - "\n", - "The unpaired Cliff's delta between control and test is 0.28 [95%CI -0.0244, 0.533].\n", - "The p-value of the two-sided permutation t-test is 0.061, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", - "\n", - "To get the results of all valid statistical tests, use `.cliffs_delta.statistical_tests`" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "control = norm.rvs(loc=0, size=30, random_state=12345)\n", - "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", - "my_df = pd.DataFrame({\"control\": control,\n", - " \"test\": test})\n", - "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", - "my_dabest_object.cliffs_delta" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "9661ab37", - "metadata": {}, - "source": [ - "Cliff's delta is a measure of ordinal dominance, ie. how often the values from the test sample are larger than values from the control sample.\n", - "\n", - "$$\\text{Cliff's delta} = \\frac{\\#({x}_{test} > {x}_{control}) - \\#({x}_{test} < {x}_{control})} {{n}_{Test} \\times {n}_{Control}}$$\n", - " \n", - " \n", - "where $\\#$ denotes the number of times a value from the test sample exceeds (or is lesser than) values in the control sample. \n", - " \n", - "Cliff's delta ranges from -1 to 1; it can also be thought of as a measure of the degree of overlap between the two samples. An attractive aspect of this effect size is that it does not make an assumptions about the underlying distributions that the samples were drawn from. \n", - "\n", - "References:\n", - "\n", - "\n", - " \n", - "" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "bd341f7c", - "metadata": {}, - "source": [ - "#### Example: delta_g" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9abb53c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.02.14\n", - "==================\n", - " \n", - "Good morning!\n", - "The current time is Tue May 16 01:11:14 2023.\n", - "\n", - "The unpaired deltas' g between W Placebo and M Placebo is 0.793 [95%CI 0.553, 1.1].\n", - "The p-value of the two-sided permutation t-test is 0.0, calculated for legacy purposes only. \n", - "\n", - "The unpaired deltas' g between W Drug and M Drug is 0.528 [95%CI 0.286, 0.765].\n", - "The p-value of the two-sided permutation t-test is 0.0, calculated for legacy purposes only. \n", - "\n", - "The deltas' g between Placebo and Drug is -0.651 [95%CI -1.6, 0.217].\n", - "The p-value of the two-sided permutation t-test is 0.0, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing the effect size (or greater),\n", - "assuming the null hypothesis of zero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", - "\n", - "To get the results of all valid statistical tests, use `.delta_g.statistical_tests`" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.random.seed(12345) # Fix the seed so the results are replicable.\n", - "N=20\n", - "y = norm.rvs(loc=3, scale=0.4, size=N*4)\n", - "y[N:2*N] = y[N:2*N]+1\n", - "y[2*N:3*N] = y[2*N:3*N]-0.5\n", - "t1 = np.repeat('Placebo', N*2).tolist()\n", - "t2 = np.repeat('Drug', N*2).tolist()\n", - "treatment = t1 + t2\n", - "rep = []\n", - "for i in range(N*2):\n", - " rep.append('Rep1')\n", - " rep.append('Rep2')\n", - "wt = np.repeat('W', N).tolist()\n", - "mt = np.repeat('M', N).tolist()\n", - "wt2 = np.repeat('W', N).tolist()\n", - "mt2 = np.repeat('M', N).tolist()\n", - "genotype = wt + mt + wt2 + mt2\n", - "id = list(range(0, N*2))\n", - "id_col = id + id\n", - "df_delta2 = pd.DataFrame({'ID' : id_col,\n", - " 'Rep' : rep,\n", - " 'Genotype' : genotype,\n", - " 'Treatment': treatment,\n", - " 'Y' : y})\n", - "unpaired_delta2 = dabest.load(data = df_delta2, x = [\"Genotype\", \"Genotype\"], y = \"Y\", delta2 = True, experiment = \"Treatment\")\n", - "unpaired_delta2.delta_g" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8d41dad3", - "metadata": {}, - "source": [ - "Deltas' g is an effect size that only applied on experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2, which calculates `hedges_g` for delta-delta statistics.\n", - "\n", - "\n", - " $$\\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}}$$\n", - "\n", - " $$\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}$$\n", - "\n", - "\n", - "where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples.\n", - "\n", - "A delta-delta value is then calculated as the mean difference between the two primary deltas:\n", - "\n", - "$$\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}$$\n", - "\n", - "and the standard deviation of the delta-delta value is calculated from a pooled variance of the 4 samples:\n", - "\n", - "\n", - "$$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", - "\n", - "where $s$ is the standard deviation and $n$ is the sample size.\n", - "\n", - "A deltas' g value is then calculated as delta-delta value divided by pooled standard deviation $s_{\\Delta_{\\Delta}}$:\n", - "\n", - "\n", - "$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "87f50106", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class DeltaDelta(object):\n", - " \"\"\"\n", - " A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs:\n", - "\n", - "\n", - " $$\\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}}$$\n", - "\n", - " $$\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}$$\n", - "\n", - "\n", - " where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples. \n", - "\n", - " A delta-delta value is then calculated as the mean difference between the two primary deltas:\n", - "\n", - "\n", - " $$\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}$$\n", - " \n", - " and a deltas' g value is calculated as the mean difference between the two primary deltas divided by\n", - " the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples:\n", - " \n", - " $$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$$\n", - "\n", - " $$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", - "\n", - " where $s$ is the standard deviation and $n$ is the sample size.\n", - "\n", - "\n", - " \"\"\"\n", - " \n", - " def __init__(self, effectsizedataframe, permutation_count,bootstraps_delta_delta,\n", - " ci=95):\n", - "\n", - " import numpy as np\n", - " from numpy import sort as npsort\n", - " from numpy import sqrt, isinf, isnan\n", - " from ._stats_tools import effsize as es\n", - " from ._stats_tools import confint_1group as ci1g\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - "\n", - "\n", - " from string import Template\n", - " import warnings\n", - " \n", - " self.__effsizedf = effectsizedataframe.results\n", - " self.__dabest_obj = effectsizedataframe.dabest_obj\n", - " self.__ci = ci\n", - " self.__resamples = effectsizedataframe.resamples\n", - " self.__effect_size = effectsizedataframe.effect_size\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", - " self.__permutation_count = permutation_count\n", - " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", - " self.__control = self.__dabest_obj.experiment_label[0]\n", - " self.__test = self.__dabest_obj.experiment_label[1]\n", - "\n", - "\n", - " # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data\n", - " if self.__effect_size == \"mean_diff\":\n", - " self.__bootstraps_delta_delta = bootstraps_delta_delta[2]\n", - " self.__difference = self.__effsizedf[\"difference\"][1] - self.__effsizedf[\"difference\"][0]\n", - " else:\n", - " self.__bootstraps_delta_delta = bootstraps_delta_delta[0]\n", - " self.__difference = bootstraps_delta_delta[1]\n", - " \n", - " sorted_delta_delta = npsort(self.__bootstraps_delta_delta)\n", - "\n", - " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps_delta_delta, self.__difference)\n", - " \n", - " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", - " self.__bootstraps_delta_delta, \n", - " np.mean))\n", - "\n", - " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", - "\n", - " # Compute BCa intervals.\n", - " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", - " \n", - " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", - "\n", - " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_delta_delta[bca_idx_low]\n", - " self.__bca_high = sorted_delta_delta[bca_idx_high]\n", - "\n", - " err1 = \"The $lim_type limit of the interval\"\n", - " err2 = \"was in the $loc 10 values.\"\n", - " err3 = \"The result should be considered unstable.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", - "\n", - " if bca_idx_high >= self.__resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", - "\n", - " else:\n", - " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", - " err2 = \"It is set to the effect size itself.\"\n", - " err3 = \"All bootstrap values were likely all the same.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", - "\n", - " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", - "\n", - " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", - "\n", - " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_delta_delta[pct_idx_low]\n", - " self.__pct_high = sorted_delta_delta[pct_idx_high]\n", - " \n", - " \n", - "\n", - " def __permutation_test(self):\n", - " \"\"\"\n", - " Perform a permutation test and obtain the permutation p-value\n", - " based on the permutation data.\n", - " \"\"\"\n", - " import numpy as np\n", - " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", - "\n", - " THRESHOLD = np.abs(self.__difference)\n", - "\n", - " self.__permutations_delta_delta = np.array(self.__permutations[1]-self.__permutations[0])\n", - "\n", - " count = sum(np.abs(self.__permutations_delta_delta)>THRESHOLD)\n", - " self.__pvalue_permutation = count/self.__permutation_count\n", - "\n", - "\n", - "\n", - " def __repr__(self, header=True, sigfig=3):\n", - " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", - "\n", - " from .misc_tools import print_greeting\n", - "\n", - " first_line = {\"control\" : self.__control,\n", - " \"test\" : self.__test}\n", - " \n", - " if self.__effect_size == \"mean_diff\":\n", - " out1 = \"The delta-delta between {control} and {test} \".format(**first_line)\n", - " else:\n", - " out1 = \"The deltas' g between {control} and {test} \".format(**first_line)\n", - " \n", - " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", - " if \".\" in str(self.__ci):\n", - " ci_width = base_string_fmt.format(self.__ci)\n", - " else:\n", - " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", - " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", - " out = out1 + out2\n", - "\n", - " if header is True:\n", - " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", - "\n", - "\n", - " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", - "\n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", - " p2 = \"calculated for legacy purposes only. \"\n", - " pvalue = p1 + p2\n", - "\n", - "\n", - " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", - " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", - " bs = bs1 + bs2\n", - "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the \" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of \" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", - " pval_def = pval_def1 + pval_def2\n", - "\n", - "\n", - " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - "\n", - "\n", - " def to_dict(self):\n", - " \"\"\"\n", - " Returns the attributes of the `DeltaDelta` object as a\n", - " dictionary.\n", - " \"\"\"\n", - " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", - " out = {}\n", - " for a in attrs:\n", - " out[a] = getattr(self, a)\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " Returns the width of the confidence interval, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - "\n", - " @property\n", - " def alpha(self):\n", - " \"\"\"\n", - " Returns the significance level of the statistical test as a float\n", - " between 0 and 1.\n", - " \"\"\"\n", - " return self.__alpha\n", - "\n", - "\n", - " @property\n", - " def bias_correction(self):\n", - " return self.__bias_correction\n", - "\n", - "\n", - " @property\n", - " def bootstraps(self):\n", - " '''\n", - " Return the bootstrapped deltas from all the experiment groups.\n", - " '''\n", - " return self.__bootstraps\n", - "\n", - "\n", - " @property\n", - " def jackknives(self):\n", - " return self.__jackknives\n", - "\n", - "\n", - " @property\n", - " def acceleration_value(self):\n", - " return self.__acceleration_value\n", - "\n", - "\n", - " @property\n", - " def bca_low(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__bca_low\n", - "\n", - "\n", - " @property\n", - " def bca_high(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval upper limit.\n", - " \"\"\"\n", - " return self.__bca_high\n", - "\n", - "\n", - " @property\n", - " def bca_interval_idx(self):\n", - " return self.__bca_interval_idx\n", - "\n", - "\n", - " @property\n", - " def control(self):\n", - " '''\n", - " Return the name of the control experiment group.\n", - " '''\n", - " return self.__control\n", - "\n", - "\n", - " @property\n", - " def test(self):\n", - " '''\n", - " Return the name of the test experiment group.\n", - " '''\n", - " return self.__test\n", - "\n", - "\n", - " @property\n", - " def bootstraps_delta_delta(self):\n", - " '''\n", - " Return the delta-delta values calculated from the bootstrapped \n", - " deltas.\n", - " '''\n", - " return self.__bootstraps_delta_delta\n", - "\n", - "\n", - " @property\n", - " def difference(self):\n", - " '''\n", - " Return the delta-delta value calculated based on the raw data.\n", - " '''\n", - " return self.__difference\n", - "\n", - "\n", - " @property\n", - " def pct_interval_idx (self):\n", - " return self.__pct_interval_idx \n", - "\n", - "\n", - " @property\n", - " def pct_low(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_low\n", - "\n", - "\n", - " @property\n", - " def pct_high(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_high\n", - "\n", - "\n", - " @property\n", - " def pvalue_permutation(self):\n", - " try:\n", - " return self.__pvalue_permutation\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__pvalue_permutation\n", - " \n", - "\n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__permutation_count\n", - "\n", - " \n", - " @property\n", - " def permutations(self):\n", - " '''\n", - " Return the mean differences of permutations obtained during\n", - " the permutation test for each experiment group.\n", - " '''\n", - " try:\n", - " return self.__permutations\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations\n", - "\n", - " \n", - " @property\n", - " def permutations_delta_delta(self):\n", - " '''\n", - " Return the delta-delta values of permutations obtained \n", - " during the permutation test.\n", - " '''\n", - " try:\n", - " return self.__permutations_delta_delta\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations_delta_delta\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c6a7192f", - "metadata": {}, - "source": [ - "\n", - "\n", - "and the standard deviation of the delta-delta value is calculated from a pooled variance of the 4 samples:\n", - "\n", - "\n", - "$$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", - "\n", - "where $s$ is the standard deviation and $n$ is the sample size." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a5905b79", - "metadata": {}, - "source": [ - "#### Example: delta-delta" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "088f734b", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0UAAAIaCAYAAADvKOYjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAACRpUlEQVR4nOzdd3xT9f4/8NdJ2qZ779JJoVBoKVBAZhmyRQREULwM/eJVcV0FwauI/mS411VRuQoOEFGGslQEyihSRlsoo4zSUigt3SMdaZuc3x9cIqGDpk16muT1fDz6kJzP55zziulp8s75nM8RRFEUQUREREREZKFkUgcgIiIiIiKSEosiIiIiIiKyaCyKiIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCyaRRVFOTk5eO2115CTkyN1FCIiIiIiaicsrih6/fXXWRQREREREZGWRRVFREREREREt2NRREREREREFo1FEVE7oa6pRlXRNdSpKqWOQkRERGRRrKQOQGTp1DXVyNz9Na6f3A1NbTUEuTU8IwcjbOT/wdreRep4RERERGaPRRGRhERRxJkfX0Pp5dS/l6lrkZ+6BxXXLyHmkQ8gs7KRMCERERGR+WNRRCShkktJOgXRrSrzMnFh24dQlRZAU1cD56Du8O8zAbauPm2ckoiIiMi8sSgiklDRxaNNtuef2qf9tzLnAq6n/I5uD74B5w5djB2NiIiIyGJwogUiKYmiXt3Vqkpc2PahcbIQERERWSgWRUQScgvvo/c6VQVXUJ6dZoQ0RERERJaJRRGRhNw69oZzYDe916upKDVCGiIiIiLLxKKISEKCICBy+mvw7TUWMmvFjWVyK8gVDk2sJIODV3AbJSQiIiIyf5xogUhiVgp7hI97CiEjHkFNeSFsHFxRcO4vXNz2UYP93Tv3g62bbxunJCIiIjJfJnOm6LXXXoMgCDo/XbpwBi4yH1YKe9h7BsLKzgm+MaMQ0H8KIOgeos6B3dD5nmclSkhERESWrCIvE1n71+HSH18ia/86VORlGnV/s2fPhiAIePzxx+u1zZs3D4IgYPbs2QbZl0mdKerWrRv+/PNP7WMrK5OKT6SX0BGPwK/3PShMS4C6VgWX4O5wCeoudSwiIiKyMFVF13D+1/dRfvUsIMggCDKIogZZ+9fCqUNXdL73edi5+xtl34GBgVi/fj0++OAD2NnZAQCqq6uxbt06BAUFGWw/JlVVWFlZwdeXw4bIcti6eiPgrklSxyAiIiILVVV0DSe+/hfqVJU3FogaiKJG216efQ4nvv4XejzygVEKo169eiE9PR2bNm3CjBkzAACbNm1CUFAQQkNDDbYfkxk+BwAXLlyAv78/wsLCMGPGDGRlZTXZX6VSoaysTPujVCrbKCkRERERkek7/+v7NwqiWwohHaIGdapKnP/1faNleOSRR7B69Wrt46+//hpz5swx6D5Mpijq168f1qxZg99++w0rV65ERkYGBg8ejPLy8kbXWbFiBVxcXLQ/cXFxbZiYiIiIiMh0VeRl3hgy11hBdJOoQfnVs0a7xujhhx/GwYMHcfnyZVy+fBkJCQl4+OGHDboPkxk+N3bsWO2/o6Oj0a9fPwQHB2PDhg149NFHG1znpZdewvPPP699nJKSwsKIiIiIiKgZCtMO3Zj06U5FEQAIMhSmHYKDd4jBc3h5eWH8+PFYs2YNRFHE+PHj4enpadB9mExRdDtXV1d07twZFy9ebLSPQqGAQqHQPnZ0dGyLaEREREREJq+uWqmdVOFOBEGGumrjXaryyCOP4KmnngIAfPrppwbfvskMn7udUqlEeno6/Pz8pI5CRERERGR2rGwdm1UQAYAoamBla7wTEGPGjEFNTQ1qa2sxevRog2/fZIqi+fPnY9++fcjMzMShQ4cwadIkyOVyPPjgg1JHIyIiIiIyOx5dBjRv6BwAiJob/Y1ELpfj7NmzOHPmDORyucG3bzLD565evYoHH3wQhYWF8PLywqBBg3D48GF4eXlJHY2IiIiIyOw4eIfAqUNXlGefa7o4EmRwCogwyvVEt3J2djbatgVRFEWjbb2dSUpKQu/evXH8+HH06tVL6jhEWjXKIuSd3ANVWT7s3APgHTUMVnZOUsciIiIiC6dzn6KGCiNBBiuFvdHuU9RWTOZMEZG5yjsVjwtbP4CortMuuxz/LbpM+TfcOrJ4JyIiIunYufujxyMf4Pyv79+YnluQ/T35gqiBU0AEOt/7vEkXRACLIiJJVRVl48Kv70PUqHWWq2uqcPbnZQjoPwUl6cegrq2BS1A3+PedaPJ/dIiIiMi02Ln7o8fsd1GRl4nCtEOoq1bCytYRHl0GGH3IXFthUUQkodyknfUKops0tdW4sn+t9nFlXgbyTu5G5PTX4RLUra0iEhEREQG4cY2RuRRBtzOZ2eeIzFFlYbZe/dU1Vbi4/WMjpSEiIiKyTCyKiCSkcPLQe52qwqsou5pmhDRERERElolFEZGEfGJadvOxuqoyAychIiIislwsiogk5OTfCcFD/6HfSoIM9l4hRslDREREZIk40QKRxAIHTYdraAxyk3+HqqwAdu7+sHZ0R1b8tw3294joD1tX7zZOSURERGS+WBQRtQNOAV3gFNBFZ5lYV4Orh37SmZ3OJaQHOt3zbFvHIyIiIjJrHD5H1E4FD/0HYp/6GmGjH0fI8DnoMec9RD28HFa2DlJHIyIiIjKqzz//HE5OTqir+/vm9kqlEtbW1hg6dKhO3/j4eAiCgPT09Bbvj2eKiNoxhbMn/PtMkDoGERERETKuFeDgyYtQVqngaKfAoOhwhPp7GmVfw4YNg1KpxLFjx3DXXXcBAA4cOABfX18kJiaiuroatra2AIC9e/ciKCgIHTt2bPH+WBQRWbjY2Fjk5ubC19cXx44dkzoOERERtTPZ+SV4e93vOJOZA5lMgEwQoBFFfPv7YXQL9cOCB0cjwMvVoPuMiIiAn58f4uPjtUVRfHw8Jk6ciD179uDw4cPaM0bx8fEYNmxYq/bH4XNEFi43NxfZ2dnIzc2VOgoRERG1M9n5JXj6wx+QlnXjc4JGI6JOrYFGIwIAzl7OxdMf/oDs/BKD73vYsGHYu3ev9vHevXsxdOhQxMXFaZdXVVUhMTGRRRERERERERnH2+t+R0V1jbYIup1GI6Kiugbv/PCHwfc9bNgwJCQkoK6uDuXl5UhOTkZcXByGDBmC+Ph4AMBff/0FlUrFooiIiIiIiAwv41oBzmTmNFoQ3aTRiDidcQ0Z1woMuv+hQ4eioqICR48exYEDB9C5c2d4eXkhLi5Oe11RfHw8wsLCEBQU1Kp98ZoiIiIiIiKq5+DJi5DJhDsWRQAgkwk4mHrRoBMvhIeHo0OHDti7dy+Ki4sRFxcHAPD390dgYCAOHTqEvXv3Yvjw4a3eF88UERERERFRPcoqFWSC0Ky+MkGAslJl8AzDhg1DfHw84uPjdabiHjJkCHbu3IkjR460eugcwKKIiIiIiIga4GingEa881kiANCIIhztFQbPMGzYMBw8eBApKSnaM0UAEBcXhy+++AI1NTUsiojMjbqmCtWledDU1UodhYiIiCzcoOjwZg2dA25cVzQoOtzgGYYNG4aqqiqEh4fDx8dHuzwuLg7l5eXaqbtbi9cUEbUDNRUlyPjzKxScOQBRXQsrW0f49ByN4Lh/QGZlLXU8IiIiskCh/p6IDPFDWlZuk8WRTCaga7AfQv0MfyPXkJAQiA2crQoODm5weUvxTBGRxNQ1VUj9bhHyU/dAVN84Q1RXrUT2XxuRtmmFxOmIiIjIkr340Gg42NpAJmv42iKZTICDrQ0WPDiqjZMZFosiIonlndyNqoIrDbYVnU9E2dWzbZyIiIiI6IYAL1f857kH0TX4xhA1mUyAlVymLZK6BvvhP889iAAvVwlTth6HzxFJrPB8YpPtRecT4dyhaxulISIiItIV4OWKD595ABnXCnAw9SKUlSo42iswKDrcKEPmpMCiiEhqdxgPqyrLR8mlZDj6d4KVrWMbhSIiIiLSFervadD7ELUnLIqIJOYWHouSjORG2/NPxSP/VDxk1gr49R6PkOGzIcjkbZiQiIiIyLzxmiIiifn0GAmFq88d+2lqVcg+vAkZu1e3QSoiIiIiy8GiiEhiVrYOiP7HW3DvfBcg/O+QbOLu0bnHd6C2qryN0hERERGZPw6fI2oHFC5eiHxgMWoqSlBbUYLkL59qtK+mToXyq2fh3qlvGyYkIiIiMl8siojaERsHV9g4uEJmbQNNrarRfjIrRRumIiIiIjJvHD5H1A55dhnUaJu1gyucg7q1YRoiIiIi88aiiKgdChz8IKzsnRtoERAy4hHI5DzJS0RERGQoLIqI2iE7dz/0mP0evLoPhSC3BgA4d4hE5LRX4RM9QuJ0REREROaFXzcTtUMadS3Ks89Bo66Fa1gvuIbGwKfH3bBS2EsdjYiIiMjssCgiamfqVJU4ve4VlGef0y4rvpCIa0e2IOofb8LWxVvCdERERETmh8PniNqZy/Hf6RREN6lKriN9xycSJCIiIiIybyyKiNoRjboOeSd3N9penJ4EVWl+GyYiIiIiMn8siojaEXVNFdSqiiZ6iFCVFzS5jRplEWrKiwwbjIiIiMiM8ZoionbESmEPawdX1FaUNNxBEHAl4UfYOLrDu/twuAR31zYVXTyGrH3fQZlzEQDg4NsRwXEPw71T3zZITkRERGS6eKaIqB0RZHL4xoxuvIMoovjCUVxP/h2p3y3Ehe0fAwCKLh7FmR9f1xZEAFCRm44zG95A4bm/mtynr68vAgIC4Ovra5DnQERERGRqeKaIqJ0JHPIglHkZKL5w5I59ryf/DpfgaGT/9TMgaup3EDW4HP8dPCL6N7qNY8eOtSYuERERkcljUUTUzsjk1ug2bQlKL6ei8NxfqCrMRnF644XLtSO/ouJ6RqPtlfmXUV2cC1s3ngkiIiIiagiHzxG1Uy7BUQgb9Rjcwns32a/mDhMvAIAoioaKRURERGR2TLYoevPNNyEIAp577jmpoxDdUV11Ba4d/RXnf3kfl/74UnvtT52qEjnHd+DSrv8iO3ELaitLddarrSiFIGv6hK6dRwfYewY12d7UWaLY2Fh06NABsbGxejwjIiIiIvNhksPnjh49ii+++ALR0dFSRyG6I2XOBZz64VXUVZZpl1078gs8ugxESUaKzhTcl/d+g073Pg8nv0649McXKLp47H/XCgkAGj7b49d7HAABaRtXNNgnaMhDEASh0Xy5ubnIzs5u4bMjIiIiMn0md6ZIqVRixowZWLVqFdzc3KSOQ9QkUaPG2Z+X6xRENxWmJdS7J5Gmrgbnt7yDE2vmo+jCkVsmT2i4IPLvOxHOQd1RXZILl9AYWNk5a9ts3fzR+b4F8OoWZ7DnQ0RERGSOTO5M0bx58zB+/HjcfffdWLp0qdRxiJpUnH4cqtI8vdYRNWrUVhQ32u7euR9s3fzg3X0YVGX5OPafOdDU1fzdQWaFkBFzENB3YpNniIiIiIjoBpMqitavX4+kpCQcPXq0Wf1VKhVUKpX2sVKpNFY0ogZVF+cafJtO/hEIHDQNqvJCnFgzH6K6VreDpg6Xd6+GZ8QA2Lp6G3z/RERERObGZIbPXblyBc8++yzWrl0LW1vbZq2zYsUKuLi4aH/i4jiMiNqWwghFSUVeJq6f+BPZhzfXL4j+R9TU4XrK7wbfNxEREZE5EkQTmat3y5YtmDRpEuRyuXaZWq2GIAiQyWRQqVQ6bUD9M0UpKSmIi4vD8ePH0atXrzbLbmxqjQaJpzOQlpULB1sFhvXqDG835zuvSEYnatQ4+p9HmjVttqF5Rg5Gl8mL7tivQ4cOyM7ORkBAAK5evdoGyYiIiIjaF5MZPjdixAikpqbqLJszZw66dOmChQsX1iuIAEChUEChUGgfOzo6Gj1nW7teVIZ/f7kFWdeLtMu+3pGAmaPvwoxR/SRMRgAgyOToMuUlnF7/KtTVupMqOAd2Q9mVM7h9EoWAfpNRkHZQ72uRbqdw9mzV+kRERESWwmSKIicnJ3Tv3l1nmYODAzw8POottyT/b812nYIIADQaEWt2/oVQP08MiOooUTK6yblDF8Q+uQrXU3ZBmXsRVraO8I4aDufASChz05F7fAeUuemorSxDXbUS+Wf2wTU0BrUVpSjJSIaoUQOC7JaZ6JpDgE+PUUZ7TkSWTlWaj8ILiRDVtXANiYGDT6jUkYiIqBVMpiii+s5kXsP5K9cbbd9yIIVFUTthbe+CDgPur7fc0bcjPLsNQV7qHu0McmpVBfJO7oaNozt6zPkAMhsFklb+U4+9CQgd+X+w92r8hq5E1HIZf36F7MQtOl9UuHe+CxGTFkBu3bxrXomIqH0x6aIoPj5e6giSyswtukN7YRsloda49NvnulNq/0+NsgjZiZsQcd8CyKxtoamtbnQbHl0GQlOngq2LD3x6joajL4thImPIObYN2Yc31VtedP4wLv3+JTrd84wEqYiIqLVMuiiydO5O9k23Ozu0URJqKeX1S6gsyGq0veDsQXj3GAnnoG4oST/eYB9bN390mfIS70lE1AayE39ptC0vdQ9Chs+Ctb1LGyYiIiJDMJkpuam+Pl1C4OHSeOEzum9kG6ahllCrKptsF9V1OL325UYLIkFujY5jnmBBRNQG6lSVqC6+1mi7qK5FZf6VNkxERESGwqLIhMnlMrz40GjY2tQ/4denSzAmDIyWIBXpw8E7FDJrxZ073kJuYwdrB1d4Rg5Bj9nvwq2j+UwvT9Seya0Vdzxereyd2igNEREZEosiE9ercxC+WPAwpgzthYggHwR4uqKDlytq6tTYdigVVaqGb+5J7YOVrQN8e43Vax1Ro0avxz9Hl8kL4egXbqRkRHQ7QSaHV7fGbwLu6BcOB6/gNkxERESGwqLIDPh7umJM3264XlSG7IISXM0vwYmLV/Hppng89/GPKK9s/AJ9kl7I8Dnw7TkGgqz+vbYaoqmrQfnVs0ZORUQNCR76D9i6+ddbLlc4oOPYeRIkIiIiQ+BEC2bigw1/okRZVW/5pWsF+GbnX3hqyjAJUlFzyORWCB//NAIHP4jSyychyK1x/tcPINapGl/HSr8hd0RkGDaO7ujxyPvIObYdhecOoa5KCYWLNzy6DoSDd4jU8YiIqIV4psgMXMkrwpnMnEbbdx07C7VGnxt/khQUzp7wjhoOr8jB8Ioc1Gg/a0c3OAd1a8NkRHQrazsneEcNBwCoSq+jLCsVGb9/jiMfzULh+cMSpyMiopZgUWQGisqansGssroGqpq6NkpDhhA0+MFGpvUVEDr8EcjkPMlLJBVR1ODMj6+hIjddZ3ldVRnSNq5ARf5liZIREVFLsSgyA4HebpDLGn8pfdycYKewbsNE1Fq2bn6Inv0evKKGQ2ZlA0CAc2A3RE5/Dd7Rw6WOR2TRitOPo7KRwkdU1yHn6NY2TkRERK3Fr5vNgLuzA4bEdMLepHMNtk8cHMP72JggO3c/REx8AZj4AkRRA0HgdxhE7YHy2oWm23OabiciovaHRZGZePb+4SgsVeJkerbO8rF3dceUON7HxtQJggwadS0KzhxESUYyBJkVPCLuglt4LIslojZmZevYqnYiImp/WBSZCQc7Bd57aipOpl9F0vkrsJbLMCg6HMG+HlJHIwOoURbh1Pcvo7IgS7vsesrvcAnpgchpSyDX8wawRNRynpGDkbH7K4jqhq/V9IriEFciIlPDosjMRHfsgOiOHaSOQQZ2ccenOgXRTaWZJ5C1fy1CRzwiQSoiy2Tj6IbQEY/i0h9f1Gtz6xgL7+5D2z4UERG1CosionZOVVaAogtHGm2/nrILIcNmNfvmr0TUev5974W9VxCuHf0VFdczYe3gAp8ed8MnZjSPRSIiE8SiiKidU5XmAWLj95mqqypDnaoS1nZObZiKiFxDY+AaGiN1DCIiMgAWRUTtQGnWaVw/sQs15YWw9wyEb6+xsPcMBAAoXLwBQdZoYWRl5wQrhX1bxiUiIiIyKyyKiCSWuecbXD20Qfu45FISco5tQ6cJ/4JXtyGoLrkOR79OUF5reMp1nx4jWzVcx9fXV+e/RERERJaGRRGRhEovp+oURDeJGjUubP0AGX9+jdqKov8tFQCIOv2cg6IQFDejVRmOHTvWqvWJiIiITJ3RiqJr167B39/fWJsnMgu5KX802iZq1LcURMDNgsjOowOcAiLgEdEf7p368qJuIolUFmQh59h2VORdho2DC7x73A338D5SxyIiohYwWlHUrVs3fPrpp3jooYeMtQsik1dTXqj3OqrSfETPfpcTKxBJqODMAZzb8i5Ezd/3Kio4exA+MaPQ6Z5nJUxGREQtITPWhpctW4Z//vOfmDp1KoqKiu68ApEFsnMP0HsdTZ0K5VfPGiENETVHXXUFzm/9UKcguul6yh8oPPeXBKmIiKg1jFYUPfnkkzh58iQKCwsRGRmJrVu3GmtXRCbLr/e4GzPL6UlmpTBCGiJqjoIz+6GprW60valhsURE1D4ZdaKF0NBQ7NmzB5988gkmT56Mrl27wspKd5dJSUnGjEDUrjn4hCJ83FNI3/kpRI1au1yQW0FU1/8WGgCsHd3gHNStrSIS0W1qlMVNt7dgWCwREUnL6LPPXb58GZs2bYKbmxsmTpxYrygisnS+PUfDLTwWeSd3a+9T5NQhEqfWvYy6yrLbegsIHf4IZHIeR0RSsfvfPcQaY+8Z1EZJiIjIUIz6yWrVqlV44YUXcPfdd+P06dPw8vIy5u6ITJbCyQOBAx/QWdZj9nvI2r8OhWkJ0NTVwDmwGzoMnMrZrYgk5hHRHzZOnqgpL2igVYBf7D1tnomIiFrHaEXRmDFjcOTIEXzyySeYOXOmsXZDZLbs3P0Rcd98iOILgKjh1NtmoLqmFgdOXERRWQWCfNzRNzIEcpnRLu0kI5HJrRA57VWcWf8aapR/TyQkyOQIG/1POHfoImE6IiJqCaMVRWq1GidPnkSHDh2MtQsiiyAIAiCwIDJ1f526hLfX/Q5llUq7zNfdGf/v/+5FqJ+nhMmoJRx9OyL2qa9QcOYgKvIyYO3gCu/uw2Dj5C51NCIiagFBFEVR6hBtJSkpCb1798bx48fRq1cvqeO0StL5LGxNOIlrBSXwdnPGuLu6o3/3MKljkYGJooiK65egqauBg08o5Na2UkeiFriaX4zH3v4etXXqem1ero5Y8/Js2PB6SyIiIsnwXdgErf0jEWt2/n0fjEvXCnD49CVMHtITT0yKkzAZGVLRhSPI2PVfVBVlAwDktg7w7zMRQUMeunH2iEzG1oMnGyyIACC/RIn9KRdwd2zXNk5FREREN3Ewu4nJul6kUxDdatP+ZJy6dM1o+46NjUWHDh0QGxtrtH3QDaVZp3H2p6XagggA1NUVuHJgHS7HfythMmqJ9Gv5TbdnN91ORERExsUzRSZm19EzTbav23UEHQM8YSWXY3CPcIT5G27Gv9zcXGRnZ9+5I7Xa1YQfde5bdKtrR35Fh/73w8rWoY1TUUs5O9g12e5yh3aipsTGxiI3Nxe+vr44duyY1HGIiEwSiyITU6KsarL9aFomjqZlAgC+/yMRI/t0xfzpoyCTcbiVKSnJSGm0TVNbjbKrZzg1twkZ1acrDpy40GCbTCZgeG/OVkYtxy+siIhaj8PnTExHPc/87Dp6Fj/tPW6kNGQsgqzp7ytkd2in9qVfZChGNnLN0GMTBsPbzamNExEREdGtWBSZmJF9usLJXr8ZyH45eAIWNMmgWfCI6N9om7W9C5yDurdhGmotQRCw4KFR+Pc/xqJX5yAE+7hjcHQ43p03BVOGmvZMmEREROaAXzebGAc7BZY9NhGvfb0NRWUVzVonv6QcVapa2NvaGDkdGUrg4OkoSj8KdXX91zh46D8gs7KWIBW1hiAIGNYrAsN6RUgdhYiIiG7DosgEdQ32w/eLH0FC6kVkF5TCWibDqm0HG+3vYGsDhQ1falNi7xmI6Fnv4HL8dyg6nwiIGjj4dkTggKnwjBwsdTwiIiIis8JPyibK2kqOoT3//sb50OlLOJ3R8HTcI/tEQi7jSElT4+AVjMipr0BTVwONug5WCnupIxERERGZJX5SNhPPT7sb7k71PzSHB3hh1ti7JEhEhiKzsmFBRERERGREPFNkJoJ83PHFiw9j+1+nkHw+C1ZyGQZHd8LdsV05dI6IiIiIqAn8tGxGXB3tMWNkX8wY2bdeW51ajb1J57EnKQ2V1TXoGuyLewf1gL+na9sHJTIzJy5ewfkreXCyt8Xg6HA42CmkjkRERER6YFFkYtQaDY6cyUROYQl83V3QLzIUcnnToyBr6uqweNWvSDqfpV12JjMH2/9Kxf/7v4no2SnQ2LGJzFJBiRKvfvUrLlzN0y77dNNezJs8DGP6dZMwGREREemDRZEJOZd1Hf9vzTbkFZdrl3m6OGLx7HGIDPFvdL1fD57UKYhuqq6pw9trf8f3ix+5Y2FFRPW9tnqrTkEE3DiuPvjxT3TwckX3sIBmbUcURahq62BrozvVesa1AlRUqxDi5wFHO/3uT0ZERETNx6LIRFRUqfDvLzejrKJaZ3lBqRL//uIXTBwcjUOpl6CsUqFLsC+mxPVC97AbhdLvR043ut2CUiWOn7+Mvl1DjZqfyNycunQN57KuN9imEUVs2pd8x6KoolqF735PxB9HTqO8UgVvNydMGBiNLkG++GzzPmTkFAAAbG2sMKZfdzx272BYW8kN/lyo/RBFEaWZJ6Eqy4Otmx9ceKNmIqI2YTJF0cqVK7Fy5UpkZmYCALp164ZXX30VY8eOlTZYG/nj6Jl6BdFNFdUqrNt1VPv44MmLOJSajgUPjcLdsV1RXFbZ5LaL7tBORPVduNpwQXTT8fNZ+McbX0MQBPSLDMX9Q3vBx91Z215TV4dFKzch7ZbCKq+4HF9tS4BMEKARRe3y6po6bDmQgkpVDRY8OMrwT8ZClF1NQ3H6cQgyGdw794OjT5ikeVRlBajMvwxre2c4+nWCMucC0ja9jeriv2+vYO8VjC5TXoK9J4c5ExEZk8kURR06dMCbb76JTp06QRRFfPPNN5g4cSKSk5PRrZv5j92/cCXvzp1uoRFFfLopHoOiwxHs646T6dmN9g3182htPCKL4+Jg12R7ZXUNKqtrAABbDqRgb9I5vPfU/Qj2vXG87Tl+TqcgutWtBdGt/jx6FjNH36VTXNGdqWurkfbzChSnH9Muy9r3Pby6xaHTvf9CScYJKK+dh9zWAV6Rg2Hj6A4AqK0sRXVJHmwc3aBw9jRYnjpVJS5u/w8Kzh4ERA0AwM6jA2qURVCrdL+kqsy/jFNrX0HvJ7+A3JpDKImIjMVkiqIJEyboPF62bBlWrlyJw4cPW0RR5GSv/5uhskqFLftT0KmDd6NFUddgX0QE+bY2HpHF6d+9I+xtbbSFz52UVlThi1/2Y/k/JwEAEk5e1HufGlFE8oUrnMRBTxm7vtIpiG7KP70PxRkpqKss1S7L/PNrBA6ejqrCbBScPQBRXQdAgFvH3ug4dh5sXb1bnSdt4wqUXErSWVZVeLXR/jXlBcg/tQ++PUe3et9ERNQwkymKbqVWq/HTTz+hoqIC/fv3b7SfSqWCSqXSPlYqlW0Rzyjuju2KTfuT9V7vq+0JjbaF+Hlg8ezxrYlFZLHsFNZ4dupwvLX2d2g0DZ/Zud2xtMt4ZdUvKK+sxvXishbtl9cU6aeuugJ5qbsbb7+lIAIAUVOHrH3f39ZLRHH6MaR+twg9H/tE52bKpZdTUZGXCWsHV7h36gu5df3p2FXlhSg8mwB1TRWs7JzrFUTNUZ6dxqKIiMiITKooSk1NRf/+/VFdXQ1HR0ds3rwZkZGRjfZfsWIFXn/99TZMaDydAr0xZWgvbIzX/830VnY21hg/IAox4YHo0zUEMplgoIRElmd4ry4I8HTF5v0pOH/lOmysrZCend9ofxFA4pmMFu9PYW2FPl1CGm2PjY1Fbm4ufH19cexY/TMjlqi6JBeaWtWdOzaDqvQ68k78Cf++96K6JA9nf3oDFdcvadut7JzQ6Z5n4RHx95d1WQd+wJUDP0DUqFu1b/kthRgRERmeSc3DHBERgZSUFCQmJuKJJ57ArFmzcObMmUb7v/TSSygtLdX+7Nu3rw3TGt7jE4fg1Tn3oFfnIPh5uCCmUyBmjr4LMqH5hU1VTS28XJ3Qr1soCyIiA4gI8sWih8fg65dm4ePnpt3xWqPmsGpkivwH7+4DZ4fGh9Lm5uYiOzsbubm5rc5gLqwdXAEY7m9d8aXjEEUNzqxfolMQAUBdVTnSNr2JivzLAID8MweQte/7VhdEAODdfVirt0FERI0zqTNFNjY2CA8PBwD07t0bR48exUcffYQvvviiwf4KhQIKxd9DGRwdHdskpzENjg7H4OhwnWW+Hs74dFM8Kpp5bcPpjGuYHNfTGPGILJqNlRUmDu6Bb3873KL1ZTIBA7p3xIyRffFzfBL2n7iA2jo1gnzcMXVYb15L1AIKJw+4hvVs0ZC1hgiCDMXpx1FZUP/ebwAgquuQc3Qrwsc9hWuJWwyyT9/e4+DoF37njkRE1GImVRTdTqPR6FwzZOpq69TYm3QO8SnnUV1Ti+iwANwzMBqeLk0XcyP7RGJQdCfsP3Eel64VIONaAZIvXGm0v63CutE2ImqdGSP7oVRZha2HTjb7WqNRfSMxMrYrArxc4eXqBABY9PAYzH9wJGpq1bC3tTFmZLPXccyTSP1uIWrKC3UbBJl29rfmsnZ0R/6p+Cb7KHMuAAAqrus3VNLOIxC+vcYiL3U3VGX5sHPzh2/vcfCJHqHXdoiISH8mUxS99NJLGDt2LIKCglBeXo5169YhPj4ev//+u9TRDKK6phb//nILUm+ZJS41PRu/JpzAin9Ohlwm4Ni5y5DLZOjfPQwdvNy0/URRxI97jmHzvmRUqu58tmhoz84tyujr66vzXyKqTyYT8NSUYZg2IhbH0i5DEASkpmfjj6OND/WNCgtATKf696GxksthJefECq1l5+6HnnP/g9yk31CcfhwQZPCI6AcbJ0+c/+U9iOpanf5W9s6oq2x4Iozryb/dcX9Wtje+yLK2d4aqrPFrzBQu3lCV5kFu6wDvqBEIGvwgrO2dEdBvoh7Pjn+biYgMwWSKory8PMycORM5OTlwcXFBdHQ0fv/9d4wcOVLqaAbx057jOgXRTeWVKiz47GdUqf5+01619QDG3RWFZ+4fDplMwHe/H8baPxKbtZ/B0eGIjQhuUUZeuE3UfF6uTugc6IO/Tl+CrY0VZDKhwTNHro72GNYzQoKElsXa3gWBg6YhcNA0neUO3sHIOboN5dfOw8rWAV7dh8Kj6yBcTdiA3OTfbhRHep5R8owcjMJzf8HBt2OjRZHC1Qex8/4LiCIEWesKX/5tJiJqPZMpir766iupIxjVb0dON9p2a0EEAKIIbP8rFX4eLrh3UA9s3Nf4VN0KaysIAuDt5ozx/aMwcXAPCMKND2fHzmUip7AMfh7OiI3gTHREhqJWa/DWut+xN+mcznJBECDecmNWNyd7vPF/90JhYzJ/is2OvWcQOo59st7ykGGzEBz3MEoyT+L0uleavz3vUGTsXg11deO3gJBZK9DpnucgCDJDzgFBREStwHfidqKorELvdX45mIIuQb5N3jxSVVuHwT3CkZlTiP0nLsDWxgqh/p5Y/u1O5Bb9PTzE190Zi2ePR+dAnxblJ6K/ffdHYr2CCLgx1DUqLADdwvwR5O2OuJhOsLHmn+H2SpDJUV3U8I2vb5JZKWDt4AprBxc4deiKnCO/4sbk67qs7JygcPaCc2Ak/PveCzv3ACOlJiKiluC7cTsR5OOOS9cK9Fonv0SJWvWdp3o9cOLi//5VjNMZ12All6FOrTsUJLeoDP/+YgvWvDwLjnYNT/nLe6AQ3VltnRrbEk422n7hah6Wzp3IyRNMhNym6fsDCbL/Da0TNSjNSEFDBREA1FUp0WPOe0Yphvi3mYio9UzqPkXm7L7BMXqvY2MtR/dQf/i4Oem13u0F0U2lFVX440jjF4PzHihEd1ZcXonSiqpG26trapFbVNqGiag13Dv3g8xa0Wi7uqYKqrJ8KHMuovJ/9ydqmIjyq/XPHhoC/zYTEbUei6J2Yuxd3TFpSAxuvw+rrU3j02c72Cqw4LOfEeLnAUGPG7g25VzWdYNsh8hSOdkrYN3EjHEyQYCrY9NnH6j9sLJ1QMiIRwyyLbmi9Tf2JSIi4+DwuXbkyUlDMWFgNPalXEC1qhZRHQPg6eqIhZ9tavCb5+LyShSXVwK4cdYo0MsN6dcKYCWXIcDLDZdzC+utcyeO9g0PnSOi5rFT2GBwj3DsaeCaIgCQy2V48LX/ItDbDRMHx+CeAVEG+1KDjMM/9h7Yuvog+/BmKHMuQhBkqKsu12sbVnbOcOvY20gJiYiotVgUtTOB3u54eFQ/nWWfz5+BXxJO4HjaZRSXV6KgtP6sRjW1ahSVV6JzBy+cv5qPrBYURAAwoneXFq1HRH977N4hSMu6jmsFJfXaautuXAd4+XoRPv55DzJyCvDM/cPbOCHpyz28D9zD+wAAsvavQ9b+tc1fWZAhbNRjkFnxOjIiovaKw+dMgKerIx4dPxCfvfAQbKwbH5ZTXF6J81dv3BOj4Ut9mzZhYDQiQ/xamJKIbvJwccBnzz+Ix+4djKiOAQjwcm2079aEk8jMadmXGCQNR/+mb4Dt4NsRClefG2eHOvVF1MPL4d65H/JS9yLn+A5UXM9oo6RERNRcPFNkYm4Ol2spe4UNnp06HBCAbYdSkVtYBl8PZ9wzIArDe/EsEZGhONgpMHVYb0wd1huvrPoF2fkljfbdl3IeIX792y4ctYiqvBBVhdlQuHrD3iu44YkVBBlqKkqgrlLC3isQnl0HobIgC6fXvwZNbbW2m2tYL3SZvBBWto5t+AyIiKgxLIpMTIivB85e1m+GIQHAI/cMhLuTAwZFh2unAmYRRNQ2VLV1rWonadVVlePijk9QkHboxvTbABx9O8HeKwiV+VnafoJMDlGjRm35jTN/ypyLuPDr+w1us+RSEs7/8h4ipy0x/hMgIqI74vA5EzNpSE+91xFxY8rvUX0jeW8UIglEhzV9b5rojryRZ3sliiJOr38NBWcPagsiAFDmXkBtVTkipy1Bx7FPwrf3PRA1d75v3K2KLhxFVWHTN4clIqK2waLIxAzrFYEZo/pBJtOdrer2x7fqFurf5NTeRGRc4wdEwdmh4ZkdOwZ4oW/X0HrLK6pUSDyTgaNpmVDV8EySVEouJaE8O63BtlplMSryMuHXezxUJS25R5CI8pwLrQtIREQGweFzJmj22P4Y268b9p+4gCpVDbqF+uN81nV8veNQvb4yQcCMUX0lSElEN7k7O+CtJybj7bV/ICOnQLu8d0QQFs4YrfOlhiiK+Oa3w9gYn4TqmloAN+59NHN0f9w3JKato1u8kswTTbdnpMA3ZjTqVBUt2r6VrUOL1iMiIsNiUWSifNydMXXY3/e86B0RDJlMhg17j6Gs4sbFvL7uzpg7YTD6dAmRKCUR3RQe4I0vX3wY57Kuo7BMiSBvd3TwdqvX74c/j2LtH4k6y8orVfh0czwc7BQY2adrW0UmADJ502fZy66cQeIHDwGC/gMvrB1c4Rqq/5BoIiIyPBZFZmTaiFjEdgnGrmNnobCSY8LAHvB05cxGRMakqqlDysUrqFOr0T00AC6Odk32jwjyAeDTYFtNbR027ktqdN0fdx9lUdTGPLoMwJWD6xttF9W1//uHptE+gpUNxLoa3WUyOTqOfRIyOd+GiYjaA/41NhM1tXV4a+3v2H/i7/HpG/YexwPDYzFn3AAJkxGZr60JJ7F6xyGUV944O2ttJceEAdF4bOJgyGX6nzm4nFukPdPbYPv1IpQoK+HqaN9gu6+vr85/qfUcfTvCK2o48lP3tGh9l5Ae6DjmSRSmHUT+6f1Qqyrh1KErAu6aBKc73O+IiIjaDosiM/HZ5n06BREA1Kk1WLfrCLxdnTB+QJREyYjM076U8/j4Z90PyrV1amzanwxrKzn+b8KgZm3nan4xftpzHEfOZkKtafxsA3DjGkEbq8b/bB87dqxZ+yT9dJ7wHBy8gpFzbBtUZfmQWdlAc9uZn1vZeQUjaPCDsPcKgoNXMADAftB0BA6a3laRiYhITyyKzEBZRRX+OHqm0faN+5JYFBEZ2Po/jzba9svBE7CxkuNaYSncnR0wqk8kQvw86vW7eDUP8z/9GRXVjX/AvlVsl2BOqy8BQSZHhwH3I6D/FGhqq5GduAVZ+75vtL/cWgGvyMFtmJCIiFqLRZEZyMgpRG1d4/fHuJJXjCpVDewU/DBFZAhVqlpczM5vtL26phbf3TJZwk97j+OR8QMwYWA09iadR1FZBYJ83LE14WSzCyJ7Wxs8Mn5gq7NTywmCALmNHdw69m6yKHIL69WGqYiIyBBYFJkBZ/uG739yk8LaSmfITVlFFfYmn0dxeSVCfT0wMLojrORyY8ckMhvWVjJYyWWoUzc93O1WX28/hO9/T0RNE19g3M5KLoNcJsOAqI6YMbIvgn3rn226VWxsLHJzc+Hr68uhdEbk5N8ZbuF9UHyx/tlCawdX+MWOlyAVERG1BosiMxDq74mOAV5Ib+Sb60HR4RAhAgB2HT2DD3/ajZravz+Yebk6YdnciQj192yTvESmzkoux6DocMQnn9drPX0KIgD4+qVZ8PNwaXb/3NxcZGdn67UP0k9VYTYqC6+gw4CpUDh5IC91j/b6Ilt3f8ht7HHq+5fh4NsR/n0nwsm/k8SJiYioOVgUmYln7h+Olz7fjEqV7lAcmSBg9/E0HDqVjj5dQnAw9SI0GlGnT35JOV5e9Qu+fWU2zxgRNdOssf2RfP4KSiuqjLJ9DxcHeLs6GWXbpL8aZRHO//I+SjKStcscfEIR+dAbEAQBmbvXoPzq39d2VhZkIf/0PkRMfAFe3YdKkJiIiPSh/5yx1C5Fhvjh0xcexISB0QjwcoWDnQIAoBFvFEBVqlrsP3GhXkF0U35JORJS09ssL5Gp6+Dlhv88Nx1j+nWDg60NFNZWCPMz3NnWKXG9IJfzT3R7IIoanFr3qk5BBAAV1zOQ9vMylF85q1MQ/b2iBhd3fgp1jXEK55t8fX0REBDAqdiJiFqBZ4rMSAcvNzxz/3BcupaPf76zVu/1M3IKERfTeDvvgUKky8/TBS9MH4kXpo8EAFRUq/DQa1/VO2N7JzJB0H6BYS2XY1JcDO4fyov124uiC0dQmZfRYFtdZRmuHdvW6LpqVSUKz/0F76jhxorH68eIiAyARZEZSjjZsjM+bk4N3xDyJr7xEjXNwVaBl2eNxf9bvR2q2rpmrSOXyfDJ89ORca0QgnBj2u3Gbs5K0ijLOtVke11lWZPttXdoJyIi6bEoMlFnL+dgz/FzqKhWoWuwL+6O7aqdcrvuDjeAbIiNtRzDevLu6kSt1bdrKNa8PBu/JZ7G5dxCuDnZY3TfbthyIAW/JZ7W6SsIwLzJQxEe4I3wAG+JEtOdyKwUTbZb2TqgRqlqtN3BJ8zQkYiIyMBYFJmgDzfsxva/UrWPdx09i+//OII3H5+EUD9P9OociHW7jjR7ezKZgGfvHwFnBztjxCWyOJ4ujnh4VD/UqdX4cc9xvLLqFxSUKuFgawM3J3vYKWwQ4uuBCYOi0TXYT+q4dAeekYNx5eD6RtudOnRF4blDgFj/mk0H345wDYk2ZjwiIjIAFkUmZtfRMzoF0U1FZRV4Y812TB/RBykXr8DdyR5F5ZX1+rk52uHRCYOQkJqO4rIKhPh5YuKgHgjvwG+piQxt6Tc7dCYwqaiuQUV1DWI6BeKFB0dCLuNECqbAwTsEvr3GIjdpZ4PthWkJ//uXAODvwsjBJxSRUxcbPyAREbUaiyITszXhZKNtV/KK8c4Pf+gsu/UtOqpjAJ6bOgJBPu4Y3beb8UISEU5cvNrojI4pF67gr1OXMCg6vI1TUUt1HDsPDj6hyDm6DZWFVyHIZBDVt183JkKQWyOg70S4hvWES0gPCIIgSV4iItIPiyITk1NYqld/EcDIPl3x8Kh+8Pd0NUomIqrvwIkLd2xnUWQ6BEGAX+/x8Os9HiWZJ3Dq+3832E9U10KjqYNraEzbBiQiolZhUWRifNydUaLU754XR89exvzpo4yUiMhy1dTVYc/xczh48iJq69To2SkQ4/p3h7ODHWrr1HdYt+l2ar/Ksk63qp2IiNofFkUmZnz/KJzLuq7XOiXKSlTX1MLe1sZIqYgsT0W1CotWbkLaLcdj0vksbD6QgnfnTUFMp0DsONz4VM49OwVq/63RiDh3JRc1tXXo1MGHx2o7J7Oxbbrduul2IiJqf1gUmZgx/bohNT0bu46dbfY6ro52sLWxNmIqIsvz3e+JOgXRTUVlFXhv/Z94d94UBP/hjsvXi+r18XV3xt2xXQEAB05exJe/7Edu0Y172dgrbDBxcA/MHjsAMhmvR2mPPLsOQubu1YDY8O0PvLoNaeNERETUWiyKTIwgCHhxxmiM7heJ3cfPoaJKhS7BvjhyNgMpF642uM7Yu7rzwxWRAWk0Iv440vgQqdMZ1/Dqf38FBMDFwQ6lFX8PeXVxsIOjnQIrt+xD50BvfLIpHhrN3zOWVapq8MOfRyGKIh69Z5BRnwe1jK2LNwIHTMWVhB/rtTkFRMA7eoQEqYiIqDVYFJmoHuGB6BH+9/CboT07Y8FnG5GdX6LTr2enQDw8ql8bpyMybzV1dSivbPxmnQBwNO2yzmMfNydcLy5HaUUVSiuqcDE7v97NXG+15cAJTB/RBw52Td84lKQRPGwm7DwDce3IL6jIy4C1vSt8etyNDgPuh9yarxkRkalhUWQmvFyd8MX8h7E3+RySz2fBykqOQVHh6BcZyrNERAZma2MNbzcn5BWXN3ud63r0BYDqmlqcuZyDPl1C9ExHbcU7ahi8o4ZJHYOIiAyARZEZUdhYYUy/bhjTj/cgIjK2ewf2wH+3HTTqPmys5EbdPhEREd3A26kTEbXA/cN6YUTvLkbbvruTPbqF+htt+0RERPQ3nikiImoBuUyGRQ+PwZS4Xjhw8gJq69SoU2uw5UBKq7ctCMCj9wyClZxnioiIiNoCiyIiolboFOiNToHeAIAqVQ3ik8/pdYNlJ3tbxIR3wF+nL6FOrUHXYF88eHdf9O8eZqzIREREdBsWRUREBmKnsMHSufdhyde/orC0QrvcwdYG4/pH4deDJ6CqrdMud3Oyxxv/dy8ignyhVmug1mhgY93yP8u+vr46/yUiIqLmEURRFO/czTwkJSWhd+/eOH78OHr16iV1HCIyU7V1ahxKTcfV/GJ4ujpiSI/OsFNYo6yiGnuS0lBYVoEgb3fExXRqVRFEREREhsF3YyIiA7O2kiOuZ+d6y50dbHHvwB5Q1dbBTmEtQTIiIiJqiMkURStWrMCmTZuQlpYGOzs7DBgwAG+99RYiIiKkjkZEdEcVVSp8+9th/HH0DJRVKni7OeHegT1w/7BekMs4ESgREZGUTOadeN++fZg3bx4OHz6MXbt2oba2FqNGjUJFRcWdVyYiklBNbR1eXLkJm/YnQ1mlAgDkFZfjv9sO4t0fdkmcjoiIiEzmTNFvv/2m83jNmjXw9vbG8ePHMWTIEIlSERHd2e7jaTh/5XqDbX8eO4spQ3siPMC7jVMRERHRTSZzpuh2paWlAAB3d3eJkxARNe3AyYtNt59oup2IiIiMy2TOFN1Ko9Hgueeew8CBA9G9e/dG+6lUKqhUKu1jpVLZFvGIiHTU1alb1U5ERETGZZJniubNm4dTp05h/fr1TfZbsWIFXFxctD9xcXFtlJCI6G89OwfeoT2ojZIQERFRQ0yuKHrqqaewbds27N27Fx06dGiy70svvYTS0lLtz759+9ooJRHR38bdFQV3J/sG27oG+6J3BIsiIiIiKZlMUSSKIp566ils3rwZe/bsQWho6B3XUSgUcHZ21v44Ojq2QVIiIl0ujnZ4Z9796Bbqr10mkwkY3KMTls69D4IgSJiOiIiITOaaonnz5mHdunX45Zdf4OTkhNzcXACAi4sL7OzsJE5HRNS0IB93fPjMA7iaV4zCMiUCvNzg6cIvaoiIiNoDQRRFUeoQzdHYN6mrV6/G7Nmzm7WNpKQk9O7dG8ePH0evXr0MmI6IiIiIiEyVyZwpMpHajYiIiIiITIzJXFNERERERERkDCyKiIiIiIjIopnM8DkiIlNXqqzC7uNpKCqrQJCPO+JiOkNhwz/DREREUuO7MRFRG9hzPA3v/bgLNbVq7bJVWw/ijf+7F12CfSVMRkRERBw+R0RkZJk5hXh73R86BREAlCgrsfi/v6C6plaiZERERASwKCIiMrpfE05ArdE02FairEJ88vk2TkRERES3YlFERGRkWdeLmmzPzC1soyRERETUEBZFRERG5uZk32S7h7NDGyUhIiKihrAoIiIysjH9ujXaZiWXYUTvLm2YhoiIiG7HooiIyMh6RwTjvsEx9ZbLBAHPTh0Bd54pIiIikhSn5CYiagPzJg9F38gQ/Hb4NAr/d5+iewdGI7yDt9TRiIiILB6LIiKiNtKnSwj6dAmROgYRERHdhsPniIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqvKTJTOTk5yMnJkToGGYifnx/8/PykjkEGwuPT/PAYJSIybRZVFPn5+WHJkiVm/8alUqnw4IMPYt++fVJHIQOJi4vD77//DoVCIXUUaiUen+aJxygRkWkTRFEUpQ5BhlVWVgYXFxfs27cPjo6OUsehVlIqlYiLi0NpaSmcnZ2ljkOtxOPT/PAYJSIyfRZ1psjSxMTE8A3aDJSVlUkdgYyAx6f54DFKRGT6ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5bwgl8zwdfTvPD1ND98TYmITB8nWiAiIiIiIovGM0VERERERGTRWBQREREREZFFY1FEREREREQWjUWRCYqPj4cgCCgpKWmzfc6ePRv33Xdfm+3PkgiCgC1btrTZ/tasWQNXV9c22x8RERFRe8eiyEA+//xzODk5oa6uTrtMqVTC2toaQ4cO1el7s6hJT09vcFuvvfYaBEGAIAiwsrJCSEgI/vWvf0GpVBrzKVADZs+eDUEQ8Pjjj9drmzdvHgRBwOzZsxtd/+ZrffPHx8cHU6ZMwaVLl4yYmlrKmMexp6cnhgwZgg8//BAqlcqYT4NuY8jjWCaTwcXFBT179sSLL76InJwcIyYnIqK2wqLIQIYNGwalUoljx45plx04cAC+vr5ITExEdXW1dvnevXsRFBSEjh07Nrq9bt26IScnB5mZmXjrrbfw5Zdf4oUXXjDqc6CGBQYGYv369aiqqtIuq66uxrp16xAUFNSsbZw7dw7Xrl3DTz/9hNOnT2PChAlQq9XGikwtZKzjOCsrC3v37sXUqVOxYsUKDBgwAOXl5Y2uV1NTY5gnRFqGPI6PHj2KhQsX4s8//0T37t2Rmpra6Dp8LYmITAOLIgOJiIiAn58f4uPjtcvi4+MxceJEhIaG4vDhwzrLhw0b1uT2rKys4Ovriw4dOmDatGmYMWMGfv311wb7FhYW4sEHH0RAQADs7e0RFRWFH374QaePRqPB22+/jfDwcCgUCgQFBWHZsmXa9itXruCBBx6Aq6sr3N3dMXHiRGRmZtbb1+uvvw4vLy84Ozvj8ccf13nDV6lUeOaZZ+Dt7Q1bW1sMGjQIR48ebfJ5moJevXohMDAQmzZt0i7btGkTgoKC0LNnz2Ztw9vbG35+fhgyZAheffVVnDlzBhcvXmyw78KFC9G5c2fY29sjLCwMixcvRm1trU6frVu3ok+fPrC1tYWnpycmTZqkbVOpVJg/fz4CAgLg4OCAfv366fxe3rRlyxZ06tQJtra2GD16NK5cuaLTvnLlSnTs2BE2NjaIiIjAd99916znasqMdRz7+/sjKioKTz/9NPbt24dTp07hrbfe0vYLCQnBG2+8gZkzZ8LZ2RmPPfZYg8NkU1JSIAiCzrG5atUqBAYGwt7eHpMmTcL777/P4ZENMNRx7Ovri86dO2P69OlISEiAl5cXnnjiCW2fm0ONly1bBn9/f0RERABoeJisq6sr1qxZo3186NAhxMTEwNbWFrGxsdiyZQsEQUBKSkqLnzcRETUPiyIDGjZsGPbu3at9vHfvXgwdOhRxcXHa5VVVVUhMTLzjh6nb2dnZNfqNY3V1NXr37o3t27fj1KlTeOyxx/CPf/wDR44c0fZ56aWX8Oabb2Lx4sU4c+YM1q1bBx8fHwBAbW0tRo8eDScnJxw4cAAJCQlwdHTEmDFjdPa5e/dunD17FvHx8fjhhx+wadMmvP7669r2F198ERs3bsQ333yDpKQkhIeHY/To0SgqKtLrubZHjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHJ3fb3NlzOMYALp06YKxY8fqfDgHgHfffRc9evRAcnIyFi9e3KxtJSQk4PHHH8ezzz6LlJQUjBw5UufLDtJlyOMYuHEsP/7440hISEBeXp52+e7du3Hu3Dns2rUL27Zta9a2ysrKMGHCBERFRSEpKQlvvPEGFi5c2OJsRESkJ5EMZtWqVaKDg4NYW1srlpWViVZWVmJeXp64bt06cciQIaIoiuLu3btFAOLly5cb3c6SJUvEHj16aB8fO3ZM9PT0FO+//35RFEVx7969IgCxuLi40W2MHz9efOGFF0RRFMWysjJRoVCIq1atarDvd999J0ZERIgajUa7TKVSiXZ2duLvv/8uiqIozpo1S3R3dxcrKiq0fVauXCk6OjqKarVaVCqVorW1tbh27Vpte01Njejv7y++/fbbjeZs72bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja5/+2t17do1ccCAAWJAQICoUqlEURRFAOLmzZsb3cY777wj9u7dW/u4f//+4owZMxrse/nyZVEul4vZ2dk6y0eMGCG+9NJLoiiK4urVq0UA4uHDh7XtZ8+eFQGIiYmJoiiK4oABA8S5c+fqbGPq1KniuHHjGs1pLox1HN9q4cKFop2dnfZxcHCweN999+n0aeg4T05OFgGIGRkZoiiK4rRp08Tx48frrDdjxgzRxcWl+U/YAhj6OL7Vzp07dY6dWbNmiT4+Ptrj+6aGjnMXFxdx9erVoije+Hvq4eEhVlVVadtXrVolAhCTk5Nb8rSJiEgPVpJUYmZq6NChqKiowNGjR1FcXIzOnTvDy8sLcXFxmDNnDqqrqxEfH4+wsLA7jmFPTU2Fo6Mj1Go1ampqMH78eHzyyScN9lWr1Vi+fDk2bNiA7Oxs1NTUQKVSwd7eHgBw9uxZqFQqjBgxosH1T5w4gYsXL8LJyUlneXV1tc5F5D169NBuEwD69+8PpVKJK1euoLS0FLW1tRg4cKC23draGn379sXZs2eb/h9nAry8vDB+/HisWbMGoihi/Pjx8PT0bPb6HTp0gCiKqKysRI8ePbBx40bY2Ng02PfHH3/Exx9/jPT0dCiVStTV1cHZ2VnbnpKSgrlz5za4bmpqKtRqNTp37qyzXKVSwcPDQ/vYysoKffr00T7u0qULXF1dcfbsWe1r9thjj+lsY+DAgfjoo4+a/ZxNlSGP48aIoghBEHSWxcbG6r2dc+fO6QydBIC+ffs2++yEpWntcdwQURQBQOf1jIqKavT4bsy5c+cQHR0NW1tb7bJbzwATEZFxsSgyoPDwcHTo0AF79+5FcXEx4uLiAAD+/v4IDAzEoUOHsHfvXgwfPvyO24qIiMCvv/4KKysr+Pv7N/kG+8477+Cjjz7Chx9+iKioKDg4OOC5557TDs+6OVyrMUqlEr1798batWvrtXl5ed0xq6V45JFH8NRTTwEAPv30U73WPXDgAJydneHt7V2v+LzVX3/9hRkzZuD111/H6NGj4eLigvXr1+O9997T9mnq9VQqlZDL5Th+/DjkcrlOm6Ojo16ZLZUhj+PGnD17FqGhoTrLHBwcdB7LZDdGN9/80A2g3rVlpL/WHMcNufmlT0hIiHbZ7a8lcKNouvW1BPh6EhG1J7ymyMCGDRuG+Ph4xMfH60zhO2TIEOzcuRNHjhxp1nUINjY2CA8PR0hIyB2/cUxISMDEiRPx8MMPo0ePHggLC8P58+e17Z06dYKdnR12797d4Pq9evXChQsX4O3tjfDwcJ0fFxcXbb8TJ07ozNx0+PBhODo6IjAwUHtBfkJCgra9trYWR48eRWRk5B2frym4eY3VzWuw9BEaGoqOHTs2WRABNy60Dg4Oxssvv4zY2Fh06tQJly9f1ukTHR3d6GvZs2dPqNVq5OXl1XstfX19tf3q6up0Zlg7d+4cSkpK0LVrVwBA165ddV5L4Mbvmbm8lndiqOO4IWlpafjtt98wZcqUJvvd/ELi1imfb7/gPiIiot5kJuYwuYkxteY4vl1VVRW+/PJLDBky5I5fIHl5eem8lhcuXEBlZaX2cUREBFJTU3Wma+drSUTUdlgUGdiwYcNw8OBBpKSkaL9hBoC4uDh88cUXqKmpafGHqcZ06tQJu3btwqFDh3D27Fn885//xPXr17Xttra2WLhwIV588UV8++23SE9Px+HDh/HVV18BAGbMmAFPT09MnDgRBw4cQEZGBuLj4/HMM8/g6tWr2u3U1NTg0UcfxZkzZ7Bjxw4sWbIETz31FGQyGRwcHPDEE09gwYIF+O2333DmzBnMnTsXlZWVePTRRw36fKUil8tx9uxZnDlzpt5ZGEPp1KkTsrKysH79eqSnp+Pjjz/G5s2bdfosWbIEP/zwA5YsWYKzZ88iNTVVO5NZ586dMWPGDMycORObNm1CRkYGjhw5ghUrVmD79u3abVhbW+Ppp59GYmIijh8/jtmzZ+Ouu+7SDtdZsGAB1qxZg5UrV+LChQt4//33sWnTJsyfP98oz7u9MdRxXFdXh9zcXFy7dg2pqan4z3/+g7i4OMTExGDBggVNrhseHo7AwEC89tpruHDhArZv365zxhAAnn76aezYsQPvv/8+Lly4gC+++AI7d+6sNzSP/taa4zgvLw+5ubm4cOEC1q9fj4EDB6KgoAArV66847rDhw/HJ598guTkZBw7dgyPP/44rK2tte0PPfQQNBoNHnvsMZw9exa///473n33XQDg60lE1BYkvJ7JLGVkZIgAxC5duugsz8zMFAGIERERd9xGUxdoi2L9i34LCwvFiRMnio6OjqK3t7f4yiuviDNnzhQnTpyoXUetVotLly4Vg4ODRWtrazEoKEhcvny5tj0nJ0ecOXOm6OnpKSoUCjEsLEycO3euWFpaKori3xcqv/rqq6KHh4fo6Ogozp07V6yurtZuo6qqSnz66ae12xg4cKB45MiRZvxfa79uPu/GtOYC7Ztw2wXYCxYs0P4/njZtmvjBBx/Uu3B+48aNYkxMjGhjYyN6enqKkydP1rbV1NSIr776qhgSEiJaW1uLfn5+4qRJk8STJ0+KonhjogUXFxdx48aNYlhYmKhQKMS777673qQBn332mRgWFiZaW1uLnTt3Fr/99ttGn4O5MdRxDEAEIMrlctHd3V0cNGiQ+MEHH+gcN6J4Y6KFDz74oN42Dh48KEZFRYm2trbi4MGDxZ9++klnogVRFMUvv/xSDAgIEO3s7MT77rtPXLp0qejr69ui522uDHUcAxAFQRCdnJzEHj16iAsWLBBzcnKata/s7Gxx1KhRooODg9ipUydxx44dOhMtiKIoJiQkiNHR0aKNjY3Yu3dvcd26dSIAMS0tTc9nTERE+hJE8bZBzkREZLLmzp2LtLQ0HDhwQOoo1Epr167FnDlzUFpaesdrQ4mIqHU40QIRkQl79913MXLkSDg4OGDnzp345ptv8Nlnn0kdi1rg22+/RVhYGAICAnDixAksXLgQDzzwAAsiIqI2wKKIiMiEHTlyBG+//TbKy8sRFhaGjz/+GP/3f/8ndSxqgdzcXLz66qvIzc2Fn58fpk6dypvxEhG1EQ6fIyIiIiIii8bZ54iIiIiIyKKxKCIiIiIiIovGokhCs2fPhiAIePPNN3WWb9myxaj3pSgqKsLTTz+NiIgI2NnZISgoCM888wxKS0t1+mVlZWH8+PGwt7eHt7c3FixYgLq6OqPlMnV8Pc0LX0/zw9eUiIgaw6JIYra2tnjrrbdQXFzcZvu8du0arl27hnfffRenTp3CmjVr8Ntvv+ncZFWtVmP8+PGoqanBoUOH8M0332DNmjV49dVX2yynKeLraV74epofvqZERNQgaW+TZNlmzZol3nPPPWKXLl3EBQsWaJdv3rxZbOuXZsOGDaKNjY1YW1sriqIo7tixQ5TJZGJubq62z8qVK0VnZ2dRpVK1aTZTwdfTvPD1ND98TYmIqDE8UyQxuVyO5cuX4z//+Q+uXr3a7PXGjh0LR0fHRn+6deumV47S0lI4OzvDyurGLO1//fUXoqKi4OPjo+0zevRolJWV4fTp03pt25Lw9TQvfD3ND19TIiJqCO9T1A5MmjQJMTExWLJkCb766qtmrfPf//4XVVVVjbZbW1s3e/8FBQV444038Nhjj2mX5ebm6rw5A9A+zs3Nbfa2LRFfT/PC19P88DUlIqLbsShqJ9566y0MHz4c8+fPb1b/gIAAg+y3rKwM48ePR2RkJF577TWDbJP4epobvp7mh68pERHdisPn2okhQ4Zg9OjReOmll5rV3xBDOcrLyzFmzBg4OTlh8+bNOt90+vr64vr16zr9bz729fXV45lZJr6e5oWvp/nha0pERLfimaJ25M0330RMTAwiIiLu2Le1QznKysowevRoKBQK/Prrr7C1tdVp79+/P5YtW4a8vDx4e3sDAHbt2gVnZ2dERkY249kQX0/zwtfT/PA1JSKim1gUtSNRUVGYMWMGPv744zv2bc1QjrKyMowaNQqVlZX4/vvvUVZWhrKyMgCAl5cX5HI5Ro0ahcjISPzjH//A22+/jdzcXLzyyiuYN28eFApFi/dtSfh6mhe+nuaHrykREWlJPf2dJZs1a5Y4ceJEnWUZGRmijY2NUaeH3bt3rwigwZ+MjAxtv8zMTHHs2LGinZ2d6OnpKb7wwgva6WOpPr6e5oWvp/nha0pERI0RRFEU26b8IiIiIiIian840QIREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNEsqijKycnBa6+9hpycHKmjEBEREREZBD/jtp7FFUWvv/46f2GIiIiIyGzwM27rWVRRREREREREdDsWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEZAKqq6uljkBERERktlgUEZmA/Px8qSMQERERmS0WRUQmoKamBjU1NVLHICIiIjJLLIqITERZWZnUEYiIiIjMEosiIhNRXFwsdQQiIiIis8SiiMhE8LoiIiIiIuNgUURkIi5fvgxRFKWOQURERGR2WBQRmYjy8nJkZWVJHYOIiIjI7LAoIjIhx44d49kiIiIiIgNjUURkQgoLC3Hu3DmpYxARERGZFRZFRO1cbGwsBg4ciGXLlgEAEhMTUV5eLnEqIiIiIvPBooioncvNzcX169e19ylSqVT4448/oFKpJE5GREREZB5YFBGZoMLCQuzYsQNVVVVSRyEiIiIyeSyKiExUfn4+fvnlFxQWFkodhYiIiMiksSgiMmFlZWXYsmULTp48CY1GI3UcIiIiIpPEoojIxKnVahw+fBibN29Gdna21HGIiIiITA6LIiIzUVhYiO3bt2P79u3Iy8uTOg4RERGRybCSOgARGVZ2djays7MRHByM2NhYeHh4SB2JiIiIqF1jUURkpi5fvozLly8jNDQUPXv2hKenp9SRiIiIiNolFkVEZi4jIwMZGRnw8/ND165dERISAisrHvpEREREN/GTEVE7lpWVhcrKSgBATU0NioqK4O7u3qJt5eTkICcnB9bW1ggJCUF4eDgCAgIgk/HSQiIiIrJsLIqI2qEjR47gjTfewPbt2yGKIgCgsrIS//73vxEVFYXx48cjJCSkRduura3FhQsXcOHCBdjZ2SEiIgLdunWDg4ODAZ8BERERkelgUUTUzmzatAnTpk2DKIragugmURRx6tQpnDp1CnPnzkWvXr1ata+qqiqkpKTg1KlT6NevH7p169aq7RERERGZIo6bIWpHjhw5gmnTpkGtVkOtVjfYR6PRQKPRYNWqVcjMzDTIfuvq6pCQkID09HSDbI+IiIjIlLAoImpHli5d2uAZosbs2LHDoPs/ePAgysvLDbpNIiIiovaORRFRO5GVlYVt27Y1eobodhqNBidPnkRRUZHBMqhUKmzduhXFxcUG2yYRERFRe8eiiKid2L17d7PPEN0kiiLS0tIMmkOpVOKXX35Bfn6+QbdLRERE1F6xKCJqJ8rLy/WeHlsQBFRXVxs8S01NDVJSUgy+XSIiIqL2iEURUTvh5OQEjUaj1zqiKMLW1tbgWaytrREVFWXw7RIRERG1R5ySm6idGDFiBARB0GsInSAI6NKli0FzeHt7Y9iwYXBxcTHodomIiIjaK54pImongoKCcM8990Aulzerv0wmQ3R0NNzd3Q2yf7lcjr59++Lee+9lQUREREQWhUURUTuyePFiCIIAQRCa1X/cuHEG2a+vry8mT56MmJgYva9rIiIiIjJ1/PRD1I706dMHP/74I+RyeaNnjGQyGWQyGR577DGEhIS0an+urq64++67MWHCBLi5ubVqW0RERESmitcUEbUzkydPxqFDh/DGG29g27ZtOtcYCYKAqKgojBs3rlUFkbu7O3r27ImwsLBmn5UiIiIiMlcsiojaoT59+uDXX39FVlYWYmJiUFxcDHt7eyxevLhV1xB5eXmhV69eCAoKYjFERERE9D8mNXxu//79mDBhAvz9/SEIArZs2SJ1JCKjCgoKgr29PQDAxsamxQWRi4sLRo4cifvuuw/BwcEsiIiIiCwcP1frMqmiqKKiAj169MCnn34qdRQik2BtbY277roL999/P0JDQ1kMEREREQB+rr5di4bPpaenY/Xq1UhPT8dHH30Eb29v7Ny5E0FBQejWrZuhM2qNHTsWY8eONdr2icyFIAjo3Lkz+vTpoz3TRERERHQTP1fr0vtM0b59+xAVFYXExERs2rQJSqUSAHDixAksWbLE4AFbQ6VSoaysTPtzMyuROQsNDcX999+PuLg4FkREREQWRKlU6nz2ValUUkcyGXoXRYsWLcLSpUuxa9cu2NjYaJcPHz4chw8fNmi41lqxYgVcXFy0P3FxcVJHIjIKQRAQHh6OqVOnYuTIkZxem4iIyALFxcXpfPZdsWKF1JFMht7D51JTU7Fu3bp6y729vVFQUGCQUIby0ksv4fnnn9c+TklJYWFEZic4OBh9+/ZlIURERGTh9u3bh5iYGO1jhUIhXRgTo3dR5OrqipycHISGhuosT05ORkBAgMGCGYJCodD5ZXB0dJQwDZFh2draYvDgwfWORSIiIrJMjo6OcHZ2ljqGSdJ7+Nz06dOxcOFC5ObmQhAEaDQaJCQkYP78+Zg5c6YxMhLRbTw8PDB58mQWREREREQGoPeZouXLl2PevHkIDAyEWq1GZGQk1Go1HnroIbzyyivGyKilVCpx8eJF7eOMjAykpKTA3d0dQUFBRt03kVR8fX1RV1enPevp5eWF8ePH61zTR0RERKQPfq7WJYiiKLZkxStXriA1NRVKpRI9e/ZEp06dDJ2tnvj4eAwbNqze8lmzZmHNmjV3XD8pKQm9e/fG8ePH0atXLyMkJDKO9PR07N69G9bW1pg6dSqHghIREZFWSz7jtvZztblp0X2KACAwMBCBgYGGzHJHQ4cORQtrOCKzEBERwYKIiIiIWo2fq3XpfU3RlClT8NZbb9Vb/vbbb2Pq1KkGCUVEDeM1RERERESGp3dRtH//fowbN67e8rFjx2L//v0GCUVE9dnY2MDHx0fqGERERERmR++iSKlUNniBt7W1NcrKygwSiojq69ChA2QyvQ9ZIiIiIroDvT9hRUVF4ccff6y3fP369YiMjDRIKCKqLzg4WOoIRERERGZJ74kWFi9ejMmTJyM9PR3Dhw8HAOzevRs//PADfvrpJ4MHJKIbOHSOiIiIyDj0LoomTJiALVu2YPny5fj5559hZ2eH6Oho/Pnnn4iLizNGRiKLJwgCZ50jIiIik1ZdXY2amhqdZc7OzhKl0dWiKbnHjx+P8ePHGzoLETXC1taW1xMRERGRyamsrMSLL76IDRs2oLCwsF67Wq2WIFV9Lf6UVVNTg6tXryIrK0vnh4gMz9bWVuoIRNSEuro6qSMQEbVLCxYswJ49e7By5UooFAr897//xeuvvw5/f398++23UsfT0vtM0YULF/DII4/g0KFDOstFUYQgCO2m2iMyJ+3l1DIRNayyspLHKRFRA7Zu3Ypvv/0WQ4cOxZw5czB48GCEh4cjODgYa9euxYwZM6SOCKAFRdHs2bNhZWWFbdu2wc/PD4IgGCMXEd3CyqpFI12JqI0olUoWRUREDSgqKkJYWBiAG1/yFhUVAQAGDRqEJ554QspoOvT+pJWSkoLjx4+jS5cuxshDRERkcgoKCuDv7y91DCKidicsLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG09L6mKDIyEgUFBcbIQkREZJKuXLkCURSljkFE1O7MmTMHJ06cAAAsWrQIn376KWxtbfGvf/0LCxYskDjd3/Q+U/TWW2/hxRdfxPLlyxEVFQVra2uddg4fICIiS6NUKpGZmYnQ0FCpoxARtSv/+te/tP++++67kZaWhuPHjyM8PBzR0dESJtOld1F09913AwBGjBihs5wTLRARkSVLTk5GSEgIr7UlIrrFt99+i2nTpkGhUAAAgoODERwcjJqaGnz77beYOXOmxAlv0Lso2rt3rzFyEBERmbSCggJcu3YNAQEBUkchImo35syZgzFjxsDb21tneXl5OebMmWO6RVFcXJwxchAREZmk2NhYZGRkwNHRER4eHrjvvvsgl8uljkVE1C7cHE12u6tXr8LFxUWCRA1r0Ty/Bw4cwBdffIFLly7hp59+QkBAAL777juEhoZi0KBBhs5IRETUbuXm5qKoqAgajQaFhYVISEjA4MGDOYyOiCxaz549IQgCBEHAiBEjdG4volarkZGRgTFjxkiYUJfeRdHGjRvxj3/8AzNmzEBSUhJUKhUAoLS0FMuXL8eOHTsMHpKIiMhUpKWloaqqCgMHDoSjo6PUcYiIJHHfffcBuHE7n9GjR+v8PbSxsUFISAimTJkiUbr69C6Kli5dis8//xwzZ87E+vXrtcsHDhyIpUuXGjQcERGRKbp8+TKuXr2KTp06oXv37nB3d5c6EhFRm1qyZAkAICQkBNOmTYOtra3EiZqmd1F07tw5DBkypN5yFxcXlJSUGCITERGRyVOr1UhLS0NaWhr8/PwQExODDh06cFgdEVmUWbNmSR2hWfQuinx9fXHx4kWEhIToLD948CDCwsIMlYuIiMhs5OTkICcnBwEBARgyZAicnJykjkREZDRubm7N/gKoqKjIyGmaR++iaO7cuXj22Wfx9ddfQxAEXLt2DX/99Rfmz5+PxYsXGyMjERGRWcjOzsbGjRvRv39/dO7cmWeNiMgsffjhh1JH0JveRdGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiIyGzU1Ndi3bx9SU1PRvXt3dOzYEdbW1lLHIiIyGFMZMncrvYoitVqNhIQEzJs3DwsWLMDFixehVCoRGRnJGXaIiMjiZGVlobKyEsCNYqeoqKjZkyoUFRVh//79OHToEIKCghAaGooOHTpo7/pORGQu0tPTsXr1aqSnp+Ojjz6Ct7c3du7ciaCgIHTr1k3qeAAAmT6d5XI5Ro0aheLiYtjY2CAyMhJ9+/ZlQURERBblyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPa26urqcOnSJezevRvfffcdtm7dipSUFBQVFUEURSM9AyKitrFv3z5ERUUhMTERmzZtglKpBACcOHFCO0Nde6D38Lnu3bvj0qVLCA0NNUYeIiKidm3Tpk2YNm0aRFGsV7SIoohTp07h1KlTmDt3Lnr16qXXtjUajXZShiNHjsDR0REhISGIiIiAh4eHIZ8GEVGbWLRoEZYuXYrnn39eZ5KZ4cOH45NPPpEwmS69zhQBN+5TNH/+fGzbtg05OTkoKyvT+SEiIjJXR44cwbRp06BWq6FWqxvso9FooNFosGrVKr3OGDVEqVTi1KlT2LhxI3777TftDdOJiExFamoqJk2aVG+5t7c3CgoKJEjUML2LonHjxuHEiRO499570aFDB7i5ucHNzQ2urq5wc3MzRkYiIqJ2YenSpQ2eIWrMjh07DLbvrKwsHDhwwGDbIyJqC66ursjJyam3PDk5GQEBARIkapjew+f27t1rjBxERETtWlZWFrZt29bsgkij0eDkyZN6Tb5wJ5cuXcK5c+cQERFhkO0RERnb9OnTsXDhQvz0008QBAEajQYJCQmYP38+Zs6cKXU8Lb2Lori4OGPkICIiatd2796t98QHoigiLS0NAwYMMFiO/fv3QyaToVOnTgbbJhGRsSxfvhzz5s1DYGAg1Go1IiMjoVar8dBDD+GVV16ROp6W3sPnAODAgQN4+OGHMWDAAGRnZwMAvvvuOxw8eNCg4YiIiNqL8vJyyGT6vW0KgoDq6mqD5hBFEfv379dOBU5E1J7Z2Nhg1apVSE9Px7Zt2/D9998jLS0N3333HeRyudTxtPQuijZu3IjRo0fDzs4OSUlJ2os+S0tLsXz5coMHJCIiag+cnJyg0Wj0WkcURdja2hopERGR6QgKCsK4cePwwAMPtMsz3XoPn1u6dCk+//xzzJw5E+vXr9cuHzhwIJYuXWrQcERERO3FiBEjIAiCXkPoBEFAly5dDJrD2toaw4cPh729vUG3S0RkKM8//3yz+77//vtGTNJ8ehdF586dw5AhQ+otd3FxQUlJiSEyERERtTtBQUG45557sGPHjkan476VTCZDVFSUwSZZAAAvLy8MGzYMrq6uBtsmEZGhJScn6zxOSkpCXV2ddpKY8+fPQy6Xo3fv3lLEa5DeRZGvry8uXryIkJAQneUHDx5EWFiYoXIRERG1O4sXL8bOnTubfcZo3LhxBtmvlZUVevXqhejoaL2vayIiamu3zlb9/vvvw8nJCd9884329j3FxcWYM2cOBg8eLFXEevT+yzp37lw8++yzSExMhCAIuHbtGtauXYv58+fjiSeeMEZGIiKidqFPnz748ccfIZfLG71AWCaTQSaT4bHHHqv3BaK+BEFAREQEpk2bhpiYGBZERGRy3nvvPaxYsULnfqZubm5YunQp3nvvPQmT6dL7TNGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiJqNyZPnoxDhw7hjTfeqHffIkEQEBUVhXHjxrWqILKxsUHnzp0RFRUFJycnA6QmIpJGWVkZ8vPz6y3Pz89HeXm5BIka1qyi6OTJk+jevTtkMhkEQcDLL7+MBQsW4OLFi1AqlYiMjISjo6OxsxIREbULffr0wa+//oqsrCzExMSguLgY9vb2WLx4cYuvIRIEAf7+/ujUqRNCQ0NhbW1t4NRERG1v0qRJmDNnDt577z307dsXAJCYmIgFCxZg8uTJEqf7W7OKop49eyInJwfe3t4ICwvD0aNH4eHhgcjISGPnIyIiareCgoJgb2+P4uJi2NjYtKggcnd3R+fOndGxY0c4ODgYISURkXQ+//xzzJ8/Hw899BBqa2sB3LhO8tFHH8U777wjcbq/NasocnV1RUZGBry9vZGZman3fRqIiIjob1ZWVujYsSMiIyPh6ekJQRCkjkREZBT29vb47LPP8M477yA9PR0A2uWXQM0qiqZMmYK4uDj4+flBEATExsY2eoHppUuXDBqQiIjIXDg6OiIyMhJdu3aFQqGQOg4RUZtxcHBAdHS01DEa1ayi6Msvv8TkyZNx8eJFPPPMM5g7dy4v/CQiImomb29vREVFITQ0lDPIERG1Q82eaGHUqFEYM2YMjh8/jmeffZZFERER0R14e3ujb9++2pEWRETUPuk90cK+fftQU1Nj7FxEREQmSy6Xo3///ujatSuLISIiE9Csc/g3J1oAIPlEC59++ilCQkJga2uLfv364ciRI5JlISIi8vX1hbu7O5ydnQHcGDc/ceJEREZGsiAionaNn6v/ZlITLfz44494/vnn8fnnn6Nfv3748MMPMXr0aJw7dw7e3t5G2y8REVFjjh07hh9//BGlpaVwdHTEhAkTOMSciNo9KT5XOzs7IyUlBWFhYUbZfmuY1EQL77//PubOnYs5c+YAuDHv+fbt2/H1119j0aJFbZ6HiIjoJrlcjjFjxrAgIiKTIMXnalEUjbJdQ2hWUQQAY8aMAQDJJlqoqanB8ePH8dJLL2mXyWQy3H333fjrr78aXEelUkGlUmkfK5VKAEBdXZ325lFEREStVVdXh+7du8PJyYnvL0TU5urq6gDc+KxbVlamXa5QKBqc/r8ln6vNXbOLoptWr15tjBx3VFBQALVaDR8fH53lPj4+SEtLa3CdFStW4PXXX6+3vF+/fkbJSEREREQklbi4OJ3HS5YswWuvvVavX0s+VxvCww8/rL3+sr1pVlE0efJkrFmzBs7Ozpg8eXKTfTdt2mSQYIbw0ksv4fnnn9c+TklJQVxcHBITE9GzZ08JkxERkTk5fPgw7rrrLqljEJGFSk5ORr9+/bBv3z7ExMRol7e3m0SvXLlS6giNalZR5OLiop1Bx8XFxaiBGuPp6Qm5XI7r16/rLL9+/Tp8fX0bXOf2U4aOjo4AACsrK1hbWxsvLBERWRRfX1++rxCRZKysbnykd3R0bNaZmJZ8rjZ3zSqKbh0yJ9XwORsbG/Tu3Ru7d+/GfffdBwDQaDTYvXs3nnrqKUkyERERAX9/6UZEZAr4ubo+va8pktLzzz+PWbNmITY2Fn379sWHH36IiooK7awZREREUrC1tZU6AhGRXvi5WleziqKePXs2+wZ0SUlJrQrUlGnTpiE/Px+vvvoqcnNzERMTg99++63eRWJERERtiUUREZkafq7W1ayi6OZpNQCorq7GZ599hsjISPTv3x/AjQtMT58+jSeffNIoIW/11FNPWexpPSIiap9sbGykjkBEpLe2/lxdXV2NmpoanWXtZTa6ZhVFS5Ys0f77//7v//DMM8/gjTfeqNfnypUrhk1HRERkAmQymdQRiIjapcrKSrz44ovYsGEDCgsL67Wr1WoJUtWn91/xn376CTNnzqy3/OGHH8bGjRsNEoqIiIiIiEzfggULsGfPHqxcuRIKhQL//e9/8frrr8Pf3x/ffvut1PG09C6K7OzskJCQUG95QkICx1QTEREREZHW1q1b8dlnn2HKlCmwsrLC4MGD8corr2D58uVYu3at1PG09J597rnnnsMTTzyBpKQk9O3bFwCQmJiIr7/+GosXLzZ4QCIiovZOFMVmT0hERGRJioqKEBYWBuDG9UNFRUUAgEGDBuGJJ56QMpoOvYuiRYsWISwsDB999BG+//57AEDXrl2xevVqPPDAAwYPSERE1N6Joih1BCKidiksLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG0WnSfogceeIAFEBER0f/I5XKpIxARtUtz5szBiRMnEBcXh0WLFmHChAn45JNPUFtbi/fff1/qeFomdfNWIiIiIiIyHf/617+0/7777ruRlpaG48ePIzw8HNHR0RIm08U5RImIiIjIbNXW1kodwaJ9++23UKlU2sfBwcGYPHkyunTpYtqzzxERERERmYrbbxZKbWvOnDkoLS2tt7y8vBxz5syRIFHDWBQRERERkdniRCjSamx2zqtXr8LFxUWCRA3jNUVEREREZLZYFEmjZ8+eEAQBgiBgxIgRsLL6u+xQq9XIyMjAmDFjJEyoS++iSK1WY82aNdi9ezfy8vKg0Wh02vfs2WOwcERERERErcFriqRx3333AQBSUlIwevRoODo6attsbGwQEhKCKVOmSJSuPr2LomeffRZr1qzB+PHj0b17d96sjoiIiIjarVsv8qe2s2TJEgBASEgIpk2bBltbW4kTNU3vomj9+vXYsGEDxo0bZ4w8REREREQGU11d3eh1LWR8s2bNkjpCs+hdFNnY2CA8PNwYWYiIiIiIDEqtVkOlUrX7MxXmxM3NrdlFaFFRkZHTNI/eRdELL7yAjz76CJ988gkrbiIiIiJq98rKylgUtaEPP/xQ6gh607soOnjwIPbu3YudO3eiW7dusLa21mnftGmTwcIREREREbVWQUEBvL29pY5hMUxlyNyt9C6KXF1dMWnSJGNkISIiIiIyuKysLERGRkodw2Klp6dj9erVSE9Px0cffQRvb2/s3LkTQUFB6Natm9TxALSgKFq9erUxchARERERGcWVK1dQWlrarm4Wain27duHsWPHYuDAgdi/fz+WLVsGb29vnDhxAl999RV+/vlnqSMCAGRSByAiIiIiMiZRFJGYmCh1DIu0aNEiLF26FLt27YKNjY12+fDhw3H48GEJk+nS+0wRAPz888/YsGEDsrKyUFNTo9OWlJRkkGBERERERIaSmZmJS5cuISwsTOooFiU1NRXr1q2rt9zb2xsFBQUSJGqY3meKPv74Y8yZMwc+Pj5ITk5G37594eHhgUuXLmHs2LHGyEhEREREpLfY2FgMGjQIy5YtAwAcOHAAFRUVEqeyLK6ursjJyam3PDk5GQEBARIkapjeRdFnn32GL7/8Ev/5z39gY2ODF198Ebt27cIzzzyD0tJSY2QkIiIiItJbbm4url+/jrKyMgCASqXC7t27odFoJE5mOaZPn46FCxciNzcXgiBAo9EgISEB8+fPx8yZM6WOp6V3UZSVlYUBAwYAAOzs7FBeXg4A+Mc//oEffvjBsOmIiIiIiAwoNzcXBw8ehCiKUkexCMuXL0eXLl0QGBgIpVKJyMhIDBkyBAMGDMArr7widTwtvYsiX19f7Z1ng4KCtBdIZWRk8JeLiIiIiNq9tLQ0HDt2jJ9d24CNjQ1WrVqF9PR0bNu2Dd9//z3S0tLw3XffQS6XSx1PS++JFoYPH45ff/0VPXv2xJw5c/Cvf/0LP//8M44dO4bJkycbIyMRERERkUElJydDEAT07t0bgiBIHcfsBQUFISgoSOoYjdK7KPryyy+14zDnzZsHDw8PHDp0CPfeey/++c9/GjwgEREREZExJCUlQa1Wo2/fviyMDOj5559vdt/333/fiEmaT++iSCaTQSb7e9Td9OnTMX36dIOGIiIiIiJqCydOnEB1dTUGDRrUroZzmbLk5GSdx0lJSairq0NERAQA4Pz585DL5ejdu7cU8RrUovsUHThwAF988QXS09Px888/IyAgAN999x1CQ0MxaNAgQ2ckIiIiIjKac+fOobi4GMOHD4ezs7PUcUze3r17tf9+//334eTkhG+++QZubm4AgOLiYsyZMweDBw+WKmI9ek+0sHHjRowePRp2dnZITk6GSqUCAJSWlmL58uUGD0hEREREZGx5eXn4+eefcfLkSU7ZbUDvvfceVqxYoS2IAMDNzQ1Lly7Fe++9J2EyXXoXRUuXLsXnn3+OVatWwdraWrt84MCBSEpKMmg4IiIiIqK2UldXh8OHD2Pz5s24fv261HHMQllZGfLz8+stz8/P197apz3Quyg6d+4chgwZUm+5i4sLSkpKDJGJiIiIiEgyhYWF+OWXX7B//37tqChqmUmTJmHOnDnYtGkTrl69iqtXr2Ljxo149NFH29XM1XpfU+Tr64uLFy8iJCREZ/nBgwcRFhZmqFxERERERJJKS0vD5cuXMWDAAISFhXGGuhb4/PPPMX/+fDz00EOora0FAFhZWeHRRx/FO++8I3G6v+l9pmju3Ll49tlnkZiYCEEQcO3aNaxduxbz58/HE088YYyMRERERESSqKqqwu7du/HHH3+goqJC6jgmx97eHp999hkKCwuRnJyM5ORkFBUV4bPPPoODg4PU8bT0PlO0aNEiaDQajBgxApWVlRgyZAgUCgXmz5+Pp59+2hgZiYiIiIgkdfnyZeTm5mLAgAEIDw/nWSM9OTg4IDo6WuoYjdK7KBIEAS+//DIWLFiAixcvQqlUIjIyEo6OjsbIR0RERESkt6ysLFRWVgIAampqUFRUBHd391ZtU6VSYe/evcjMzNSeGCDzoPfwuZtsbGwQGRmJvn37siAiIiIionbhyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPU+MjIysHnzZu32yfQ1+0zRI4880qx+X3/9dYvDEBERERG11KZNmzBt2jSIoghRFHXaRFHEqVOncOrUKcydOxe9evVq1b7Kysrw66+/Yty4cfDy8mrVtkh6zT5TtGbNGuzduxclJSUoLi5u9IeIiIiIqK0dOXIE06ZNg1qthlqtbrCPRqOBRqPBqlWrDHLGSKVSYceOHfwMbAaafaboiSeewA8//ICMjAzMmTMHDz/8cKvHZRIRERERGcLSpUsbPEPUmB07duDJJ59s9X5vFkb33nsvnJycWr09kkazzxR9+umnyMnJwYsvvoitW7ciMDAQDzzwAH7//fdm//IRERERERlaVlYWtm3b1ugZottpNBqcPHkSRUVFBtl/RUUFtm3bhrKyMoNsj9qeXhMtKBQKPPjgg9i1axfOnDmDbt264cknn0RISAiUSqWxMhIRERERNWr37t16f0kviiLS0tIMlqG8vBxbt25FSUmJwbZJbafFs8/JZDIIggBRFJtdlRMRERERGVp5eTlkMv0+1gqCgOrqaoPmqKiowI4dO6BSqQy6XTI+vX57VCoVfvjhB4wcORKdO3dGamoqPvnkE2RlZRl9Wu5ly5ZhwIABsLe3h6urq1H3RURERESmw8nJCRqNRq91RFGEra2twbMolUqcPHnS4NuViqV8Bm92UfTkk0/Cz88Pb775Ju655x5cuXIFP/30E8aNG6d3Zd4SNTU1mDp1Kp544gmj74uIiIiITMeIESMgCIJe6wiCgC5duhgljznNRmcpn8GbPfvc559/jqCgIISFhWHfvn3Yt29fg/02bdpksHC3ev311wHcmBqciIiIiOimoKAg3HPPPdixY0ezLuuQyWSIiooy2kzKPj4+RtmuFCzlM3izi6KZM2fqXYETEREREbWFxYsXY+fOndpr3u9k3LhxRskRHByM7t27G2XbZDzNLopMsTpUqVQ6F7pxhjwiIiIi89SnTx/8+OOPmDZtWqMTgd285OOxxx5DSEiIQfevUCgQGxuLyMhIyU4kKJVKnWnBFQoFFAqFJFlMjfEvBmrCokWLIAhCkz+tmSpxxYoVcHFx0f7ExcUZMD0RERERtSeTJ0/GoUOHMG7cuHqFiSAIiIqKwsKFC9GzZ0+D7dPa2hq9evXCgw8+iG7dukk6siouLk7ns++KFSsa7Gfsz+CmSBAlvPNqfn4+CgsLm+wTFhYGGxsb7eM1a9bgueeea9Yc8LefKUpJSUFcXByOHz+OXr16tTg3EREREbVvWVlZiImJQXFxMezt7bF48WKDXkMkk8nQrVs39OzZ0yiz2OkjKSkJvXv3xr59+xATE6Nd3tiZImN/BjdFzR4+ZwxeXl7w8vIy2vZv/0Uw9rThRERERNQ+BAUFwd7eHsXFxbCxsTFoQeTj44MhQ4bAzc3NYNs0BEdHRzg7O9+xn7E/g5siSYsifWRlZaGoqAhZWVlQq9VISUkBAISHh7PYISIiIiKjs7a2Rp8+fSQfJteWLOUzuMkURa+++iq++eYb7eObY0H37t2LoUOHSpSKiIiIiCxBWFgY7rrrLrMqBJrDUj6Dm0xRtGbNGpOcAY+IiIiITJePjw/69esHX19fqaNIwlI+g5tMUURERERE1FZcXFzQt29fhISEWMxQOUvGooiIiIiI6H+sra0RGxuLbt26ae9rROaPRREREREREW7MWDd48GA4ODhIHYXaGIsiIiIiIrJoMpkMd911l0XNKke6WBQRERERkcVSKBQYNWoU/Pz8pI5CEmJRREREREQWycHBAePGjWt3N2GltseiiIiIiIgsjqOjIyZMmAAnJyepo1A7wCk1iIiIiMii2NnZYfz48SyISItFERERERFZDJlMhpEjR8LFxUXqKNSOcPgcEREREZklX19f1NXVQaFQaJf1798fvr6+Eqai9ohFERERERGZpWPHjuHixYvYs2cPACA0NBSRkZESp6L2iMPniIiIiMjs2djYYNCgQbwPETWIRRERERERmb3IyEjY2dlJHYPaKRZFRERERGT2OnfuLHUEasdYFBERERGRWXN1dYWrq6vUMagdY1FERERERGatQ4cOUkegdo5FERERERGZNR8fH6kjUDvHooiIiIiIzJq7u7vUEaidY1FERERERGZLEAQ4OztLHYPaORZFRERERGS2bG1tIZfLpY5B7RyLIiIiIiIyW7a2tlJHIBPAooiIiIiIzBbPElFzsCgiIiIiIrMlCILUEcgEsCgiIiIiIrMlk/HjLt0Zf0uIiIiIyGxx+Bw1B4siIiIiIjJbLIqoOVgUEREREZHZ4vA5ag7+lhARERGR2eJEC9QcLIqIiIiIyGxx+Bw1B4siIiIiIjJbPFNEzcGiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCwaiyIiIiIiIrJoVlIHIOPIyclBTk6O1DHIQPz8/ODn5yd1DDIQHp/mh8eoeeExal54fFJzWFRR5OfnhyVLlpj9gaFSqfDggw9i3759UkchA4mLi8Pvv/8OhUIhdRRqJR6f5onHqPngMWp+LOH4tJTPuMYkiKIoSh2CDKusrAwuLi7Yt28fHB0dpY5DraRUKhEXF4fS0lI4OztLHYdaicen+eExal54jJoXHp/UXBZ1psjSxMTE8A+AGSgrK5M6AhkBj0/zwWPUPPEYNQ88Pqm5ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5aY9QWFloSvp3nh62l++JqaF76e5oWvJzUXJ1ogIiIiIiKLxjNFRERERERk0VgUERERERGRRWNRREREREREFo1FEZHEBEHAli1b2mx/a9asgaura5vtj4iIaOjQoXjuueea1bet36dee+01xMTENLt/ZmYmBEFASkqK0TJR22NRRNSE2bNnQxAEPP744/Xa5s2bB0EQMHv27EbXj4+PhyAI2h8fHx9MmTIFly5dMmJqIrqVIY9jmUwGFxcX9OzZEy+++CJycnKMmJyIAP2LlrZ2829ESUmJ1FGoFVgUEd1BYGAg1q9fj6qqKu2y6upqrFu3DkFBQc3axrlz53Dt2jX89NNPOH36NCZMmAC1Wm2syER0G0Mex0ePHsXChQvx559/onv37khNTW10nZqamlZnJyIi42NRRHQHvXr1QmBgIDZt2qRdtmnTJgQFBaFnz57N2oa3tzf8/PwwZMgQvPrqqzhz5gwuXrzYYN+FCxeic+fOsLe3R1hYGBYvXoza2lqdPlu3bkWfPn1ga2sLT09PTJo0SdumUqkwf/58BAQEwMHBAf369UN8fHy9/WzZsgWdOnWCra0tRo8ejStXrui0r1y5Eh07doSNjQ0iIiLw3XffNeu5ErVHhjqOfX190blzZ0yfPh0JCQnw8vLCE088oe0ze/Zs3HfffVi2bBn8/f0REREBoOFhsq6urlizZo328aFDhxATEwNbW1vExsZiy5YtHKJDJqmiogIzZ86Eo6Mj/Pz88N577+m0N/d9CrgxlO7111/HiRMntGdsbx4377//PqKiouDg4IDAwEA8+eSTUCqVd8z35ptvwsfHB05OTnj00UdRXV1dr89///tfdO3aFba2tujSpQs+++yzBreVmZmJYcOGAQDc3Nx0zjz/9ttvGDRoEFxdXeHh4YF77rkH6enpd8xH0mBRRNQMjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHOzdu7dFz5moPTDkcQzcOJYff/xxJCQkIC8vT7t89+7dOHfuHHbt2oVt27Y1a1tlZWWYMGECoqKikJSUhDfeeAMLFy5scTYiKS1YsAD79u3DL7/8gj/++APx8fFISkrStjfnfeqmadOm4YUXXkC3bt2Qk5ODnJwcTJs2DQAgk8nw8ccf4/Tp0/jmm2+wZ88evPjii01m27BhA1577TUsX74cx44dg5+fX72CZ+3atXj11VexbNkynD17FsuXL8fixYvxzTff1NteYGAgNm7cCODG2eScnBx89NFHAG4Uh88//zyOHTuG3bt3QyaTYdKkSdBoNPr9D6W2IRJRo2bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja6/d+9eEYBYXFwsiqIoXrt2TRwwYIAYEBAgqlQqURRFEYC4efPmRrfxzjvviL1799Y+7t+/vzhjxowG+16+fFmUy+Vidna2zvIRI0aIL730kiiKorh69WoRgHj48GFt+9mzZ0UAYmJioiiKojhgwABx7ty5OtuYOnWqOG7cuEZzErVXhj6Ob7Vz506dY2fWrFmij4+P9vi+qaHj3MXFRVy9erUoiqK4cuVK0cPDQ6yqqtK2r1q1SgQgJicnt+RpE0mivLxctLGxETds2KBdVlhYKNrZ2YnPPvtss9+nXFxctG1LliwRe/Toccd9//TTT6KHh0eTffr37y8++eSTOsv69euns/2OHTuK69at0+nzxhtviP379xdFURQzMjJ0js2m/kbcKj8/XwQgpqam3vG5UNuzkqgWIzIpXl5eGD9+PNasWQNRFDF+/Hh4eno2e/0OHTpAFEVUVlaiR48e2LhxI2xsbBrs++OPP+Ljjz9Geno6lEol6urq4OzsrG1PSUnB3LlzG1w3NTUVarUanTt31lmuUqng4eGhfWxlZYU+ffpoH3fp0gWurq44e/Ys+vbti7Nnz+Kxxx7T2cbAgQO1334RmaLWHscNEUURwI3hcTdFRUU1enw35ty5c4iOjoatra122a1ngIlMRXp6OmpqatCvXz/tMnd3d+1Q0ua+TzXHn3/+iRUrViAtLQ1lZWWoq6tDdXU1KisrYW9vD0dHR23fhx9+GJ9//jnOnj1bb9KV/v37a0dCVFRUID09HY8++qjOe21dXR1cXFz0ynfhwgW8+uqrSExMREFBgfYMUVZWFrp3767Xtsj4WBQRNdMjjzyCp556CgDw6aef6rXugQMH4OzsDG9vbzg5OTXa76+//sKMGTPw+uuvY/To0XBxccH69et1xmPfHH7XEKVSCblcjuPHj0Mul+u03frmQGSpWnMcN+Ts2bMAgJCQEO0yBweHev0EQdAWUDfdfq0gkSUw1PtUZmYm7rnnHjzxxBNYtmwZ3N3dcfDgQTz66KOoqamBvb29zvV4t365eKd8ALBq1Sqdwg5Avbx3MmHCBAQHB2PVqlXw9/eHRqNB9+7dOQFLO8WiiKiZxowZg5qaGgiCgNGjR+u1bmhoaLPuuXDo0CEEBwfj5Zdf1i67fPmyTp/o6Gjs3r27wWshevbsCbVajby8PAwePLjR/dTV1eHYsWPab6LPnTuHkpISdO3aFQDQtWtXJCQkYNasWdp1EhISEBkZecfnQNSeteY4vl1VVRW+/PJLDBkyBF5eXk329fLy0pm++8KFC6isrNQ+joiIwPfffw+VSgWFQgEAOHr0aKvyEUmhY8eOsLa2RmJionZmx+LiYpw/fx5xcXHNfp+6lY2NTb0ZW48fPw6NRoP33nsPMtmNS+Q3bNig0yc8PLzetrp27YrExETMnDlTu+zw4cPaf/v4+MDf3x+XLl3CjBkzmp0PgE7GwsJCnDt3DqtWrdI+z4MHDzZreyQNFkVEzSSXy7XfCuv7bVFzderUCVlZWVi/fj369OmD7du3Y/PmzTp9lixZghEjRqBjx46YPn066urqsGPHDu2sdTNmzMDMmTPx3nvvoWfPnsjPz8fu3bsRHR2N8ePHAwCsra3x9NNP4+OPP4aVlRWeeuop3HXXXdoiacGCBXjggQfQs2dP3H333di6dSs2bdqEP//80yjPm6ittOY4zsvLQ3V1NcrLy3H8+HG8/fbbKCgo0JnRrjHDhw/HJ598gv79+0OtVmPhwoWwtrbWtj/00EN4+eWX8dhjj2HRokXIysrCu+++C0B3aB5Re+fo6IhHH30UCxYsgIeHB7y9vfHyyy9rC5fmvk/dKiQkBBkZGUhJSUGHDh3g5OSE8PBw1NbW4j//+Q8mTJiAhIQEfP7553fM9+yzz2L27NmIjY3FwIEDsXbtWpw+fRphYWHaPq+//jqeeeYZuLi4YMyYMVCpVDh27BiKi4vx/PPP19tmcHAwBEHAtm3bMG7cONjZ2cHNzQ0eHh748ssv4efnh6ysLCxa9P/bu/uYpq64D+DfastLWxS0iEVRLFR8CdSpM5kuBUTFxSXgCxqj+DKjk0RQ55DHRBHd3JQ55rLF6ciSiUGSJfKHUdAY1k6G8S0K6kTnfJ/iTNQpqNAKv+ePPdzHzoJso4L2+0lMuOeee+45TX7S3+Wcc//nP3yy5HEduqKJqJNrXqDdkv+yQLsZ/rYAOzMzU3r27Cl6vV5mzJghX3zxhcuCUxGR3bt3y7Bhw8THx0cMBoNMmTJFOedwOCQ7O1vCw8NFo9GI0WiUyZMny+nTp0Xk/xew7t69W0wmk/j6+sq4cePk2rVrLvfYunWrmEwm0Wg0MnDgQCkoKGhxDESdWXvFMQBRqVQSEBAgFotFMjMzpaampk33unnzpkyYMEF0Op2YzWYpKSlx2WhBRKSiokJiYmLEx8dHRowYIbt27RIAcv78+X84YqKOVVtbK7NnzxatVishISGSm5srsbGxsnTpUhFp+++pZvX19TJ16lQJDAwUAErc5OXlidFoFH9/f0lMTJSCgoI2bXiwYcMGMRgMotfrZe7cubJy5crnNnIoLCxUfs8GBQWJ1WqV4uJiEXl+owURkfXr10vv3r1FpVIp/58cPHhQBg8eLL6+vhITEyN2u/2FmytRx1GJ/G2SMxEREXW4wsJCzJ8/Hw8ePGh1LSEREf13nD5HRETUCRQUFMBkMqFPnz6oqqpCVlYWpk+fzoSIiOglYFJERETUCdy+fRvZ2dm4ffs2jEYjUlJSsGHDho7uFhGRV+D0OSIiIiIi8mpdOroDREREREREHYlJEVEnYLfboVKp8Oeff3Z0V4jIDcYoEdHrjdPniDoBh8OBe/fuISQkhO8kIeqEGKNERK83JkVEREREROTVOH2OyAPi4uKQnp6OZcuWISgoCCEhIcjPz8ejR48wf/585W3cpaWlAJ6fmvP9998jMDAQBw4cwODBg6HX6zFx4kTU1NS43GPZsmUu901OTsa8efOU461bt8JsNsPPzw8hISGYNm2ap4dO9EpgjBIR0bOYFBF5yI4dO2AwGHDs2DGkp6cjLS0NKSkpGD16NE6ePIkJEyYgNTUVjx8/dnv948ePsXnzZuzcuROHDh3C9evX8eGHH7b5/idOnEBGRgbWr1+PCxcuYP/+/bBare01PKJXHmOUiIiaMSki8hCLxYLVq1fDbDZj1apV8PPzg8FgwMKFC2E2m5GdnY27d+/i9OnTbq93Op3Ytm0bRo4cieHDh2PJkiUoKytr8/2vX78OnU6Hd999F/3798cbb7yBjIyM9hoe0SuPMUpERM2YFBF5SExMjPJz165d0bNnT0RHRytlISEhAIA7d+64vV6r1SIiIkI5NhqNLdZ1Z/z48ejfvz9MJhNSU1NRWFjY4hNvIm/EGCUiomZMiog8RKPRuByrVCqXsuYdrJqamtp8/bP7onTp0gV/3yfF6XQqPwcEBODkyZMoKiqC0WhEdnY2LBYLtxQm+j+MUSIiasakiOgVFRwc7LKou7GxEWfPnnWpo1arMW7cOOTm5uL06dO4evUqfvzxx5fdVSKvxBglInp1qDu6A0T074wdOxYffPAB9u3bh4iICOTl5bk8Yd67dy8uX74Mq9WKoKAglJSUoKmpCVFRUR3XaSIvwhglInp1MCkiekW99957qKqqwpw5c6BWq7F8+XLEx8cr5wMDA1FcXIycnBzU19fDbDajqKgIQ4cO7cBeE3kPxigR0auDL28lIiIiIiKvxjVFRERERETk1ZgUERERERGRV2NSREREREREXo1JEREREREReTUmRUQdzG63Q6VSvdQXNs6bNw/Jyckv7X5EREREnRmTIiI3tm3bhoCAADx9+lQpq6urg0ajQVxcnEvd5qTm0qVLbtvKycmBSqWCSqWCWq1GeHg4li9fjrq6Ok8OgYjg2Vg2GAywWq3YsmULGhoaPDkMIiLyMCZFRG7Ex8ejrq4OJ06cUMrKy8vRu3dvHD16FPX19Uq5zWZDv379EBER0WJ7Q4cORU1NDa5evYpNmzbh22+/xYoVKzw6BiLyXCxfv34dNpsNKSkp+PTTTzF69GjU1ta2eJ3D4WifARERkUcwKSJyIyoqCkajEXa7XSmz2+1ISkrCgAEDcOTIEZfyZ1/I6I5arUbv3r3Rt29fzJgxA7NmzcKePXvc1r179y5mzpyJPn36QKvVIjo6GkVFRS51mpqakJubi8jISPj6+qJfv37YsGGDcv7GjRuYPn06AgMD0aNHDyQlJeHq1avP3WvdunUIDg5Gt27dsHjxYpcvbg0NDcjIyECvXr3g5+eHt99+G8ePH291nESdjadiOTQ0FNHR0UhPT8dPP/2Es2fPYtOmTUq98PBwfPTRR5gzZw66deuGRYsWuZ0qW1lZCZVK5RKf+fn5CAsLg1arxeTJk5GXl4fAwMD/+lEQEVErmBQRtSA+Ph42m005ttlsiIuLQ2xsrFL+5MkTHD169IVfpP7O39+/xSfH9fX1GDFiBPbt24ezZ89i0aJFSE1NxbFjx5Q6q1atwsaNG7FmzRqcO3cOu3btQkhICADA6XQiMTERAQEBKC8vR0VFBfR6PSZOnOhyz7KyMlRXV8Nut6OoqAjFxcVYt26dcn7lypXYvXs3duzYgZMnTyIyMhKJiYm4d+/ePxorUUfzZCwDwKBBg/DOO++guLjYpXzz5s2wWCw4deoU1qxZ06a2KioqsHjxYixduhSVlZUYP368ywMPIiLyECEit/Lz80Wn04nT6ZSHDx+KWq2WO3fuyK5du8RqtYqISFlZmQCQa9eutdjO2rVrxWKxKMcnTpwQg8Eg06ZNExERm80mAOT+/fsttjFp0iRZsWKFiIg8fPhQfH19JT8/323dnTt3SlRUlDQ1NSllDQ0N4u/vLwcOHBARkblz50qPHj3k0aNHSp1vvvlG9Hq9NDY2Sl1dnWg0GiksLFTOOxwOCQ0Nldzc3Bb7SdQZeSqWn5WVlSX+/v7Kcf/+/SU5OdmljrtYP3XqlACQK1euiIjIjBkzZNKkSS7XzZo1S7p37972ARMR0T+m7tCMjKgTi4uLw6NHj3D8+HHcv38fAwcORHBwMGJjYzF//nzU19fDbrfDZDKhX79+rbZ15swZ6PV6NDY2wuFwYNKkSfj666/d1m1sbMQnn3yCH374ATdv3oTD4UBDQwO0Wi0AoLq6Gg0NDUhISHB7fVVVFX777TcEBAS4lNfX17ssILdYLEqbAPDWW2+hrq4ON27cwIMHD+B0OjFmzBjlvEajwahRo1BdXd36B0fUybRnLLdERKBSqVzKRo4c+Y/buXDhAiZPnuxSNmrUKOzdu/df9YuIiNqGSRFRCyIjI9G3b1/YbDbcv38fsbGxAIDQ0FCEhYXh8OHDsNlsGDt27AvbioqKwp49e6BWqxEaGgofH58W63722Wf48ssvsWXLFkRHR0On02HZsmXK1Dd/f/9W71VXV4cRI0agsLDwuXPBwcEv7CvR66Y9Y7kl1dXVGDBggEuZTqdzOe7S5a8Z6yKilDmdzn99TyIiaj9cU0TUivj4eNjtdtjtdpfte61WK0pLS3Hs2LE2rUHw8fFBZGQkwsPDW02IgL/WFCQlJWH27NmwWCwwmUz49ddflfNmsxn+/v4oKytze/3w4cNx8eJF9OrVC5GRkS7/unfvrtSrqqrCkydPlOMjR45Ar9cjLCwMERER8PHxQUVFhXLe6XTi+PHjGDJkyAvHS9TZtFcsu3P+/Hns378fU6dObbVe80OJmpoapayystKlTlRU1HMbmnCDEyIiz2NSRNSK+Ph4/Pzzz6isrFSeLgNAbGwstm/fDofD8a+/SLXEbDbj4MGDOHz4MKqrq/H+++/jjz/+UM77+fkhKysLK1euREFBAS5duoQjR47gu+++AwDMmjULBoMBSUlJKC8vx5UrV2C325GRkYHff/9dacfhcGDBggU4d+4cSkpKsHbtWixZsgRdunSBTqdDWloaMjMzsX//fpw7dw4LFy7E48ePsWDBgnYdL9HL0F6x/PTpU9y+fRu3bt3CmTNn8NVXXyE2NhbDhg1DZmZmq9dGRkYiLCwMOTk5uHjxIvbt24fPP//cpU56ejpKSkqQl5eHixcvYvv27SgtLX1uah4REbUvTp8jakV8fDyePHmCQYMGKbu7AX99kaqtrVW2+21Pq1evxuXLl5GYmAitVotFixYhOTkZDx48UOqsWbMGarUa2dnZuHXrFoxGIxYvXgwA0Gq1OHToELKysjBlyhTU1taiT58+SEhIQLdu3ZQ2EhISYDabYbVa0dDQgJkzZyInJ0c5v3HjRjQ1NSE1NRW1tbUYOXIkDhw4gKCgoHYdL9HL0F6x/Msvv8BoNKJr167o3r07hgwZglWrViEtLQ2+vr6tXqvRaFBUVIS0tDTExMTgzTffxMcff4yUlBSlzpgxY7Bt2zasW7cOq1evRmJiIpYvX97iGkQiImofKnl2cjMRERF1KgsXLsT58+dRXl7e0V0hInpt8S9FREREncjmzZsxfvx46HQ6lJaWYseOHdi6dWtHd4uI6LXGvxQRERF1ItOnT4fdbkdtbS1MJhPS09OV6bFEROQZTIqIiIiIiMircfc5IiIiIiLyakyKiIiIiIjIqzEpIiIiIiIir8akiIiIiIiIvBqTIiIiIiIi8mpMioiIiIiIyKsxKSIiIiIiIq/GpIiIiIiIiLwakyIiIiIiIvJq/wvnnQ3fkVlEQgAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "np.random.seed(9999) # Fix the seed so the results are replicable.\n", - "N = 20\n", - "# Create samples\n", - "y = norm.rvs(loc=3, scale=0.4, size=N*4)\n", - "y[N:2*N] = y[N:2*N]+1\n", - "y[2*N:3*N] = y[2*N:3*N]-0.5\n", - "# Add a `Treatment` column\n", - "t1 = np.repeat('Placebo', N*2).tolist()\n", - "t2 = np.repeat('Drug', N*2).tolist()\n", - "treatment = t1 + t2 \n", - "# Add a `Rep` column as the first variable for the 2 replicates of experiments done\n", - "rep = []\n", - "for i in range(N*2):\n", - " rep.append('Rep1')\n", - " rep.append('Rep2')\n", - "# Add a `Genotype` column as the second variable\n", - "wt = np.repeat('W', N).tolist()\n", - "mt = np.repeat('M', N).tolist()\n", - "wt2 = np.repeat('W', N).tolist()\n", - "mt2 = np.repeat('M', N).tolist()\n", - "genotype = wt + mt + wt2 + mt2\n", - "# Add an `id` column for paired data plotting.\n", - "id = list(range(0, N*2))\n", - "id_col = id + id \n", - "# Combine all columns into a DataFrame.\n", - "df_delta2 = pd.DataFrame({'ID' : id_col,\n", - " 'Rep' : rep,\n", - " 'Genotype' : genotype, \n", - " 'Treatment': treatment,\n", - " 'Y' : y\n", - " })\n", - "unpaired_delta2 = dabest.load(data = df_delta2, x = [\"Genotype\", \"Genotype\"], y = \"Y\", delta2 = True, experiment = \"Treatment\")\n", - "unpaired_delta2.mean_diff.plot();" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "24c4b036", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class MiniMetaDelta(object):\n", - " \"\"\"\n", - " A class to compute and store the weighted delta.\n", - " A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``.\n", - " \n", - " \"\"\"\n", - "\n", - " def __init__(self, effectsizedataframe, permutation_count,\n", - " ci=95):\n", - "\n", - " import numpy as np\n", - " from numpy import sort as npsort\n", - " from numpy import sqrt, isinf, isnan\n", - " from ._stats_tools import effsize as es\n", - " from ._stats_tools import confint_1group as ci1g\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - "\n", - "\n", - " from string import Template\n", - " import warnings\n", - " \n", - " self.__effsizedf = effectsizedataframe.results\n", - " self.__dabest_obj = effectsizedataframe.dabest_obj\n", - " self.__ci = ci\n", - " self.__resamples = effectsizedataframe.resamples\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", - " self.__permutation_count = permutation_count\n", - " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", - " self.__control = np.array(self.__effsizedf[\"control\"])\n", - " self.__test = np.array(self.__effsizedf[\"test\"])\n", - " self.__control_N = np.array(self.__effsizedf[\"control_N\"])\n", - " self.__test_N = np.array(self.__effsizedf[\"test_N\"])\n", - "\n", - "\n", - " idx = self.__dabest_obj.idx\n", - " dat = self.__dabest_obj._plot_data\n", - " xvar = self.__dabest_obj._xvar\n", - " yvar = self.__dabest_obj._yvar\n", - "\n", - " # compute the variances of each control group and each test group\n", - " control_var=[]\n", - " test_var=[]\n", - " for j, current_tuple in enumerate(idx):\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " control_var.append(np.var(control, ddof=1))\n", - "\n", - " tname = current_tuple[1]\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - " test_var.append(np.var(test, ddof=1))\n", - " self.__control_var = np.array(control_var)\n", - " self.__test_var = np.array(test_var)\n", - "\n", - " # Compute pooled group variances for each pair of experiment groups\n", - " # based on the raw data\n", - " self.__group_var = ci2g.calculate_group_var(self.__control_var, \n", - " self.__control_N,\n", - " self.__test_var, \n", - " self.__test_N)\n", - "\n", - " # Compute the weighted average mean differences of the bootstrap data\n", - " # using the pooled group variances of the raw data as the inverse of \n", - " # weights\n", - " self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta(\n", - " self.__group_var, \n", - " self.__bootstraps, \n", - " self.__resamples)\n", - "\n", - " # Compute the weighted average mean difference based on the raw data\n", - " self.__difference = es.weighted_delta(self.__effsizedf[\"difference\"],\n", - " self.__group_var)\n", - "\n", - " sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)\n", - "\n", - "\n", - " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps_weighted_delta, self.__difference)\n", - " \n", - " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", - " self.__bootstraps_weighted_delta, \n", - " np.mean))\n", - "\n", - " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", - "\n", - " # Compute BCa intervals.\n", - " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", - " \n", - " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", - "\n", - " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_weighted_deltas[bca_idx_low]\n", - " self.__bca_high = sorted_weighted_deltas[bca_idx_high]\n", - "\n", - " err1 = \"The $lim_type limit of the interval\"\n", - " err2 = \"was in the $loc 10 values.\"\n", - " err3 = \"The result should be considered unstable.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", - "\n", - " if bca_idx_high >= self.__resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", - "\n", - " else:\n", - " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", - " err2 = \"It is set to the effect size itself.\"\n", - " err3 = \"All bootstrap values were likely all the same.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", - "\n", - " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", - "\n", - " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", - "\n", - " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_weighted_deltas[pct_idx_low]\n", - " self.__pct_high = sorted_weighted_deltas[pct_idx_high]\n", - " \n", - " \n", - "\n", - " def __permutation_test(self):\n", - " \"\"\"\n", - " Perform a permutation test and obtain the permutation p-value\n", - " based on the permutation data.\n", - " \"\"\"\n", - " import numpy as np\n", - " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", - " self.__permutations_var = np.array(self.__effsizedf[\"permutations_var\"])\n", - "\n", - " THRESHOLD = np.abs(self.__difference)\n", - "\n", - " all_num = []\n", - " all_denom = []\n", - "\n", - " groups = len(self.__permutations)\n", - " for i in range(0, len(self.__permutations[0])):\n", - " weight = [1/self.__permutations_var[j][i] for j in range(0, groups)]\n", - " all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)]))\n", - " all_denom.append(np.sum(weight))\n", - " \n", - " output=[]\n", - " for i in range(0, len(all_num)):\n", - " output.append(all_num[i]/all_denom[i])\n", - " \n", - " self.__permutations_weighted_delta = np.array(output)\n", - "\n", - " count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD)\n", - " self.__pvalue_permutation = count/self.__permutation_count\n", - "\n", - "\n", - "\n", - " def __repr__(self, header=True, sigfig=3):\n", - " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", - "\n", - " from .misc_tools import print_greeting\n", - " \n", - " is_paired = self.__dabest_obj.is_paired\n", - "\n", - " PAIRED_STATUS = {'baseline' : 'paired', \n", - " 'sequential' : 'paired',\n", - " 'None' : 'unpaired'\n", - " }\n", - "\n", - " first_line = {\"paired_status\": PAIRED_STATUS[str(is_paired)]}\n", - " \n", - "\n", - " out1 = \"The weighted-average {paired_status} mean differences \".format(**first_line)\n", - " \n", - " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", - " if \".\" in str(self.__ci):\n", - " ci_width = base_string_fmt.format(self.__ci)\n", - " else:\n", - " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", - " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", - " out = out1 + out2\n", - "\n", - " if header is True:\n", - " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", - "\n", - "\n", - " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", - "\n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", - " p2 = \"calculated for legacy purposes only. \"\n", - " pvalue = p1 + p2\n", - "\n", - "\n", - " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", - " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", - " bs = bs1 + bs2\n", - "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", - " pval_def = pval_def1 + pval_def2\n", - "\n", - "\n", - " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - "\n", - "\n", - " def to_dict(self):\n", - " \"\"\"\n", - " Returns all attributes of the `dabest.MiniMetaDelta` object as a\n", - " dictionary.\n", - " \"\"\"\n", - " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", - " out = {}\n", - " for a in attrs:\n", - " out[a] = getattr(self, a)\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " Returns the width of the confidence interval, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - "\n", - " @property\n", - " def alpha(self):\n", - " \"\"\"\n", - " Returns the significance level of the statistical test as a float\n", - " between 0 and 1.\n", - " \"\"\"\n", - " return self.__alpha\n", - "\n", - "\n", - " @property\n", - " def bias_correction(self):\n", - " return self.__bias_correction\n", - "\n", - "\n", - " @property\n", - " def bootstraps(self):\n", - " '''\n", - " Return the bootstrapped differences from all the experiment groups.\n", - " '''\n", - " return self.__bootstraps\n", - "\n", - "\n", - " @property\n", - " def jackknives(self):\n", - " return self.__jackknives\n", - "\n", - "\n", - " @property\n", - " def acceleration_value(self):\n", - " return self.__acceleration_value\n", - "\n", - "\n", - " @property\n", - " def bca_low(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__bca_low\n", - "\n", - "\n", - " @property\n", - " def bca_high(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval upper limit.\n", - " \"\"\"\n", - " return self.__bca_high\n", - "\n", - "\n", - " @property\n", - " def bca_interval_idx(self):\n", - " return self.__bca_interval_idx\n", - "\n", - "\n", - " @property\n", - " def control(self):\n", - " '''\n", - " Return the names of the control groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__control\n", - "\n", - "\n", - " @property\n", - " def test(self):\n", - " '''\n", - " Return the names of the test groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__test\n", - " \n", - " @property\n", - " def control_N(self):\n", - " '''\n", - " Return the sizes of the control groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__control_N\n", - "\n", - "\n", - " @property\n", - " def test_N(self):\n", - " '''\n", - " Return the sizes of the test groups from all the experiment \n", - " groups in order.\n", - " '''\n", - " return self.__test_N\n", - "\n", - "\n", - " @property\n", - " def control_var(self):\n", - " '''\n", - " Return the estimated population variances of the control groups \n", - " from all the experiment groups in order. Here the population \n", - " variance is estimated from the sample variance. \n", - " '''\n", - " return self.__control_var\n", - "\n", - "\n", - " @property\n", - " def test_var(self):\n", - " '''\n", - " Return the estimated population variances of the control groups \n", - " from all the experiment groups in order. Here the population \n", - " variance is estimated from the sample variance. \n", - " '''\n", - " return self.__test_var\n", - "\n", - " \n", - " @property\n", - " def group_var(self):\n", - " '''\n", - " Return the pooled group variances of all the experiment groups \n", - " in order. \n", - " '''\n", - " return self.__group_var\n", - "\n", - "\n", - " @property\n", - " def bootstraps_weighted_delta(self):\n", - " '''\n", - " Return the weighted-average mean differences calculated from the bootstrapped \n", - " deltas and weights across the experiment groups, where the weights are \n", - " the inverse of the pooled group variances.\n", - " '''\n", - " return self.__bootstraps_weighted_delta\n", - "\n", - "\n", - " @property\n", - " def difference(self):\n", - " '''\n", - " Return the weighted-average delta calculated from the raw data.\n", - " '''\n", - " return self.__difference\n", - "\n", - "\n", - " @property\n", - " def pct_interval_idx (self):\n", - " return self.__pct_interval_idx \n", - "\n", - "\n", - " @property\n", - " def pct_low(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_low\n", - "\n", - "\n", - " @property\n", - " def pct_high(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_high\n", - "\n", - "\n", - " @property\n", - " def pvalue_permutation(self):\n", - " try:\n", - " return self.__pvalue_permutation\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__pvalue_permutation\n", - " \n", - "\n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__permutation_count\n", - "\n", - " \n", - " @property\n", - " def permutations(self):\n", - " '''\n", - " Return the mean differences of permutations obtained during\n", - " the permutation test for each experiment group.\n", - " '''\n", - " try:\n", - " return self.__permutations\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations\n", - "\n", - "\n", - " @property\n", - " def permutations_var(self):\n", - " '''\n", - " Return the pooled group variances of permutations obtained during\n", - " the permutation test for each experiment group.\n", - " '''\n", - " try:\n", - " return self.__permutations_var\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations_var\n", - "\n", - " \n", - " @property\n", - " def permutations_weighted_delta(self):\n", - " '''\n", - " Return the weighted-average deltas of permutations obtained \n", - " during the permutation test.\n", - " '''\n", - " try:\n", - " return self.__permutations_weighted_delta\n", - " except AttributeError:\n", - " self.__permutation_test()\n", - " return self.__permutations_weighted_delta\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "ae5bac56", - "metadata": {}, - "source": [ - "The weighted delta is calcuated as follows:\n", - "\n", - "$$\\theta_{\\text{weighted}} = \\frac{\\Sigma\\hat{\\theta_{i}}w_{i}}{{\\Sigma}w_{i}}$$\n", - "\n", - "where:\n", - "\n", - "$$\\hat{\\theta_{i}} = \\text{Mean difference for replicate }i$$\n", - "\n", - "\n", - "$$w_{i} = \\text{Weight for replicate }i = \\frac{1}{s_{i}^2} $$\n", - "\n", - "$$s_{i}^2 = \\text{Pooled variance for replicate }i = \\frac{(n_{test}-1)s_{test}^2+(n_{control}-1)s_{control}^2}{n_{test}+n_{control}-2}$$\n", - "\n", - "$$n = \\text{sample size and }s^2 = \\text{variance for control/test.}$$\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "dc1239ee", - "metadata": {}, - "source": [ - "#### Example: mini-meta-delta" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e144ed50", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DABEST v2023.03.29\n", - "==================\n", - " \n", - "Good afternoon!\n", - "The current time is Tue Apr 18 14:47:44 2023.\n", - "\n", - "The weighted-average unpaired mean differences is 0.0336 [95%CI -0.137, 0.228].\n", - "The p-value of the two-sided permutation t-test is 0.736, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Ns = 20\n", - "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", - "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", - "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", - "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", - "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", - "my_df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", - " 'Control 2' : c2, 'Test 2' : t2,\n", - " 'Control 3' : c3, 'Test 3' : t3})\n", - "my_dabest_object = dabest.load(my_df, idx=((\"Control 1\", \"Test 1\"), (\"Control 2\", \"Test 2\"), (\"Control 3\", \"Test 3\")), mini_meta=True)\n", - "my_dabest_object.mean_diff.mini_meta_delta" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "669285cb", - "metadata": {}, - "source": [ - "As of version 2023.02.14, weighted delta can only be calculated for mean difference, and not for standardized measures such as Cohen's *d*.\n", - "\n", - "Details about the calculated weighted delta are accessed as attributes of the ``mini_meta_delta`` class. See the `minimetadelta` for details on usage.\n", - "\n", - "Refer to Chapter 10 of the Cochrane handbook for further information on meta-analysis: \n", - "https://training.cochrane.org/handbook/current/chapter-10\n", - "\t\t" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6017e0d4", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class TwoGroupsEffectSize(object):\n", - "\n", - " \"\"\"\n", - " A class to compute and store the results of bootstrapped\n", - " mean differences between two groups.\n", - " \n", - " Compute the effect size between two groups.\n", - "\n", - " Parameters\n", - " ----------\n", - " control : array-like\n", - " test : array-like\n", - " These should be numerical iterables.\n", - " effect_size : string.\n", - " Any one of the following are accepted inputs:\n", - " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", - " is_paired : string, default None\n", - " resamples : int, default 5000\n", - " The number of bootstrap resamples to be taken for the calculation\n", - " of the confidence interval limits.\n", - " permutation_count : int, default 5000\n", - " The number of permutations (reshuffles) to perform for the \n", - " computation of the permutation p-value\n", - " ci : float, default 95\n", - " The confidence interval width. The default of 95 produces 95%\n", - " confidence intervals.\n", - " random_seed : int, default 12345\n", - " `random_seed` is used to seed the random number generator during\n", - " bootstrap resampling. This ensures that the confidence intervals\n", - " reported are replicable.\n", - "\n", - " Returns\n", - " -------\n", - " A :py:class:`TwoGroupEffectSize` object:\n", - " `difference` : float\n", - " The effect size of the difference between the control and the test.\n", - " `effect_size` : string\n", - " The type of effect size reported.\n", - " `is_paired` : string\n", - " The type of repeated-measures experiment.\n", - " `ci` : float\n", - " Returns the width of the confidence interval, in percent.\n", - " `alpha` : float\n", - " Returns the significance level of the statistical test as a float between 0 and 1.\n", - " `resamples` : int\n", - " The number of resamples performed during the bootstrap procedure.\n", - " `bootstraps` : numpy ndarray\n", - " The generated bootstraps of the effect size.\n", - " `random_seed` : int\n", - " The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned.\n", - " `bca_low, bca_high` : float\n", - " The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively.\n", - " `pct_low, pct_high` : float\n", - " The percentile confidence interval lower limit and upper limits, respectively.\n", - " \"\"\"\n", - "\n", - " def __init__(self, control, test, effect_size,\n", - " proportional=False,\n", - " is_paired=None, ci=95,\n", - " resamples=5000, \n", - " permutation_count=5000, \n", - " random_seed=12345):\n", - "\n", - " \n", - " import numpy as np\n", - " from numpy import array, isnan, isinf\n", - " from numpy import sort as npsort\n", - " from numpy.random import choice, seed\n", - "\n", - " import scipy.stats as spstats\n", - "\n", - " # import statsmodels.stats.power as power\n", - " import statsmodels\n", - "\n", - " from string import Template\n", - " import warnings\n", - " \n", - " from ._stats_tools import effsize as es\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - "\n", - "\n", - " self.__EFFECT_SIZE_DICT = {\"mean_diff\" : \"mean difference\",\n", - " \"median_diff\" : \"median difference\",\n", - " \"cohens_d\" : \"Cohen's d\",\n", - " \"cohens_h\" : \"Cohen's h\",\n", - " \"hedges_g\" : \"Hedges' g\",\n", - " \"cliffs_delta\" : \"Cliff's delta\",\n", - " \"delta_g\" : \"deltas' g\"}\n", - "\n", - "\n", - " kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]\n", - " if effect_size not in kosher_es:\n", - " err1 = \"The effect size '{}'\".format(effect_size)\n", - " err2 = \"is not one of {}\".format(kosher_es)\n", - " raise ValueError(\" \".join([err1, err2]))\n", - "\n", - " if effect_size == \"cliffs_delta\" and is_paired:\n", - " err1 = \"`paired` is not None; therefore Cliff's delta is not defined.\"\n", - " raise ValueError(err1)\n", - "\n", - " if proportional==True and effect_size not in ['mean_diff','cohens_h']:\n", - " err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n", - " raise ValueError(err1)\n", - "\n", - " if proportional==True and (np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False):\n", - " err1 = \"`proportional` is True; Only accept binary data consisting of 0 and 1.\"\n", - " raise ValueError(err1)\n", - "\n", - " # Convert to numpy arrays for speed.\n", - " # NaNs are automatically dropped.\n", - " control = array(control)\n", - " test = array(test)\n", - " control = control[~isnan(control)]\n", - " test = test[~isnan(test)]\n", - "\n", - " self.__effect_size = effect_size\n", - " self.__control = control\n", - " self.__test = test\n", - " self.__is_paired = is_paired\n", - " self.__resamples = resamples\n", - " self.__permutation_count = permutation_count\n", - " self.__random_seed = random_seed\n", - " self.__ci = ci\n", - " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", - "\n", - " self.__difference = es.two_group_difference(\n", - " control, test, is_paired, effect_size)\n", - " \n", - " self.__jackknives = ci2g.compute_meandiff_jackknife(\n", - " control, test, is_paired, effect_size)\n", - "\n", - " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", - "\n", - " bootstraps = ci2g.compute_bootstrapped_diff(\n", - " control, test, is_paired, effect_size,\n", - " resamples, random_seed)\n", - " self.__bootstraps = bootstraps\n", - " \n", - " sorted_bootstraps = npsort(self.__bootstraps)\n", - " # Added in v0.2.6.\n", - " # Raises a UserWarning if there are any infiinities in the bootstraps.\n", - " num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)])\n", - " \n", - " if num_infinities > 0:\n", - " warn_msg = \"There are {} bootstrap(s) that are not defined. \"\\\n", - " \"This is likely due to smaple sample sizes. \"\\\n", - " \"The values in a bootstrap for a group will be more likely \"\\\n", - " \"to be all equal, with a resulting variance of zero. \"\\\n", - " \"The computation of Cohen's d and Hedges' g thus \"\\\n", - " \"involved a division by zero. \"\n", - " warnings.warn(warn_msg.format(num_infinities), \n", - " category=UserWarning)\n", - "\n", - " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", - " self.__bootstraps, self.__difference)\n", - "\n", - " # Compute BCa intervals.\n", - " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", - " self.__bias_correction, self.__acceleration_value,\n", - " self.__resamples, ci)\n", - "\n", - " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", - "\n", - " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", - " self.__bca_low = sorted_bootstraps[bca_idx_low]\n", - " self.__bca_high = sorted_bootstraps[bca_idx_high]\n", - "\n", - " err1 = \"The $lim_type limit of the interval\"\n", - " err2 = \"was in the $loc 10 values.\"\n", - " err3 = \"The result should be considered unstable.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if bca_idx_low <= 10:\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", - " loc=\"bottom\"),\n", - " stacklevel=1)\n", - "\n", - " if bca_idx_high >= resamples-9:\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", - " loc=\"top\"),\n", - " stacklevel=1)\n", - "\n", - " else:\n", - " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", - " err2 = \"It is set to the effect size itself.\"\n", - " err3 = \"All bootstrap values were likely all the same.\"\n", - " err_temp = Template(\" \".join([err1, err2, err3]))\n", - "\n", - " if isnan(bca_idx_low):\n", - " self.__bca_low = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", - " stacklevel=0)\n", - "\n", - " if isnan(bca_idx_high):\n", - " self.__bca_high = self.__difference\n", - " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", - " stacklevel=0)\n", - "\n", - " # Compute percentile intervals.\n", - " pct_idx_low = int((self.__alpha/2) * resamples)\n", - " pct_idx_high = int((1-(self.__alpha/2)) * resamples)\n", - "\n", - " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", - " self.__pct_low = sorted_bootstraps[pct_idx_low]\n", - " self.__pct_high = sorted_bootstraps[pct_idx_high]\n", - "\n", - " # Perform statistical tests.\n", - " \n", - " self.__PermutationTest_result = PermutationTest(control, test, \n", - " effect_size, \n", - " is_paired,\n", - " permutation_count)\n", - " \n", - " if is_paired and proportional is False:\n", - " # Wilcoxon, a non-parametric version of the paired T-test.\n", - " wilcoxon = spstats.wilcoxon(control, test)\n", - " self.__pvalue_wilcoxon = wilcoxon.pvalue\n", - " self.__statistic_wilcoxon = wilcoxon.statistic\n", - " \n", - " \n", - " if effect_size != \"median_diff\":\n", - " # Paired Student's t-test.\n", - " paired_t = spstats.ttest_rel(control, test, nan_policy='omit')\n", - " self.__pvalue_paired_students_t = paired_t.pvalue\n", - " self.__statistic_paired_students_t = paired_t.statistic\n", - "\n", - " standardized_es = es.cohens_d(control, test, is_paired)\n", - " # self.__power = power.tt_solve_power(standardized_es,\n", - " # len(control),\n", - " # alpha=self.__alpha)\n", - "\n", - " elif is_paired and proportional is True:\n", - " # for binary paired data, use McNemar's test\n", - " # References:\n", - " # https://en.wikipedia.org/wiki/McNemar%27s_test\n", - " from statsmodels.stats.contingency_tables import mcnemar\n", - " import pandas as pd\n", - " df_temp = pd.DataFrame({'control': control, 'test': test})\n", - " x1 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 0)])\n", - " x2 = len(df_temp[(df_temp['control'] == 0)&(df_temp['test'] == 1)])\n", - " x3 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 0)])\n", - " x4 = len(df_temp[(df_temp['control'] == 1)&(df_temp['test'] == 1)])\n", - " table = [[x1,x2],[x3,x4]]\n", - " _mcnemar = mcnemar(table, exact=True, correction=True)\n", - " self.__pvalue_mcnemar = _mcnemar.pvalue\n", - " self.__statistic_mcnemar = _mcnemar.statistic\n", - "\n", - " elif effect_size == \"cliffs_delta\":\n", - " # Let's go with Brunner-Munzel!\n", - " brunner_munzel = spstats.brunnermunzel(control, test,\n", - " nan_policy='omit')\n", - " self.__pvalue_brunner_munzel = brunner_munzel.pvalue\n", - " self.__statistic_brunner_munzel = brunner_munzel.statistic\n", - "\n", - "\n", - " elif effect_size == \"median_diff\":\n", - " # According to scipy's documentation of the function,\n", - " # \"The Kruskal-Wallis H-test tests the null hypothesis\n", - " # that the population median of all of the groups are equal.\"\n", - " kruskal = spstats.kruskal(control, test, nan_policy='omit')\n", - " self.__pvalue_kruskal = kruskal.pvalue\n", - " self.__statistic_kruskal = kruskal.statistic\n", - " # self.__power = np.nan\n", - "\n", - " else: # for mean difference, Cohen's d, and Hedges' g.\n", - " # Welch's t-test, assumes normality of distributions,\n", - " # but does not assume equal variances.\n", - " welch = spstats.ttest_ind(control, test, equal_var=False,\n", - " nan_policy='omit')\n", - " self.__pvalue_welch = welch.pvalue\n", - " self.__statistic_welch = welch.statistic\n", - "\n", - " # Student's t-test, assumes normality of distributions,\n", - " # as well as assumption of equal variances.\n", - " students_t = spstats.ttest_ind(control, test, equal_var=True,\n", - " nan_policy='omit')\n", - " self.__pvalue_students_t = students_t.pvalue\n", - " self.__statistic_students_t = students_t.statistic\n", - "\n", - " # Mann-Whitney test: Non parametric,\n", - " # does not assume normality of distributions\n", - " try:\n", - " mann_whitney = spstats.mannwhitneyu(control, test, \n", - " alternative='two-sided')\n", - " self.__pvalue_mann_whitney = mann_whitney.pvalue\n", - " self.__statistic_mann_whitney = mann_whitney.statistic\n", - " except ValueError:\n", - " # Occurs when the control and test are exactly identical\n", - " # in terms of rank (eg. all zeros.)\n", - " pass\n", - " \n", - " \n", - "\n", - " standardized_es = es.cohens_d(control, test, is_paired = None)\n", - " \n", - " # The Cohen's h calculation is for binary categorical data\n", - " try:\n", - " self.__proportional_difference = es.cohens_h(control, test)\n", - " except ValueError:\n", - " # Occur only when the data consists not only 0's and 1's.\n", - " pass\n", - " # self.__power = power.tt_ind_solve_power(standardized_es,\n", - " # len(control),\n", - " # alpha=self.__alpha,\n", - " # ratio=len(test)/len(control)\n", - " # )\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", - " \n", - " # # Deprecated in v0.3.0; permutation p-values will be reported by default.\n", - " # UNPAIRED_ES_TO_TEST = {\"mean_diff\" : \"Mann-Whitney\",\n", - " # \"median_diff\" : \"Kruskal\",\n", - " # \"cohens_d\" : \"Mann-Whitney\",\n", - " # \"hedges_g\" : \"Mann-Whitney\",\n", - " # \"cliffs_delta\" : \"Brunner-Munzel\"}\n", - " # \n", - " # TEST_TO_PVAL_ATTR = {\"Mann-Whitney\" : \"pvalue_mann_whitney\",\n", - " # \"Kruskal\" : \"pvalue_kruskal\",\n", - " # \"Brunner-Munzel\" : \"pvalue_brunner_munzel\",\n", - " # \"Wilcoxon\" : \"pvalue_wilcoxon\"}\n", - " \n", - " RM_STATUS = {'baseline' : 'for repeated measures against baseline \\n', \n", - " 'sequential': 'for the sequential design of repeated-measures experiment \\n',\n", - " 'None' : ''\n", - " }\n", - "\n", - " PAIRED_STATUS = {'baseline' : 'paired', \n", - " 'sequential' : 'paired',\n", - " 'None' : 'unpaired'\n", - " }\n", - "\n", - " first_line = {\"rm_status\" : RM_STATUS[str(self.__is_paired)],\n", - " \"es\" : self.__EFFECT_SIZE_DICT[self.__effect_size],\n", - " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)]}\n", - " \n", - "\n", - " out1 = \"The {paired_status} {es} {rm_status}\".format(**first_line)\n", - " \n", - " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", - " if \".\" in str(self.__ci):\n", - " ci_width = base_string_fmt.format(self.__ci)\n", - " else:\n", - " ci_width = str(self.__ci)\n", - " \n", - " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", - " \"ci\" : ci_width,\n", - " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", - " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", - " \n", - " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", - " out = out1 + out2\n", - " \n", - " # # Deprecated in v0.3.0; permutation p-values will be reported by default.\n", - " # if self.__is_paired:\n", - " # stats_test = \"Wilcoxon\"\n", - " # else:\n", - " # stats_test = UNPAIRED_ES_TO_TEST[self.__effect_size]\n", - " \n", - " \n", - " # pval_rounded = base_string_fmt.format(getattr(self,\n", - " # TEST_TO_PVAL_ATTR[stats_test])\n", - " # )\n", - " \n", - " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", - " \n", - " # # Deprecated in v0.3.0; permutation p-values will be reported by default.\n", - " # pvalue = \"The two-sided p-value of the {} test is {}.\".format(stats_test,\n", - " # pval_rounded)\n", - " \n", - " # pvalue = \"The two-sided p-value of the {} test is {}.\".format(stats_test,\n", - " # pval_rounded)\n", - " \n", - " \n", - " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", - " p2 = \"calculated for legacy purposes only. \"\n", - " pvalue = p1 + p2\n", - " \n", - " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", - " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", - " bs = bs1 + bs2\n", - "\n", - " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", - " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", - " \"zero difference is true.\"\n", - " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", - " \"control and test labels were performed.\"\n", - " pval_def = pval_def1 + pval_def2\n", - "\n", - " if show_resample_count and define_pval:\n", - " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", - " elif show_resample_count is False and define_pval is True:\n", - " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, pval_def)\n", - " elif show_resample_count is True and define_pval is False:\n", - " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, bs)\n", - " else:\n", - " return \"{}\\n{}\".format(out, pvalue)\n", - "\n", - "\n", - "\n", - " def to_dict(self):\n", - " \"\"\"\n", - " Returns the attributes of the `dabest.TwoGroupEffectSize` object as a\n", - " dictionary.\n", - " \"\"\"\n", - " # Only get public (user-facing) attributes.\n", - " attrs = [a for a in dir(self)\n", - " if not a.startswith((\"_\", \"to_dict\"))]\n", - " out = {}\n", - " for a in attrs:\n", - " out[a] = getattr(self, a)\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def difference(self):\n", - " \"\"\"\n", - " Returns the difference between the control and the test.\n", - " \"\"\"\n", - " return self.__difference\n", - "\n", - " @property\n", - " def effect_size(self):\n", - " \"\"\"\n", - " Returns the type of effect size reported.\n", - " \"\"\"\n", - " return self.__EFFECT_SIZE_DICT[self.__effect_size]\n", - "\n", - " @property\n", - " def is_paired(self):\n", - " return self.__is_paired\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " Returns the width of the confidence interval, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - " @property\n", - " def alpha(self):\n", - " \"\"\"\n", - " Returns the significance level of the statistical test as a float\n", - " between 0 and 1.\n", - " \"\"\"\n", - " return self.__alpha\n", - "\n", - " @property\n", - " def resamples(self):\n", - " \"\"\"\n", - " The number of resamples performed during the bootstrap procedure.\n", - " \"\"\"\n", - " return self.__resamples\n", - "\n", - " @property\n", - " def bootstraps(self):\n", - " \"\"\"\n", - " The generated bootstraps of the effect size.\n", - " \"\"\"\n", - " return self.__bootstraps\n", - "\n", - " @property\n", - " def random_seed(self):\n", - " \"\"\"\n", - " The number used to initialise the numpy random seed generator, ie.\n", - " `seed_value` from `numpy.random.seed(seed_value)` is returned.\n", - " \"\"\"\n", - " return self.__random_seed\n", - "\n", - " @property\n", - " def bca_interval_idx(self):\n", - " return self.__bca_interval_idx\n", - "\n", - " @property\n", - " def bca_low(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__bca_low\n", - "\n", - " @property\n", - " def bca_high(self):\n", - " \"\"\"\n", - " The bias-corrected and accelerated confidence interval upper limit.\n", - " \"\"\"\n", - " return self.__bca_high\n", - "\n", - " @property\n", - " def pct_interval_idx(self):\n", - " return self.__pct_interval_idx\n", - "\n", - " @property\n", - " def pct_low(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_low\n", - "\n", - " @property\n", - " def pct_high(self):\n", - " \"\"\"\n", - " The percentile confidence interval lower limit.\n", - " \"\"\"\n", - " return self.__pct_high\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_brunner_munzel(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_brunner_munzel\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_brunner_munzel(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_brunner_munzel\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_wilcoxon(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_wilcoxon\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_wilcoxon(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_wilcoxon\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def pvalue_mcnemar(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_mcnemar\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_mcnemar(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_mcnemar\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_paired_students_t(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_paired_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_paired_students_t(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_paired_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_kruskal(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_kruskal\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_kruskal(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_kruskal\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_welch(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_welch\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_welch(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_welch\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_students_t(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - " @property\n", - " def statistic_students_t(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_students_t\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def pvalue_mann_whitney(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__pvalue_mann_whitney\n", - " except AttributeError:\n", - " return npnan\n", - "\n", - "\n", - "\n", - " @property\n", - " def statistic_mann_whitney(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__statistic_mann_whitney\n", - " except AttributeError:\n", - " return npnan\n", - " \n", - " # Introduced in v0.3.0.\n", - " @property\n", - " def pvalue_permutation(self):\n", - " return self.__PermutationTest_result.pvalue\n", - " \n", - " # \n", - " # \n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__PermutationTest_result.permutation_count\n", - "\n", - " \n", - " @property\n", - " def permutations(self):\n", - " return self.__PermutationTest_result.permutations\n", - "\n", - " \n", - " @property\n", - " def permutations_var(self):\n", - " return self.__PermutationTest_result.permutations_var\n", - "\n", - "\n", - " @property\n", - " def proportional_difference(self):\n", - " from numpy import nan as npnan\n", - " try:\n", - " return self.__proportional_difference\n", - " except AttributeError:\n", - " return npnan\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d72ccb04", - "metadata": {}, - "source": [ - "#### Example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5d8a7a87", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "The unpaired mean difference is -0.253 [95%CI -0.78, 0.25].\n", - "The p-value of the two-sided permutation t-test is 0.348, calculated for legacy purposes only. \n", - "\n", - "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", - "Any p-value reported is the probability of observing theeffect size (or greater),\n", - "assuming the null hypothesis ofzero difference is true.\n", - "For each p-value, 5000 reshuffles of the control and test labels were performed." - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.random.seed(12345)\n", - "control = norm.rvs(loc=0, size=30)\n", - "test = norm.rvs(loc=0.5, size=30)\n", - "effsize = dabest.TwoGroupsEffectSize(control, test, \"mean_diff\")\n", - "effsize" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "72a4c93e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'alpha': 0.05,\n", - " 'bca_high': 0.24951887238295106,\n", - " 'bca_interval_idx': (125, 4875),\n", - " 'bca_low': -0.7801782111071534,\n", - " 'bootstraps': array([-0.3649424 , -0.45018155, -0.56034412, ..., -0.49805581,\n", - " -0.25334475, -0.55206229]),\n", - " 'ci': 95,\n", - " 'difference': -0.25315417702752846,\n", - " 'effect_size': 'mean difference',\n", - " 'is_paired': None,\n", - " 'pct_high': 0.24951887238295106,\n", - " 'pct_interval_idx': (125, 4875),\n", - " 'pct_low': -0.7801782111071534,\n", - " 'permutation_count': 5000,\n", - " 'permutations': array([ 0.17221029, 0.03112419, -0.13911387, ..., -0.38007941,\n", - " 0.30261507, -0.09073054]),\n", - " 'permutations_var': array([0.07201642, 0.07251104, 0.07219407, ..., 0.07003705, 0.07094885,\n", - " 0.07238581]),\n", - " 'proportional_difference': nan,\n", - " 'pvalue_brunner_munzel': nan,\n", - " 'pvalue_kruskal': nan,\n", - " 'pvalue_mann_whitney': 0.5201446121616038,\n", - " 'pvalue_mcnemar': nan,\n", - " 'pvalue_paired_students_t': nan,\n", - " 'pvalue_permutation': 0.3484,\n", - " 'pvalue_students_t': 0.34743913903372836,\n", - " 'pvalue_welch': 0.3474493875548964,\n", - " 'pvalue_wilcoxon': nan,\n", - " 'random_seed': 12345,\n", - " 'resamples': 5000,\n", - " 'statistic_brunner_munzel': nan,\n", - " 'statistic_kruskal': nan,\n", - " 'statistic_mann_whitney': 494.0,\n", - " 'statistic_mcnemar': nan,\n", - " 'statistic_paired_students_t': nan,\n", - " 'statistic_students_t': 0.9472545159069105,\n", - " 'statistic_welch': 0.9472545159069105,\n", - " 'statistic_wilcoxon': nan}" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "effsize.to_dict() " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eb366b18", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class EffectSizeDataFrame(object):\n", - " \"\"\"A class that generates and stores the results of bootstrapped effect\n", - " sizes for several comparisons.\"\"\"\n", - "\n", - " def __init__(self, dabest, effect_size,\n", - " is_paired, ci=95, proportional=False,\n", - " resamples=5000, \n", - " permutation_count=5000,\n", - " random_seed=12345, \n", - " x1_level=None, x2=None, \n", - " delta2=False, experiment_label=None,\n", - " mini_meta=False):\n", - " \"\"\"\n", - " Parses the data from a Dabest object, enabling plotting and printing\n", - " capability for the effect size of interest.\n", - " \"\"\"\n", - "\n", - " self.__dabest_obj = dabest\n", - " self.__effect_size = effect_size\n", - " self.__is_paired = is_paired\n", - " self.__ci = ci\n", - " self.__resamples = resamples\n", - " self.__permutation_count = permutation_count\n", - " self.__random_seed = random_seed\n", - " self.__proportional = proportional\n", - " self.__x1_level = x1_level\n", - " self.__experiment_label = experiment_label \n", - " self.__x2 = x2\n", - " self.__delta2 = delta2 \n", - " self.__mini_meta = mini_meta\n", - "\n", - "\n", - " def __pre_calc(self):\n", - " import pandas as pd\n", - " from .misc_tools import print_greeting, get_varname\n", - " from ._stats_tools import confint_2group_diff as ci2g\n", - "\n", - " idx = self.__dabest_obj.idx\n", - " dat = self.__dabest_obj._plot_data\n", - " xvar = self.__dabest_obj._xvar\n", - " yvar = self.__dabest_obj._yvar\n", - "\n", - " out = []\n", - " reprs = []\n", - " \n", - " if self.__delta2==True:\n", - " mixed_data = []\n", - " for j, current_tuple in enumerate(idx):\n", - " if self.__is_paired != \"sequential\":\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - "\n", - " for ix, tname in enumerate(current_tuple[1:]):\n", - " if self.__is_paired == \"sequential\":\n", - " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - " mixed_data.append(control)\n", - " mixed_data.append(test)\n", - " bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3],\n", - " self.__is_paired, self.__resamples, self.__random_seed)\n", - "\n", - "\n", - " for j, current_tuple in enumerate(idx):\n", - " if self.__is_paired!=\"sequential\":\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - "\n", - " for ix, tname in enumerate(current_tuple[1:]):\n", - " if self.__is_paired == \"sequential\":\n", - " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - "\n", - " result = TwoGroupsEffectSize(control, test,\n", - " self.__effect_size,\n", - " self.__proportional,\n", - " self.__is_paired,\n", - " self.__ci,\n", - " self.__resamples,\n", - " self.__permutation_count,\n", - " self.__random_seed)\n", - " r_dict = result.to_dict()\n", - " r_dict[\"control\"] = cname\n", - " r_dict[\"test\"] = tname\n", - " r_dict[\"control_N\"] = int(len(control))\n", - " r_dict[\"test_N\"] = int(len(test))\n", - " out.append(r_dict)\n", - " if j == len(idx)-1 and ix == len(current_tuple)-2:\n", - " if self.__delta2 and self.__effect_size in [\"mean_diff\",\"delta_g\"]:\n", - " resamp_count = False\n", - " def_pval = False\n", - " elif self.__mini_meta and self.__effect_size == \"mean_diff\":\n", - " resamp_count = False\n", - " def_pval = False\n", - " else:\n", - " resamp_count = True\n", - " def_pval = True\n", - " else:\n", - " resamp_count = False\n", - " def_pval = False\n", - "\n", - " text_repr = result.__repr__(show_resample_count=resamp_count,\n", - " define_pval=def_pval)\n", - "\n", - " to_replace = \"between {} and {} is\".format(cname, tname)\n", - " text_repr = text_repr.replace(\"is\", to_replace, 1)\n", - "\n", - " reprs.append(text_repr)\n", - "\n", - "\n", - " self.__for_print = \"\\n\\n\".join(reprs)\n", - "\n", - " out_ = pd.DataFrame(out)\n", - "\n", - " columns_in_order = ['control', 'test', 'control_N', 'test_N',\n", - " 'effect_size', 'is_paired',\n", - " 'difference', 'ci',\n", - "\n", - " 'bca_low', 'bca_high', 'bca_interval_idx',\n", - " 'pct_low', 'pct_high', 'pct_interval_idx',\n", - " \n", - " 'bootstraps', 'resamples', 'random_seed',\n", - " \n", - " 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var',\n", - " \n", - " 'pvalue_welch',\n", - " 'statistic_welch',\n", - "\n", - " 'pvalue_students_t',\n", - " 'statistic_students_t',\n", - "\n", - " 'pvalue_mann_whitney',\n", - " 'statistic_mann_whitney',\n", - "\n", - " 'pvalue_brunner_munzel',\n", - " 'statistic_brunner_munzel',\n", - "\n", - " 'pvalue_wilcoxon',\n", - " 'statistic_wilcoxon',\n", - "\n", - " 'pvalue_mcnemar',\n", - " 'statistic_mcnemar',\n", - "\n", - " 'pvalue_paired_students_t',\n", - " 'statistic_paired_students_t',\n", - "\n", - " 'pvalue_kruskal',\n", - " 'statistic_kruskal',\n", - " 'proportional_difference'\n", - " ]\n", - " self.__results = out_.reindex(columns=columns_in_order)\n", - " self.__results.dropna(axis=\"columns\", how=\"all\", inplace=True)\n", - " \n", - " # Add the is_paired column back when is_paired is None\n", - " if self.is_paired is None:\n", - " self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1))\n", - " \n", - " # Create and compute the delta-delta statistics\n", - " if self.__delta2 is True:\n", - " self.__delta_delta = DeltaDelta(self,\n", - " self.__permutation_count,\n", - " bootstraps_delta_delta,\n", - " self.__ci)\n", - " reprs.append(self.__delta_delta.__repr__(header=False))\n", - " elif self.__delta2 is True and self.__effect_size not in [\"mean_diff\", \"delta_g\"]:\n", - " self.__delta_delta = \"Delta-delta is not supported for {}.\".format(self.__effect_size)\n", - " else:\n", - " self.__delta_delta = \"`delta2` is False; delta-delta is therefore not calculated.\"\n", - "\n", - " # Create and compute the weighted average statistics\n", - " if self.__mini_meta is True and self.__effect_size == \"mean_diff\":\n", - " self.__mini_meta_delta = MiniMetaDelta(self,\n", - " self.__permutation_count,\n", - " self.__ci)\n", - " reprs.append(self.__mini_meta_delta.__repr__(header=False))\n", - " elif self.__mini_meta is True and self.__effect_size != \"mean_diff\":\n", - " self.__mini_meta_delta = \"Weighted delta is not supported for {}.\".format(self.__effect_size)\n", - " else:\n", - " self.__mini_meta_delta = \"`mini_meta` is False; weighted delta is therefore not calculated.\"\n", - " \n", - " \n", - " varname = get_varname(self.__dabest_obj)\n", - " lastline = \"To get the results of all valid statistical tests, \" +\\\n", - " \"use `{}.{}.statistical_tests`\".format(varname, self.__effect_size)\n", - " reprs.append(lastline)\n", - "\n", - " reprs.insert(0, print_greeting())\n", - "\n", - " self.__for_print = \"\\n\\n\".join(reprs)\n", - "\n", - "\n", - " def __repr__(self):\n", - " try:\n", - " return self.__for_print\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__for_print\n", - " \n", - " \n", - " \n", - " def __calc_lqrt(self):\n", - " import lqrt\n", - " import pandas as pd\n", - " \n", - " rnd_seed = self.__random_seed\n", - " db_obj = self.__dabest_obj\n", - " dat = db_obj._plot_data\n", - " xvar = db_obj._xvar\n", - " yvar = db_obj._yvar\n", - " delta2 = self.__delta2\n", - " \n", - "\n", - " out = []\n", - "\n", - " for j, current_tuple in enumerate(db_obj.idx):\n", - " if self.__is_paired != \"sequential\":\n", - " cname = current_tuple[0]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - "\n", - " for ix, tname in enumerate(current_tuple[1:]):\n", - " if self.__is_paired == \"sequential\":\n", - " cname = current_tuple[ix]\n", - " control = dat[dat[xvar] == cname][yvar].copy()\n", - " test = dat[dat[xvar] == tname][yvar].copy()\n", - " \n", - " if self.__is_paired: \n", - " # Refactored here in v0.3.0 for performance issues.\n", - " lqrt_result = lqrt.lqrtest_rel(control, test, \n", - " random_state=rnd_seed)\n", - " \n", - " out.append({\"control\": cname, \"test\": tname, \n", - " \"control_N\": int(len(control)), \n", - " \"test_N\": int(len(test)),\n", - " \"pvalue_paired_lqrt\": lqrt_result.pvalue,\n", - " \"statistic_paired_lqrt\": lqrt_result.statistic\n", - " })\n", - "\n", - " else:\n", - " # Likelihood Q-Ratio test:\n", - " lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, \n", - " random_state=rnd_seed,\n", - " equal_var=True)\n", - " \n", - " \n", - " lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, \n", - " random_state=rnd_seed,\n", - " equal_var=False)\n", - " \n", - " out.append({\"control\": cname, \"test\": tname, \n", - " \"control_N\": int(len(control)), \n", - " \"test_N\": int(len(test)),\n", - " \n", - " \"pvalue_lqrt_equal_var\" : lqrt_equal_var_result.pvalue,\n", - " \"statistic_lqrt_equal_var\" : lqrt_equal_var_result.statistic,\n", - " \"pvalue_lqrt_unequal_var\" : lqrt_unequal_var_result.pvalue,\n", - " \"statistic_lqrt_unequal_var\" : lqrt_unequal_var_result.statistic,\n", - " }) \n", - " self.__lqrt_results = pd.DataFrame(out)\n", - "\n", - "\n", - " def plot(self, color_col=None,\n", - "\n", - " raw_marker_size=6, es_marker_size=9,\n", - "\n", - " swarm_label=None, contrast_label=None, delta2_label=None,\n", - " swarm_ylim=None, contrast_ylim=None, delta2_ylim=None,\n", - "\n", - " custom_palette=None, swarm_desat=0.5, halfviolin_desat=1,\n", - " halfviolin_alpha=0.8, \n", - "\n", - " face_color = None,\n", - " #bar plot\n", - " bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None,\n", - " # error bar of proportion plot\n", - " ci=None, ci_type='bca', err_color=None,\n", - "\n", - " float_contrast=True,\n", - " show_pairs=True,\n", - " show_delta2=True,\n", - " show_mini_meta=True,\n", - " group_summaries=None,\n", - " group_summaries_offset=0.1,\n", - "\n", - " fig_size=None,\n", - " dpi=100,\n", - " ax=None,\n", - " \n", - " contrast_show_es = False,\n", - " es_sf = 2,\n", - " es_fontsize = 10,\n", - " \n", - " contrast_show_deltas = True,\n", - " \n", - " gridkey_rows=None,\n", - " gridkey_merge_pairs = False,\n", - " gridkey_show_Ns = True,\n", - " gridkey_show_es = True,\n", - "\n", - " swarmplot_kwargs=None,\n", - " barplot_kwargs=None,\n", - " violinplot_kwargs=None,\n", - " slopegraph_kwargs=None,\n", - " sankey_kwargs=None,\n", - " reflines_kwargs=None,\n", - " group_summary_kwargs=None,\n", - " legend_kwargs=None,\n", - " title=None, fontsize_title = 16,\n", - " fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12,\n", - " fontsize_delta2label = 12):\n", - "\n", - " \"\"\"\n", - " Creates an estimation plot for the effect size of interest.\n", - " \n", - "\n", - " Parameters\n", - " ----------\n", - " color_col : string, default None\n", - " Column to be used for colors.\n", - " raw_marker_size : float, default 6\n", - " The diameter (in points) of the marker dots plotted in the\n", - " swarmplot.\n", - " es_marker_size : float, default 9\n", - " The size (in points) of the effect size points on the difference\n", - " axes.\n", - " swarm_label, contrast_label, delta2_label : strings, default None\n", - " Set labels for the y-axis of the swarmplot and the contrast plot,\n", - " respectively. If `swarm_label` is not specified, it defaults to\n", - " \"value\", unless a column name was passed to `y`. If\n", - " `contrast_label` is not specified, it defaults to the effect size\n", - " being plotted. If `delta2_label` is not specifed, it defaults to \n", - " \"delta - delta\"\n", - " swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None\n", - " The desired y-limits of the raw data (swarmplot) axes, the\n", - " difference axes and the delta-delta axes respectively, as a tuple. \n", - " These will be autoscaled to sensible values if they are not \n", - " specified. The delta2 axes and contrast axes should have the same \n", - " limits for y. When `show_delta2` is True, if both of the `contrast_ylim`\n", - " and `delta2_ylim` are not None, then they must be specified with the \n", - " same values; when `show_delta2` is True and only one of them is specified,\n", - " then the other will automatically be assigned with the same value.\n", - " Specifying `delta2_ylim` does not have any effect when `show_delta2` is\n", - " False. \n", - " custom_palette : dict, list, or matplotlib color palette, default None\n", - " This keyword accepts a dictionary with {'group':'color'} pairings,\n", - " a list of RGB colors, or a specified matplotlib palette. This\n", - " palette will be used to color the swarmplot. If `color_col` is not\n", - " specified, then each group will be colored in sequence according\n", - " to the default palette currently used by matplotlib.\n", - " Please take a look at the seaborn commands `color_palette`\n", - " and `cubehelix_palette` to generate a custom palette. Both\n", - " these functions generate a list of RGB colors.\n", - " See:\n", - " https://seaborn.pydata.org/generated/seaborn.color_palette.html\n", - " https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html\n", - " The named colors of matplotlib can be found here:\n", - " https://matplotlib.org/examples/color/named_colors.html\n", - " swarm_desat : float, default 1\n", - " Decreases the saturation of the colors in the swarmplot by the\n", - " desired proportion. Uses `seaborn.desaturate()` to acheive this.\n", - " halfviolin_desat : float, default 0.5\n", - " Decreases the saturation of the colors of the half-violin bootstrap\n", - " curves by the desired proportion. Uses `seaborn.desaturate()` to\n", - " acheive this.\n", - " halfviolin_alpha : float, default 0.8\n", - " The alpha (transparency) level of the half-violin bootstrap curves. \n", - " float_contrast : boolean, default True\n", - " Whether or not to display the halfviolin bootstrapped difference\n", - " distribution alongside the raw data.\n", - " show_pairs : boolean, default True\n", - " If the data is paired, whether or not to show the raw data as a\n", - " swarmplot, or as slopegraph, with a line joining each pair of\n", - " observations.\n", - " show_delta2, show_mini_meta : boolean, default True\n", - " If delta-delta or mini-meta delta is calculated, whether or not to \n", - " show the delta-delta plot or mini-meta plot.\n", - " group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None.\n", - " Plots the summary statistics for each group. If 'mean_sd', then\n", - " the mean and standard deviation of each group is plotted as a\n", - " notched line beside each group. If 'median_quantiles', then the\n", - " median and 25th and 75th percentiles of each group is plotted\n", - " instead. If 'None', the summaries are not shown.\n", - " group_summaries_offset : float, default 0.1\n", - " If group summaries are displayed, they will be offset from the raw\n", - " data swarmplot groups by this value. \n", - " fig_size : tuple, default None\n", - " The desired dimensions of the figure as a (length, width) tuple.\n", - " dpi : int, default 100\n", - " The dots per inch of the resulting figure.\n", - " ax : matplotlib.Axes, default None\n", - " Provide an existing Axes for the plots to be created. If no Axes is\n", - " specified, a new matplotlib Figure will be created.\n", - " gridkey_rows : list, default None\n", - " Provide a list of row labels for the gridkey. The supplied idx is\n", - " checked against the row labels to determine whether the corresponding\n", - " cell should be populated or not.\n", - " swarmplot_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the seaborn `swarmplot`\n", - " command here, as a dict. If None, the following keywords are\n", - " passed to sns.swarmplot : {'size':`raw_marker_size`}.\n", - " violinplot_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the matplotlib `\n", - " pyplot.violinplot` command here, as a dict. If None, the following\n", - " keywords are passed to violinplot : {'widths':0.5, 'vert':True,\n", - " 'showextrema':False, 'showmedians':False}.\n", - " slopegraph_kwargs : dict, default None\n", - " This will change the appearance of the lines used to join each pair\n", - " of observations when `show_pairs=True`. Pass any keyword arguments\n", - " accepted by matplotlib `plot()` function here, as a dict.\n", - " If None, the following keywords are\n", - " passed to plot() : {'linewidth':1, 'alpha':0.5}.\n", - " sankey_kwargs: dict, default None\n", - " Whis will change the appearance of the sankey diagram used to depict\n", - " paired proportional data when `show_pairs=True` and `proportional=True`. \n", - " Pass any keyword arguments accepted by plot_tools.sankeydiag() function\n", - " here, as a dict. If None, the following keywords are passed to sankey diagram:\n", - " {\"width\": 0.5, \"align\": \"center\", \"alpha\": 0.4, \"bar_width\": 0.1, \"rightColor\": False}\n", - " reflines_kwargs : dict, default None\n", - " This will change the appearance of the zero reference lines. Pass\n", - " any keyword arguments accepted by the matplotlib Axes `hlines`\n", - " command here, as a dict. If None, the following keywords are\n", - " passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75,\n", - " 'zorder':2, 'color' : default y-tick color}.\n", - " group_summary_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the matplotlib.lines.Line2D\n", - " command here, as a dict. This will change the appearance of the\n", - " vertical summary lines for each group, if `group_summaries` is not\n", - " 'None'. If None, the following keywords are passed to\n", - " matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}.\n", - " legend_kwargs : dict, default None\n", - " Pass any keyword arguments accepted by the matplotlib Axes\n", - " `legend` command here, as a dict. If None, the following keywords\n", - " are passed to matplotlib.Axes.legend : {'loc':'upper left',\n", - " 'frameon':False}.\n", - " title : string, default None\n", - " Title for the plot. If None, no title will be displayed. Pass any\n", - " keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here,\n", - " as a string.\n", - " fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large'\n", - " Font size for the plot title. If a float, the fontsize in points. The\n", - " string values denote sizes relative to the default font size. Pass any keyword arguments accepted\n", - " by the matplotlib.pyplot.suptitle `fontsize` command here, as a string.\n", - " fontsize_rawxlabel : float, default 12\n", - " Font size for the raw axes xlabel.\n", - " fontsize_rawylabel : float, default 12\n", - " Font size for the raw axes ylabel.\n", - " fontsize_contrastxlabel : float, default 12\n", - " Font size for the contrast axes xlabel.\n", - " fontsize_contrastylabel : float, default 12\n", - " Font size for the contrast axes ylabel.\n", - " fontsize_delta2label : float, default 12\n", - " Font size for the delta-delta axes ylabel.\n", - "\n", - "\n", - " Returns\n", - " -------\n", - " A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``.\n", - " \n", - " The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it.\n", - " \n", - " If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` \n", - " itself, while the effect size axes is accessed at ``ax.contrast_axes``.\n", - " See the last example below.\n", - " \n", - "\n", - "\n", - " \"\"\"\n", - "\n", - " from .plotter import EffectSizeDataFramePlotter\n", - "\n", - " if hasattr(self, \"results\") is False:\n", - " self.__pre_calc()\n", - "\n", - " if self.__delta2:\n", - " color_col = self.__x2\n", - "\n", - " # if self.__proportional:\n", - " # raw_marker_size = 0.01\n", - "\n", - " # Modification incurred due to update of Seaborn\n", - " ci = ('ci', ci) if ci is not None else None\n", - " \n", - " all_kwargs = locals()\n", - " del all_kwargs[\"self\"]\n", - "\n", - " out = EffectSizeDataFramePlotter(self, **all_kwargs)\n", - "\n", - " return out\n", - "\n", - "\n", - " @property\n", - " def proportional(self):\n", - " \"\"\"\n", - " Returns the proportional parameter\n", - " class.\n", - " \"\"\"\n", - " return self.__proportional\n", - "\n", - " @property\n", - " def results(self):\n", - " \"\"\"Prints all pairwise comparisons nicely.\"\"\"\n", - " try:\n", - " return self.__results\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__results\n", - "\n", - "\n", - "\n", - " @property\n", - " def statistical_tests(self):\n", - " results_df = self.results\n", - "\n", - " # Select only the statistics and p-values.\n", - " stats_columns = [c for c in results_df.columns\n", - " if c.startswith(\"statistic\") or c.startswith(\"pvalue\")]\n", - "\n", - " default_cols = ['control', 'test', 'control_N', 'test_N',\n", - " 'effect_size', 'is_paired',\n", - " 'difference', 'ci', 'bca_low', 'bca_high']\n", - "\n", - " cols_of_interest = default_cols + stats_columns\n", - "\n", - " return results_df[cols_of_interest]\n", - "\n", - "\n", - " @property\n", - " def _for_print(self):\n", - " return self.__for_print\n", - "\n", - " @property\n", - " def _plot_data(self):\n", - " return self.__dabest_obj._plot_data\n", - "\n", - " @property\n", - " def idx(self):\n", - " return self.__dabest_obj.idx\n", - "\n", - " @property\n", - " def xvar(self):\n", - " return self.__dabest_obj._xvar\n", - "\n", - " @property\n", - " def yvar(self):\n", - " return self.__dabest_obj._yvar\n", - "\n", - " @property\n", - " def is_paired(self):\n", - " return self.__is_paired\n", - "\n", - " @property\n", - " def ci(self):\n", - " \"\"\"\n", - " The width of the confidence interval being produced, in percent.\n", - " \"\"\"\n", - " return self.__ci\n", - "\n", - " @property\n", - " def x1_level(self):\n", - " return self.__x1_level\n", - "\n", - "\n", - " @property\n", - " def x2(self):\n", - " return self.__x2\n", - "\n", - "\n", - " @property\n", - " def experiment_label(self):\n", - " return self.__experiment_label\n", - " \n", - "\n", - " @property\n", - " def delta2(self):\n", - " return self.__delta2\n", - " \n", - "\n", - " @property\n", - " def resamples(self):\n", - " \"\"\"\n", - " The number of resamples (with replacement) during bootstrap resampling.\"\n", - " \"\"\"\n", - " return self.__resamples\n", - "\n", - " @property\n", - " def random_seed(self):\n", - " \"\"\"\n", - " The seed used by `numpy.seed()` for bootstrap resampling.\n", - " \"\"\"\n", - " return self.__random_seed\n", - "\n", - " @property\n", - " def effect_size(self):\n", - " \"\"\"The type of effect size being computed.\"\"\"\n", - " return self.__effect_size\n", - "\n", - " @property\n", - " def dabest_obj(self):\n", - " \"\"\"\n", - " Returns the `dabest` object that invoked the current EffectSizeDataFrame\n", - " class.\n", - " \"\"\"\n", - " return self.__dabest_obj\n", - "\n", - " @property\n", - " def proportional(self):\n", - " \"\"\"\n", - " Returns the proportional parameter\n", - " class.\n", - " \"\"\"\n", - " return self.__proportional\n", - " \n", - " @property\n", - " def lqrt(self):\n", - " \"\"\"Returns all pairwise Lq-Likelihood Ratio Type test results \n", - " as a pandas DataFrame.\n", - " \n", - " For more information on LqRT tests, see https://arxiv.org/abs/1911.11922\n", - " \"\"\"\n", - " try:\n", - " return self.__lqrt_results\n", - " except AttributeError:\n", - " self.__calc_lqrt()\n", - " return self.__lqrt_results\n", - " \n", - " \n", - " @property\n", - " def mini_meta(self):\n", - " \"\"\"\n", - " Returns the mini_meta boolean parameter.\n", - " \"\"\"\n", - " return self.__mini_meta\n", - "\n", - " \n", - " @property\n", - " def mini_meta_delta(self):\n", - " \"\"\"\n", - " Returns the mini_meta results.\n", - " \"\"\"\n", - " try:\n", - " return self.__mini_meta_delta\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__mini_meta_delta\n", - "\n", - " \n", - " @property\n", - " def delta_delta(self):\n", - " \"\"\"\n", - " Returns the mini_meta results.\n", - " \"\"\"\n", - " try:\n", - " return self.__delta_delta\n", - " except AttributeError:\n", - " self.__pre_calc()\n", - " return self.__delta_delta\n", - "\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "0e1b8353", - "metadata": {}, - "source": [ - "#### Example: plot\n", - "\n", - "Create a Gardner-Altman estimation plot for the mean difference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a151b86", - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(9999) # Fix the seed so the results are replicable.\n", - "# pop_size = 10000 # Size of each population.\n", - "Ns = 20 # The number of samples taken from each population\n", - "\n", - "# Create samples\n", - "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", - "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", - "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "\n", - "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", - "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", - "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", - "t4 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", - "t5 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "t6 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", - "\n", - "\n", - "# Add a `gender` column for coloring the data.\n", - "females = np.repeat('Female', Ns/2).tolist()\n", - "males = np.repeat('Male', Ns/2).tolist()\n", - "gender = females + males\n", - "\n", - "# Add an `id` column for paired data plotting.\n", - "id_col = pd.Series(range(1, Ns+1))\n", - "\n", - "# Combine samples and gender into a DataFrame.\n", - "df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", - " 'Control 2' : c2, 'Test 2' : t2,\n", - " 'Control 3' : c3, 'Test 3' : t3,\n", - " 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6,\n", - " 'Gender' : gender, 'ID' : id_col\n", - " })\n", - "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91d15864", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAGGCAYAAAC0W8IbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABYW0lEQVR4nO3deVhUZfsH8O/MAMO+ryKLuICIbJoKmrvikkv6M1vVMnrfUrNMS99yz6U9ezO3NLLULDUzc8lIMHcUSVQkRRQVEJUdZJs5vz94nZxgEIZhzjB8P9c1V87znPOcm0a555zznOeWCIIggIiIiAySVOwAiIiISDMmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZsBaXqLOysrBgwQJkZWWJHQoRUYvG38f10yIT9cKFC/kXg4hIZPx9XD8tLlETERE1J0zUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQEzETsAImpapXeu4+bxH5F/NQlSmSmc/CPQqtsomFk7iB0aEdUDEzWRESu8fgHnNs+FsrJM1Xbj6A+4fS4OwZM+hNzWWcToiKg+eOmbyIhd3vuFWpK+r7zwNq7FfSNCRETUUEzUREaqJOcqSnPSNfbfuXAIglKhx4iISBtM1ERGqupecZ39yqoKKKsq9BQNEWmLiZrISFm6+kBqYqax38KpNWRmFnqMiIi0wURNZKRMLWzgGjJQY79njzF6jIaItMVETWTE/Aa9BJdOfQFIVG0SmQm8Hn0K7mFRosVFRPVnMIl6+fLlkEgkeO211zRuExMTA4lEovYyNzfXX5BEzYzUxBT+j89Cl1fWou3QqWj/2Gvo9upG+PR5VuzQiKieDOI56oSEBKxZswbBwcEP3dbW1hapqamq9xKJpI6tiQgALBxbwcKxldhhEJEWRD+jLi4uxjPPPIN169bBweHhKyVJJBK4u7urXm5ubnqIkoiISByiJ+opU6Zg+PDhGDhQ86SXBxUXF8PHxwdeXl4YNWoUzp8/X+f25eXlKCwsVL2Ki+t+ZIWIiMiQiHrp+7vvvkNiYiISEhLqtb2/vz82bNiA4OBgFBQU4MMPP0RkZCTOnz+P1q1b17rPsmXLsHDhQl2GTUREpDeinVFfv34d06dPx6ZNm+o9ISwiIgITJkxAaGgo+vTpgx07dsDFxQVr1qzRuM+cOXNQUFCgesXHx+vqRyAiImpyop1Rnz59Gjk5OQgPD1e1KRQKHDp0CJ9//jnKy8shk8nqHMPU1BRhYWG4fPmyxm3kcjnkcrnqvbW1deODJyIi0hPREvWAAQOQnJys1vb8888jICAAb7311kOTNFCd2JOTkzFs2LCmCpOIiEhUoiVqGxsbBAUFqbVZWVnByclJ1T5hwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePHFF/UePxERkT4YxHPUmmRkZEAq/fs2el5eHqKjo5GdnQ0HBwd06dIFR48eRWBgoIhREhERNR2DStRxcXF1vv/kk0/wySef6C8gIiIikYn+HDURERFpxkRNRERkwJioiYio2Th06BBGjBiBVq1aQSKRYOfOnXVuHxcXV6OYk0QiQXZ2tn4C1gEmaiIiajZKSkoQEhKClStXNmi/1NRUZGVlqV6urq5NFKHuGdRkMiJqGkpFFUpvX4NUZgpLF2+xwyHS2tChQzF06NAG7+fq6gp7e3vdB6QHTNRERi7z5C5cP/o9KovzAAAWTq3hO+AFOHXoLnJkRNWKi4tRWFioev/PFSV1ITQ0FOXl5QgKCsKCBQvQs2dPnY7flHjpm8iI3Tz+I678ukaVpAHg3t0bSPnhXeRfOSNiZER/69OnD+zs7FSv+4tc6YKHhwdWr16N7du3Y/v27fDy8kLfvn2RmJios2M0NZ5RExkpZVUlrh/9vvZOQYmMw9/B3i9Mv0ER1SI+Ph6hoaGq97o8m/b394e/v7/qfWRkJNLS0vDJJ5/gm2++0dlxmhITNZGRKs66hKrSQo39hRnnoKgsg8y0ftXriJqKtbU1bG1t9Xa8bt264fDhw3o7XmPx0jeRkZJIH/LPWyKFRMJfAdTyJCUlwcPDQ+ww6o1n1ERGytqjPcxsnFBRdLfWfge/cEhNzPQcFVHjFBcXq5U2Tk9PR1JSEhwdHeHt7Y05c+bg5s2b2LhxIwDg008/RZs2bdCpUyeUlZXhyy+/xO+//45ff/1VrB+hwZioiYyURCqDT98JuPRzzfXxpSZm8O79tAhRETXOqVOn0K9fP9X7GTNmAAAmTpyImJgYZGVlISMjQ9VfUVGBN954Azdv3oSlpSWCg4Px22+/qY1h6CSCIAhiB6FPiYmJ6NKlC06fPo3w8HCxwyFqcndSDiPjj+9QmpMOALDzDYFP3wmwbR0gcmTU0vH3cf3wjJrIyDl37AXnjr1QWVIAiUwGE3NrsUMiogZgoiZqIUyt7MQOgYi0wCmfREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQGjImaiIjIgDFRExERGTAmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwE7EDIKKmVZaXhZsnd6Hg6p+QyEzg5B8Bj66PwdTCRuzQiKgemKiJjFhR5l84t+kdKMpLVG0l2WnIORuL4Invw8zaUcToiKg+eOmbyIhd3vO5WpK+rywvC9fivhUhIiJqKCZqIiNVejsDJdlpGvtvn4+DoFToMSIi0gYTNZGRqiwtrLNfWVkOZVWFnqIhIm0xURMZKUsXL0hkphr7zR1bQWZmoceIiEgbTNRERsrU0g6unftp7PfsNkqP0RCRtpioiYyYX9S/4Ogfod4okcKzxxh4dH1MnKCIqEH4eBaREZOZmiNw3DsoybmK/PQzkMhM4dShB+S2zmKHRkT1xERN1AJYufrCytVX7DCISAsGc+l7+fLlkEgkeO211+rc7ocffkBAQADMzc3RuXNn7NmzRz8BEhERicAgEnVCQgLWrFmD4ODgOrc7evQonnrqKUyePBlnzpzB6NGjMXr0aJw7d05PkRIREemX6Im6uLgYzzzzDNatWwcHB4c6t12xYgWGDBmCWbNmoWPHjli8eDHCw8Px+eef6ylaIiIi/RI9UU+ZMgXDhw/HwIEDH7rtsWPHamwXFRWFY8eONVV4REREohJ1Mtl3332HxMREJCQk1Gv77OxsuLm5qbW5ubkhOztb4z7l5eUoLy9XvS8uLtYuWCIiIhGIlqivX7+O6dOn48CBAzA3N2+y4yxbtgwLFy5ssvGJiIiakmiXvk+fPo2cnByEh4fDxMQEJiYmiI+Px2effQYTExMoFDWLBbi7u+PWrVtqbbdu3YK7u7vG48yZMwcFBQWqV3x8vM5/FiIioqYi2hn1gAEDkJycrNb2/PPPIyAgAG+99RZkMlmNfSIiIhAbG6v2CNeBAwcQERFRY9v75HI55HK56r21tXXjgyciItIT0RK1jY0NgoKC1NqsrKzg5OSkap8wYQI8PT2xbNkyAMD06dPRp08ffPTRRxg+fDi+++47nDp1CmvXrtV7/ERERPog+qzvumRkZCArK0v1PjIyEps3b8batWsREhKCbdu2YefOnTUSPhERkbEwqCVE4+Li6nwPAOPGjcO4ceP0ExAREZHIDPqMmoiIqKVjoiYiIjJgTNRERNRsHDp0CCNGjECrVq0gkUiwc+fOh+4TFxeH8PBwyOVytGvXDjExMU0epy4Z1D1qajo3b+dj1+E/kXr9Fmws5OjfJQC9Q9tDJuV3NSJqPkpKShASEoIXXngBY8aMeej26enpGD58OP79739j06ZNiI2NxYsvvggPDw9ERUXpIeLGY6JuARIuXsWCDT+jovLvRWSOX0jH74mpWPD8Y5DJmKyJqHkYOnQohg4dWu/tV69ejTZt2uCjjz4CAHTs2BGHDx/GJ5980mwSNX9DG7nKKgXe3/SrWpK+7/j5K9hznCVCich4GUMxJyZqI3fiQjryi0s19v968rweoyEiqqm4uBiFhYWq14OFlBpLUzGnwsJC3Lt3T2fHaUpM1EYur6ikzv67hZqTOBGRPvTp0wd2dnaq1/3VKKka71EbOR93pzr7fd0d9RQJEVHt4uPjERoaqnr/YH2GxtJUzMnW1hYWFhY6O05TYqI2csFtW8OvlTOuZN6ptX/Uo6H6DYiMXtL66agozoOZtQNCJ68QOxzjVFEKmFmKHYXOWFtbw9bWtknGjoiIwJ49e9TaHlbMydDw0ncLsOD5EfB0sVdrk0oleGF4JLoHthEnKDJaFcV5qCi6i4riPLFDMWKC2AGIpri4GElJSUhKSgJQ/fhVUlISMjIyAFSXNp4wYYJq+3//+9+4cuUK3nzzTVy8eBFffPEFvv/+e7z++utNEl9aWhreeecdPPXUU8jJyQEA7N27F+fPaz8fiGfULYCHsx3WvzUBx85fQer1W7A2l6NfuD9cHWzEDo2IqEFOnTqFfv36qd7PmDEDADBx4kTExMQgKytLlbQBoE2bNvjll1/w+uuvY8WKFWjdujW+/PLLJnk0Kz4+HkOHDkXPnj1x6NAhLFmyBK6urvjzzz+xfv16bNu2TatxmahbCJlMil7B7dAruJ3YoRARaa1v374QBM1XFGpbdaxv3744c+ZME0ZVbfbs2Xj33XcxY8YM2Nj8fSLUv39/fP7551qPy0vfRETNjaAUOwKqRXJyMh5//PEa7a6urrhzp/Z5QvXBRE1E1NwwURske3t7ZGVl1Wg/c+YMPD09tR6XiZqIqLlR1lxpkMT35JNP4q233kJ2djYkEgmUSiWOHDmCmTNnqk1waygmaiKi5qaqHKjjPi2JY+nSpQgICICXlxeKi4sRGBiI3r17IzIyEu+8847W43IyGRFRc6OsAhSVgImZ2JHQA8zMzLBu3TrMmzcPycnJKC4uRlhYGNq3b9+ocZmoiYiao4piwIQrCxoiLy8veHl56Ww8XvomImqOygrEjoD+YezYsXjvvfdqtL///vsYN26c1uMyURMRNUdl+WJHQP9w6NAhDBs2rEb70KFDcejQIa3H5aXvFkIQBJxKvYa/Mm7B2tIcvUPaw8HGeNYKJmpxSnPFjoD+obi4GGZmNecNmJqaorCwUOtxmahbgJy8Iryz7iekZ/39wP2anYcQPbIXHu8dJmJkRKS10rtiR0D/0LlzZ2zduhXz5s1Ta//uu+8QGBio9bhM1C3Awq92qyVpAKhUKPDFj/HwdnNEF38fkSIjIq0V33r4NqRXc+fOxZgxY5CWlob+/fsDAGJjY7Flyxb88MMPWo/LRG3kzl3JxF/XNf+D3nkoiYmaqDkqrLkCFolrxIgR2LlzJ5YuXYpt27bBwsICwcHB+O2339CnTx+tx2WiNnLpWbfr7E/TUKeaiAxc4U2xI6BaDB8+HMOHD9fpmEzURs7Oqu4JY/bWFnqKhIh0qvAmoFQCUj68Y2gqKiqQk5MDpVJ9TXZvb2+txmOiNnI9OrWBrZU5CkvKau0f/Ij2ExyISESKyupkba+7hTWocS5duoQXXngBR48eVWsXBAESiQQKhXZrtDNRGzkzUxO88eQgvBuzB5X/+EsS3sEbwyM7ixQZETVa7hUmagMyadIkmJiYYPfu3fDw8IBEItHJuEzULUBkUFt88cbT2PnHGaRm3IK1hRwDunbEwK4BMJHJxA6PiLR15y/AT/tJSqRbSUlJOH36NAICAnQ6LhN1C+Hr4YTXnhgodhhEpEs5KWJHQA8IDAzEnTu6n6DLWQhERM1VzgVAUSV2FPQ/7733Ht58803ExcXh7t27KCwsVHtpi2fURETNVeW96mTtESx2JARg4MDqq5YDBgxQa+dkMiKiluz6CSZqA3Hw4MEmGZeJmoioObt2FOgWLXYUBDRq9bG68B41EVEz0rVrV7Tu1A1dlyZWN+ReAfKvixsUqfzxxx949tlnERkZiZs3q1eP++abb3D48GGtx2SiJiJqRrKzs3EzMxvZhRV/N16JEy0e+tv27dsRFRUFCwsLJCYmory8HABQUFCApUuXaj0uEzURUXP31z5AEMSOosV79913sXr1aqxbtw6mpqaq9p49eyIxMVHrcZmoiYiau4IbwE3tEwHpRmpqKnr37l2j3c7ODvn5+VqPy0RNRGQMkjaJHUGL5+7ujsuXL9doP3z4MPz8/LQel4maiMgY3DwNXE8QO4oWLTo6GtOnT8eJEycgkUiQmZmJTZs2YebMmXj55Ze1HpePZxERGYujnwFj1wMmZmJH0iLNnj0bSqUSAwYMQGlpKXr37g25XI6ZM2di2rRpWo/LM2oiImORnwGc+UbsKFokhUKBP/74A1OmTEFubi7OnTuH48eP4/bt21i8eHGjxuYZNRGRMUnaBPg+Crh0EDuSFkUmk2Hw4MFISUmBvb09AgMDdTY2z6iJiIyJUgHELQUUlWJH0uIEBQXhypUrOh+XiZqIyNjkpgOJG8WOosV59913MXPmTOzevRtZWVmsnkVERHVI2gS06QM4txM7khZj2LBhAICRI0dCIpGo2lk9i4iIalIqgPjlwOjVgIy/6vXB4KpnXb58GWlpaejduzcsLCxU3xiIiMhA3LkEJMYAj7wodiQtgsFUz7p79y4GDhyIDh06YNiwYcjKygIATJ48GW+88YbOAyQiokY48y1w47TYUbQYBlE96/XXX4eJiQkyMjJgaWmpah8/fjz27dundSBERNQEBAGIXQgUZokdidEzmOpZv/76K9577z20bt1arb19+/a4du2a1oEQUbXS2xlI3fkBjr3/fzj63lhc3L4MxdlpYodFzVlZAbB/DlBeLHYkRs1gqmeVlJSonUnfl5ubC7lc3qCxVq1aheDgYNja2sLW1hYRERHYu3evxu1jYmIgkUjUXubm5g39EYgMVnF2Gv78agZun4uDouIelJVluJNyGGdjZqEg47zY4VFzlpsO/DYfUFSJHYnRMpjqWY8++ig2bvz7+TyJRAKlUon3338f/fr1a9BYrVu3xvLly3H69GmcOnUK/fv3x6hRo3D+vOZfSLa2tsjKylK9eBZPxuRq7AYoKu7VaFdWlSP9ty/1GouyqgK3zx/CzZM/If/KGQisd9z83TgFHP6YtaubSFNVz2rwrO/3338fAwYMwKlTp1BRUYE333wT58+fR25uLo4cOdKgsUaMGKH2fsmSJVi1ahWOHz+OTp061bqPRCKBu7t7Q8MmMniVpYXIT/9TY39x5l8oy78Fc3u3eo9ZUZKP7MR9KMw4B6mpGZwDe8O5Yy9IH/K4Tu6lk/hr18eoulekarNw9kLgE3Nh4ehZ7+OTAbr4C2DvDYQ8KXYkRud+9awNGzaoqmcdO3YMM2fOxNy5c7Uet8GJOigoCH/99Rc+//xz2NjYoLi4GGPGjMGUKVPg4eGhdSAKhQI//PADSkpKEBERoXG74uJi+Pj4QKlUIjw8HEuXLtWY1ImaE2VlOYC6z3QUlWX1Hq/41hWc2/Q2qkr/XhEp968TuHVmHwKfXAiZae23qu7dvYmUbUsh/GMJynt3ruP85nno8spaSKSyesdBBujEGsDRD/DqJnYkRqWpqmdp9Ry1nZ0d3n77ba0P+qDk5GRERESgrKwM1tbW+PHHHzUuZu7v748NGzYgODgYBQUF+PDDDxEZGYnz58/XmNx2X3l5uWrmHVCd6IkMkZmtE+T2bijPv1Vrv6mVPWRyK2Sf2Q+lohL2PsGwdPHWON5fP32slqTvK7iWjJvHtsO799O17pd5aneNJH1fWX427qYeh3PHnvX4ichgCUogdhEwZh1gq/0JFgFnz55FUFAQpFIpJBIJ3n77bcyaNQuXL19GcXExAgMDYW1t3ahjNDhRHzp0qM7+2m6k18Xf3x9JSUkoKCjAtm3bMHHiRMTHx9earCMiItTOtiMjI9GxY0esWbNGYxmxZcuWYeHChQ2KiUgMEokUrSP+D2l7V9bab+Xqi9MrJ0N4YDKQU0AkOox6AzJT9UmVRZmXUJqTrvFYt5J+hXfvp1F6OwO3z8ejqrwUtq0D4BTQEyVZNe+xPag46zITtTEoL6qeXDbyc9avboSwsDBkZWXB1dUVfn5+SEhIgJOTk06rZzU4Ufft27dG24MrkjV0LVMzMzO0a1e9Fm2XLl2QkJCAFStWYM2aNQ/d19TUFGFhYbXevL9vzpw5mDFjhup9UlJSk60eQ9RYHl2GoaqsGDeOfK+aVCY1lcPeNwS5l07W2P7uxaNIk1uhw4jX1Noriu/WeZzyoru4cuBLZJ74UdWWlbAL5g4ekNu51rmviUXjzg7IgNxOBY5+BvSeKXYkzZa9vT3S09Ph6uqKq1evQqlU6vwYDU7UeXl5au8rKytx5swZzJ07F0uWLGl0QEqlUu1SdV0UCgWSk5NVC6HXRi6Xqz021thLEERNzavnE/Do+hgKriUDghJ2Pp1x9us3NW5/O/kgfPtNhJm1g6rNwqn2W0H3mVk7qCXp+8rysuqcESyRyuDSiV90jUrKz4BbEOA/ROxImqWxY8eiT58+8PDwgEQiQdeuXSGT1T6HQ9sSmA1O1HZ2djXaBg0aBDMzM8yYMQOnT9d/qbo5c+Zg6NCh8Pb2RlFRETZv3oy4uDjs378fADBhwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePFFrmNLxsVEbgmnDt0BAEpFJUpva34MUVBWoTTnqlqitnRqDfs2ochPT6p1H0kds77L8rNh59O5+ovCP/j0mwi5rXM9fwpqNv74CHBuDzi1FTuSZmft2rUYM2YMLl++jFdffRXR0dGwsbHR6TF0VlLFzc0NqampDdonJycHEyZMQFZWFuzs7BAcHIz9+/dj0KBBAICMjAxIpX8/6p2Xl4fo6GhkZ2fDwcEBXbp0wdGjR3V6L4DI0EikJpCZWdT6fPV9tV2O7jDyDZzbMhelOVfV2t3DhuB2St3rDrsED4BL5/7I+fM3VBTnwtLFG626joC9X5hWPwMZOEUFcGAeMPZLwNRC7GialbNnz2Lw4MEYMmQITp8+jenTp4ufqM+ePav2XhAEZGVlYfny5QgNDW3QWOvXr6+zPy4uTu39J598gk8++aRBxyBq7iQSCVyC+iI7sfZV+yydvaFUVOH6ke8hNTGFk39PmNu7wszGEWEvfobcSyeRl5aIynuFsPfpDLfQwSi8eRGlZZqfgDC3dYF9m1C4hw5uqh+LDE3BDeD4KuDRGQ/fllQenEwWHx+PiooKnR+jwYk6NDQUEomkxipFPXr0wIYNG3QWGBH9zbv3M8hP/xNleZlq7VJTOSQmpjgb8/dkoPTfNsCzx+NoM+AFQCJB4fXzyDn7G5RVFbibchjX4r+FnW9IjTPt+8zt3WHnG9yUPw4Zqgs/Af7DANcAsSNpNgxyMll6uvojH1KpFC4uLlxzm6gJmVk7IOT5j5B1ajfupByGsqoC9r4hKCvIQf6Vfyz2Lyhx89h2WDh6oiwvGzePq08aq7pXhLsph2Ht3g7F2epPTMjMrdBh9CxIJA1eXZiMRcI6YPhHYkfRbBjkZDIfHx+tDkREjWNqaQvv3k+rFiopL7iNhM9f0Lj9zRM/oqLwjsZ+paBEwP+9jdvn46AovwcbzwB4hA+FmY2jzmMn3cjIyEBpaSkAoLRCiYzcMng76vgk6cYpoOAmYGe4S8WuXLkSH3zwAbKzsxESEoL//ve/6Nat9lXWYmJi8Pzzz6u1yeVylJXVf5W/uhjMZLLPPvus3gO++uqrWgdDRPVXeuda9QpTGty7c73u/W9dgYNfOJwDInUdGunYyZMnsXjxYvzyyy+q2455pVXwffskHuvsiLnDfPCIrw6Tw/UTgN0Y3Y2nQ1u3bsWMGTOwevVqdO/eHZ9++imioqKQmpoKV9fa1wCwtbVVm+z84NofujBkSPWjbaJOJqvvBC6JRMJETaQnppY1H5V8kExuCUV5qcZ+idQEEg2X6Mhw7NixA+PHj4cgCDXmBgkCsOdcLvaey8PW6I4YE6ajR+dyUnQzThP4+OOPER0drTpLXr16NX755Rds2LABs2fPrnUffRVz+uqrr5pk3Hol6n/elyYi8Vl7tIeli4/GZ6zdQgYh93ICynIza+138u8Bqcy01r7GuP8894PPdZN2Tp48ifHjx0OhUGgsM6pQAhIIGL8uBUffDNXNmXVB3VdjxFJRUYHTp09jzpw5qjapVIqBAwfi2LFjGvdrymJOY8aMQUxMDGxtbTFmTN1XIXbs2KHVMXT2HDUR6V/7x17Fuc1za5w5W7q2gfejT8HeLwwp378LQVml1m9iYQvvvs81SUyhk1c0ybgt0bvvvlvrmfQ/CQAECHh3zzX89EpQ4w9cWPuXu6ZSXFyMwsK/C8j8c0XJ++7cuQOFQgE3N/VSr25ubrh48WKtY2tTzKkh7OzsVJfSa1sQTBe0StQ3btzArl27kJGRUeOZsY8//lgngRHRw9l4BiAs+nNkJvyMgmtnITUxg3PHXnAPi4LMzAKO7R5B5+eW4frRH5B/5QykMhM4BfSEV6/xsHBsJXb4VIeMjAzs3r37oUn6PoUS+Dk5VzcTzMoKgLJCwNy2cePU0z/rL8yfPx8LFizQydjaFHNqiAcvd4t66ftBsbGxGDlyJPz8/HDx4kUEBQXh6tWrEAQB4eHhTREjEdXB3N4NfoM0L6Nr6xWITuPn6zGi5k+hUDTJ87ANsX///non6fsEAfj1Qh4mRrg9fOOHyU4BPJv2d3pVVfWVnvj4eLUFs2o7mwYAZ2dnyGQy3LqlXgr21q1b9b4HXZ9iToamwYl6zpw5mDlzJhYuXAgbGxts374drq6ueOaZZ1Qz34iImrPFixc32/K40d9eQvS3l3QwUg8djFE/1tbWsLV9+Nm7mZkZunTpgtjYWIwePRpAdSGn2NhYTJ06tV7Hqk8xp4YICwur9yzyxMTEh29UiwYn6pSUFGzZsqV6ZxMT3Lt3D9bW1li0aBFGjRqFl19+WatAiIgMxdy5c/H222+LGkNMTAxeeumlBu+37tn2ujmjdukIjK69NrqunDlzBt27d2/QPjNmzMDEiRPRtWtXdOvWDZ9++ilKSkpUs8D1Xczp/hcGACgrK8MXX3yBwMBA1eX248eP4/z583jllVe0PkaDE7WVlZXqvrSHhwfS0tJUs+fu3NG8uAIRia8s/xZKbl2BiYUtbL0Cdf48qbGQyWQaV5fSl6ioqFqXa66LRAIMDnSAqUwHK8vlXQKU5YC86UoDm5g0fJrU+PHjcfv2bcybNw/Z2dkIDQ3Fvn37VBPM9F3Maf78v28rvfjii3j11Vdr3PueP38+rl/Xfia9RGjgTZDRo0dj+PDhiI6OxsyZM/HTTz9h0qRJ2LFjBxwcHPDbb79pHYw+JCYmokuXLjh9+jTvqVOLUVVWjEu7V+Bu6nHVIinmDq3Qbvg02HNdb4M1cuRI7NmzBwqF4qHbyqTA8CBH3cz6vu/x1YBrR92N9w/G9vvYzs4Op06dQvv27dXaL126hK5du6KgoECrcRv8tevjjz9WXapYuHAhBgwYgK1bt8LX1/eh1bCISBwp25bi7sWjaiuZleVl4sLWBSi9e0Onx0paPx0nV0xA0vrpOh23JZo7dy4kEslDr3xIAEggwTvDdLzEs0L3laCMmYWFBY4cOVKj/ciRI42qh9Hg6w5Lly7Fs88+C6D6Mvjq1au1PjgRNb2im6kouPpnrX3KynJknvwJ7YZO0dnxKorzUFF0V2fjtWSPPPIItm7dqlqZrLYza5m0Okl/H91Rt8uIAoCt4a73bYhee+01vPzyy0hMTFStPX7ixAls2LABc+fO1XrcBifq27dvY8iQIXBxccGTTz6JZ599FiEhIVoHQE2jorIKPxw8jX0nzuNuQQm83BwwqlcIhkV0Fjs00rOC6+fr7C/MqLufxDVmzBgcPXoUixcvrvFctURSfbn7HV2v9Q0ArUIBKx0tSdpCzJ49G35+flixYgW+/fZbAEDHjh3x1Vdf4YknntB63AYn6p9++gl5eXn44YcfsHnzZnz88ccICAjAM888g6effhq+vr5aB0O6UaVQ4O21O5F0+e9Lmlcy7+CT72ORlnkH08b2a5LjvvLRZuQVlcLBxhJfvPF0kxyDGk5mWvszqfdJH9JP4nvkkUdUi0yFhoYiLy8PDpYmSHonXPfVs+4Lm9A04xq5J554olFJuTZaTQ10cHDASy+9hLi4OFy7dg2TJk3CN998g3bt2uk0ONLOoaRLakn6QbsO/4lr2U1zWTKvqBR3CoqRV6S5EATpR9HNi7hxdBsyE3bBppU/JFLN38mdAx/VY2TUGN7e3rC0tAQAWJpJmy5J+/QEWndpmrGpwRq11ndlZSVOnTqFEydO4OrVqzXWXyVxxCX9VXf/mb8wcWhEndtQ81RVVoKUbUvU7klLpDLYtA6o9RK3lVsbuIdxoSJ6gIk50JNVEA2JVmfUBw8eRHR0NNzc3DBp0iTY2tpi9+7duHFDt7NHSTtl5ZV191fU3U/N16VfVtSYOCYoFSjMOA/38CGwbtUBEqkMplb28IwYi87PLYeJ3FKkaMkgdZkI2DR9SUiqvwafUXt6eiI3NxdDhgzB2rVrMWLECI3rspI4Ovt54swlzQ/XB/mxGIMxKivIwd2Lmkv9Fd1MRVj053qMiJodBx+g8zixo6B/aHCiXrBgAcaNGwd7e/smCId0YXhkZ/z4RxKKSstq9Pl6OKFHJz8RoqKmVppzVe056X8quVVdPIerkZFGvWYATVCjnBqnwYk6Ojq6KeIgHXK0tcJ7/x6D9zbtw7Vbuar2IL9WGNM7DBnZuWjTio9dGBtTy7qLGphY2jBJk2adx1U/kkVaUygUiImJQWxsLHJycmpUYPv999+1GrdRk8nIcLX3csWXsyfgwtUs3MotwNFz6Th2Lg2LYn4BAPi1csa0sf15GdyI2HgGwMKpNe5pWGnMtfMAPUdEzYZLANCt4QVASN306dMRExOD4cOHIygoSGdfjJmojVygrwd2Hf4TcWdS1dqvZN7BnDU/YuWMp+Dt5ihSdKRr7R+bjvNb5kFRcU+t3dLVF66dB6CiKBdmNvy86QGWjsDgdwETM7Ejafa+++47fP/99zoroXkfE7WRu3k7H78nXqy1r6yiEtviEjFj/EA9R0VNxdYrEKEvfobMhJ9RcC0ZMlMzyO3dUZSZiqQvq+v12ngGwKffRBbjIMDMChj6AWDtInYkRsHMzKxJ1hPRQS00MmRJl66jrvpoZ/7K0F8wpBcWjq3QNupfCH/pc7iFDMKd8/Eoz8tW9RfdvIjzW+ai8PoFEaMk0cnMgKilgDMXqtKVN954AytWrGhQadL64Bm1kTMxqfu7mInINXep6SgVVbh2aFOtfYKiChmHNiPomXf1HBUZBIkUGLSQk8d07PDhwzh48CD27t2LTp06wdRUfQb9jh07tBqXidrIdQ9sA1OZDJUa6tn2Cm6r54hIX4qzLqOyOE9jf356EpRVFZDy3mTL03cO4BMpdhRGx97eHo8//rjOx2WiNnL21pZ4cmBXfLP/RI0+VwcbjOkTJkJUpB8Pu/wm6PwSHTUDEVOADoPFjsIoffXVV00yLhN1CzBhSASc7W2w7eBpXM/Jg5mpDH1CO2DS0Eg42FiJHR41EWuPdjC1skdlSX6t/Xa+IQ+trEVGpvM4IFi3lZ2o6TFRtxDDegRhWI8glJZVwMxUxnvTLYBUZgqvR5/ClX2ravRJpDJ4P8pSpC1K2/5Aj1fEjsLobdu2Dd9//z0yMjJQUVGh1peYmKjVmJz13cJYmpsxSbcgrbo+hnbDX4Xc7u/KdlbubRH45ELY+QSJGBnplWeX6vvSUv7Kb0qfffYZnn/+ebi5ueHMmTPo1q0bnJyccOXKFQwdOlTrcXlGTWTk3MOi4BY6CGW5WZDITGBuz3K0LYpLABc00ZMvvvgCa9euxVNPPYWYmBi8+eab8PPzw7x585Cbm/vwATTg1yuiFkAikcLCyZNJuqVx9AOGfQCYsZSpPmRkZCAysno2vYWFBYqKigAAzz33HLZs2aL1uEzURETGyLYVMOxDwLzuYi2kO+7u7qozZ29vbxw/fhwAkJ6e3qgnLJioiYiMjdym+kzayknsSFqU/v37Y9euXQCA559/Hq+//joGDRqE8ePHN+r5at6jJiIyJvdXHbNrLXYkLc7atWtVpS2nTJkCJycnHD16FCNHjsS//vUvrcdloiYiMiahT1fP8ia9k0qlkD4ws/7JJ5/Ek08+2fhxGz0CEREZBrvWQJdJYkfRov3xxx949tlnERERgZs3bwIAvvnmGxw+fFjrMZmoiQgAuJyoMejxCiAzffh21CS2b9+OqKgoWFhY4MyZMygvLwcAFBQUYOnSpVqPy0RN1IJVFOchbd8qHPvwCRxZMgJJ61/D7fOHxA6LtOESwEIbInv33XexevVqrFu3Tq1yVs+ePbVelQzgPWqiZk9QKpB76STuXDwKQVEJO58QuHbuB5mZeZ37VZYW4OzXb6IsL1PVVpx1Cak/vofywttoHTG2qUMnXQp9GpBIxI6iRUtNTUXv3r1rtNvZ2SE/P1/rcZmojUxpWQViT1/Etey7cLC1wqCuAXB14HOUxkpZVYELWxciPz1J1Xbnwh+4cewHdH52OcztXTXum3lyl1qSflBG/Ca4h0XBxNxa1yFTI7m7uwOCAu4mxX832rYCfB8VLygCUP3ZXL58Gb6+vmrthw8fhp+fn9bjMlEbkfPpmZj75S4UlZap2jbuO4Ypj/fFyF4h4gVGTeb64a1qSfq+8vxbuLT7U3R+dikEQYncSwkouPonJDITOPlHwLZ1R9xJ0Ty5RVlVjty/TsI1uH8TRk/aOHXqFJB/Hdj67N+NgaO5jrcBiI6OxvTp07FhwwZIJBJkZmbi2LFjmDlzJubOnav1uEzURqK8ogoLNvyslqQBQKkU8PmOg/D3doO/t7tI0VFTEAQB2Wf2aewvuPonijL/QtrelSjOuqxqv3lsO5w79oKysrzO8ZVVdfeTgZCaAP5DxI6CAMyePRtKpRIDBgxAaWkpevfuDblcjpkzZ2LatGlaj8uvYEYiLikV+cX3au0TBGDXkbMa9714LRs/HzmL+KS/UF5R1VQhko4pq8o11pq+L/3Al2pJ+r47KYdhYmFTx54S2PkENy5A0g+fCMDcTuwoCIBEIsHbb7+N3NxcnDt3DsePH8ft27exePHiRo3LM2ojceN2ft39OXk12nILS7Ao5hecT//7PqWNpRzT/28A+oR10HWIpGMyU3OYWTuiolhTVR4JCq9f0Lh/edFdSGSmEBSVNfqcAiJh4eSpo0ipSfn1EzsC+gczMzMEBgbqbDwmaiPhbFf3pJ/a+hds+Bkp17LV2opKy7Hs231wd7KDvzcrLRk69/ChyDi0qdY+61btUZz5l8Z9q0oLEDDuHVw98CXK8qv/HkikMrh06ou2w15pknhJxyRSwKub2FG0eC+88EK9ttuwYYNW4zNRG4n+4f748uc/UKbh0vXQHkFq789dyayRpO9TKJX48dAZzH6W970MXeueT6A4+zJy/zqh1m7h1BptBr2E5K9natzXxNwaTh26w6lDDxTdSEFVeQms3Pwgt2Ehh2bDxb+6AAeJKiYmBj4+PggLC2uShYOYqI2EjaU5Zj0VhWXf7kWVQqnW1yu4HfYeP4dv9h+Hp4s9RvQMRmpG7Un6vtSMW00ZLumIVGaCwCfmIT/9T9y5eBjKqkrYtwmFc8eekMpMYefTGQXXkmvd1zVkICSS6mkqtl66u0xHeuTWSewICMDLL7+MLVu2ID09Hc8//zyeffZZODo66mx8Jmoj0ju0Pfw8nbH7aHL1c9Q2lqisVCAu6e/LnxeuZuG3UynoF+5f51jWFmZNHS7pkH2bENi3qfkIXrvhryL5mzmoKLqj1m7j6Q/v3s/oKzxqKs6cS2IIVq5ciY8//hg7duzAhg0bMGfOHAwfPhyTJ0/G4MGDIWnkQjSizvpetWoVgoODYWtrC1tbW0RERGDv3r117vPDDz8gICAA5ubm6Ny5M/bs2aOnaJuH1i4O+Peo3lj2r8cxrEeQWpK+TxCAg6dTYWaq+XvagC4dmzJM0hMLx1YI/9dKtBn0IhzadYWjfwQ6jHwDnZ97DyZyS7HDo8ay9xE7AvofuVyOp556CgcOHMCFCxfQqVMnvPLKK/D19UVxcfHDB6iDqIm6devWWL58OU6fPo1Tp06hf//+GDVqFM6fP1/r9kePHsVTTz2FyZMn48yZMxg9ejRGjx6Nc+fO6Tny5mH/Sc0zfgUA4R28al1xsFObVjXuaVPzZWJuDc/uj6PTkwsROO4duAb3h9SEhRuMAmtOGySpVAqJRAJBEKBQKBo/ng5i0tqIESMwbNgwtG/fHh06dMCSJUtgbW2N48eP17r9ihUrMGTIEMyaNQsdO3bE4sWLER4ejs8//1zPkTcPuYUldfa72Nvgg1fGIjLIDy72NvBr5YyXRj6K9/49BnIz3hUhMmjmtoCcS7waivLycmzZsgWDBg1Chw4dkJycjM8//xwZGRmwtm7c52Qwv40VCgV++OEHlJSUICIiotZtjh07hhkzZqi1RUVFYefOnXqIsPnxdXfCyZSrGvt93J0Q0s4LIe289BcUEemGTSuxI6D/eeWVV/Ddd9/By8sLL7zwArZs2QJnZ2edjS96ok5OTkZERATKyspgbW2NH3/8UeOD4tnZ2XBzU3+2183NDdnZmmcwl5eXq2qCAmj0vYLmZHhkZ/x4KAmVtVx6sbGUY2DXABGiIjFUlZWg6GYKJFJT2HoHQsqaxc2fDdc5MBSrV6+Gt7c3/Pz8EB8fj/j4+Fq327Fjh1bji56o/f39kZSUhIKCAmzbtg0TJ05EfHy8zlZ1WbZsGRYuXKiTsZqbVs72+M+EoXhv036UVfy9+pSdlQUWvDACVuZyEaMjfRAEJa4d3IjMhF2qtb1Nrezh03cC3MOiRI6OGsWaa/cbigkTJjR6ZnddRE/UZmZmaNeuHQCgS5cuSEhIwIoVK7BmzZoa27q7u+PWLfXne2/dulVd9k2DOXPmqF0uT0pKQp8+fXQUveHrFdwOoe1b42BiKv66ngOlIKBLB2+0b625/CEZj4z4Tbhx9Ae1tsqSfFz+5TOYWFjDOaCnSJFRo1nz37ChiImJadLxRU/U/6RUKtUuVT8oIiICsbGxeO2111RtBw4c0HhPG6ieMi+X/33m2Nib+s2RUing8Nk0JP6VAQD49eQF2FjKMXVsP/QP5+VvY6WoKENmwi6N/TeO/MBE3ZxZuYgdAemJqIl6zpw5GDp0KLy9vVFUVITNmzcjLi4O+/fvB1B9OcHT0xPLli0DAEyfPh19+vTBRx99hOHDh+O7777DqVOnsHbtWjF/DIO3+Os9SLp0Xa2tqLQc723aD3dHWwT6clKKMSrOToOivFRzf9YlKCrKIDMz12NUpDOWXOq1pRD18aycnBxMmDAB/v7+GDBgABISErB//34MGjQIAJCRkYGsrCzV9pGRkdi8eTPWrl2LkJAQbNu2DTt37kRQEJ/51eTS9ZwaSfo+pVLA9rgzeo6I9EVmWvfqchKpDBKZTE/RkM6xtGWLIeoZ9fr16+vsj4uLq9E2btw4jBs3rokiMj4XH7Km98P6qfmycm8HcwcPlOVl1drv2KEHZ383Z2ZWYkdAeiLqGTU1PauHrNnNmd/GSyKRoM2gF6tLIf6DzNwKPn2eFSEq0hlTC7EjID1hojZyEZ38YCnXnKz7d6m7OAc1b04deqDzs0th7xcOSKSQmpjBpVNfhEz6CJYu3mKHR40h45fsloKJ2shZyM3w8uN9al3Tu31rV4zqVbPiEhkXO5/OCHp6MXr+ZxciZ/8I/8dnwdKZq9E1a1IZIG25v75XrlwJX19fmJubo3v37jh58mSd2zf3Yk4t95NuQYZ074QPXhmLiCA/ONpawdvNEc8Pi8RHU/8PFnWcbTeUg40lnO2s4WDDqkyGqCkXZCA9k7TcSYBbt27FjBkzMH/+fCQmJiIkJARRUVHIycmpdXtjKOYkEQRBEDsIfUpMTESXLl1w+vRphIeHix0OkdE5uWICKoruwszGCd2mbxQ7HONUcgew0t1a0mLR5vdx9+7d8cgjj6iKMSmVSnh5eWHatGmYPXt2je3Hjx+PkpIS7N69W9XWo0cPhIaGYvXq1br5QZoYz6iJiJqbWiYItgQVFRU4ffo0Bg4cqGqTSqUYOHAgjh07Vus+x44dU9seqC7mpGl7Q2RwK5OR+O6VVyD29EVcybwDB2tLDHykIzyc+MwmtRwKhQJKpVLsMDSrqgIqKx++nYGrqqoCUF0sqbCwUNX+zxUl77tz5w4UCkWtxZkuXrxY6zG0KeZkaJioW6DcwhIcO3cFlVUKhHXwgo/73yscpWZk4+21P6Gg5J6q7dtfTyB65KP4v768VUAtw+LFi1tsMR8x/LP+wvz587FgwQJxgjFATNQtTMzeY9gam4Aqxd9nC48Gt8NbzwyBVCrBvPU/qyVpAFAKAtb8dAj+Xm7o3NZT3yET6d3cuXPx9ttvix2GZuVFgNxG7Cga7cyZM+jevTvi4+MRGhqqaq/tbBoAnJ2dIZPJGlScSZtiToaGidoI3cjJQ+zpiyi+V44OXm7oE9YeZiYm2Hv8HDb9eqLG9n+cvQxryziEtfdCbmGJxnF3HfmTiZpaBJlMBpkhL68qyAHT5r+qnIlJdQqytraGra3tQ7c3MzNDly5dEBsbi9GjRwOonkwWGxuLqVOn1rqPNsWcDA0TtZH5as9RbPntJB6cyx+z9yiW/etxbItL1LjfbwkpsLGsewGFGzl5ugqTjJiZtYPaf6kJtNDJZAAwY8YMTJw4EV27dkW3bt3w6aefoqSkBM8//zwA4yzmxERtRA6fvYzNB2o++J+TV4T5G36uM9FWKhR42IN6zvZ1X2p75aPNyCsqhYONJb544+l6xUzGJ3TyCrFDaAFa7jPx48ePx+3btzFv3jxkZ2cjNDQU+/btU00Yy8jIgPSBxWDuF3N655138J///Aft27dvdsWcmKiNyM9Hzmrsu5GTB1MTGSqrFBq36dm5LX45lozSsopa+4f1qPsvdl5RKe4UFNcvWCLSXgs+owaAqVOnarzUbYzFnFr2p21krj/k0nQHL1eNfX6tnNGpTSvMeXYITE1q3psb1SsEEUF+jY6RiHSghSfqloZn1EbE2c4Kt/OLNPYP69EZeUWlyLxToNZuITfFhCE9ELP3GNJu3kaPTm1gIpOiuLQc9taWiOoeiJB2XBuayGBwOdgWhYnaiAzpEYSUa7U/xO9kZwV/HzeM698F59IykXbzNioVCoS194K/txuWf7sPZRVVavuM6hWCqWP76SN0aoR7uZm4cWwb8i6fAgA4tOuK1hFjYeHIGfrGi4m6JWGiNiJR3QKRmHoN8UmX1NrNzUzh7miL6Pe+UU0Yc7SxxKvj+qOrvy+eWvhljSQNAD8d/hNhHbzQs3M7fYRPWii+dQXJ38yGouzvx+pundmPOymH0fm55bB2q75dUVGUi4KMZEhkJrBvEwYTOQunNGs8o25RmKiNiEwqxdsThmFg13TVc9T+Xm44l56JPy/fUNs2t6gU7369B08P6oai0jKNY+45do6J2oClH/hSLUnfpygrQfqBLxH09GKk7V+NW2f2Q1BWTySUmVnAu88z8Oz+uL7DJV1hom5RmKiNjEQiQY9OfujRqfpM6krmbWyq5ZEtAKhSKHH47OU6x8up4543iauiOA8FVzXP9C+4ehZp+1YhO3GvWrui4h7SD3wJU0t7uHbmrQ0iQ8epg0bufHpWnf0Pe5zK09leh9GQLikq7gGo6+F3AbfOxmrsvXFsu85jIj1pWdWJWzwmaiNnaW5WZ7+DtSWc7aw19o/sGaLrkEhH5HauMK1j9S8Tc2sIVbU/Ew8ApTnp/0v2RGTImKiNXI9ObWBupnlN4H5dArDghcdgZ2Wh1i6RAJOGRiDc37upQyQtSWUmaPXISI39Lg+5rC2RmUAia/7rRRMZO96jNjIVVVXIySuCjYU57KwtYGUuR/SIXvjv9oM1tvX1cMLoXiGwspBj4zvP47dTKbiSeRu2VhYY9EhHeLk6ivATUEO0jhyHqntFyEzYBUFRPXNf8r8E7jvgBeRfOYN7d2/Uuq9zQE9IZfwV0CwJAieUtSD8V2okFAolNu4/jp+PnEVRaRmkEgm6BbbBy6N7Y2SvELg52mJ7XCJSr9+CtYUcA7oE4In+XWBlUV2Iw9LcDCN78TJ3cyORSNBm4GR4RoxF/pXqoiv2fuEws7IHALSN+jfOb10IQVGptp+plT28+zyn73BJZ3iPuiVhojYSH209gAMJKar3SkHA8fNXcOnGLax642l0D2yD7oFtRIyQmpKZlT1cO/ev0W7vF4aQSR/gxrHtyL9yBhKZCZwDesIz8v9gbqd5SVkycIISgAGX4SSdYqI2AtdzcvHbqZRa++4WlOCnw2cxaWjzqb1KumXt0R4BY2aLHQbpEmd9tyicTGYETly4Wue/2xPnrwAABEFAYUkZKiprrkJGRESGiWfULYAA4Ocjf2Jb3Blk3smHqUyGXiHtMHl4T7g52oodHhE1lJS/ulsSnlEbge6BvnX2m5uZ4rNtB5F5Jx8AUKlQ4GBiKl777HvcLai5/CQRGTgpf3W3JPy0jYCXqyMGdu1Ya5+DjSVSNVTUulNQjB2HEpsyNCIiaiQmaiMx88lBeGrgI7CxrH7cSiqRoEdgGzzeOwxVSqXG/Y4kp+krRCIi0gJvdBgJmUyKF4b3xLODuyMnrwjWlnLYW1vil6PJde6nUGhO4kREJD4maiNjZmqC1q5/r/8c3sEbEonmpzm6cIlQIiKDxkvfRs7D2Q4Du9R+/9pCboqxfcP1HBERETUEz6hbgNfHD4S1pRx7j59HWUX1UpLtW7ti6th+XM+7BagqK8atPw8gPz0JEpkpnP0j4dzpUUhZkIOoWWCibgFMTWR45fG+mDgkAtdu5cLaQg5vNyboluBebhaSv5mNiqI7qrbc1GPIOr0bnZ5+FyZySxGjI6L64KXvFsTKQo5AXw8m6Rbk8i8r1JL0fUU3U5FxaLMIERFRQzFRExmpe7lZKLimedZ/zp8HIAic9U9k6JioiYxURdHdOvuryoqhqCjTUzREpC0maiIjZe7gDkg0/xM3s3GCzMxCjxERkTaYqImMlNzWGU7+PTT2e3QZBolEoseIiEgbTNRERqzdsKmw9mhXo925Yy+0jhwnQkRE1FB8PIvIiJla2iHkhU+QeykB+elJkMpM4BTQE7atA8QOjYjqiYmayMhJJFI4degOpw7dxQ6FiLTAS99EREQGjImaiIjIgDFRExERGTDeo24h7uQX45djyUi9fgs2FnL07xKAbh19+XgOEZGBY6JuAc6m3cDcdbtQWl6havs9MRX9wv0x+5khkEqZrImIDBUvfRs5hUKJpd/sVUvS9x1MTMWvCRdEiIqIiOqLidrInUy5irsFJRr79x4/p8doiIiooZiojdydgqI6+2/nF+spEiIi0oaoiXrZsmV45JFHYGNjA1dXV4wePRqpqal17hMTEwOJRKL2Mjc311PEzY+ni0Od/a1d7PUTCBERaUXURB0fH48pU6bg+PHjOHDgACorKzF48GCUlGi+VAsAtra2yMrKUr2uXbump4ibn7D2XvBy1ZysR/YK0WM0RETUUKLO+t63b5/a+5iYGLi6uuL06dPo3bu3xv0kEgnc3d2bOjyjIJFIMP+FEZizekeNy9xPDnwEvYJrFmwgIiLDYVCPZxUUFAAAHB0d69yuuLgYPj4+UCqVCA8Px9KlS9GpUyd9hNgs+bg5IuY/kxB35i+156i93er+/0xEROIzmEStVCrx2muvoWfPnggKCtK4nb+/PzZs2IDg4GAUFBTgww8/RGRkJM6fP4/WrVvX2L68vBzl5eWq98XFLXPylJmpCQZ3C8TgboFih0JERA1gMIl6ypQpOHfuHA4fPlzndhEREYiIiFC9j4yMRMeOHbFmzRosXry4xvbLli3DwoULdR4vERGRPhjE41lTp07F7t27cfDgwVrPiutiamqKsLAwXL58udb+OXPmoKCgQPWKj4/XRchGqbDkHnIL657IR0RE+iXqGbUgCJg2bRp+/PFHxMXFoU2bNg0eQ6FQIDk5GcOGDau1Xy6XQy6Xq95bW1trHa+xunA1E+t3H8HZtJsAAF93JzwzuBv6hvmLHBkREYmaqKdMmYLNmzfjp59+go2NDbKzswEAdnZ2sLCwAABMmDABnp6eWLZsGQBg0aJF6NGjB9q1a4f8/Hx88MEHuHbtGl588UXRfo7m7OK1bMz6YjsqKhWqtqvZd7Fk416UVVRhSHdO0iMiEpOoiXrVqlUAgL59+6q1f/XVV5g0aRIAICMjA1Lp31fo8/LyEB0djezsbDg4OKBLly44evQoAgM5SUobG/cdU0vSD/p67zEM6toRMln97pA42Fiq/ZeIiBpP9EvfDxMXF6f2/pNPPsEnn3zSRBEZr9zCEuw9fg6p12/B2kKOAV0C0LmtJ06lal4s5k5BMVKv30Kgr0e9jvHFG0/rKlwiIvofg5n1TU3nfHom3l67EyVlf1fQOpCQgv7h/njYdyWl8uFfpoiIqOkYxKxvajoKhRJLNu5RS9L3/Z6YitZ1LC9qb22BDt6uTRkeERE9BBO1kUu4eLXOClkyqRRSqaTWvicHPAIzE150IaLmJzc3F8888wxsbW1hb2+PyZMnP3TBq759+9Yo+vTvf/9bTxFrxkRt5G7n113msrSsAosmj4Svu5OqzcnOClPG9MXYvuFNHR4RUZN45plncP78eRw4cAC7d+/GoUOH8NJLLz10v+joaLWiT++//74eoq0bT5eMXCtn+zr7PV3s0T2wDboHtkHGrVxUKRTwcXOq90xvIiJDk5KSgn379iEhIQFdu3YFAPz3v//FsGHD8OGHH6JVq1Ya97W0tDS4ok/8bWzkwjt4w7OOmtMjIoNVf/Z2c4RfKxcmaSJq1o4dOwZ7e3tVkgaAgQMHQiqV4sSJE3Xuu2nTJjg7OyMoKAhz5sxBaWlpU4f7UDyjNnISiQTzn38Ms1f/WGN50P/rG47eoe1FioyIqFpxcTEKCwtV7/+5omRDZWdnw9VVfSKsiYkJHB0dVQtr1ebpp5+Gj48PWrVqhbNnz+Ktt95CamoqduzYoXUsusBE3QK08XBGzH8m4eCZVPyVkQ0rCzkGdumINq2cxQ6NiAh9+vRRez9//nwsWLCgxnazZ8/Ge++9V+dYKSkpWsfx4D3szp07w8PDAwMGDEBaWhratm2r9biNxUTdQljITTGsRxCG9dBcQpSISAzx8fEIDQ1Vvdd0Nv3GG2+oVq3UxM/PD+7u7sjJyVFrr6qqQm5uboPuP3fv3h0AcPnyZSZq0h2FUonLN3KgFAS0b+0KE5lM7JCIiOpkbW0NW1vbh27n4uICFxeXh24XERGB/Px8nD59Gl26dAEA/P7771AqlarkWx9JSUkAAA+P+q3O2FSYqI3IrycvIGbvUdVz0w42lnh6UDeMfjRU3MCIiPSoY8eOGDJkCKKjo7F69WpUVlZi6tSpePLJJ1Uzvm/evIkBAwZg48aN6NatG9LS0rB582YMGzYMTk5OOHv2LF5//XX07t0bwcHBDzli02KiNhLxZ/7CB1t+VWvLKyrFyh1xkEmlGNFT3L9oRET6tGnTJkydOhUDBgyAVCrF2LFj8dlnn6n6KysrkZqaqprVbWZmht9++w2ffvopSkpK4OXlhbFjx+Kdd94R60dQYaI2Et/+qvmRgy2/ncSwiCDIpHzsiohaBkdHR2zevFljv6+vr1phKC8vL8THx+sjtAbjb24jcLegBFez72rsv51fjIxbuXqMiIiIdIVn1EbApB4LlEglEhxKuoTU69mwsTBHv3B/uDk+fPIGERGJi4naCNhZWyCoTSucS8+std/TxR7z1u9C5p0CVdtXe45iwtAIPDOom77CJCIiLfDSt5F4YXhPmJrUfBRLKpWgqkqplqQBQCkIiNlzFCfOp+srRCIi0gITtZHo3NYTH7wyFuEdvCH5X9XKzn6eePGxXriVV6hxv52Hk/QTIBERaYWXvo1Ipzat8N7LY1BWUQlBEGAhN8PPR/6sc59r2ZxkRkRkyJiojZC5manqzw42VnVu62Rr2dThEBFRI/DSt5HrHtgGDjaak3FU9056jIaIiBqKidrImZrI8ObTUZCb1rx4Ehnkh6HdWaSDiMiQ8dJ3C9A1wAdr33wWPx85i9Trt2BtIceALgHoFdyOq5URERk4JuoWopWzPf41qrfYYRARUQPxdIqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBqzFLniSkpIidghE9D8eHh7w8PAQO4wGy8rKQlZWlthhNFv8PVw/LS5Re3h4oE+fPnj22WfFDoWI/mf+/PlYsGCB2GE02Jo1a7Bw4UKxw2jW+vTp0yy/pOmTRBAEQewg9K0lfwsuLi5Gnz59EB8fD2tra7HDIT0y5M+eZ9T1Z8ifozaa62evTy0yUbdkhYWFsLOzQ0FBAWxtbcUOh/SIn71x4OfY8nAyGRERkQFjoiYiIjJgTNQtjFwux/z58yGXy8UOhfSMn71x4OfY8vAeNRERkQHjGTUREZEBY6ImIiIyYEzUREREBoyJmhokLi4OEokE+fn5YodCRNQiMFGLKDs7G9OmTYOfnx/kcjm8vLwwYsQIxMbG6vQ4ffv2xWuvvabTMeuydu1a9O3bF7a2tkzqjSSRSOp8NWbZTYlEgp07dz50uyVLliAyMhKWlpawt7fX+ngtGT9HaowWt9a3obh69Sp69uwJe3t7fPDBB+jcuTMqKyuxf/9+TJkyBRcvXtRrPIIgQKFQwMSk8X8lSktLMWTIEAwZMgRz5szRQXQt14PLU27duhXz5s1Damqqqk0fS0hWVFRg3LhxiIiIwPr165v8eMaInyM1ikCiGDp0qODp6SkUFxfX6MvLy1P9+dq1a8LIkSMFKysrwcbGRhg3bpyQnZ2t6p8/f74QEhIibNy4UfDx8RFsbW2F8ePHC4WFhYIgCMLEiRMFAGqv9PR04eDBgwIAYc+ePUJ4eLhgamoqHDx4UCgrKxOmTZsmuLi4CHK5XOjZs6dw8uRJ1fHu7/dgjJo0ZFt6uK+++kqws7NTa1u3bp0QEBAgyOVywd/fX1i5cqWqr7y8XJgyZYrg7u4uyOVywdvbW1i6dKkgCILg4+Oj9nfCx8dHq+NTw/FzpIbiGbUIcnNzsW/fPixZsgRWVlY1+u9fllIqlRg1ahSsra0RHx+PqqoqTJkyBePHj0dcXJxq+7S0NOzcuRO7d+9GXl4ennjiCSxfvhxLlizBihUr8NdffyEoKAiLFi0CALi4uODq1asAgNmzZ+PDDz+En58fHBwc8Oabb2L79u34+uuv4ePjg/fffx9RUVG4fPkyHB0dm/p/DTXApk2bMG/ePHz++ecICwvDmTNnEB0dDSsrK0ycOBGfffYZdu3ahe+//x7e3t64fv06rl+/DgBISEiAq6srvvrqKwwZMgQymUzkn6bl4udID8NELYLLly9DEAQEBATUuV1sbCySk5ORnp4OLy8vAMDGjRvRqVMnJCQk4JFHHgFQndBjYmJgY2MDAHjuuecQGxuLJUuWwM7ODmZmZrC0tIS7u3uNYyxatAiDBg0CAJSUlGDVqlWIiYnB0KFDAQDr1q3DgQMHsH79esyaNUtn/w+o8ebPn4+PPvoIY8aMAQC0adMGFy5cwJo1azBx4kRkZGSgffv26NWrFyQSCXx8fFT7uri4AKj+Uljb3wvSH36O9DCcTCYCoZ6LwaWkpMDLy0uVpAEgMDAQ9vb2agXXfX19VUkaqC4bl5OTU69jdO3aVfXntLQ0VFZWomfPnqo2U1NTdOvWjQXeDUxJSQnS0tIwefJkWFtbq17vvvsu0tLSAACTJk1CUlIS/P398eqrr+LXX38VOWr6J36OVB88oxZB+/btIZFIdDZhzNTUVO29RCKBUqms1761XXonw1dcXAyg+opH9+7d1fruX/4MDw9Heno69u7di99++w1PPPEEBg4ciG3btuk9XqodP0eqD55Ri8DR0RFRUVFYuXIlSkpKavTff5ypY8eOavejAODChQvIz89HYGBgvY9nZmYGhULx0O3atm0LMzMzHDlyRNVWWVmJhISEBh2Pmp6bmxtatWqFK1euoF27dmqvNm3aqLaztbXF+PHjsW7dOmzduhXbt29Hbm4ugOovePX5e0FNh58j1QfPqEWycuVK9OzZE926dcOiRYsQHByMqqoqHDhwAKtWrUJKSgoGDhyIzp0745lnnsGnn36KqqoqvPLKK+jTp4/aJeuH8fX1xYkTJ3D16lVYW1trnBRmZWWFl19+GbNmzYKjoyO8vb3x/vvvo7S0FJMnT6738bKzs5GdnY3Lly8DAJKTk2FjYwNvb29OSNOhhQsX4tVXX4WdnR2GDBmC8vJynDp1Cnl5eZgxYwY+/vhjeHh4ICwsDFKpFD/88APc3d1VkxV9fX0RGxuLnj17Qi6Xw8HBodbjZGRkIDc3FxkZGVAoFEhKSgIAtGvXTi+PFRk7fo70UGJPO2/JMjMzhSlTpgg+Pj6CmZmZ4OnpKYwcOVI4ePCgapv6Pp71oE8++UTtMY3U1FShR48egoWFRY3Hs/756NS9e/eEadOmCc7Ozlo/njV//vwaj4QBEL766ist/i/RfbU9VrNp0yYhNDRUMDMzExwcHITevXsLO3bsEARBENauXSuEhoYKVlZWgq2trTBgwAAhMTFRte+uXbuEdu3aCSYmJnU+1lPbI34A1P6eUv3xc6SGYplLIiIiA8Z71ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETURkgFj7ne5jojZQkyZNgkQiwfLly9Xad+7cCYlE0mTHzc3NxbRp0+Dv7w8LCwt4e3vj1VdfRUFBgdp2GRkZGD58OCwtLeHq6opZs2ahqqqqyeJqSfjZEwBERkYiKysLdnZ2YodCImOiNmDm5uZ47733kJeXp7djZmZmIjMzEx9++CHOnTuHmJgY7Nu3T23BE4VCgeHDh6OiogJHjx7F119/jZiYGMybN09vcRo7fvZkZmYGd3f3Jv1yRs2E2A9yU+0mTpwoPPbYY0JAQIAwa9YsVfuPP/4o6Ptj+/777wUzMzOhsrJSEARB2LNnjyCVStUWXlm1apVga2srlJeX6zU2Y8TP3jj16dNHmDp1qjB9+nTB3t5ecHV1FdauXSsUFxcLkyZNEqytrYW2bdsKe/bsEQSh5uJC9xdK2bdvnxAQECBYWVkJUVFRQmZmptoxpk+frnbcUaNGCRMnTlS9X7lypdCuXTtBLpcLrq6uwtixY5v6R6dG4hm1AZPJZFi6dCn++9//4saNG/Xeb+jQoWqVeP756tSpU4PiKCgogK2tLUxMqlecPXbsGDp37gw3NzfVNlFRUSgsLMT58+cbNDbVjp+9cfr666/h7OyMkydPYtq0aXj55Zcxbtw4REZGIjExEYMHD8Zzzz2H0tLSWvcvLS3Fhx9+iG+++QaHDh1CRkYGZs6cWe/jnzp1Cq+++ioWLVqE1NRU7Nu3D71799bVj0dNhGt9G7jHH38coaGhmD9/PtavX1+vfb788kvcu3dPY/8/q23V5c6dO1i8eDFeeuklVVt2drbaL2oAqvfZ2dn1Hpvqxs/e+ISEhOCdd94BAMyZMwfLly+Hs7MzoqOjAQDz5s3DqlWrcPbs2Vr3r6ysxOrVq9G2bVsAwNSpU7Fo0aJ6Hz8jIwNWVlZ47LHHYGNjAx8fH4SFhTXyp6KmxkTdDLz33nvo379/vb85e3p66uS4hYWFGD58OAIDA7FgwQKdjEkNw8/euAQHB6v+LJPJ4OTkhM6dO6va7n/pycnJga2tbY39LS0tVUkaaFjteQAYNGgQfHx84OfnhyFDhmDIkCF4/PHHYWlpqc2PQ3rCS9/NQO/evREVFYU5c+bUa3tdXP4sKirCkCFDYGNjgx9//FHtTMzd3R23bt1S2/7+e3d39wb8ZPQw/OyNS2214x9suz9xTFM9+dr2Fx4o1yCVStXeA9Vn4ffZ2NggMTERW7ZsgYeHB+bNm4eQkBA+AmbgeEbdTCxfvhyhoaHw9/d/6LaNvfxZWFiIqKgoyOVy7Nq1C+bm5mr9ERERWLJkCXJycuDq6goAOHDgAGxtbVm3ugnws6f6cnFxQVZWluq9QqHAuXPn0K9fP1WbiYkJBg4ciIEDB2L+/Pmwt7fH77//jjFjxogRMtUDE3Uzcb8u9WefffbQbRtz+bOwsBCDBw9GaWkpvv32WxQWFqKwsBBA9S8BmUyGwYMHIzAwEM899xzef/99ZGdn45133sGUKVMgl8u1PjbVjp891Vf//v0xY8YM/PLLL2jbti0+/vhjtbPl3bt348qVK+jduzccHBywZ88eKJXKen0JJPEwUTcjixYtwtatW5v0GImJiThx4gSA6oLyD0pPT4evry9kMhl2796Nl19+GREREbCyssLEiRMbNKmFGoafPdXHCy+8gD///BMTJkyAiYkJXn/9dbWzaXt7e+zYsQMLFixAWVkZ2rdvjy1btjT4aQDSL9ajJiIiMmCcTEZERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERELQxrXTcvTNRERI2QnZ2NadOmwc/PD3K5HF5eXhgxYgRiY2N1epy+ffvitdde0+mYdVm7di369u0LW1tbJnWRMVETEWnp6tWr6NKlC37//Xd88MEHSE5Oxr59+9CvXz9MmTJF7/EIgoCqqiqdjFVaWoohQ4bgP//5j07Go0YQtcgmEVEzNnToUMHT01MoLi6u0Xe/jrQgCMK1a9eEkSNHClZWVoKNjY0wbtw4tZre8+fPF0JCQoSNGzcKPj4+gq2trTB+/HihsLBQEITqGuUA1F7p6emqmtV79uwRwsPDBVNTU+HgwYNCWVmZMG3aNMHFxUWQy+VCz549hZMnT6qO989a13VpyLbUNHhGTUSkhdzcXOzbtw9TpkyBlZVVjX57e3sA1QU2Ro0ahdzcXMTHx+PAgQO4cuUKxo8fr7Z9Wloadu7cid27d2P37t2Ij4/H8uXLAQArVqxAREQEoqOjkZWVhaysLHh5ean2nT17NpYvX46UlBQEBwfjzTffxPbt2/H1118jMTER7dq1Q1RUFHJzc5vufwg1GS4hSkSkhcuXL0MQBAQEBNS5XWxsLJKTk5Genq5Krhs3bkSnTp2QkJCARx55BEB1Qo+JiYGNjQ0A4LnnnkNsbCyWLFkCOzs7mJmZwdLSstYqZYsWLcKgQYMAACUlJVi1ahViYmIwdOhQAMC6detw4MABrF+/HrNmzdLZ/wPSD55RExFpQajn6sspKSnw8vJSOwMODAyEvb09UlJSVG2+vr6qJA00rNZ0165dVX9OS0tDZWUlevbsqWozNTVFt27d1I5HzQcTNRGRFtq3bw+JRIKLFy/qZLzaak1rqkv9T7VdeifjwURNRKQFR0dHREVFYeXKlSgpKanRf/9xpo4dO+L69eu4fv26qu/ChQvIz89vUA1vMzMzKBSKh27Xtm1bmJmZ4ciRI6q2yspKJCQksGZ4M8VETUSkpZUrV0KhUKBbt27Yvn07Ll26hJSUFHz22WeIiIgAAAwcOFBVUzwxMREnT57EhAkT0KdPH7VL1g/j6+uLEydO4OrVq7hz547Gs20rKyu8/PLLmDVrFvbt24cLFy4gOjoapaWlmDx5cr2Pl52djaSkJFy+fBkAkJycjKSkJE5IEwETNRGRlvz8/JCYmIh+/frhjTfeQFBQEAYNGoTY2FisWrUKQPUl7J9++gkODg7o3bs3Bg4cCD8/vwbXF585cyZkMhkCAwPh4uKCjIwMjdsuX74cY8eOxXPPPYfw8HBcvnwZ+/fvh4ODQ72Pt3r1aoSFhSE6OhoA0Lt3b4SFhWHXrl0Nipsaj/WoiYiIDBjPqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQG7P8BAzIgTT7Tai8AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig1 = my_data.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a37d4519", - "metadata": {}, - "source": [ - " Create a Gardner-Altman plot for the Hedges' g effect size." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5e9cac0b", - "metadata": {}, - "outputs": [], - "source": [ - "fig2 = my_data.hedges_g.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f40f8fe0", - "metadata": {}, - "source": [ - "Create a Cumming estimation plot for the mean difference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0e6a68e", - "metadata": {}, - "outputs": [], - "source": [ - "fig3 = my_data.mean_diff.plot(float_contrast=True);" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1ee59074", - "metadata": {}, - "source": [ - " Create a paired Gardner-Altman plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "89a19ee0", - "metadata": {}, - "outputs": [], - "source": [ - "my_data_paired = dabest.load(df, idx=(\"Control 1\", \"Test 1\"),\n", - " id_col = \"ID\", paired='baseline')\n", - "fig4 = my_data_paired.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3c37066a", - "metadata": {}, - "source": [ - "Create a multi-group Cumming plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "896cac2a", - "metadata": {}, - "outputs": [], - "source": [ - "my_multi_groups = dabest.load(df, id_col = \"ID\", \n", - " idx=((\"Control 1\", \"Test 1\"),\n", - " (\"Control 2\", \"Test 2\")))\n", - "fig5 = my_multi_groups.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "de81e2e4", - "metadata": {}, - "source": [ - "Create a shared control Cumming plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f7d518b5", - "metadata": {}, - "outputs": [], - "source": [ - "my_shared_control = dabest.load(df, id_col = \"ID\",\n", - " idx=(\"Control 1\", \"Test 1\",\n", - " \"Test 2\", \"Test 3\"))\n", - "fig6 = my_shared_control.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c80ba34f", - "metadata": {}, - "source": [ - "Create a repeated meausures (against baseline) Slopeplot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d46fd3a", - "metadata": {}, - "outputs": [], - "source": [ - "my_rm_baseline = dabest.load(df, id_col = \"ID\", paired = \"baseline\",\n", - " idx=(\"Control 1\", \"Test 1\",\n", - " \"Test 2\", \"Test 3\"))\n", - "fig7 = my_rm_baseline.mean_diff.plot();" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4eaf4362", - "metadata": {}, - "source": [ - "Create a repeated meausures (sequential) Slopeplot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4b6a3727", - "metadata": {}, - "outputs": [], - "source": [ - "my_rm_sequential = dabest.load(df, id_col = \"ID\", paired = \"sequential\",\n", - " idx=(\"Control 1\", \"Test 1\",\n", - " \"Test 2\", \"Test 3\"))\n", - "fig8 = my_rm_sequential.mean_diff.plot();" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d22bdc4c", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "class PermutationTest:\n", - " \"\"\"\n", - " A class to compute and report permutation tests.\n", - " \n", - " Parameters\n", - " ----------\n", - " control : array-like\n", - " test : array-like\n", - " These should be numerical iterables.\n", - " effect_size : string.\n", - " Any one of the following are accepted inputs:\n", - " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g\" or 'cliffs_delta'\n", - " is_paired : string, default None\n", - " permutation_count : int, default 10000\n", - " The number of permutations (reshuffles) to perform.\n", - " random_seed : int, default 12345\n", - " `random_seed` is used to seed the random number generator during\n", - " bootstrap resampling. This ensures that the generated permutations\n", - " are replicable.\n", - " \n", - " Returns\n", - " -------\n", - " A :py:class:`PermutationTest` object:\n", - " `difference`:float\n", - " The effect size of the difference between the control and the test.\n", - " `effect_size`:string\n", - " The type of effect size reported.\n", - " \n", - " \n", - " \"\"\"\n", - " \n", - " def __init__(self, control:np.array,\n", - " test:np.array, # These should be numerical iterables.\n", - " effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", - " is_paired:str=None,\n", - " permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n", - " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n", - " **kwargs):\n", - " \n", - " import numpy as np\n", - " from numpy.random import PCG64, RandomState\n", - " from ._stats_tools.effsize import two_group_difference\n", - " from ._stats_tools.confint_2group_diff import calculate_group_var\n", - " \n", - "\n", - " self.__permutation_count = permutation_count\n", - "\n", - " # Run Sanity Check.\n", - " if is_paired and len(control) != len(test):\n", - " raise ValueError(\"The two arrays do not have the same length.\")\n", - "\n", - " # Initialise random number generator.\n", - " # rng = np.random.default_rng(seed=random_seed)\n", - " rng = RandomState(PCG64(random_seed))\n", - "\n", - " # Set required constants and variables\n", - " control = np.array(control)\n", - " test = np.array(test)\n", - "\n", - " control_sample = control.copy()\n", - " test_sample = test.copy()\n", - "\n", - " BAG = np.array([*control, *test])\n", - " CONTROL_LEN = int(len(control))\n", - " EXTREME_COUNT = 0.\n", - " THRESHOLD = np.abs(two_group_difference(control, test, \n", - " is_paired, effect_size))\n", - " self.__permutations = []\n", - " self.__permutations_var = []\n", - "\n", - " for i in range(int(permutation_count)):\n", - " \n", - " if is_paired:\n", - " # Select which control-test pairs to swap.\n", - " random_idx = rng.choice(CONTROL_LEN,\n", - " rng.randint(0, CONTROL_LEN+1),\n", - " replace=False)\n", - "\n", - " # Perform swap.\n", - " for i in random_idx:\n", - " _placeholder = control_sample[i]\n", - " control_sample[i] = test_sample[i]\n", - " test_sample[i] = _placeholder\n", - " \n", - " else:\n", - " # Shuffle the bag and assign to control and test groups.\n", - " # NB. rng.shuffle didn't produce replicable results...\n", - " shuffled = rng.permutation(BAG) \n", - " control_sample = shuffled[:CONTROL_LEN]\n", - " test_sample = shuffled[CONTROL_LEN:]\n", - "\n", - "\n", - " es = two_group_difference(control_sample, test_sample, \n", - " False, effect_size)\n", - " \n", - " var = calculate_group_var(np.var(control_sample, ddof=1), \n", - " CONTROL_LEN, \n", - " np.var(test_sample, ddof=1), \n", - " len(test_sample))\n", - " self.__permutations.append(es)\n", - " self.__permutations_var.append(var)\n", - "\n", - " if np.abs(es) > THRESHOLD:\n", - " EXTREME_COUNT += 1.\n", - "\n", - " self.__permutations = np.array(self.__permutations)\n", - " self.__permutations_var = np.array(self.__permutations_var)\n", - "\n", - " self.pvalue = EXTREME_COUNT / permutation_count\n", - "\n", - "\n", - " def __repr__(self):\n", - " return(\"{} permutations were taken. The p-value is {}.\".format(self.permutation_count, \n", - " self.pvalue))\n", - "\n", - "\n", - " @property\n", - " def permutation_count(self):\n", - " \"\"\"\n", - " The number of permuations taken.\n", - " \"\"\"\n", - " return self.__permutation_count\n", - "\n", - "\n", - " @property\n", - " def permutations(self):\n", - " \"\"\"\n", - " The effect sizes of all the permutations in a list.\n", - " \"\"\"\n", - " return self.__permutations\n", - "\n", - " \n", - " @property\n", - " def permutations_var(self):\n", - " \"\"\"\n", - " The experiment group variance of all the permutations in a list.\n", - " \"\"\"\n", - " return self.__permutations_var\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3214e42a", - "metadata": {}, - "source": [ - "**Notes**:\n", - " \n", - "The basic concept of permutation tests is the same as that behind bootstrapping.\n", - "In an \"exact\" permutation test, all possible resuffles of the control and test \n", - "labels are performed, and the proportion of effect sizes that equal or exceed \n", - "the observed effect size is computed. This is the probability, under the null \n", - "hypothesis of zero difference between test and control groups, of observing the\n", - "effect size: the p-value of the Student's t-test.\n", - "\n", - "Exact permutation tests are impractical: computing the effect sizes for all reshuffles quickly exceeds trivial computational loads. A control group and a test group both with 10 observations each would have a total of $20!$ or $2.43 \\times {10}^{18}$ reshuffles.\n", - "Therefore, in practice, \"approximate\" permutation tests are performed, where a sufficient number of reshuffles are performed (5,000 or 10,000), from which the p-value is computed.\n", - "\n", - "More information can be found [here](https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests).\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "cc181ae2", - "metadata": {}, - "source": [ - "#### Example: permutation test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3fc2c6b7", - "metadata": {}, - "outputs": [], - "source": [ - "control = norm.rvs(loc=0, size=30, random_state=12345)\n", - "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", - "perm_test = dabest.PermutationTest(control, test, \n", - " effect_size=\"mean_diff\", \n", - " is_paired=None)\n", - "perm_test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07a84d5f", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "python3", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/nbs/API/confint_1group.ipynb b/nbs/API/confint_1group.ipynb index 1e547098..4dd133bb 100644 --- a/nbs/API/confint_1group.ipynb +++ b/nbs/API/confint_1group.ipynb @@ -54,7 +54,10 @@ "outputs": [], "source": [ "#|export\n", - "import numpy as np" + "import numpy as np\n", + "from numpy.random import PCG64, RandomState\n", + "from scipy.stats import norm\n", + "from numpy import sort as npsort" ] }, { @@ -64,24 +67,19 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "# |export\n", "def create_bootstrap_indexes(array, resamples=5000, random_seed=12345):\n", " \"\"\"Given an array-like, returns a generator of bootstrap indexes\n", " to be used for resampling.\n", " \"\"\"\n", - " import numpy as np\n", - " from numpy.random import PCG64, RandomState\n", + "\n", " rng = RandomState(PCG64(random_seed))\n", - " \n", + "\n", " indexes = range(0, len(array))\n", "\n", - " out = (rng.choice(indexes, len(indexes), replace=True)\n", - " for i in range(0, resamples))\n", - " \n", - " # Reset RNG\n", - " # rng = RandomState(MT19937())\n", - " return out\n", + " out = (rng.choice(indexes, len(indexes), replace=True) for i in range(0, resamples))\n", "\n", + " return out\n", "\n", "\n", "def compute_1group_jackknife(x, func, *args, **kwargs):\n", @@ -89,53 +87,53 @@ " Returns the jackknife bootstraps for func(x).\n", " \"\"\"\n", " from . import confint_2group_diff as ci_2g\n", + "\n", " jackknives = [i for i in ci_2g.create_jackknife_indexes(x)]\n", " out = [func(x[j], *args, **kwargs) for j in jackknives]\n", - " del jackknives # memory management.\n", + " del jackknives # memory management.\n", " return out\n", "\n", "\n", - "\n", "def compute_1group_acceleration(jack_dist):\n", " from . import confint_2group_diff as ci_2g\n", - " return ci_2g._calc_accel(jack_dist)\n", "\n", + " return ci_2g._calc_accel(jack_dist)\n", "\n", "\n", - "def compute_1group_bootstraps(x, func, resamples=5000, random_seed=12345,\n", - " *args, **kwargs):\n", + "def compute_1group_bootstraps(\n", + " x, func, resamples=5000, random_seed=12345, *args, **kwargs\n", + "):\n", " \"\"\"Bootstraps func(x), with the number of specified resamples.\"\"\"\n", "\n", - " import numpy as np\n", - " \n", " # Create bootstrap indexes.\n", - " boot_indexes = create_bootstrap_indexes(x, resamples=resamples,\n", - " random_seed=random_seed)\n", + " boot_indexes = create_bootstrap_indexes(\n", + " x, resamples=resamples, random_seed=random_seed\n", + " )\n", "\n", " out = [func(x[b], *args, **kwargs) for b in boot_indexes]\n", - " \n", + "\n", " del boot_indexes\n", - " \n", - " return out\n", "\n", + " return out\n", "\n", "\n", "def compute_1group_bias_correction(x, bootstraps, func, *args, **kwargs):\n", - " from scipy.stats import norm\n", " metric = func(x, *args, **kwargs)\n", " prop_boots_less_than_metric = sum(bootstraps < metric) / len(bootstraps)\n", "\n", " return norm.ppf(prop_boots_less_than_metric)\n", "\n", "\n", - "\n", - "def summary_ci_1group(x:np.array,# An numerical iterable.\n", - " func, #The function to be applied to x.\n", - " resamples:int=5000, #The number of bootstrap resamples to be taken of func(x).\n", - " alpha:float=0.05, #Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", - " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", - " sort_bootstraps:bool=True, \n", - " *args, **kwargs):\n", + "def summary_ci_1group(\n", + " x: np.array, # An numerical iterable.\n", + " func, # The function to be applied to x.\n", + " resamples: int = 5000, # The number of bootstrap resamples to be taken of func(x).\n", + " alpha: float = 0.05, # Denotes the likelihood that the confidence interval produced _does not_ include the true summary statistic. When alpha = 0.05, a 95% confidence interval is produced.\n", + " random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", + " sort_bootstraps: bool = True,\n", + " *args,\n", + " **kwargs\n", + "):\n", " \"\"\"\n", " Given an array-like x, returns func(x), and a bootstrap confidence\n", " interval of func(x).\n", @@ -158,11 +156,10 @@ "\n", " \"\"\"\n", " from . import confint_2group_diff as ci2g\n", - " from numpy import sort as npsort\n", "\n", - " boots = compute_1group_bootstraps(x, func, resamples=resamples,\n", - " random_seed=random_seed,\n", - " *args, **kwargs)\n", + " boots = compute_1group_bootstraps(\n", + " x, func, resamples=resamples, random_seed=random_seed, *args, **kwargs\n", + " )\n", " bias = compute_1group_bias_correction(x, boots, func)\n", "\n", " jk = compute_1group_jackknife(x, func, *args, **kwargs)\n", @@ -183,12 +180,16 @@ " del boots\n", " del boots_sorted\n", "\n", - " out = {'summary': func(x), 'func': func,\n", - " 'bca_ci_low': low, 'bca_ci_high': high,\n", - " 'bootstraps': B}\n", + " out = {\n", + " \"summary\": func(x),\n", + " \"func\": func,\n", + " \"bca_ci_low\": low,\n", + " \"bca_ci_high\": high,\n", + " \"bootstraps\": B,\n", + " }\n", "\n", " del B\n", - " return out\n" + " return out" ] }, { diff --git a/nbs/API/confint_2group_diff.ipynb b/nbs/API/confint_2group_diff.ipynb index c2285f8f..0f01ea27 100644 --- a/nbs/API/confint_2group_diff.ipynb +++ b/nbs/API/confint_2group_diff.ipynb @@ -54,8 +54,15 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", - "import numpy as np" + "#| export\n", + "import numpy as np\n", + "from numpy import arange, delete, errstate\n", + "from numpy import mean as npmean\n", + "from numpy import sum as npsum\n", + "from numpy.random import PCG64, RandomState\n", + "import pandas as pd\n", + "from scipy.stats import norm\n", + "from numpy import isnan" ] }, { @@ -65,7 +72,7 @@ "metadata": {}, "outputs": [], "source": [ - "#|export\n", + "#| export\n", "def create_jackknife_indexes(data):\n", " \"\"\"\n", " Given an array-like, creates a jackknife bootstrap.\n", @@ -81,43 +88,45 @@ " -------\n", " Generator that yields all jackknife bootstrap samples.\n", " \"\"\"\n", - " from numpy import arange, delete\n", "\n", " index_range = arange(0, len(data))\n", " return (delete(index_range, i) for i in index_range)\n", "\n", "\n", - "\n", "def create_repeated_indexes(data):\n", " \"\"\"\n", " Convenience function. Given an array-like with length N,\n", " returns a generator that yields N indexes [0, 1, ..., N].\n", " \"\"\"\n", - " from numpy import arange\n", "\n", " index_range = arange(0, len(data))\n", " return (index_range for i in index_range)\n", "\n", "\n", - "\n", "def _create_two_group_jackknife_indexes(x0, x1, is_paired):\n", " \"\"\"Creates the jackknife bootstrap for 2 groups.\"\"\"\n", "\n", " if is_paired and len(x0) == len(x1):\n", - " out = list(zip([j for j in create_jackknife_indexes(x0)],\n", - " [i for i in create_jackknife_indexes(x1)]\n", - " )\n", - " )\n", + " out = list(\n", + " zip(\n", + " [j for j in create_jackknife_indexes(x0)],\n", + " [i for i in create_jackknife_indexes(x1)],\n", + " )\n", + " )\n", " else:\n", - " jackknife_c = list(zip([j for j in create_jackknife_indexes(x0)],\n", - " [i for i in create_repeated_indexes(x1)]\n", - " )\n", - " )\n", - "\n", - " jackknife_t = list(zip([i for i in create_repeated_indexes(x0)],\n", - " [j for j in create_jackknife_indexes(x1)]\n", - " )\n", - " )\n", + " jackknife_c = list(\n", + " zip(\n", + " [j for j in create_jackknife_indexes(x0)],\n", + " [i for i in create_repeated_indexes(x1)],\n", + " )\n", + " )\n", + "\n", + " jackknife_t = list(\n", + " zip(\n", + " [i for i in create_repeated_indexes(x0)],\n", + " [j for j in create_jackknife_indexes(x1)],\n", + " )\n", + " )\n", " out = jackknife_c + jackknife_t\n", " del jackknife_c\n", " del jackknife_t\n", @@ -125,7 +134,6 @@ " return out\n", "\n", "\n", - "\n", "def compute_meandiff_jackknife(x0, x1, is_paired, effect_size):\n", " \"\"\"\n", " Given two arrays, returns the jackknife for their effect size.\n", @@ -140,46 +148,37 @@ " x0_shuffled = x0[j[0]]\n", " x1_shuffled = x1[j[1]]\n", "\n", - " es = __es.two_group_difference(x0_shuffled, x1_shuffled,\n", - " is_paired, effect_size)\n", + " es = __es.two_group_difference(x0_shuffled, x1_shuffled, is_paired, effect_size)\n", " out.append(es)\n", "\n", " return out\n", "\n", "\n", - "\n", "def _calc_accel(jack_dist):\n", - " from numpy import mean as npmean\n", - " from numpy import sum as npsum\n", - " from numpy import errstate\n", - "\n", " jack_mean = npmean(jack_dist)\n", "\n", - " numer = npsum((jack_mean - jack_dist)**3)\n", - " denom = 6.0 * (npsum((jack_mean - jack_dist)**2) ** 1.5)\n", + " numer = npsum((jack_mean - jack_dist) ** 3)\n", + " denom = 6.0 * (npsum((jack_mean - jack_dist) ** 2) ** 1.5)\n", "\n", - " with errstate(invalid='ignore'):\n", + " with errstate(invalid=\"ignore\"):\n", " # does not raise warning if invalid division encountered.\n", " return numer / denom\n", "\n", "\n", - "def compute_bootstrapped_diff(x0, x1, is_paired, effect_size,\n", - " resamples=5000, random_seed=12345):\n", + "def compute_bootstrapped_diff(\n", + " x0, x1, is_paired, effect_size, resamples=5000, random_seed=12345\n", + "):\n", " \"\"\"Bootstraps the effect_size for 2 groups.\"\"\"\n", - " \n", + "\n", " from . import effsize as __es\n", - " import numpy as np\n", - " from numpy.random import PCG64, RandomState\n", - " \n", - " # rng = RandomState(default_rng(random_seed))\n", + "\n", " rng = RandomState(PCG64(random_seed))\n", "\n", " out = np.repeat(np.nan, resamples)\n", " x0_len = len(x0)\n", " x1_len = len(x1)\n", - " \n", + "\n", " for i in range(int(resamples)):\n", - " \n", " if is_paired:\n", " if x0_len != x1_len:\n", " raise ValueError(\"The two arrays do not have the same length.\")\n", @@ -189,45 +188,27 @@ " else:\n", " x0_sample = rng.choice(x0, x0_len, replace=True)\n", " x1_sample = rng.choice(x1, x1_len, replace=True)\n", - " \n", - " out[i] = __es.two_group_difference(x0_sample, x1_sample,\n", - " is_paired, effect_size)\n", - " \n", - " # check whether there are any infinities in the bootstrap,\n", - " # which likely indicates the sample sizes are too small as\n", - " # the computation of Cohen's d and Hedges' g necessitated \n", - " # a division by zero.\n", - " # Added in v0.2.6.\n", - " \n", - " # num_infinities = len(out[np.isinf(out)])\n", - " # print(num_infinities)\n", - " # if num_infinities > 0:\n", - " # warn_msg = \"There are {} bootstraps that are not defined. \"\\\n", - " # \"This is likely due to smaple sample sizes. \"\\\n", - " # \"The values in a bootstrap for a group will be more likely \"\\\n", - " # \"to be all equal, with a resulting variance of zero. \"\\\n", - " # \"The computation of Cohen's d and Hedges' g will therefore \"\\\n", - " # \"involved a division by zero. \"\n", - " # warnings.warn(warn_msg.format(num_infinities), category=\"UserWarning\")\n", - " \n", + "\n", + " out[i] = __es.two_group_difference(x0_sample, x1_sample, is_paired, effect_size)\n", + "\n", " return out\n", "\n", - "def compute_delta2_bootstrapped_diff(x1:np.ndarray,# Control group 1\n", - " x2:np.ndarray,# Test group 1\n", - " x3:np.ndarray,# Control group 2\n", - " x4:np.ndarray,# Test group 2\n", - " is_paired:str=None,\n", - " resamples:int=5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n", - " random_seed:int=12345# `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", - " )->tuple: # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n", + "\n", + "def compute_delta2_bootstrapped_diff(\n", + " x1: np.ndarray, # Control group 1\n", + " x2: np.ndarray, # Test group 1\n", + " x3: np.ndarray, # Control group 2\n", + " x4: np.ndarray, # Test group 2\n", + " is_paired: str = None,\n", + " resamples: int = 5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n", + " random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", + ") -> (\n", + " tuple\n", + "): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n", " \"\"\"\n", " Bootstraps the effect size deltas' g.\n", - " \n", - " \"\"\"\n", "\n", - " import numpy as np\n", - " import pandas as pd\n", - " from numpy.random import PCG64, RandomState\n", + " \"\"\"\n", "\n", " rng = RandomState(PCG64(random_seed))\n", " x1_len = len(x1)\n", @@ -237,11 +218,15 @@ " out_delta_g = np.repeat(np.nan, resamples)\n", " deltadelta = np.repeat(np.nan, resamples)\n", "\n", - " n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2= x1_len, x2_len, x3_len, x4_len\n", + " n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len\n", " s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4)\n", "\n", - " sd_numerator = ((n_a2_b1 - 1) * s_a2_b1 ** 2 + (n_a1_b1 - 1) * s_a1_b1 ** 2 + (n_a2_b2 - 1) * s_a2_b2 ** 2 + (\n", - " n_a1_b2 - 1) * s_a1_b2 ** 2)\n", + " sd_numerator = (\n", + " (n_a2_b1 - 1) * s_a2_b1**2\n", + " + (n_a1_b1 - 1) * s_a1_b1**2\n", + " + (n_a2_b2 - 1) * s_a2_b2**2\n", + " + (n_a1_b2 - 1) * s_a1_b2**2\n", + " )\n", " sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1)\n", " pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n", "\n", @@ -249,46 +234,58 @@ " if is_paired:\n", " if (x1_len != x2_len) or (x3_len != x4_len):\n", " raise ValueError(\"The two arrays do not have the same length.\")\n", - " df_paired_1 = pd.DataFrame({\n", - " 'value': np.concatenate([x1, x3]),\n", - " 'array_id': np.repeat(['x1','x3'], [x1_len, x3_len])\n", - " })\n", - " df_paired_2 = pd.DataFrame({\n", - " 'value': np.concatenate([x2, x4]),\n", - " 'array_id': np.repeat(['x2','x4'], [x1_len, x3_len])\n", - " })\n", - " x_sample_index = rng.choice(len(df_paired_1), len(df_paired_1), replace=True)\n", + " df_paired_1 = pd.DataFrame(\n", + " {\n", + " \"value\": np.concatenate([x1, x3]),\n", + " \"array_id\": np.repeat([\"x1\", \"x3\"], [x1_len, x3_len]),\n", + " }\n", + " )\n", + " df_paired_2 = pd.DataFrame(\n", + " {\n", + " \"value\": np.concatenate([x2, x4]),\n", + " \"array_id\": np.repeat([\"x2\", \"x4\"], [x1_len, x3_len]),\n", + " }\n", + " )\n", + " x_sample_index = rng.choice(\n", + " len(df_paired_1), len(df_paired_1), replace=True\n", + " )\n", " x_sample_1 = df_paired_1.loc[x_sample_index]\n", " x_sample_2 = df_paired_2.loc[x_sample_index]\n", - " x1_sample = x_sample_1[x_sample_1['array_id'] == 'x1']['value']\n", - " x2_sample = x_sample_2[x_sample_2['array_id'] == 'x2']['value']\n", - " x3_sample = x_sample_1[x_sample_1['array_id'] == 'x3']['value']\n", - " x4_sample = x_sample_2[x_sample_2['array_id'] == 'x4']['value']\n", + " x1_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x1\"][\"value\"]\n", + " x2_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x2\"][\"value\"]\n", + " x3_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x3\"][\"value\"]\n", + " x4_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x4\"][\"value\"]\n", " else:\n", - " df = pd.DataFrame({\n", - " 'value': np.concatenate([x1, x2, x3, x4]),\n", - " 'array_id': np.repeat(['x1', 'x2', 'x3', 'x4'], [x1_len, x2_len, x3_len, x4_len])\n", - " })\n", - " x_sample_index = rng.choice(len(df),len(df), replace=True)\n", + " df = pd.DataFrame(\n", + " {\n", + " \"value\": np.concatenate([x1, x2, x3, x4]),\n", + " \"array_id\": np.repeat(\n", + " [\"x1\", \"x2\", \"x3\", \"x4\"], [x1_len, x2_len, x3_len, x4_len]\n", + " ),\n", + " }\n", + " )\n", + " x_sample_index = rng.choice(len(df), len(df), replace=True)\n", " x_sample = df.loc[x_sample_index]\n", - " x1_sample = x_sample[x_sample['array_id'] == 'x1']['value']\n", - " x2_sample = x_sample[x_sample['array_id'] == 'x2']['value']\n", - " x3_sample = x_sample[x_sample['array_id'] == 'x3']['value']\n", - " x4_sample = x_sample[x_sample['array_id'] == 'x4']['value']\n", + " x1_sample = x_sample[x_sample[\"array_id\"] == \"x1\"][\"value\"]\n", + " x2_sample = x_sample[x_sample[\"array_id\"] == \"x2\"][\"value\"]\n", + " x3_sample = x_sample[x_sample[\"array_id\"] == \"x3\"][\"value\"]\n", + " x4_sample = x_sample[x_sample[\"array_id\"] == \"x4\"][\"value\"]\n", "\n", - " delta_1 = np.mean(x2_sample)-np.mean(x1_sample)\n", - " delta_2 = np.mean(x4_sample)-np.mean(x3_sample)\n", + " delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n", + " delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n", " delta_delta = delta_2 - delta_1\n", " deltadelta[i] = delta_delta\n", - " out_delta_g[i] = delta_delta/pooled_sample_sd\n", - " delta_g = ((np.mean(x4)-np.mean(x3)) - (np.mean(x2)-np.mean(x1))) / pooled_sample_sd\n", + " out_delta_g[i] = delta_delta / pooled_sample_sd\n", + " delta_g = (\n", + " (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))\n", + " ) / pooled_sample_sd\n", " return out_delta_g, delta_g, deltadelta\n", "\n", "\n", - "\n", - "def compute_meandiff_bias_correction(bootstraps, #An numerical iterable, comprising bootstrap resamples of the effect size.\n", - " effsize # The effect size for the original sample.\n", - " ): #The bias correction value for the given bootstraps and effect size.\n", + "def compute_meandiff_bias_correction(\n", + " bootstraps, # An numerical iterable, comprising bootstrap resamples of the effect size.\n", + " effsize, # The effect size for the original sample.\n", + "): # The bias correction value for the given bootstraps and effect size.\n", " \"\"\"\n", " Computes the bias correction required for the BCa method\n", " of confidence interval construction.\n", @@ -300,22 +297,18 @@ " and effect size.\n", "\n", " \"\"\"\n", - " from scipy.stats import norm\n", - " from numpy import array\n", "\n", - " B = array(bootstraps)\n", + " B = np.array(bootstraps)\n", " prop_less_than_es = sum(B < effsize) / len(B)\n", "\n", " return norm.ppf(prop_less_than_es)\n", "\n", "\n", - "\n", "def _compute_alpha_from_ci(ci):\n", " if ci < 0 or ci > 100:\n", " raise ValueError(\"`ci` must be a number between 0 and 100.\")\n", "\n", - " return (100. - ci) / 100.\n", - "\n", + " return (100.0 - ci) / 100.0\n", "\n", "\n", "def _compute_quantile(z, bias, acceleration):\n", @@ -325,15 +318,12 @@ " return bias + (numer / denom)\n", "\n", "\n", - "\n", "def compute_interval_limits(bias, acceleration, n_boots, ci=95):\n", " \"\"\"\n", " Returns the indexes of the interval limits for a given bootstrap.\n", "\n", " Supply the bias, acceleration factor, and number of bootstraps.\n", " \"\"\"\n", - " from scipy.stats import norm\n", - " from numpy import isnan, nan\n", "\n", " alpha = _compute_alpha_from_ci(ci)\n", "\n", @@ -343,7 +333,7 @@ " z_low = norm.ppf(alpha_low)\n", " z_high = norm.ppf(alpha_high)\n", "\n", - " kws = {'bias': bias, 'acceleration': acceleration}\n", + " kws = {\"bias\": bias, \"acceleration\": acceleration}\n", " low = _compute_quantile(z_low, **kws)\n", " high = _compute_quantile(z_high, **kws)\n", "\n", @@ -356,21 +346,20 @@ " return low, high\n", "\n", "\n", - "def calculate_group_var(control_var, control_N,test_var, test_N):\n", - " return control_var/control_N + test_var/test_N\n", + "def calculate_group_var(control_var, control_N, test_var, test_N):\n", + " return control_var / control_N + test_var / test_N\n", "\n", "\n", "def calculate_weighted_delta(group_var, differences, resamples):\n", - " '''\n", + " \"\"\"\n", " Compute the weighted deltas.\n", - " '''\n", - " import numpy as np\n", + " \"\"\"\n", "\n", - " weight = 1/group_var\n", + " weight = 1 / group_var\n", " denom = np.sum(weight)\n", " num = np.sum(weight[i] * differences[i] for i in range(0, len(weight)))\n", "\n", - " return num/denom" + " return num / denom" ] }, { diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb new file mode 100644 index 00000000..1e85c3a7 --- /dev/null +++ b/nbs/API/dabest_object.ipynb @@ -0,0 +1,1325 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "ed122c74", + "metadata": {}, + "source": [ + "# Dabest object\n", + "\n", + "> Main class for estimating statistics and generating plots.\n", + "\n", + "- order: 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb97d9b1", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp _dabest_object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d5d586f", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from __future__ import annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcd32470", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *\n", + "import nbdev\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3c6f47a", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "# Import standard data science libraries\n", + "from numpy import array, repeat, random, issubdtype, number\n", + "import pandas as pd\n", + "from scipy.stats import norm\n", + "from scipy.stats import randint\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "204a64b4", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import dabest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "350b12c1", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class Dabest(object):\n", + "\n", + " \"\"\"\n", + " Class for estimation statistics and plots.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " data,\n", + " idx,\n", + " x,\n", + " y,\n", + " paired,\n", + " id_col,\n", + " ci,\n", + " resamples,\n", + " random_seed,\n", + " proportional,\n", + " delta2,\n", + " experiment,\n", + " experiment_label,\n", + " x1_level,\n", + " mini_meta,\n", + " ):\n", + " \"\"\"\n", + " Parses and stores pandas DataFrames in preparation for estimation\n", + " statistics. You should not be calling this class directly; instead,\n", + " use `dabest.load()` to parse your DataFrame prior to analysis.\n", + " \"\"\"\n", + "\n", + " self.__delta2 = delta2\n", + " self.__experiment = experiment\n", + " self.__ci = ci\n", + " self.__input_data = data\n", + " self.__output_data = data.copy()\n", + " self.__id_col = id_col\n", + " self.__is_paired = paired\n", + " self.__resamples = resamples\n", + " self.__random_seed = random_seed\n", + " self.__proportional = proportional\n", + " self.__mini_meta = mini_meta\n", + "\n", + " # Check if it is a valid mini_meta case\n", + " if self.__mini_meta:\n", + " # Only mini_meta calculation but not proportional and delta-delta function\n", + " if self.__proportional:\n", + " err0 = \"`proportional` and `mini_meta` cannot be True at the same time.\"\n", + " raise ValueError(err0)\n", + " if self.__delta2:\n", + " err0 = \"`delta` and `mini_meta` cannot be True at the same time.\"\n", + " raise ValueError(err0)\n", + "\n", + " # Check if the columns stated are valid\n", + " # TODO instead of traversing twice idx you can traverse only once\n", + " # and break the loop if the condition is not satisfied?\n", + " # TODO What if the type is not str and not tuple,list? missing raise Error\n", + " if all([isinstance(i, str) for i in idx]):\n", + " if len(pd.unique([t for t in idx]).tolist()) != 2:\n", + " err0 = \"`mini_meta` is True, but `idx` ({})\".format(idx)\n", + " err1 = \"does not contain exactly 2 columns.\"\n", + " raise ValueError(err0 + err1)\n", + "\n", + " if all([isinstance(i, (tuple, list)) for i in idx]):\n", + " all_idx_lengths = [len(t) for t in idx]\n", + " if (array(all_idx_lengths) != 2).any():\n", + " err1 = \"`mini_meta` is True, but some idx \"\n", + " err2 = \"in {} does not consist only of two groups.\".format(idx)\n", + " raise ValueError(err1 + err2)\n", + "\n", + " # TODO can you have True mini_meta and delta2 at the same time?\n", + " # Check if this is a 2x2 ANOVA case and x & y are valid columns\n", + " # Create experiment_label and x1_level\n", + " if self.__delta2:\n", + " # TODO Wrap the errors in a separate function called check_errors()\n", + " if self.__proportional:\n", + " err0 = \"`proportional` and `delta` cannot be True at the same time.\"\n", + " raise ValueError(err0)\n", + "\n", + " # idx should not be specified\n", + " if idx:\n", + " err0 = \"`idx` should not be specified when `delta2` is True.\".format(\n", + " len(x)\n", + " )\n", + " raise ValueError(err0)\n", + "\n", + " # Check if x is valid\n", + " # TODO if x is None is fine??\n", + " if len(x) != 2:\n", + " err0 = \"`delta2` is True but the number of variables indicated by `x` is {}.\".format(\n", + " len(x)\n", + " )\n", + " raise ValueError(err0)\n", + "\n", + " for i in x:\n", + " if i not in self.__output_data.columns:\n", + " err = \"{0} is not a column in `data`. Please check.\".format(i)\n", + " raise IndexError(err)\n", + "\n", + " # Check if y is valid\n", + " if not y:\n", + " err0 = \"`delta2` is True but `y` is not indicated.\"\n", + " raise ValueError(err0)\n", + "\n", + " if y not in self.__output_data.columns:\n", + " err = \"{0} is not a column in `data`. Please check.\".format(y)\n", + " raise IndexError(err)\n", + "\n", + " # Check if experiment is valid\n", + " if experiment not in self.__output_data.columns:\n", + " err = \"{0} is not a column in `data`. Please check.\".format(experiment)\n", + " raise IndexError(err)\n", + "\n", + " # Check if experiment_label is valid and create experiment when needed\n", + " if experiment_label:\n", + " if len(experiment_label) != 2:\n", + " err0 = \"`experiment_label` does not have a length of 2.\"\n", + " raise ValueError(err0)\n", + "\n", + " for i in experiment_label:\n", + " if i not in self.__output_data[experiment].unique():\n", + " err = \"{0} is not an element in the column `{1}` of `data`. Please check.\".format(\n", + " i, experiment\n", + " )\n", + " raise IndexError(err)\n", + " else:\n", + " experiment_label = self.__output_data[experiment].unique()\n", + "\n", + " # Check if x1_level is valid\n", + " if x1_level:\n", + " if len(x1_level) != 2:\n", + " err0 = \"`x1_level` does not have a length of 2.\"\n", + " raise ValueError(err0)\n", + "\n", + " for i in x1_level:\n", + " if i not in self.__output_data[x[0]].unique():\n", + " err = \"{0} is not an element in the column `{1}` of `data`. Please check.\".format(\n", + " i, experiment\n", + " )\n", + " raise IndexError(err)\n", + "\n", + " else:\n", + " x1_level = self.__output_data[x[0]].unique()\n", + "\n", + " # TODO what if experiment is None?\n", + " elif experiment:\n", + " experiment_label = self.__output_data[experiment].unique()\n", + " x1_level = self.__output_data[x[0]].unique()\n", + " self.__experiment_label = experiment_label\n", + " self.__x1_level = x1_level\n", + "\n", + " # create new x & idx and record the second variable if this is a valid 2x2 ANOVA case\n", + " if idx is None and x is not None and y is not None:\n", + " # Add a length check for unique values in the first element in list x,\n", + " # if the length is greater than 2, force delta2 to be False\n", + " # Should be removed if delta2 for situations other than 2x2 is supported\n", + " if len(self.__output_data[x[0]].unique()) > 2 and x1_level is None:\n", + " self.__delta2 = False\n", + " # stop the loop if delta2 is False\n", + "\n", + " # add a new column which is a combination of experiment and the first variable\n", + " new_col_name = experiment + x[0]\n", + " while new_col_name in self.__output_data.columns:\n", + " new_col_name += \"_\"\n", + "\n", + " self.__output_data[new_col_name] = (\n", + " self.__output_data[x[0]].astype(str)\n", + " + \" \"\n", + " + self.__output_data[experiment].astype(str)\n", + " )\n", + "\n", + " # create idx and record the first and second x variable\n", + " idx = []\n", + " for i in list(map(lambda x: str(x), experiment_label)):\n", + " temp = []\n", + " for j in list(map(lambda x: str(x), x1_level)):\n", + " temp.append(j + \" \" + i)\n", + " idx.append(temp)\n", + "\n", + " self.__idx = idx\n", + " self.__x1 = x[0]\n", + " self.__x2 = x[1]\n", + " x = new_col_name\n", + " else:\n", + " self.__idx = idx\n", + " self.__x1 = None\n", + " self.__x2 = None\n", + "\n", + " # Determine the kind of estimation plot we need to produce.\n", + " if all([isinstance(i, (str, int, float)) for i in idx]):\n", + " # flatten out idx.\n", + " all_plot_groups = pd.unique([t for t in idx]).tolist()\n", + " if len(idx) > len(all_plot_groups):\n", + " err0 = \"`idx` contains duplicated groups. Please remove any duplicates and try again.\"\n", + " raise ValueError(err0)\n", + "\n", + " # We need to re-wrap this idx inside another tuple so as to\n", + " # easily loop thru each pairwise group later on.\n", + " self.__idx = (idx,)\n", + "\n", + " elif all([isinstance(i, (tuple, list)) for i in idx]):\n", + " all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist()\n", + "\n", + " actual_groups_given = sum([len(i) for i in idx])\n", + "\n", + " if actual_groups_given > len(all_plot_groups):\n", + " err0 = \"Groups are repeated across tuples,\"\n", + " err1 = \" or a tuple has repeated groups in it.\"\n", + " err2 = \" Please remove any duplicates and try again.\"\n", + " raise ValueError(err0 + err1 + err2)\n", + "\n", + " else: # mix of string and tuple?\n", + " err = \"There seems to be a problem with the idx you \" \"entered--{}.\".format(\n", + " idx\n", + " )\n", + " raise ValueError(err)\n", + "\n", + " # Check if there is a typo on paired\n", + " if self.__is_paired and self.__is_paired not in (\"baseline\", \"sequential\"):\n", + " err = \"{} assigned for `paired` is not valid.\".format(self.__is_paired)\n", + " raise ValueError(err)\n", + "\n", + " # Determine the type of data: wide or long.\n", + " if x is None and y is not None:\n", + " err = \"You have only specified `y`. Please also specify `x`.\"\n", + " raise ValueError(err)\n", + "\n", + " if x is not None and y is None:\n", + " err = \"You have only specified `x`. Please also specify `y`.\"\n", + " raise ValueError(err)\n", + "\n", + " self.__plot_data = self.get_plot_data(x, y, all_plot_groups)\n", + " self.__all_plot_groups = all_plot_groups\n", + "\n", + " # Check if `id_col` is valid\n", + " if self.__is_paired:\n", + " if id_col is None:\n", + " err = \"`id_col` must be specified if `paired` is assigned with a not NoneType value.\"\n", + " raise IndexError(err)\n", + "\n", + " if id_col not in self.__plot_data.columns:\n", + " err = \"{} is not a column in `data`. \".format(id_col)\n", + " raise IndexError(err)\n", + "\n", + " self._compute_effectsize_dfs()\n", + "\n", + " def __repr__(self):\n", + " from .__init__ import __version__\n", + " from .misc_tools import print_greeting\n", + "\n", + " greeting_header = print_greeting()\n", + "\n", + " RM_STATUS = {\n", + " \"baseline\": \"for repeated measures against baseline \\n\",\n", + " \"sequential\": \"for the sequential design of repeated-measures experiment \\n\",\n", + " \"None\": \"\",\n", + " }\n", + "\n", + " PAIRED_STATUS = {\"baseline\": \"Paired e\", \"sequential\": \"Paired e\", \"None\": \"E\"}\n", + "\n", + " first_line = {\n", + " \"rm_status\": RM_STATUS[str(self.__is_paired)],\n", + " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)],\n", + " }\n", + "\n", + " s1 = \"{paired_status}ffect size(s) {rm_status}\".format(**first_line)\n", + " s2 = \"with {}% confidence intervals will be computed for:\".format(self.__ci)\n", + " desc_line = s1 + s2\n", + "\n", + " out = [greeting_header + \"\\n\\n\" + desc_line]\n", + "\n", + " comparisons = []\n", + "\n", + " if self.__is_paired == \"sequential\":\n", + " for j, current_tuple in enumerate(self.__idx):\n", + " for ix, test_name in enumerate(current_tuple[1:]):\n", + " control_name = current_tuple[ix]\n", + " comparisons.append(\"{} minus {}\".format(test_name, control_name))\n", + " else:\n", + " for j, current_tuple in enumerate(self.__idx):\n", + " control_name = current_tuple[0]\n", + "\n", + " for ix, test_name in enumerate(current_tuple[1:]):\n", + " comparisons.append(\"{} minus {}\".format(test_name, control_name))\n", + "\n", + " if self.__delta2:\n", + " comparisons.append(\n", + " \"{} minus {} (only for mean difference)\".format(\n", + " self.__experiment_label[1], self.__experiment_label[0]\n", + " )\n", + " )\n", + "\n", + " if self.__mini_meta:\n", + " comparisons.append(\"weighted delta (only for mean difference)\")\n", + "\n", + " for j, g in enumerate(comparisons):\n", + " out.append(\"{}. {}\".format(j + 1, g))\n", + "\n", + " resamples_line1 = \"\\n{} resamples \".format(self.__resamples)\n", + " resamples_line2 = \"will be used to generate the effect size bootstraps.\"\n", + " out.append(resamples_line1 + resamples_line2)\n", + "\n", + " return \"\\n\".join(out)\n", + "\n", + " @property\n", + " def mean_diff(self):\n", + " \"\"\"\n", + " Returns an :py:class:`EffectSizeDataFrame` for the mean difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`\n", + "\n", + " \"\"\"\n", + " return self.__mean_diff\n", + "\n", + " @property\n", + " def median_diff(self):\n", + " \"\"\"\n", + " Returns an :py:class:`EffectSizeDataFrame` for the median difference, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", + "\n", + " \"\"\"\n", + " return self.__median_diff\n", + "\n", + " @property\n", + " def cohens_d(self):\n", + " \"\"\"\n", + " Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `d`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", + "\n", + " \"\"\"\n", + " return self.__cohens_d\n", + "\n", + " @property\n", + " def cohens_h(self):\n", + " \"\"\"\n", + " Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Cohen's `h`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `directional` argument in `dabest.load()`.\n", + "\n", + " \"\"\"\n", + " return self.__cohens_h\n", + "\n", + " @property\n", + " def hedges_g(self):\n", + " \"\"\"\n", + " Returns an :py:class:`EffectSizeDataFrame` for the standardized mean difference Hedges' `g`, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", + "\n", + " \"\"\"\n", + " return self.__hedges_g\n", + "\n", + " @property\n", + " def cliffs_delta(self):\n", + " \"\"\"\n", + " Returns an :py:class:`EffectSizeDataFrame` for Cliff's delta, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", + "\n", + " \"\"\"\n", + " return self.__cliffs_delta\n", + "\n", + " @property\n", + " def delta_g(self):\n", + " \"\"\"\n", + " Returns an :py:class:`EffectSizeDataFrame` for deltas' g, its confidence interval, and relevant statistics, for all comparisons as indicated via the `idx` and `paired` argument in `dabest.load()`.\n", + " \"\"\"\n", + " return self.__delta_g\n", + "\n", + " @property\n", + " def input_data(self):\n", + " \"\"\"\n", + " Returns the pandas DataFrame that was passed to `dabest.load()`.\n", + " When `delta2` is True, a new column is added to support the\n", + " function. The name of this new column is indicated by `x`.\n", + " \"\"\"\n", + " return self.__input_data\n", + "\n", + " @property\n", + " def idx(self):\n", + " \"\"\"\n", + " Returns the order of categories that was passed to `dabest.load()`.\n", + " \"\"\"\n", + " return self.__idx\n", + "\n", + " @property\n", + " def x1(self):\n", + " \"\"\"\n", + " Returns the first variable declared in x when it is a delta-delta\n", + " case; returns None otherwise.\n", + " \"\"\"\n", + " return self.__x1\n", + "\n", + " @property\n", + " def x1_level(self):\n", + " \"\"\"\n", + " Returns the levels of first variable declared in x when it is a\n", + " delta-delta case; returns None otherwise.\n", + " \"\"\"\n", + " return self.__x1_level\n", + "\n", + " @property\n", + " def x2(self):\n", + " \"\"\"\n", + " Returns the second variable declared in x when it is a delta-delta\n", + " case; returns None otherwise.\n", + " \"\"\"\n", + " return self.__x2\n", + "\n", + " @property\n", + " def experiment(self):\n", + " \"\"\"\n", + " Returns the column name of experiment labels that was passed to\n", + " `dabest.load()` when it is a delta-delta case; returns None otherwise.\n", + " \"\"\"\n", + " return self.__experiment\n", + "\n", + " @property\n", + " def experiment_label(self):\n", + " \"\"\"\n", + " Returns the experiment labels in order that was passed to `dabest.load()`\n", + " when it is a delta-delta case; returns None otherwise.\n", + " \"\"\"\n", + " return self.__experiment_label\n", + "\n", + " @property\n", + " def delta2(self):\n", + " \"\"\"\n", + " Returns the boolean parameter indicating if this is a delta-delta\n", + " situation.\n", + " \"\"\"\n", + " return self.__delta2\n", + "\n", + " @property\n", + " def is_paired(self):\n", + " \"\"\"\n", + " Returns the type of repeated-measures experiment.\n", + " \"\"\"\n", + " return self.__is_paired\n", + "\n", + " @property\n", + " def id_col(self):\n", + " \"\"\"\n", + " Returns the id column declared to `dabest.load()`.\n", + " \"\"\"\n", + " return self.__id_col\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " The width of the desired confidence interval.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + " @property\n", + " def resamples(self):\n", + " \"\"\"\n", + " The number of resamples used to generate the bootstrap.\n", + " \"\"\"\n", + " return self.__resamples\n", + "\n", + " @property\n", + " def random_seed(self):\n", + " \"\"\"\n", + " The number used to initialise the numpy random seed generator, ie.\n", + " `seed_value` from `numpy.random.seed(seed_value)` is returned.\n", + " \"\"\"\n", + " return self.__random_seed\n", + "\n", + " @property\n", + " def x(self):\n", + " \"\"\"\n", + " Returns the x column that was passed to `dabest.load()`, if any.\n", + " When `delta2` is True, `x` returns the name of the new column created\n", + " for the delta-delta situation. To retrieve the 2 variables passed into\n", + " `x` when `delta2` is True, please call `x1` and `x2` instead.\n", + " \"\"\"\n", + " return self.__x\n", + "\n", + " @property\n", + " def y(self):\n", + " \"\"\"\n", + " Returns the y column that was passed to `dabest.load()`, if any.\n", + " \"\"\"\n", + " return self.__y\n", + "\n", + " @property\n", + " def _xvar(self):\n", + " \"\"\"\n", + " Returns the xvar in dabest.plot_data.\n", + " \"\"\"\n", + " return self.__xvar\n", + "\n", + " @property\n", + " def _yvar(self):\n", + " \"\"\"\n", + " Returns the yvar in dabest.plot_data.\n", + " \"\"\"\n", + " return self.__yvar\n", + "\n", + " @property\n", + " def _plot_data(self):\n", + " \"\"\"\n", + " Returns the pandas DataFrame used to produce the estimation stats/plots.\n", + " \"\"\"\n", + " return self.__plot_data\n", + "\n", + " @property\n", + " def proportional(self):\n", + " \"\"\"\n", + " Returns the proportional parameter class.\n", + " \"\"\"\n", + " return self.__proportional\n", + "\n", + " @property\n", + " def mini_meta(self):\n", + " \"\"\"\n", + " Returns the mini_meta boolean parameter.\n", + " \"\"\"\n", + " return self.__mini_meta\n", + "\n", + " @property\n", + " def _all_plot_groups(self):\n", + " \"\"\"\n", + " Returns the all plot groups, as indicated via the `idx` keyword.\n", + " \"\"\"\n", + " return self.__all_plot_groups\n", + "\n", + " def get_plot_data(self, x, y, all_plot_groups):\n", + " \"\"\"\n", + " Function to prepare some attributes for plotting\n", + " \"\"\"\n", + "\n", + " # Identify the type of data that was passed in.\n", + " if x is not None and y is not None:\n", + " # Assume we have a long dataset.\n", + " # check both x and y are column names in data.\n", + " if x not in self.__output_data.columns:\n", + " err = \"{0} is not a column in `data`. Please check.\".format(x)\n", + " raise IndexError(err)\n", + " if y not in self.__output_data.columns:\n", + " err = \"{0} is not a column in `data`. Please check.\".format(y)\n", + " raise IndexError(err)\n", + "\n", + " # check y is numeric.\n", + " if not issubdtype(self.__output_data[y].dtype, number):\n", + " err = \"{0} is a column in `data`, but it is not numeric.\".format(y)\n", + " raise ValueError(err)\n", + "\n", + " # check all the idx can be found in self.__output_data[x]\n", + " for g in all_plot_groups:\n", + " if g not in self.__output_data[x].unique():\n", + " err0 = '\"{0}\" is not a group in the column `{1}`.'.format(g, x)\n", + " err1 = \" Please check `idx` and try again.\"\n", + " raise IndexError(err0 + err1)\n", + "\n", + " # Select only rows where the value in the `x` column\n", + " # is found in `idx`.\n", + " plot_data = self.__output_data[\n", + " self.__output_data.loc[:, x].isin(all_plot_groups)\n", + " ].copy()\n", + "\n", + " # Assign attributes\n", + " self.__x = x\n", + " self.__y = y\n", + " self.__xvar = x\n", + " self.__yvar = y\n", + "\n", + " elif x is None and y is None:\n", + " # Assume we have a wide dataset.\n", + " # Assign attributes appropriately.\n", + " self.__x = None\n", + " self.__y = None\n", + " self.__xvar = \"group\"\n", + " self.__yvar = \"value\"\n", + "\n", + " # First, check we have all columns in the dataset.\n", + " for g in all_plot_groups:\n", + " if g not in self.__output_data.columns:\n", + " err0 = '\"{0}\" is not a column in `data`.'.format(g)\n", + " err1 = \" Please check `idx` and try again.\"\n", + " raise IndexError(err0 + err1)\n", + "\n", + " set_all_columns = set(self.__output_data.columns.tolist())\n", + " set_all_plot_groups = set(all_plot_groups)\n", + " id_vars = set_all_columns.difference(set_all_plot_groups)\n", + "\n", + " plot_data = pd.melt(\n", + " self.__output_data,\n", + " id_vars=id_vars,\n", + " value_vars=all_plot_groups,\n", + " value_name=self.__yvar,\n", + " var_name=self.__xvar,\n", + " )\n", + "\n", + " # Added in v0.2.7.\n", + " plot_data.dropna(axis=0, how=\"any\", subset=[self.__yvar], inplace=True)\n", + "\n", + " # TODO these comments should not be in the code but on the release notes of the package version\n", + " # Lines 131 to 140 added in v0.2.3.\n", + " # Fixes a bug that jammed up when the xvar column was already\n", + " # a pandas Categorical. Now we check for this and act appropriately.\n", + " if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):\n", + " plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)\n", + " plot_data[self.__xvar].cat.reorder_categories(\n", + " all_plot_groups, ordered=True, inplace=True\n", + " )\n", + " else:\n", + " plot_data.loc[:, self.__xvar] = pd.Categorical(\n", + " plot_data[self.__xvar], categories=all_plot_groups, ordered=True\n", + " )\n", + "\n", + " return plot_data\n", + "\n", + " def _compute_effectsize_dfs(self):\n", + " '''\n", + " Function to compute all attributes based on EffectSizeDataFrame.\n", + " It returns nothing.\n", + " '''\n", + " from ._effsize_objects import EffectSizeDataFrame\n", + "\n", + " effectsize_df_kwargs = dict(\n", + " ci=self.__ci,\n", + " is_paired=self.__is_paired,\n", + " random_seed=self.__random_seed,\n", + " resamples=self.__resamples,\n", + " proportional=self.__proportional,\n", + " delta2=self.__delta2,\n", + " experiment_label=self.__experiment_label,\n", + " x1_level=self.__x1_level,\n", + " x2=self.__x2,\n", + " mini_meta=self.__mini_meta,\n", + " )\n", + "\n", + " self.__mean_diff = EffectSizeDataFrame(\n", + " self, \"mean_diff\", **effectsize_df_kwargs\n", + " )\n", + "\n", + " self.__median_diff = EffectSizeDataFrame(\n", + " self, \"median_diff\", **effectsize_df_kwargs\n", + " )\n", + "\n", + " self.__cohens_d = EffectSizeDataFrame(self, \"cohens_d\", **effectsize_df_kwargs)\n", + "\n", + " self.__cohens_h = EffectSizeDataFrame(self, \"cohens_h\", **effectsize_df_kwargs)\n", + "\n", + " self.__hedges_g = EffectSizeDataFrame(self, \"hedges_g\", **effectsize_df_kwargs)\n", + "\n", + " self.__delta_g = EffectSizeDataFrame(self, \"delta_g\", **effectsize_df_kwargs)\n", + "\n", + " if not self.__is_paired:\n", + " self.__cliffs_delta = EffectSizeDataFrame(\n", + " self, \"cliffs_delta\", **effectsize_df_kwargs\n", + " )\n", + " else:\n", + " self.__cliffs_delta = (\n", + " \"The data is paired; Cliff's delta is therefore undefined.\"\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c86c0487", + "metadata": {}, + "source": [ + "#### Example: mean_diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d07d58b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:26 2023.\n", + "\n", + "The unpaired mean difference between control and test is 0.5 [95%CI -0.0412, 1.0].\n", + "The p-value of the two-sided permutation t-test is 0.0758, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", + "\n", + "To get the results of all valid statistical tests, use `.mean_diff.statistical_tests`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "control = norm.rvs(loc=0, size=30, random_state=12345)\n", + "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", + "my_df = pd.DataFrame({\"control\": control,\n", + " \"test\": test})\n", + "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", + "my_dabest_object.mean_diff" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "cf5ca0a0", + "metadata": {}, + "source": [ + "This is simply the mean of the control group subtracted from\n", + "the mean of the test group.\n", + "\n", + "$$\\text{Mean difference} = \\overline{x}_{Test} - \\overline{x}_{Control}$$\n", + "\n", + "where $\\overline{x}$ is the mean for the group $x$." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8b3b146c", + "metadata": {}, + "source": [ + "#### Example: median_diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e9b8635", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jacobluke/opt/anaconda3/envs/dabest-nbdev/lib/python3.8/site-packages/dabest/_stats_tools/effsize.py:72: UserWarning: Using median as the statistic in bootstrapping may result in a biased estimate and cause problems with BCa confidence intervals. Consider using a different statistic, such as the mean.\n", + "When plotting, please consider using percetile confidence intervals by specifying `ci_type='percentile'`. For detailed information, refer to https://github.com/ACCLAB/DABEST-python/issues/129 \n", + "\n", + " warnings.warn(message=mes1+mes2, category=UserWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:28 2023.\n", + "\n", + "The unpaired median difference between control and test is 0.5 [95%CI -0.0758, 0.991].\n", + "The p-value of the two-sided permutation t-test is 0.103, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", + "\n", + "To get the results of all valid statistical tests, use `.median_diff.statistical_tests`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "control = norm.rvs(loc=0, size=30, random_state=12345)\n", + "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", + "my_df = pd.DataFrame({\"control\": control,\n", + " \"test\": test})\n", + "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", + "my_dabest_object.median_diff" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "838b2978", + "metadata": {}, + "source": [ + "\n", + "This is the median difference between the control group and the test group.\n", + "\n", + "If the comparison(s) are unpaired, median_diff is computed with the following equation:\n", + "\n", + "\n", + "$$\\text{Median difference} = \\widetilde{x}_{Test} - \\widetilde{x}_{Control}$$\n", + "\n", + "where $\\widetilde{x}$ is the median for the group $x$.\n", + "\n", + "If the comparison(s) are paired, median_diff is computed with the following equation:\n", + "\n", + "$$\\text{Median difference} = \\widetilde{x}_{Test - Control}$$\n", + " \n", + "\n", + "##### Things to note\n", + "\n", + "Using median difference as the statistic in bootstrapping may result in a biased estimate and cause problems with BCa confidence intervals. Consider using mean difference instead. \n", + "\n", + "When plotting, consider using percentile confidence intervals instead of BCa confidence intervals by specifying `ci_type = 'percentile'` in .plot(). \n", + "\n", + "For detailed information, please refer to [Issue 129](https://github.com/ACCLAB/DABEST-python/issues/129). \n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a5324d21", + "metadata": {}, + "source": [ + "#### Example: cohens_d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "748b5c60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:29 2023.\n", + "\n", + "The unpaired Cohen's d between control and test is 0.471 [95%CI -0.0843, 0.976].\n", + "The p-value of the two-sided permutation t-test is 0.0758, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", + "\n", + "To get the results of all valid statistical tests, use `.cohens_d.statistical_tests`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "control = norm.rvs(loc=0, size=30, random_state=12345)\n", + "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", + "my_df = pd.DataFrame({\"control\": control,\n", + " \"test\": test})\n", + "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", + "my_dabest_object.cohens_d" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "6f66579c", + "metadata": {}, + "source": [ + "\n", + "Cohen's `d` is simply the mean of the control group subtracted from\n", + "the mean of the test group.\n", + "\n", + "If `paired` is None, then the comparison(s) are unpaired; \n", + "otherwise the comparison(s) are paired.\n", + "\n", + "If the comparison(s) are unpaired, Cohen's `d` is computed with the following equation:\n", + "\n", + "\n", + "$$d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{pooled standard deviation}}$$\n", + "\n", + "\n", + "For paired comparisons, Cohen's d is given by\n", + "\n", + "$$d = \\frac{\\overline{x}_{Test} - \\overline{x}_{Control}} {\\text{average standard deviation}}$$\n", + "\n", + "where $\\overline{x}$ is the mean of the respective group of observations, ${Var}_{x}$ denotes the variance of that group,\n", + "\n", + "\n", + "$$\\text{pooled standard deviation} = \\sqrt{ \\frac{(n_{control} - 1) * {Var}_{control} + (n_{test} - 1) * {Var}_{test} } {n_{control} + n_{test} - 2} }$$\n", + "\n", + "and\n", + "\n", + "\n", + "$$\\text{average standard deviation} = \\sqrt{ \\frac{{Var}_{control} + {Var}_{test}} {2}}$$\n", + "\n", + "The sample variance (and standard deviation) uses N-1 degrees of freedoms.\n", + "This is an application of [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction), and yields the unbiased sample variance.\n", + "\n", + "References:\n", + "\n", + "\n", + " \n", + "\n", + " \n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "40f4eff9", + "metadata": {}, + "source": [ + "#### Example: cohens_h" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f713781c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:30 2023.\n", + "\n", + "The unpaired Cohen's h between control and test is 0.0 [95%CI -0.613, 0.429].\n", + "The p-value of the two-sided permutation t-test is 0.799, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", + "\n", + "To get the results of all valid statistical tests, use `.cohens_h.statistical_tests`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "control = randint.rvs(0, 2, size=30, random_state=12345)\n", + "test = randint.rvs(0, 2, size=30, random_state=12345)\n", + "my_df = pd.DataFrame({\"control\": control,\n", + " \"test\": test})\n", + "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", + "my_dabest_object.cohens_h" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9e3e57bd", + "metadata": {}, + "source": [ + "Cohen's *h* uses the information of proportion in the control and test groups to calculate the distance between two proportions.\n", + "\n", + "It can be used to describe the difference between two proportions as \"small\", \"medium\", or \"large\".\n", + "\n", + "It can be used to determine if the difference between two proportions is \"meaningful\".\n", + "\n", + "A directional Cohen's *h* is computed with the following equation:\n", + "\n", + "\n", + "$$h = 2 * \\arcsin{\\sqrt{proportion_{Test}}} - 2 * \\arcsin{\\sqrt{proportion_{Control}}}$$\n", + "\n", + "For a non-directional Cohen's *h*, the equation is:\n", + "\n", + "$$h = |2 * \\arcsin{\\sqrt{proportion_{Test}}} - 2 * \\arcsin{\\sqrt{proportion_{Control}}}|$$\n", + "\n", + "References:\n", + "\n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "970fb3b2", + "metadata": {}, + "source": [ + "#### Example: hedges_g" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26960f9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:32 2023.\n", + "\n", + "The unpaired Hedges' g between control and test is 0.465 [95%CI -0.0832, 0.963].\n", + "The p-value of the two-sided permutation t-test is 0.0758, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", + "\n", + "To get the results of all valid statistical tests, use `.hedges_g.statistical_tests`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "control = norm.rvs(loc=0, size=30, random_state=12345)\n", + "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", + "my_df = pd.DataFrame({\"control\": control,\n", + " \"test\": test})\n", + "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", + "my_dabest_object.hedges_g" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "66c8a83a", + "metadata": {}, + "source": [ + "Hedges' `g` is `cohens_d` corrected for bias via multiplication with the following correction factor:\n", + " \n", + "$$\\frac{ \\Gamma( \\frac{a} {2} )} {\\sqrt{ \\frac{a} {2} } \\times \\Gamma( \\frac{a - 1} {2} )}$$\n", + "\n", + "where\n", + "\n", + "$$a = {n}_{control} + {n}_{test} - 2$$\n", + "\n", + "and $\\Gamma(x)$ is the [Gamma function](https://en.wikipedia.org/wiki/Gamma_function).\n", + "\n", + "\n", + "\n", + "References:\n", + "\n", + "\n", + " \n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "b1cf0080", + "metadata": {}, + "source": [ + "#### Example: cliffs_delta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dce86c76", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:40 2023.\n", + "\n", + "The unpaired Cliff's delta between control and test is 0.28 [95%CI -0.0244, 0.533].\n", + "The p-value of the two-sided permutation t-test is 0.061, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", + "\n", + "To get the results of all valid statistical tests, use `.cliffs_delta.statistical_tests`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "control = norm.rvs(loc=0, size=30, random_state=12345)\n", + "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", + "my_df = pd.DataFrame({\"control\": control,\n", + " \"test\": test})\n", + "my_dabest_object = dabest.load(my_df, idx=(\"control\", \"test\"))\n", + "my_dabest_object.cliffs_delta" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9661ab37", + "metadata": {}, + "source": [ + "Cliff's delta is a measure of ordinal dominance, ie. how often the values from the test sample are larger than values from the control sample.\n", + "\n", + "$$\\text{Cliff's delta} = \\frac{\\#({x}_{test} > {x}_{control}) - \\#({x}_{test} < {x}_{control})} {{n}_{Test} \\times {n}_{Control}}$$\n", + " \n", + " \n", + "where $\\#$ denotes the number of times a value from the test sample exceeds (or is lesser than) values in the control sample. \n", + " \n", + "Cliff's delta ranges from -1 to 1; it can also be thought of as a measure of the degree of overlap between the two samples. An attractive aspect of this effect size is that it does not make an assumptions about the underlying distributions that the samples were drawn from. \n", + "\n", + "References:\n", + "\n", + "\n", + " \n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bd341f7c", + "metadata": {}, + "source": [ + "#### Example: delta_g" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9abb53c1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.02.14\n", + "==================\n", + " \n", + "Good morning!\n", + "The current time is Tue May 16 01:11:14 2023.\n", + "\n", + "The unpaired deltas' g between W Placebo and M Placebo is 0.793 [95%CI 0.553, 1.1].\n", + "The p-value of the two-sided permutation t-test is 0.0, calculated for legacy purposes only. \n", + "\n", + "The unpaired deltas' g between W Drug and M Drug is 0.528 [95%CI 0.286, 0.765].\n", + "The p-value of the two-sided permutation t-test is 0.0, calculated for legacy purposes only. \n", + "\n", + "The deltas' g between Placebo and Drug is -0.651 [95%CI -1.6, 0.217].\n", + "The p-value of the two-sided permutation t-test is 0.0, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing the effect size (or greater),\n", + "assuming the null hypothesis of zero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed.\n", + "\n", + "To get the results of all valid statistical tests, use `.delta_g.statistical_tests`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.seed(12345) # Fix the seed so the results are replicable.\n", + "N=20\n", + "y = norm.rvs(loc=3, scale=0.4, size=N*4)\n", + "y[N:2*N] = y[N:2*N]+1\n", + "y[2*N:3*N] = y[2*N:3*N]-0.5\n", + "t1 = repeat('Placebo', N*2).tolist()\n", + "t2 = repeat('Drug', N*2).tolist()\n", + "treatment = t1 + t2\n", + "rep = []\n", + "for i in range(N*2):\n", + " rep.append('Rep1')\n", + " rep.append('Rep2')\n", + "wt = repeat('W', N).tolist()\n", + "mt = repeat('M', N).tolist()\n", + "wt2 = repeat('W', N).tolist()\n", + "mt2 = repeat('M', N).tolist()\n", + "genotype = wt + mt + wt2 + mt2\n", + "id = list(range(0, N*2))\n", + "id_col = id + id\n", + "df_delta2 = pd.DataFrame({'ID' : id_col,\n", + " 'Rep' : rep,\n", + " 'Genotype' : genotype,\n", + " 'Treatment': treatment,\n", + " 'Y' : y})\n", + "unpaired_delta2 = dabest.load(data = df_delta2, x = [\"Genotype\", \"Genotype\"], y = \"Y\", delta2 = True, experiment = \"Treatment\")\n", + "unpaired_delta2.delta_g" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d41dad3", + "metadata": {}, + "source": [ + "Deltas' g is an effect size that only applied on experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2, which calculates `hedges_g` for delta-delta statistics.\n", + "\n", + "\n", + " $$\\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}}$$\n", + "\n", + " $$\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}$$\n", + "\n", + "\n", + "where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples.\n", + "\n", + "A delta-delta value is then calculated as the mean difference between the two primary deltas:\n", + "\n", + "$$\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}$$\n", + "\n", + "and the standard deviation of the delta-delta value is calculated from a pooled variance of the 4 samples:\n", + "\n", + "\n", + "$$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", + "\n", + "where $s$ is the standard deviation and $n$ is the sample size.\n", + "\n", + "A deltas' g value is then calculated as delta-delta value divided by pooled standard deviation $s_{\\Delta_{\\Delta}}$:\n", + "\n", + "\n", + "$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07a84d5f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbs/API/delta_objects.ipynb b/nbs/API/delta_objects.ipynb new file mode 100644 index 00000000..e2ab4475 --- /dev/null +++ b/nbs/API/delta_objects.ipynb @@ -0,0 +1,1032 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Delta objects\n", + "\n", + "> Auxiliary delta classes for estimating statistics and generating plots.\n", + "\n", + "- order: 9" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp _delta_objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from __future__ import annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *\n", + "import nbdev\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import dabest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "from scipy.stats import norm\n", + "import pandas as pd\n", + "import numpy as np\n", + "from numpy import sort as npsort\n", + "from numpy import isnan\n", + "from string import Template\n", + "import warnings\n", + "import datetime as dt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class DeltaDelta(object):\n", + " \"\"\"\n", + " A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs:\n", + "\n", + "\n", + " $$\\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}}$$\n", + "\n", + " $$\\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}}$$\n", + "\n", + "\n", + " where $\\overline{X}_{A_{i}, B_{j}}$ is the mean of the sample with A = i and B = j, $\\Delta$ is the mean difference between two samples.\n", + "\n", + " A delta-delta value is then calculated as the mean difference between the two primary deltas:\n", + "\n", + "\n", + " $$\\Delta_{\\Delta} = \\Delta_{2} - \\Delta_{1}$$\n", + "\n", + " and a deltas' g value is calculated as the mean difference between the two primary deltas divided by\n", + " the standard deviation of the delta-delta value, which is calculated from a pooled variance of the 4 samples:\n", + "\n", + " $$\\Delta_{g} = \\frac{\\Delta_{\\Delta}}{s_{\\Delta_{\\Delta}}}$$\n", + "\n", + " $$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", + "\n", + " where $s$ is the standard deviation and $n$ is the sample size.\n", + "\n", + "\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self, effectsizedataframe, permutation_count, bootstraps_delta_delta, ci=95\n", + " ):\n", + " from ._stats_tools import effsize as es\n", + " from ._stats_tools import confint_1group as ci1g\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + "\n", + " self.__effsizedf = effectsizedataframe.results\n", + " self.__dabest_obj = effectsizedataframe.dabest_obj\n", + " self.__ci = ci\n", + " self.__resamples = effectsizedataframe.resamples\n", + " self.__effect_size = effectsizedataframe.effect_size\n", + " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", + " self.__permutation_count = permutation_count\n", + " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", + " self.__control = self.__dabest_obj.experiment_label[0]\n", + " self.__test = self.__dabest_obj.experiment_label[1]\n", + "\n", + " # Compute the bootstrap delta-delta or deltas' g and the true dela-delta based on the raw data\n", + " if self.__effect_size == \"mean_diff\":\n", + " self.__bootstraps_delta_delta = bootstraps_delta_delta[2]\n", + " self.__difference = (\n", + " self.__effsizedf[\"difference\"][1] - self.__effsizedf[\"difference\"][0]\n", + " )\n", + " else:\n", + " self.__bootstraps_delta_delta = bootstraps_delta_delta[0]\n", + " self.__difference = bootstraps_delta_delta[1]\n", + "\n", + " sorted_delta_delta = npsort(self.__bootstraps_delta_delta)\n", + "\n", + " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", + " self.__bootstraps_delta_delta, self.__difference\n", + " )\n", + "\n", + " self.__jackknives = np.array(\n", + " ci1g.compute_1group_jackknife(self.__bootstraps_delta_delta, np.mean)\n", + " )\n", + "\n", + " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", + "\n", + " # Compute BCa intervals.\n", + " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", + " self.__bias_correction, self.__acceleration_value, self.__resamples, ci\n", + " )\n", + "\n", + " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", + "\n", + " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", + " self.__bca_low = sorted_delta_delta[bca_idx_low]\n", + " self.__bca_high = sorted_delta_delta[bca_idx_high]\n", + "\n", + " err1 = \"The $lim_type limit of the interval\"\n", + " err2 = \"was in the $loc 10 values.\"\n", + " err3 = \"The result should be considered unstable.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if bca_idx_low <= 10:\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"lower\", loc=\"bottom\"), stacklevel=1\n", + " )\n", + "\n", + " if bca_idx_high >= self.__resamples - 9:\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"upper\", loc=\"top\"), stacklevel=1\n", + " )\n", + "\n", + " else:\n", + " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", + " err2 = \"It is set to the effect size itself.\"\n", + " err3 = \"All bootstrap values were likely all the same.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if isnan(bca_idx_low):\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"), stacklevel=0)\n", + "\n", + " if isnan(bca_idx_high):\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"), stacklevel=0)\n", + "\n", + " # Compute percentile intervals.\n", + " pct_idx_low = int((self.__alpha / 2) * self.__resamples)\n", + " pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)\n", + "\n", + " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", + " self.__pct_low = sorted_delta_delta[pct_idx_low]\n", + " self.__pct_high = sorted_delta_delta[pct_idx_high]\n", + "\n", + " def __permutation_test(self):\n", + " \"\"\"\n", + " Perform a permutation test and obtain the permutation p-value\n", + " based on the permutation data.\n", + " \"\"\"\n", + " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", + "\n", + " THRESHOLD = np.abs(self.__difference)\n", + "\n", + " self.__permutations_delta_delta = np.array(\n", + " self.__permutations[1] - self.__permutations[0]\n", + " )\n", + "\n", + " count = sum(np.abs(self.__permutations_delta_delta) > THRESHOLD)\n", + " self.__pvalue_permutation = count / self.__permutation_count\n", + "\n", + " def __repr__(self, header=True, sigfig=3):\n", + " from .misc_tools import print_greeting\n", + "\n", + " first_line = {\"control\": self.__control, \"test\": self.__test}\n", + "\n", + " if self.__effect_size == \"mean_diff\":\n", + " out1 = \"The delta-delta between {control} and {test} \".format(**first_line)\n", + " else:\n", + " out1 = \"The deltas' g between {control} and {test} \".format(**first_line)\n", + "\n", + " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", + " if \".\" in str(self.__ci):\n", + " ci_width = base_string_fmt.format(self.__ci)\n", + " else:\n", + " ci_width = str(self.__ci)\n", + "\n", + " ci_out = {\n", + " \"es\": base_string_fmt.format(self.__difference),\n", + " \"ci\": ci_width,\n", + " \"bca_low\": base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\": base_string_fmt.format(self.__bca_high),\n", + " }\n", + "\n", + " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", + " out = out1 + out2\n", + "\n", + " if header is True:\n", + " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", + "\n", + " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", + "\n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(\n", + " pval_rounded\n", + " )\n", + " p2 = \"calculated for legacy purposes only. \"\n", + " pvalue = p1 + p2\n", + "\n", + " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", + " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", + " bs = bs1 + bs2\n", + "\n", + " pval_def1 = (\n", + " \"Any p-value reported is the probability of observing the \"\n", + " + \"effect size (or greater),\\nassuming the null hypothesis of \"\n", + " + \"zero difference is true.\"\n", + " )\n", + " pval_def2 = (\n", + " \"\\nFor each p-value, 5000 reshuffles of the \"\n", + " + \"control and test labels were performed.\"\n", + " )\n", + " pval_def = pval_def1 + pval_def2\n", + "\n", + " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", + "\n", + " def to_dict(self):\n", + " \"\"\"\n", + " Returns the attributes of the `DeltaDelta` object as a\n", + " dictionary.\n", + " \"\"\"\n", + " # Only get public (user-facing) attributes.\n", + " attrs = [a for a in dir(self) if not a.startswith((\"_\", \"to_dict\"))]\n", + " out = {}\n", + " for a in attrs:\n", + " out[a] = getattr(self, a)\n", + " return out\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " Returns the width of the confidence interval, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + " @property\n", + " def alpha(self):\n", + " \"\"\"\n", + " Returns the significance level of the statistical test as a float\n", + " between 0 and 1.\n", + " \"\"\"\n", + " return self.__alpha\n", + "\n", + " @property\n", + " def bias_correction(self):\n", + " return self.__bias_correction\n", + "\n", + " @property\n", + " def bootstraps(self):\n", + " \"\"\"\n", + " Return the bootstrapped deltas from all the experiment groups.\n", + " \"\"\"\n", + " return self.__bootstraps\n", + "\n", + " @property\n", + " def jackknives(self):\n", + " return self.__jackknives\n", + "\n", + " @property\n", + " def acceleration_value(self):\n", + " return self.__acceleration_value\n", + "\n", + " @property\n", + " def bca_low(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__bca_low\n", + "\n", + " @property\n", + " def bca_high(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval upper limit.\n", + " \"\"\"\n", + " return self.__bca_high\n", + "\n", + " @property\n", + " def bca_interval_idx(self):\n", + " return self.__bca_interval_idx\n", + "\n", + " @property\n", + " def control(self):\n", + " \"\"\"\n", + " Return the name of the control experiment group.\n", + " \"\"\"\n", + " return self.__control\n", + "\n", + " @property\n", + " def test(self):\n", + " \"\"\"\n", + " Return the name of the test experiment group.\n", + " \"\"\"\n", + " return self.__test\n", + "\n", + " @property\n", + " def bootstraps_delta_delta(self):\n", + " \"\"\"\n", + " Return the delta-delta values calculated from the bootstrapped\n", + " deltas.\n", + " \"\"\"\n", + " return self.__bootstraps_delta_delta\n", + "\n", + " @property\n", + " def difference(self):\n", + " \"\"\"\n", + " Return the delta-delta value calculated based on the raw data.\n", + " \"\"\"\n", + " return self.__difference\n", + "\n", + " @property\n", + " def pct_interval_idx(self):\n", + " return self.__pct_interval_idx\n", + "\n", + " @property\n", + " def pct_low(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_low\n", + "\n", + " @property\n", + " def pct_high(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_high\n", + "\n", + " @property\n", + " def pvalue_permutation(self):\n", + " try:\n", + " return self.__pvalue_permutation\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__pvalue_permutation\n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permuations taken.\n", + " \"\"\"\n", + " return self.__permutation_count\n", + "\n", + " @property\n", + " def permutations(self):\n", + " \"\"\"\n", + " Return the mean differences of permutations obtained during\n", + " the permutation test for each experiment group.\n", + " \"\"\"\n", + " try:\n", + " return self.__permutations\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations\n", + "\n", + " @property\n", + " def permutations_delta_delta(self):\n", + " \"\"\"\n", + " Return the delta-delta values of permutations obtained\n", + " during the permutation test.\n", + " \"\"\"\n", + " try:\n", + " return self.__permutations_delta_delta\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations_delta_delta" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "and the standard deviation of the delta-delta value is calculated from a pooled variance of the 4 samples:\n", + "\n", + "\n", + "$$s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}}$$\n", + "\n", + "where $s$ is the standard deviation and $n$ is the sample size." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: delta-delta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0UAAAIaCAYAAADvKOYjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAACRpUlEQVR4nOzdd3xT9f4/8NdJ2qZ779JJoVBoKVBAZhmyRQREULwM/eJVcV0FwauI/mS411VRuQoOEFGGslQEyihSRlsoo4zSUigt3SMdaZuc3x9cIqGDpk16muT1fDz6kJzP55zziulp8s75nM8RRFEUQUREREREZKFkUgcgIiIiIiKSEosiIiIiIiKyaCyKiIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCyaRRVFOTk5eO2115CTkyN1FCIiIiIiaicsrih6/fXXWRQREREREZGWRRVFREREREREt2NRREREREREFo1FEVE7oa6pRlXRNdSpKqWOQkRERGRRrKQOQGTp1DXVyNz9Na6f3A1NbTUEuTU8IwcjbOT/wdreRep4RERERGaPRRGRhERRxJkfX0Pp5dS/l6lrkZ+6BxXXLyHmkQ8gs7KRMCERERGR+WNRRCShkktJOgXRrSrzMnFh24dQlRZAU1cD56Du8O8zAbauPm2ckoiIiMi8sSgiklDRxaNNtuef2qf9tzLnAq6n/I5uD74B5w5djB2NiIiIyGJwogUiKYmiXt3Vqkpc2PahcbIQERERWSgWRUQScgvvo/c6VQVXUJ6dZoQ0RERERJaJRRGRhNw69oZzYDe916upKDVCGiIiIiLLxKKISEKCICBy+mvw7TUWMmvFjWVyK8gVDk2sJIODV3AbJSQiIiIyf5xogUhiVgp7hI97CiEjHkFNeSFsHFxRcO4vXNz2UYP93Tv3g62bbxunJCIiIjJfJnOm6LXXXoMgCDo/XbpwBi4yH1YKe9h7BsLKzgm+MaMQ0H8KIOgeos6B3dD5nmclSkhERESWrCIvE1n71+HSH18ia/86VORlGnV/s2fPhiAIePzxx+u1zZs3D4IgYPbs2QbZl0mdKerWrRv+/PNP7WMrK5OKT6SX0BGPwK/3PShMS4C6VgWX4O5wCeoudSwiIiKyMFVF13D+1/dRfvUsIMggCDKIogZZ+9fCqUNXdL73edi5+xtl34GBgVi/fj0++OAD2NnZAQCqq6uxbt06BAUFGWw/JlVVWFlZwdeXw4bIcti6eiPgrklSxyAiIiILVVV0DSe+/hfqVJU3FogaiKJG216efQ4nvv4XejzygVEKo169eiE9PR2bNm3CjBkzAACbNm1CUFAQQkNDDbYfkxk+BwAXLlyAv78/wsLCMGPGDGRlZTXZX6VSoaysTPujVCrbKCkRERERkek7/+v7NwqiWwohHaIGdapKnP/1faNleOSRR7B69Wrt46+//hpz5swx6D5Mpijq168f1qxZg99++w0rV65ERkYGBg8ejPLy8kbXWbFiBVxcXLQ/cXFxbZiYiIiIiMh0VeRl3hgy11hBdJOoQfnVs0a7xujhhx/GwYMHcfnyZVy+fBkJCQl4+OGHDboPkxk+N3bsWO2/o6Oj0a9fPwQHB2PDhg149NFHG1znpZdewvPPP699nJKSwsKIiIiIiKgZCtMO3Zj06U5FEQAIMhSmHYKDd4jBc3h5eWH8+PFYs2YNRFHE+PHj4enpadB9mExRdDtXV1d07twZFy9ebLSPQqGAQqHQPnZ0dGyLaEREREREJq+uWqmdVOFOBEGGumrjXaryyCOP4KmnngIAfPrppwbfvskMn7udUqlEeno6/Pz8pI5CRERERGR2rGwdm1UQAYAoamBla7wTEGPGjEFNTQ1qa2sxevRog2/fZIqi+fPnY9++fcjMzMShQ4cwadIkyOVyPPjgg1JHIyIiIiIyOx5dBjRv6BwAiJob/Y1ELpfj7NmzOHPmDORyucG3bzLD565evYoHH3wQhYWF8PLywqBBg3D48GF4eXlJHY2IiIiIyOw4eIfAqUNXlGefa7o4EmRwCogwyvVEt3J2djbatgVRFEWjbb2dSUpKQu/evXH8+HH06tVL6jhEWjXKIuSd3ANVWT7s3APgHTUMVnZOUsciIiIiC6dzn6KGCiNBBiuFvdHuU9RWTOZMEZG5yjsVjwtbP4CortMuuxz/LbpM+TfcOrJ4JyIiIunYufujxyMf4Pyv79+YnluQ/T35gqiBU0AEOt/7vEkXRACLIiJJVRVl48Kv70PUqHWWq2uqcPbnZQjoPwUl6cegrq2BS1A3+PedaPJ/dIiIiMi02Ln7o8fsd1GRl4nCtEOoq1bCytYRHl0GGH3IXFthUUQkodyknfUKops0tdW4sn+t9nFlXgbyTu5G5PTX4RLUra0iEhEREQG4cY2RuRRBtzOZ2eeIzFFlYbZe/dU1Vbi4/WMjpSEiIiKyTCyKiCSkcPLQe52qwqsou5pmhDRERERElolFEZGEfGJadvOxuqoyAychIiIislwsiogk5OTfCcFD/6HfSoIM9l4hRslDREREZIk40QKRxAIHTYdraAxyk3+HqqwAdu7+sHZ0R1b8tw3294joD1tX7zZOSURERGS+WBQRtQNOAV3gFNBFZ5lYV4Orh37SmZ3OJaQHOt3zbFvHIyIiIjJrHD5H1E4FD/0HYp/6GmGjH0fI8DnoMec9RD28HFa2DlJHIyIiIjKqzz//HE5OTqir+/vm9kqlEtbW1hg6dKhO3/j4eAiCgPT09Bbvj2eKiNoxhbMn/PtMkDoGERERETKuFeDgyYtQVqngaKfAoOhwhPp7GmVfw4YNg1KpxLFjx3DXXXcBAA4cOABfX18kJiaiuroatra2AIC9e/ciKCgIHTt2bPH+WBQRWbjY2Fjk5ubC19cXx44dkzoOERERtTPZ+SV4e93vOJOZA5lMgEwQoBFFfPv7YXQL9cOCB0cjwMvVoPuMiIiAn58f4uPjtUVRfHw8Jk6ciD179uDw4cPaM0bx8fEYNmxYq/bH4XNEFi43NxfZ2dnIzc2VOgoRERG1M9n5JXj6wx+QlnXjc4JGI6JOrYFGIwIAzl7OxdMf/oDs/BKD73vYsGHYu3ev9vHevXsxdOhQxMXFaZdXVVUhMTGRRRERERERERnH2+t+R0V1jbYIup1GI6Kiugbv/PCHwfc9bNgwJCQkoK6uDuXl5UhOTkZcXByGDBmC+Ph4AMBff/0FlUrFooiIiIiIiAwv41oBzmTmNFoQ3aTRiDidcQ0Z1woMuv+hQ4eioqICR48exYEDB9C5c2d4eXkhLi5Oe11RfHw8wsLCEBQU1Kp98ZoiIiIiIiKq5+DJi5DJhDsWRQAgkwk4mHrRoBMvhIeHo0OHDti7dy+Ki4sRFxcHAPD390dgYCAOHTqEvXv3Yvjw4a3eF88UERERERFRPcoqFWSC0Ky+MkGAslJl8AzDhg1DfHw84uPjdabiHjJkCHbu3IkjR460eugcwKKIiIiIiIga4GingEa881kiANCIIhztFQbPMGzYMBw8eBApKSnaM0UAEBcXhy+++AI1NTUsiojMjbqmCtWledDU1UodhYiIiCzcoOjwZg2dA25cVzQoOtzgGYYNG4aqqiqEh4fDx8dHuzwuLg7l5eXaqbtbi9cUEbUDNRUlyPjzKxScOQBRXQsrW0f49ByN4Lh/QGZlLXU8IiIiskCh/p6IDPFDWlZuk8WRTCaga7AfQv0MfyPXkJAQiA2crQoODm5weUvxTBGRxNQ1VUj9bhHyU/dAVN84Q1RXrUT2XxuRtmmFxOmIiIjIkr340Gg42NpAJmv42iKZTICDrQ0WPDiqjZMZFosiIonlndyNqoIrDbYVnU9E2dWzbZyIiIiI6IYAL1f857kH0TX4xhA1mUyAlVymLZK6BvvhP889iAAvVwlTth6HzxFJrPB8YpPtRecT4dyhaxulISIiItIV4OWKD595ABnXCnAw9SKUlSo42iswKDrcKEPmpMCiiEhqdxgPqyrLR8mlZDj6d4KVrWMbhSIiIiLSFervadD7ELUnLIqIJOYWHouSjORG2/NPxSP/VDxk1gr49R6PkOGzIcjkbZiQiIiIyLzxmiIiifn0GAmFq88d+2lqVcg+vAkZu1e3QSoiIiIiy8GiiEhiVrYOiP7HW3DvfBcg/O+QbOLu0bnHd6C2qryN0hERERGZPw6fI2oHFC5eiHxgMWoqSlBbUYLkL59qtK+mToXyq2fh3qlvGyYkIiIiMl8siojaERsHV9g4uEJmbQNNrarRfjIrRRumIiIiIjJvHD5H1A55dhnUaJu1gyucg7q1YRoiIiIi88aiiKgdChz8IKzsnRtoERAy4hHI5DzJS0RERGQoLIqI2iE7dz/0mP0evLoPhSC3BgA4d4hE5LRX4RM9QuJ0REREROaFXzcTtUMadS3Ks89Bo66Fa1gvuIbGwKfH3bBS2EsdjYiIiMjssCgiamfqVJU4ve4VlGef0y4rvpCIa0e2IOofb8LWxVvCdERERETmh8PniNqZy/Hf6RREN6lKriN9xycSJCIiIiIybyyKiNoRjboOeSd3N9penJ4EVWl+GyYiIiIiMn8siojaEXVNFdSqiiZ6iFCVFzS5jRplEWrKiwwbjIiIiMiM8ZoionbESmEPawdX1FaUNNxBEHAl4UfYOLrDu/twuAR31zYVXTyGrH3fQZlzEQDg4NsRwXEPw71T3zZITkRERGS6eKaIqB0RZHL4xoxuvIMoovjCUVxP/h2p3y3Ehe0fAwCKLh7FmR9f1xZEAFCRm44zG95A4bm/mtynr68vAgIC4Ovra5DnQERERGRqeKaIqJ0JHPIglHkZKL5w5I59ryf/DpfgaGT/9TMgaup3EDW4HP8dPCL6N7qNY8eOtSYuERERkcljUUTUzsjk1ug2bQlKL6ei8NxfqCrMRnF644XLtSO/ouJ6RqPtlfmXUV2cC1s3ngkiIiIiagiHzxG1Uy7BUQgb9Rjcwns32a/mDhMvAIAoioaKRURERGR2TLYoevPNNyEIAp577jmpoxDdUV11Ba4d/RXnf3kfl/74UnvtT52qEjnHd+DSrv8iO3ELaitLddarrSiFIGv6hK6dRwfYewY12d7UWaLY2Fh06NABsbGxejwjIiIiIvNhksPnjh49ii+++ALR0dFSRyG6I2XOBZz64VXUVZZpl1078gs8ugxESUaKzhTcl/d+g073Pg8nv0649McXKLp47H/XCgkAGj7b49d7HAABaRtXNNgnaMhDEASh0Xy5ubnIzs5u4bMjIiIiMn0md6ZIqVRixowZWLVqFdzc3KSOQ9QkUaPG2Z+X6xRENxWmJdS7J5Gmrgbnt7yDE2vmo+jCkVsmT2i4IPLvOxHOQd1RXZILl9AYWNk5a9ts3fzR+b4F8OoWZ7DnQ0RERGSOTO5M0bx58zB+/HjcfffdWLp0qdRxiJpUnH4cqtI8vdYRNWrUVhQ32u7euR9s3fzg3X0YVGX5OPafOdDU1fzdQWaFkBFzENB3YpNniIiIiIjoBpMqitavX4+kpCQcPXq0Wf1VKhVUKpX2sVKpNFY0ogZVF+cafJtO/hEIHDQNqvJCnFgzH6K6VreDpg6Xd6+GZ8QA2Lp6G3z/RERERObGZIbPXblyBc8++yzWrl0LW1vbZq2zYsUKuLi4aH/i4jiMiNqWwghFSUVeJq6f+BPZhzfXL4j+R9TU4XrK7wbfNxEREZE5EkQTmat3y5YtmDRpEuRyuXaZWq2GIAiQyWRQqVQ6bUD9M0UpKSmIi4vD8ePH0atXrzbLbmxqjQaJpzOQlpULB1sFhvXqDG835zuvSEYnatQ4+p9HmjVttqF5Rg5Gl8mL7tivQ4cOyM7ORkBAAK5evdoGyYiIiIjaF5MZPjdixAikpqbqLJszZw66dOmChQsX1iuIAEChUEChUGgfOzo6Gj1nW7teVIZ/f7kFWdeLtMu+3pGAmaPvwoxR/SRMRgAgyOToMuUlnF7/KtTVupMqOAd2Q9mVM7h9EoWAfpNRkHZQ72uRbqdw9mzV+kRERESWwmSKIicnJ3Tv3l1nmYODAzw8POottyT/b812nYIIADQaEWt2/oVQP08MiOooUTK6yblDF8Q+uQrXU3ZBmXsRVraO8I4aDufASChz05F7fAeUuemorSxDXbUS+Wf2wTU0BrUVpSjJSIaoUQOC7JaZ6JpDgE+PUUZ7TkSWTlWaj8ILiRDVtXANiYGDT6jUkYiIqBVMpiii+s5kXsP5K9cbbd9yIIVFUTthbe+CDgPur7fc0bcjPLsNQV7qHu0McmpVBfJO7oaNozt6zPkAMhsFklb+U4+9CQgd+X+w92r8hq5E1HIZf36F7MQtOl9UuHe+CxGTFkBu3bxrXomIqH0x6aIoPj5e6giSyswtukN7YRsloda49NvnulNq/0+NsgjZiZsQcd8CyKxtoamtbnQbHl0GQlOngq2LD3x6joajL4thImPIObYN2Yc31VtedP4wLv3+JTrd84wEqYiIqLVMuiiydO5O9k23Ozu0URJqKeX1S6gsyGq0veDsQXj3GAnnoG4oST/eYB9bN390mfIS70lE1AayE39ptC0vdQ9Chs+Ctb1LGyYiIiJDMJkpuam+Pl1C4OHSeOEzum9kG6ahllCrKptsF9V1OL325UYLIkFujY5jnmBBRNQG6lSVqC6+1mi7qK5FZf6VNkxERESGwqLIhMnlMrz40GjY2tQ/4denSzAmDIyWIBXpw8E7FDJrxZ073kJuYwdrB1d4Rg5Bj9nvwq2j+UwvT9Seya0Vdzxereyd2igNEREZEosiE9ercxC+WPAwpgzthYggHwR4uqKDlytq6tTYdigVVaqGb+5J7YOVrQN8e43Vax1Ro0avxz9Hl8kL4egXbqRkRHQ7QSaHV7fGbwLu6BcOB6/gNkxERESGwqLIDPh7umJM3264XlSG7IISXM0vwYmLV/Hppng89/GPKK9s/AJ9kl7I8Dnw7TkGgqz+vbYaoqmrQfnVs0ZORUQNCR76D9i6+ddbLlc4oOPYeRIkIiIiQ+BEC2bigw1/okRZVW/5pWsF+GbnX3hqyjAJUlFzyORWCB//NAIHP4jSyychyK1x/tcPINapGl/HSr8hd0RkGDaO7ujxyPvIObYdhecOoa5KCYWLNzy6DoSDd4jU8YiIqIV4psgMXMkrwpnMnEbbdx07C7VGnxt/khQUzp7wjhoOr8jB8Ioc1Gg/a0c3OAd1a8NkRHQrazsneEcNBwCoSq+jLCsVGb9/jiMfzULh+cMSpyMiopZgUWQGisqansGssroGqpq6NkpDhhA0+MFGpvUVEDr8EcjkPMlLJBVR1ODMj6+hIjddZ3ldVRnSNq5ARf5liZIREVFLsSgyA4HebpDLGn8pfdycYKewbsNE1Fq2bn6Inv0evKKGQ2ZlA0CAc2A3RE5/Dd7Rw6WOR2TRitOPo7KRwkdU1yHn6NY2TkRERK3Fr5vNgLuzA4bEdMLepHMNtk8cHMP72JggO3c/REx8AZj4AkRRA0HgdxhE7YHy2oWm23OabiciovaHRZGZePb+4SgsVeJkerbO8rF3dceUON7HxtQJggwadS0KzhxESUYyBJkVPCLuglt4LIslojZmZevYqnYiImp/WBSZCQc7Bd57aipOpl9F0vkrsJbLMCg6HMG+HlJHIwOoURbh1Pcvo7IgS7vsesrvcAnpgchpSyDX8wawRNRynpGDkbH7K4jqhq/V9IriEFciIlPDosjMRHfsgOiOHaSOQQZ2ccenOgXRTaWZJ5C1fy1CRzwiQSoiy2Tj6IbQEY/i0h9f1Gtz6xgL7+5D2z4UERG1CosionZOVVaAogtHGm2/nrILIcNmNfvmr0TUev5974W9VxCuHf0VFdczYe3gAp8ed8MnZjSPRSIiE8SiiKidU5XmAWLj95mqqypDnaoS1nZObZiKiFxDY+AaGiN1DCIiMgAWRUTtQGnWaVw/sQs15YWw9wyEb6+xsPcMBAAoXLwBQdZoYWRl5wQrhX1bxiUiIiIyKyyKiCSWuecbXD20Qfu45FISco5tQ6cJ/4JXtyGoLrkOR79OUF5reMp1nx4jWzVcx9fXV+e/RERERJaGRRGRhEovp+oURDeJGjUubP0AGX9+jdqKov8tFQCIOv2cg6IQFDejVRmOHTvWqvWJiIiITJ3RiqJr167B39/fWJsnMgu5KX802iZq1LcURMDNgsjOowOcAiLgEdEf7p368qJuIolUFmQh59h2VORdho2DC7x73A338D5SxyIiohYwWlHUrVs3fPrpp3jooYeMtQsik1dTXqj3OqrSfETPfpcTKxBJqODMAZzb8i5Ezd/3Kio4exA+MaPQ6Z5nJUxGREQtITPWhpctW4Z//vOfmDp1KoqKiu68ApEFsnMP0HsdTZ0K5VfPGiENETVHXXUFzm/9UKcguul6yh8oPPeXBKmIiKg1jFYUPfnkkzh58iQKCwsRGRmJrVu3GmtXRCbLr/e4GzPL6UlmpTBCGiJqjoIz+6GprW60valhsURE1D4ZdaKF0NBQ7NmzB5988gkmT56Mrl27wspKd5dJSUnGjEDUrjn4hCJ83FNI3/kpRI1au1yQW0FU1/8WGgCsHd3gHNStrSIS0W1qlMVNt7dgWCwREUnL6LPPXb58GZs2bYKbmxsmTpxYrygisnS+PUfDLTwWeSd3a+9T5NQhEqfWvYy6yrLbegsIHf4IZHIeR0RSsfvfPcQaY+8Z1EZJiIjIUIz6yWrVqlV44YUXcPfdd+P06dPw8vIy5u6ITJbCyQOBAx/QWdZj9nvI2r8OhWkJ0NTVwDmwGzoMnMrZrYgk5hHRHzZOnqgpL2igVYBf7D1tnomIiFrHaEXRmDFjcOTIEXzyySeYOXOmsXZDZLbs3P0Rcd98iOILgKjh1NtmoLqmFgdOXERRWQWCfNzRNzIEcpnRLu0kI5HJrRA57VWcWf8aapR/TyQkyOQIG/1POHfoImE6IiJqCaMVRWq1GidPnkSHDh2MtQsiiyAIAiCwIDJ1f526hLfX/Q5llUq7zNfdGf/v/+5FqJ+nhMmoJRx9OyL2qa9QcOYgKvIyYO3gCu/uw2Dj5C51NCIiagFBFEVR6hBtJSkpCb1798bx48fRq1cvqeO0StL5LGxNOIlrBSXwdnPGuLu6o3/3MKljkYGJooiK65egqauBg08o5Na2UkeiFriaX4zH3v4etXXqem1ero5Y8/Js2PB6SyIiIsnwXdgErf0jEWt2/n0fjEvXCnD49CVMHtITT0yKkzAZGVLRhSPI2PVfVBVlAwDktg7w7zMRQUMeunH2iEzG1oMnGyyIACC/RIn9KRdwd2zXNk5FREREN3Ewu4nJul6kUxDdatP+ZJy6dM1o+46NjUWHDh0QGxtrtH3QDaVZp3H2p6XagggA1NUVuHJgHS7HfythMmqJ9Gv5TbdnN91ORERExsUzRSZm19EzTbav23UEHQM8YSWXY3CPcIT5G27Gv9zcXGRnZ9+5I7Xa1YQfde5bdKtrR35Fh/73w8rWoY1TUUs5O9g12e5yh3aipsTGxiI3Nxe+vr44duyY1HGIiEwSiyITU6KsarL9aFomjqZlAgC+/yMRI/t0xfzpoyCTcbiVKSnJSGm0TVNbjbKrZzg1twkZ1acrDpy40GCbTCZgeG/OVkYtxy+siIhaj8PnTExHPc/87Dp6Fj/tPW6kNGQsgqzp7ytkd2in9qVfZChGNnLN0GMTBsPbzamNExEREdGtWBSZmJF9usLJXr8ZyH45eAIWNMmgWfCI6N9om7W9C5yDurdhGmotQRCw4KFR+Pc/xqJX5yAE+7hjcHQ43p03BVOGmvZMmEREROaAXzebGAc7BZY9NhGvfb0NRWUVzVonv6QcVapa2NvaGDkdGUrg4OkoSj8KdXX91zh46D8gs7KWIBW1hiAIGNYrAsN6RUgdhYiIiG7DosgEdQ32w/eLH0FC6kVkF5TCWibDqm0HG+3vYGsDhQ1falNi7xmI6Fnv4HL8dyg6nwiIGjj4dkTggKnwjBwsdTwiIiIis8JPyibK2kqOoT3//sb50OlLOJ3R8HTcI/tEQi7jSElT4+AVjMipr0BTVwONug5WCnupIxERERGZJX5SNhPPT7sb7k71PzSHB3hh1ti7JEhEhiKzsmFBRERERGREPFNkJoJ83PHFiw9j+1+nkHw+C1ZyGQZHd8LdsV05dI6IiIiIqAn8tGxGXB3tMWNkX8wY2bdeW51ajb1J57EnKQ2V1TXoGuyLewf1gL+na9sHJTIzJy5ewfkreXCyt8Xg6HA42CmkjkRERER6YFFkYtQaDY6cyUROYQl83V3QLzIUcnnToyBr6uqweNWvSDqfpV12JjMH2/9Kxf/7v4no2SnQ2LGJzFJBiRKvfvUrLlzN0y77dNNezJs8DGP6dZMwGREREemDRZEJOZd1Hf9vzTbkFZdrl3m6OGLx7HGIDPFvdL1fD57UKYhuqq6pw9trf8f3ix+5Y2FFRPW9tnqrTkEE3DiuPvjxT3TwckX3sIBmbUcURahq62BrozvVesa1AlRUqxDi5wFHO/3uT0ZERETNx6LIRFRUqfDvLzejrKJaZ3lBqRL//uIXTBwcjUOpl6CsUqFLsC+mxPVC97AbhdLvR043ut2CUiWOn7+Mvl1DjZqfyNycunQN57KuN9imEUVs2pd8x6KoolqF735PxB9HTqO8UgVvNydMGBiNLkG++GzzPmTkFAAAbG2sMKZfdzx272BYW8kN/lyo/RBFEaWZJ6Eqy4Otmx9ceKNmIqI2YTJF0cqVK7Fy5UpkZmYCALp164ZXX30VY8eOlTZYG/nj6Jl6BdFNFdUqrNt1VPv44MmLOJSajgUPjcLdsV1RXFbZ5LaL7tBORPVduNpwQXTT8fNZ+McbX0MQBPSLDMX9Q3vBx91Z215TV4dFKzch7ZbCKq+4HF9tS4BMEKARRe3y6po6bDmQgkpVDRY8OMrwT8ZClF1NQ3H6cQgyGdw794OjT5ikeVRlBajMvwxre2c4+nWCMucC0ja9jeriv2+vYO8VjC5TXoK9J4c5ExEZk8kURR06dMCbb76JTp06QRRFfPPNN5g4cSKSk5PRrZv5j92/cCXvzp1uoRFFfLopHoOiwxHs646T6dmN9g3182htPCKL4+Jg12R7ZXUNKqtrAABbDqRgb9I5vPfU/Qj2vXG87Tl+TqcgutWtBdGt/jx6FjNH36VTXNGdqWurkfbzChSnH9Muy9r3Pby6xaHTvf9CScYJKK+dh9zWAV6Rg2Hj6A4AqK0sRXVJHmwc3aBw9jRYnjpVJS5u/w8Kzh4ERA0AwM6jA2qURVCrdL+kqsy/jFNrX0HvJ7+A3JpDKImIjMVkiqIJEyboPF62bBlWrlyJw4cPW0RR5GSv/5uhskqFLftT0KmDd6NFUddgX0QE+bY2HpHF6d+9I+xtbbSFz52UVlThi1/2Y/k/JwEAEk5e1HufGlFE8oUrnMRBTxm7vtIpiG7KP70PxRkpqKss1S7L/PNrBA6ejqrCbBScPQBRXQdAgFvH3ug4dh5sXb1bnSdt4wqUXErSWVZVeLXR/jXlBcg/tQ++PUe3et9ERNQwkymKbqVWq/HTTz+hoqIC/fv3b7SfSqWCSqXSPlYqlW0Rzyjuju2KTfuT9V7vq+0JjbaF+Hlg8ezxrYlFZLHsFNZ4dupwvLX2d2g0DZ/Zud2xtMt4ZdUvKK+sxvXishbtl9cU6aeuugJ5qbsbb7+lIAIAUVOHrH3f39ZLRHH6MaR+twg9H/tE52bKpZdTUZGXCWsHV7h36gu5df3p2FXlhSg8mwB1TRWs7JzrFUTNUZ6dxqKIiMiITKooSk1NRf/+/VFdXQ1HR0ds3rwZkZGRjfZfsWIFXn/99TZMaDydAr0xZWgvbIzX/830VnY21hg/IAox4YHo0zUEMplgoIRElmd4ry4I8HTF5v0pOH/lOmysrZCend9ofxFA4pmMFu9PYW2FPl1CGm2PjY1Fbm4ufH19cexY/TMjlqi6JBeaWtWdOzaDqvQ68k78Cf++96K6JA9nf3oDFdcvadut7JzQ6Z5n4RHx95d1WQd+wJUDP0DUqFu1b/kthRgRERmeSc3DHBERgZSUFCQmJuKJJ57ArFmzcObMmUb7v/TSSygtLdX+7Nu3rw3TGt7jE4fg1Tn3oFfnIPh5uCCmUyBmjr4LMqH5hU1VTS28XJ3Qr1soCyIiA4gI8sWih8fg65dm4ePnpt3xWqPmsGpkivwH7+4DZ4fGh9Lm5uYiOzsbubm5rc5gLqwdXAEY7m9d8aXjEEUNzqxfolMQAUBdVTnSNr2JivzLAID8MweQte/7VhdEAODdfVirt0FERI0zqTNFNjY2CA8PBwD07t0bR48exUcffYQvvviiwf4KhQIKxd9DGRwdHdskpzENjg7H4OhwnWW+Hs74dFM8Kpp5bcPpjGuYHNfTGPGILJqNlRUmDu6Bb3873KL1ZTIBA7p3xIyRffFzfBL2n7iA2jo1gnzcMXVYb15L1AIKJw+4hvVs0ZC1hgiCDMXpx1FZUP/ebwAgquuQc3Qrwsc9hWuJWwyyT9/e4+DoF37njkRE1GImVRTdTqPR6FwzZOpq69TYm3QO8SnnUV1Ti+iwANwzMBqeLk0XcyP7RGJQdCfsP3Eel64VIONaAZIvXGm0v63CutE2ImqdGSP7oVRZha2HTjb7WqNRfSMxMrYrArxc4eXqBABY9PAYzH9wJGpq1bC3tTFmZLPXccyTSP1uIWrKC3UbBJl29rfmsnZ0R/6p+Cb7KHMuAAAqrus3VNLOIxC+vcYiL3U3VGX5sHPzh2/vcfCJHqHXdoiISH8mUxS99NJLGDt2LIKCglBeXo5169YhPj4ev//+u9TRDKK6phb//nILUm+ZJS41PRu/JpzAin9Ohlwm4Ni5y5DLZOjfPQwdvNy0/URRxI97jmHzvmRUqu58tmhoz84tyujr66vzXyKqTyYT8NSUYZg2IhbH0i5DEASkpmfjj6OND/WNCgtATKf696GxksthJefECq1l5+6HnnP/g9yk31CcfhwQZPCI6AcbJ0+c/+U9iOpanf5W9s6oq2x4Iozryb/dcX9Wtje+yLK2d4aqrPFrzBQu3lCV5kFu6wDvqBEIGvwgrO2dEdBvoh7Pjn+biYgMwWSKory8PMycORM5OTlwcXFBdHQ0fv/9d4wcOVLqaAbx057jOgXRTeWVKiz47GdUqf5+01619QDG3RWFZ+4fDplMwHe/H8baPxKbtZ/B0eGIjQhuUUZeuE3UfF6uTugc6IO/Tl+CrY0VZDKhwTNHro72GNYzQoKElsXa3gWBg6YhcNA0neUO3sHIOboN5dfOw8rWAV7dh8Kj6yBcTdiA3OTfbhRHep5R8owcjMJzf8HBt2OjRZHC1Qex8/4LiCIEWesKX/5tJiJqPZMpir766iupIxjVb0dON9p2a0EEAKIIbP8rFX4eLrh3UA9s3Nf4VN0KaysIAuDt5ozx/aMwcXAPCMKND2fHzmUip7AMfh7OiI3gTHREhqJWa/DWut+xN+mcznJBECDecmNWNyd7vPF/90JhYzJ/is2OvWcQOo59st7ykGGzEBz3MEoyT+L0uleavz3vUGTsXg11deO3gJBZK9DpnucgCDJDzgFBREStwHfidqKorELvdX45mIIuQb5N3jxSVVuHwT3CkZlTiP0nLsDWxgqh/p5Y/u1O5Bb9PTzE190Zi2ePR+dAnxblJ6K/ffdHYr2CCLgx1DUqLADdwvwR5O2OuJhOsLHmn+H2SpDJUV3U8I2vb5JZKWDt4AprBxc4deiKnCO/4sbk67qs7JygcPaCc2Ak/PveCzv3ACOlJiKiluC7cTsR5OOOS9cK9Fonv0SJWvWdp3o9cOLi//5VjNMZ12All6FOrTsUJLeoDP/+YgvWvDwLjnYNT/nLe6AQ3VltnRrbEk422n7hah6Wzp3IyRNMhNym6fsDCbL/Da0TNSjNSEFDBREA1FUp0WPOe0Yphvi3mYio9UzqPkXm7L7BMXqvY2MtR/dQf/i4Oem13u0F0U2lFVX440jjF4PzHihEd1ZcXonSiqpG26trapFbVNqGiag13Dv3g8xa0Wi7uqYKqrJ8KHMuovJ/9ydqmIjyq/XPHhoC/zYTEbUei6J2Yuxd3TFpSAxuvw+rrU3j02c72Cqw4LOfEeLnAUGPG7g25VzWdYNsh8hSOdkrYN3EjHEyQYCrY9NnH6j9sLJ1QMiIRwyyLbmi9Tf2JSIi4+DwuXbkyUlDMWFgNPalXEC1qhZRHQPg6eqIhZ9tavCb5+LyShSXVwK4cdYo0MsN6dcKYCWXIcDLDZdzC+utcyeO9g0PnSOi5rFT2GBwj3DsaeCaIgCQy2V48LX/ItDbDRMHx+CeAVEG+1KDjMM/9h7Yuvog+/BmKHMuQhBkqKsu12sbVnbOcOvY20gJiYiotVgUtTOB3u54eFQ/nWWfz5+BXxJO4HjaZRSXV6KgtP6sRjW1ahSVV6JzBy+cv5qPrBYURAAwoneXFq1HRH977N4hSMu6jmsFJfXaautuXAd4+XoRPv55DzJyCvDM/cPbOCHpyz28D9zD+wAAsvavQ9b+tc1fWZAhbNRjkFnxOjIiovaKw+dMgKerIx4dPxCfvfAQbKwbH5ZTXF6J81dv3BOj4Ut9mzZhYDQiQ/xamJKIbvJwccBnzz+Ix+4djKiOAQjwcm2079aEk8jMadmXGCQNR/+mb4Dt4NsRClefG2eHOvVF1MPL4d65H/JS9yLn+A5UXM9oo6RERNRcPFNkYm4Ol2spe4UNnp06HBCAbYdSkVtYBl8PZ9wzIArDe/EsEZGhONgpMHVYb0wd1huvrPoF2fkljfbdl3IeIX792y4ctYiqvBBVhdlQuHrD3iu44YkVBBlqKkqgrlLC3isQnl0HobIgC6fXvwZNbbW2m2tYL3SZvBBWto5t+AyIiKgxLIpMTIivB85e1m+GIQHAI/cMhLuTAwZFh2unAmYRRNQ2VLV1rWonadVVlePijk9QkHboxvTbABx9O8HeKwiV+VnafoJMDlGjRm35jTN/ypyLuPDr+w1us+RSEs7/8h4ipy0x/hMgIqI74vA5EzNpSE+91xFxY8rvUX0jeW8UIglEhzV9b5rojryRZ3sliiJOr38NBWcPagsiAFDmXkBtVTkipy1Bx7FPwrf3PRA1d75v3K2KLhxFVWHTN4clIqK2waLIxAzrFYEZo/pBJtOdrer2x7fqFurf5NTeRGRc4wdEwdmh4ZkdOwZ4oW/X0HrLK6pUSDyTgaNpmVDV8EySVEouJaE8O63BtlplMSryMuHXezxUJS25R5CI8pwLrQtIREQGweFzJmj22P4Y268b9p+4gCpVDbqF+uN81nV8veNQvb4yQcCMUX0lSElEN7k7O+CtJybj7bV/ICOnQLu8d0QQFs4YrfOlhiiK+Oa3w9gYn4TqmloAN+59NHN0f9w3JKato1u8kswTTbdnpMA3ZjTqVBUt2r6VrUOL1iMiIsNiUWSifNydMXXY3/e86B0RDJlMhg17j6Gs4sbFvL7uzpg7YTD6dAmRKCUR3RQe4I0vX3wY57Kuo7BMiSBvd3TwdqvX74c/j2LtH4k6y8orVfh0czwc7BQY2adrW0UmADJ502fZy66cQeIHDwGC/gMvrB1c4Rqq/5BoIiIyPBZFZmTaiFjEdgnGrmNnobCSY8LAHvB05cxGRMakqqlDysUrqFOr0T00AC6Odk32jwjyAeDTYFtNbR027ktqdN0fdx9lUdTGPLoMwJWD6xttF9W1//uHptE+gpUNxLoa3WUyOTqOfRIyOd+GiYjaA/41NhM1tXV4a+3v2H/i7/HpG/YexwPDYzFn3AAJkxGZr60JJ7F6xyGUV944O2ttJceEAdF4bOJgyGX6nzm4nFukPdPbYPv1IpQoK+HqaN9gu6+vr85/qfUcfTvCK2o48lP3tGh9l5Ae6DjmSRSmHUT+6f1Qqyrh1KErAu6aBKc73O+IiIjaDosiM/HZ5n06BREA1Kk1WLfrCLxdnTB+QJREyYjM076U8/j4Z90PyrV1amzanwxrKzn+b8KgZm3nan4xftpzHEfOZkKtafxsA3DjGkEbq8b/bB87dqxZ+yT9dJ7wHBy8gpFzbBtUZfmQWdlAc9uZn1vZeQUjaPCDsPcKgoNXMADAftB0BA6a3laRiYhITyyKzEBZRRX+OHqm0faN+5JYFBEZ2Po/jzba9svBE7CxkuNaYSncnR0wqk8kQvw86vW7eDUP8z/9GRXVjX/AvlVsl2BOqy8BQSZHhwH3I6D/FGhqq5GduAVZ+75vtL/cWgGvyMFtmJCIiFqLRZEZyMgpRG1d4/fHuJJXjCpVDewU/DBFZAhVqlpczM5vtL26phbf3TJZwk97j+OR8QMwYWA09iadR1FZBYJ83LE14WSzCyJ7Wxs8Mn5gq7NTywmCALmNHdw69m6yKHIL69WGqYiIyBBYFJkBZ/uG739yk8LaSmfITVlFFfYmn0dxeSVCfT0wMLojrORyY8ckMhvWVjJYyWWoUzc93O1WX28/hO9/T0RNE19g3M5KLoNcJsOAqI6YMbIvgn3rn226VWxsLHJzc+Hr68uhdEbk5N8ZbuF9UHyx/tlCawdX+MWOlyAVERG1BosiMxDq74mOAV5Ib+Sb60HR4RAhAgB2HT2DD3/ajZravz+Yebk6YdnciQj192yTvESmzkoux6DocMQnn9drPX0KIgD4+qVZ8PNwaXb/3NxcZGdn67UP0k9VYTYqC6+gw4CpUDh5IC91j/b6Ilt3f8ht7HHq+5fh4NsR/n0nwsm/k8SJiYioOVgUmYln7h+Olz7fjEqV7lAcmSBg9/E0HDqVjj5dQnAw9SI0GlGnT35JOV5e9Qu+fWU2zxgRNdOssf2RfP4KSiuqjLJ9DxcHeLs6GWXbpL8aZRHO//I+SjKStcscfEIR+dAbEAQBmbvXoPzq39d2VhZkIf/0PkRMfAFe3YdKkJiIiPSh/5yx1C5Fhvjh0xcexISB0QjwcoWDnQIAoBFvFEBVqlrsP3GhXkF0U35JORJS09ssL5Gp6+Dlhv88Nx1j+nWDg60NFNZWCPMz3NnWKXG9IJfzT3R7IIoanFr3qk5BBAAV1zOQ9vMylF85q1MQ/b2iBhd3fgp1jXEK55t8fX0REBDAqdiJiFqBZ4rMSAcvNzxz/3BcupaPf76zVu/1M3IKERfTeDvvgUKky8/TBS9MH4kXpo8EAFRUq/DQa1/VO2N7JzJB0H6BYS2XY1JcDO4fyov124uiC0dQmZfRYFtdZRmuHdvW6LpqVSUKz/0F76jhxorH68eIiAyARZEZSjjZsjM+bk4N3xDyJr7xEjXNwVaBl2eNxf9bvR2q2rpmrSOXyfDJ89ORca0QgnBj2u3Gbs5K0ijLOtVke11lWZPttXdoJyIi6bEoMlFnL+dgz/FzqKhWoWuwL+6O7aqdcrvuDjeAbIiNtRzDevLu6kSt1bdrKNa8PBu/JZ7G5dxCuDnZY3TfbthyIAW/JZ7W6SsIwLzJQxEe4I3wAG+JEtOdyKwUTbZb2TqgRqlqtN3BJ8zQkYiIyMBYFJmgDzfsxva/UrWPdx09i+//OII3H5+EUD9P9OociHW7jjR7ezKZgGfvHwFnBztjxCWyOJ4ujnh4VD/UqdX4cc9xvLLqFxSUKuFgawM3J3vYKWwQ4uuBCYOi0TXYT+q4dAeekYNx5eD6RtudOnRF4blDgFj/mk0H345wDYk2ZjwiIjIAFkUmZtfRMzoF0U1FZRV4Y812TB/RBykXr8DdyR5F5ZX1+rk52uHRCYOQkJqO4rIKhPh5YuKgHgjvwG+piQxt6Tc7dCYwqaiuQUV1DWI6BeKFB0dCLuNECqbAwTsEvr3GIjdpZ4PthWkJ//uXAODvwsjBJxSRUxcbPyAREbUaiyITszXhZKNtV/KK8c4Pf+gsu/UtOqpjAJ6bOgJBPu4Y3beb8UISEU5cvNrojI4pF67gr1OXMCg6vI1TUUt1HDsPDj6hyDm6DZWFVyHIZBDVt183JkKQWyOg70S4hvWES0gPCIIgSV4iItIPiyITk1NYqld/EcDIPl3x8Kh+8Pd0NUomIqrvwIkLd2xnUWQ6BEGAX+/x8Os9HiWZJ3Dq+3832E9U10KjqYNraEzbBiQiolZhUWRifNydUaLU754XR89exvzpo4yUiMhy1dTVYc/xczh48iJq69To2SkQ4/p3h7ODHWrr1HdYt+l2ar/Ksk63qp2IiNofFkUmZnz/KJzLuq7XOiXKSlTX1MLe1sZIqYgsT0W1CotWbkLaLcdj0vksbD6QgnfnTUFMp0DsONz4VM49OwVq/63RiDh3JRc1tXXo1MGHx2o7J7Oxbbrduul2IiJqf1gUmZgx/bohNT0bu46dbfY6ro52sLWxNmIqIsvz3e+JOgXRTUVlFXhv/Z94d94UBP/hjsvXi+r18XV3xt2xXQEAB05exJe/7Edu0Y172dgrbDBxcA/MHjsAMhmvR2mPPLsOQubu1YDY8O0PvLoNaeNERETUWiyKTIwgCHhxxmiM7heJ3cfPoaJKhS7BvjhyNgMpF642uM7Yu7rzwxWRAWk0Iv440vgQqdMZ1/Dqf38FBMDFwQ6lFX8PeXVxsIOjnQIrt+xD50BvfLIpHhrN3zOWVapq8MOfRyGKIh69Z5BRnwe1jK2LNwIHTMWVhB/rtTkFRMA7eoQEqYiIqDVYFJmoHuGB6BH+9/CboT07Y8FnG5GdX6LTr2enQDw8ql8bpyMybzV1dSivbPxmnQBwNO2yzmMfNydcLy5HaUUVSiuqcDE7v97NXG+15cAJTB/RBw52Td84lKQRPGwm7DwDce3IL6jIy4C1vSt8etyNDgPuh9yarxkRkalhUWQmvFyd8MX8h7E3+RySz2fBykqOQVHh6BcZyrNERAZma2MNbzcn5BWXN3ud63r0BYDqmlqcuZyDPl1C9ExHbcU7ahi8o4ZJHYOIiAyARZEZUdhYYUy/bhjTj/cgIjK2ewf2wH+3HTTqPmys5EbdPhEREd3A26kTEbXA/cN6YUTvLkbbvruTPbqF+htt+0RERPQ3nikiImoBuUyGRQ+PwZS4Xjhw8gJq69SoU2uw5UBKq7ctCMCj9wyClZxnioiIiNoCiyIiolboFOiNToHeAIAqVQ3ik8/pdYNlJ3tbxIR3wF+nL6FOrUHXYF88eHdf9O8eZqzIREREdBsWRUREBmKnsMHSufdhyde/orC0QrvcwdYG4/pH4deDJ6CqrdMud3Oyxxv/dy8ignyhVmug1mhgY93yP8u+vr46/yUiIqLmEURRFO/czTwkJSWhd+/eOH78OHr16iV1HCIyU7V1ahxKTcfV/GJ4ujpiSI/OsFNYo6yiGnuS0lBYVoEgb3fExXRqVRFEREREhsF3YyIiA7O2kiOuZ+d6y50dbHHvwB5Q1dbBTmEtQTIiIiJqiMkURStWrMCmTZuQlpYGOzs7DBgwAG+99RYiIiKkjkZEdEcVVSp8+9th/HH0DJRVKni7OeHegT1w/7BekMs4ESgREZGUTOadeN++fZg3bx4OHz6MXbt2oba2FqNGjUJFRcWdVyYiklBNbR1eXLkJm/YnQ1mlAgDkFZfjv9sO4t0fdkmcjoiIiEzmTNFvv/2m83jNmjXw9vbG8ePHMWTIEIlSERHd2e7jaTh/5XqDbX8eO4spQ3siPMC7jVMRERHRTSZzpuh2paWlAAB3d3eJkxARNe3AyYtNt59oup2IiIiMy2TOFN1Ko9Hgueeew8CBA9G9e/dG+6lUKqhUKu1jpVLZFvGIiHTU1alb1U5ERETGZZJniubNm4dTp05h/fr1TfZbsWIFXFxctD9xcXFtlJCI6G89OwfeoT2ojZIQERFRQ0yuKHrqqaewbds27N27Fx06dGiy70svvYTS0lLtz759+9ooJRHR38bdFQV3J/sG27oG+6J3BIsiIiIiKZlMUSSKIp566ils3rwZe/bsQWho6B3XUSgUcHZ21v44Ojq2QVIiIl0ujnZ4Z9796Bbqr10mkwkY3KMTls69D4IgSJiOiIiITOaaonnz5mHdunX45Zdf4OTkhNzcXACAi4sL7OzsJE5HRNS0IB93fPjMA7iaV4zCMiUCvNzg6cIvaoiIiNoDQRRFUeoQzdHYN6mrV6/G7Nmzm7WNpKQk9O7dG8ePH0evXr0MmI6IiIiIiEyVyZwpMpHajYiIiIiITIzJXFNERERERERkDCyKiIiIiIjIopnM8DkiIlNXqqzC7uNpKCqrQJCPO+JiOkNhwz/DREREUuO7MRFRG9hzPA3v/bgLNbVq7bJVWw/ijf+7F12CfSVMRkRERBw+R0RkZJk5hXh73R86BREAlCgrsfi/v6C6plaiZERERASwKCIiMrpfE05ArdE02FairEJ88vk2TkRERES3YlFERGRkWdeLmmzPzC1soyRERETUEBZFRERG5uZk32S7h7NDGyUhIiKihrAoIiIysjH9ujXaZiWXYUTvLm2YhoiIiG7HooiIyMh6RwTjvsEx9ZbLBAHPTh0Bd54pIiIikhSn5CYiagPzJg9F38gQ/Hb4NAr/d5+iewdGI7yDt9TRiIiILB6LIiKiNtKnSwj6dAmROgYRERHdhsPniIiIiIjIorEoIiIiIiIii8aiiIiIiIiILBqvKTJTOTk5yMnJkToGGYifnx/8/PykjkEGwuPT/PAYJSIybRZVFPn5+WHJkiVm/8alUqnw4IMPYt++fVJHIQOJi4vD77//DoVCIXUUaiUen+aJxygRkWkTRFEUpQ5BhlVWVgYXFxfs27cPjo6OUsehVlIqlYiLi0NpaSmcnZ2ljkOtxOPT/PAYJSIyfRZ1psjSxMTE8A3aDJSVlUkdgYyAx6f54DFKRGT6ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5bwgl8zwdfTvPD1ND98TYmITB8nWiAiIiIiIovGM0VERERERGTRWBQREREREZFFY1FEREREREQWjUWRCYqPj4cgCCgpKWmzfc6ePRv33Xdfm+3PkgiCgC1btrTZ/tasWQNXV9c22x8RERFRe8eiyEA+//xzODk5oa6uTrtMqVTC2toaQ4cO1el7s6hJT09vcFuvvfYaBEGAIAiwsrJCSEgI/vWvf0GpVBrzKVADZs+eDUEQ8Pjjj9drmzdvHgRBwOzZsxtd/+ZrffPHx8cHU6ZMwaVLl4yYmlrKmMexp6cnhgwZgg8//BAqlcqYT4NuY8jjWCaTwcXFBT179sSLL76InJwcIyYnIqK2wqLIQIYNGwalUoljx45plx04cAC+vr5ITExEdXW1dvnevXsRFBSEjh07Nrq9bt26IScnB5mZmXjrrbfw5Zdf4oUXXjDqc6CGBQYGYv369aiqqtIuq66uxrp16xAUFNSsbZw7dw7Xrl3DTz/9hNOnT2PChAlQq9XGikwtZKzjOCsrC3v37sXUqVOxYsUKDBgwAOXl5Y2uV1NTY5gnRFqGPI6PHj2KhQsX4s8//0T37t2Rmpra6Dp8LYmITAOLIgOJiIiAn58f4uPjtcvi4+MxceJEhIaG4vDhwzrLhw0b1uT2rKys4Ovriw4dOmDatGmYMWMGfv311wb7FhYW4sEHH0RAQADs7e0RFRWFH374QaePRqPB22+/jfDwcCgUCgQFBWHZsmXa9itXruCBBx6Aq6sr3N3dMXHiRGRmZtbb1+uvvw4vLy84Ozvj8ccf13nDV6lUeOaZZ+Dt7Q1bW1sMGjQIR48ebfJ5moJevXohMDAQmzZt0i7btGkTgoKC0LNnz2Ztw9vbG35+fhgyZAheffVVnDlzBhcvXmyw78KFC9G5c2fY29sjLCwMixcvRm1trU6frVu3ok+fPrC1tYWnpycmTZqkbVOpVJg/fz4CAgLg4OCAfv366fxe3rRlyxZ06tQJtra2GD16NK5cuaLTvnLlSnTs2BE2NjaIiIjAd99916znasqMdRz7+/sjKioKTz/9NPbt24dTp07hrbfe0vYLCQnBG2+8gZkzZ8LZ2RmPPfZYg8NkU1JSIAiCzrG5atUqBAYGwt7eHpMmTcL777/P4ZENMNRx7Ovri86dO2P69OlISEiAl5cXnnjiCW2fm0ONly1bBn9/f0RERABoeJisq6sr1qxZo3186NAhxMTEwNbWFrGxsdiyZQsEQUBKSkqLnzcRETUPiyIDGjZsGPbu3at9vHfvXgwdOhRxcXHa5VVVVUhMTLzjh6nb2dnZNfqNY3V1NXr37o3t27fj1KlTeOyxx/CPf/wDR44c0fZ56aWX8Oabb2Lx4sU4c+YM1q1bBx8fHwBAbW0tRo8eDScnJxw4cAAJCQlwdHTEmDFjdPa5e/dunD17FvHx8fjhhx+wadMmvP7669r2F198ERs3bsQ333yDpKQkhIeHY/To0SgqKtLrubZHjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHJ3fb3NlzOMYALp06YKxY8fqfDgHgHfffRc9evRAcnIyFi9e3KxtJSQk4PHHH8ezzz6LlJQUjBw5UufLDtJlyOMYuHEsP/7440hISEBeXp52+e7du3Hu3Dns2rUL27Zta9a2ysrKMGHCBERFRSEpKQlvvPEGFi5c2OJsRESkJ5EMZtWqVaKDg4NYW1srlpWViVZWVmJeXp64bt06cciQIaIoiuLu3btFAOLly5cb3c6SJUvEHj16aB8fO3ZM9PT0FO+//35RFEVx7969IgCxuLi40W2MHz9efOGFF0RRFMWysjJRoVCIq1atarDvd999J0ZERIgajUa7TKVSiXZ2duLvv/8uiqIozpo1S3R3dxcrKiq0fVauXCk6OjqKarVaVCqVorW1tbh27Vpte01Njejv7y++/fbbjeZs72bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja5/+2t17do1ccCAAWJAQICoUqlEURRFAOLmzZsb3cY777wj9u7dW/u4f//+4owZMxrse/nyZVEul4vZ2dk6y0eMGCG+9NJLoiiK4urVq0UA4uHDh7XtZ8+eFQGIiYmJoiiK4oABA8S5c+fqbGPq1KniuHHjGs1pLox1HN9q4cKFop2dnfZxcHCweN999+n0aeg4T05OFgGIGRkZoiiK4rRp08Tx48frrDdjxgzRxcWl+U/YAhj6OL7Vzp07dY6dWbNmiT4+Ptrj+6aGjnMXFxdx9erVoije+Hvq4eEhVlVVadtXrVolAhCTk5Nb8rSJiEgPVpJUYmZq6NChqKiowNGjR1FcXIzOnTvDy8sLcXFxmDNnDqqrqxEfH4+wsLA7jmFPTU2Fo6Mj1Go1ampqMH78eHzyyScN9lWr1Vi+fDk2bNiA7Oxs1NTUQKVSwd7eHgBw9uxZqFQqjBgxosH1T5w4gYsXL8LJyUlneXV1tc5F5D169NBuEwD69+8PpVKJK1euoLS0FLW1tRg4cKC23draGn379sXZs2eb/h9nAry8vDB+/HisWbMGoihi/Pjx8PT0bPb6HTp0gCiKqKysRI8ePbBx40bY2Ng02PfHH3/Exx9/jPT0dCiVStTV1cHZ2VnbnpKSgrlz5za4bmpqKtRqNTp37qyzXKVSwcPDQ/vYysoKffr00T7u0qULXF1dcfbsWe1r9thjj+lsY+DAgfjoo4+a/ZxNlSGP48aIoghBEHSWxcbG6r2dc+fO6QydBIC+ffs2++yEpWntcdwQURQBQOf1jIqKavT4bsy5c+cQHR0NW1tb7bJbzwATEZFxsSgyoPDwcHTo0AF79+5FcXEx4uLiAAD+/v4IDAzEoUOHsHfvXgwfPvyO24qIiMCvv/4KKysr+Pv7N/kG+8477+Cjjz7Chx9+iKioKDg4OOC5557TDs+6OVyrMUqlEr1798batWvrtXl5ed0xq6V45JFH8NRTTwEAPv30U73WPXDgAJydneHt7V2v+LzVX3/9hRkzZuD111/H6NGj4eLigvXr1+O9997T9mnq9VQqlZDL5Th+/DjkcrlOm6Ojo16ZLZUhj+PGnD17FqGhoTrLHBwcdB7LZDdGN9/80A2g3rVlpL/WHMcNufmlT0hIiHbZ7a8lcKNouvW1BPh6EhG1J7ymyMCGDRuG+Ph4xMfH60zhO2TIEOzcuRNHjhxp1nUINjY2CA8PR0hIyB2/cUxISMDEiRPx8MMPo0ePHggLC8P58+e17Z06dYKdnR12797d4Pq9evXChQsX4O3tjfDwcJ0fFxcXbb8TJ07ozNx0+PBhODo6IjAwUHtBfkJCgra9trYWR48eRWRk5B2frym4eY3VzWuw9BEaGoqOHTs2WRABNy60Dg4Oxssvv4zY2Fh06tQJly9f1ukTHR3d6GvZs2dPqNVq5OXl1XstfX19tf3q6up0Zlg7d+4cSkpK0LVrVwBA165ddV5L4Mbvmbm8lndiqOO4IWlpafjtt98wZcqUJvvd/ELi1imfb7/gPiIiot5kJuYwuYkxteY4vl1VVRW+/PJLDBky5I5fIHl5eem8lhcuXEBlZaX2cUREBFJTU3Wma+drSUTUdlgUGdiwYcNw8OBBpKSkaL9hBoC4uDh88cUXqKmpafGHqcZ06tQJu3btwqFDh3D27Fn885//xPXr17Xttra2WLhwIV588UV8++23SE9Px+HDh/HVV18BAGbMmAFPT09MnDgRBw4cQEZGBuLj4/HMM8/g6tWr2u3U1NTg0UcfxZkzZ7Bjxw4sWbIETz31FGQyGRwcHPDEE09gwYIF+O2333DmzBnMnTsXlZWVePTRRw36fKUil8tx9uxZnDlzpt5ZGEPp1KkTsrKysH79eqSnp+Pjjz/G5s2bdfosWbIEP/zwA5YsWYKzZ88iNTVVO5NZ586dMWPGDMycORObNm1CRkYGjhw5ghUrVmD79u3abVhbW+Ppp59GYmIijh8/jtmzZ+Ouu+7SDtdZsGAB1qxZg5UrV+LChQt4//33sWnTJsyfP98oz7u9MdRxXFdXh9zcXFy7dg2pqan4z3/+g7i4OMTExGDBggVNrhseHo7AwEC89tpruHDhArZv365zxhAAnn76aezYsQPvv/8+Lly4gC+++AI7d+6sNzSP/taa4zgvLw+5ubm4cOEC1q9fj4EDB6KgoAArV66847rDhw/HJ598guTkZBw7dgyPP/44rK2tte0PPfQQNBoNHnvsMZw9exa///473n33XQDg60lE1BYkvJ7JLGVkZIgAxC5duugsz8zMFAGIERERd9xGUxdoi2L9i34LCwvFiRMnio6OjqK3t7f4yiuviDNnzhQnTpyoXUetVotLly4Vg4ODRWtrazEoKEhcvny5tj0nJ0ecOXOm6OnpKSoUCjEsLEycO3euWFpaKori3xcqv/rqq6KHh4fo6Ogozp07V6yurtZuo6qqSnz66ae12xg4cKB45MiRZvxfa79uPu/GtOYC7Ztw2wXYCxYs0P4/njZtmvjBBx/Uu3B+48aNYkxMjGhjYyN6enqKkydP1rbV1NSIr776qhgSEiJaW1uLfn5+4qRJk8STJ0+KonhjogUXFxdx48aNYlhYmKhQKMS777673qQBn332mRgWFiZaW1uLnTt3Fr/99ttGn4O5MdRxDEAEIMrlctHd3V0cNGiQ+MEHH+gcN6J4Y6KFDz74oN42Dh48KEZFRYm2trbi4MGDxZ9++klnogVRFMUvv/xSDAgIEO3s7MT77rtPXLp0qejr69ui522uDHUcAxAFQRCdnJzEHj16iAsWLBBzcnKata/s7Gxx1KhRooODg9ipUydxx44dOhMtiKIoJiQkiNHR0aKNjY3Yu3dvcd26dSIAMS0tTc9nTERE+hJE8bZBzkREZLLmzp2LtLQ0HDhwQOoo1Epr167FnDlzUFpaesdrQ4mIqHU40QIRkQl79913MXLkSDg4OGDnzp345ptv8Nlnn0kdi1rg22+/RVhYGAICAnDixAksXLgQDzzwAAsiIqI2wKKIiMiEHTlyBG+//TbKy8sRFhaGjz/+GP/3f/8ndSxqgdzcXLz66qvIzc2Fn58fpk6dypvxEhG1EQ6fIyIiIiIii8bZ54iIiIiIyKKxKCIiIiIiIovGokhCs2fPhiAIePPNN3WWb9myxaj3pSgqKsLTTz+NiIgI2NnZISgoCM888wxKS0t1+mVlZWH8+PGwt7eHt7c3FixYgLq6OqPlMnV8Pc0LX0/zw9eUiIgaw6JIYra2tnjrrbdQXFzcZvu8du0arl27hnfffRenTp3CmjVr8Ntvv+ncZFWtVmP8+PGoqanBoUOH8M0332DNmjV49dVX2yynKeLraV74epofvqZERNQgaW+TZNlmzZol3nPPPWKXLl3EBQsWaJdv3rxZbOuXZsOGDaKNjY1YW1sriqIo7tixQ5TJZGJubq62z8qVK0VnZ2dRpVK1aTZTwdfTvPD1ND98TYmIqDE8UyQxuVyO5cuX4z//+Q+uXr3a7PXGjh0LR0fHRn+6deumV47S0lI4OzvDyurGLO1//fUXoqKi4OPjo+0zevRolJWV4fTp03pt25Lw9TQvfD3ND19TIiJqCO9T1A5MmjQJMTExWLJkCb766qtmrfPf//4XVVVVjbZbW1s3e/8FBQV444038Nhjj2mX5ebm6rw5A9A+zs3Nbfa2LRFfT/PC19P88DUlIqLbsShqJ9566y0MHz4c8+fPb1b/gIAAg+y3rKwM48ePR2RkJF577TWDbJP4epobvp7mh68pERHdisPn2okhQ4Zg9OjReOmll5rV3xBDOcrLyzFmzBg4OTlh8+bNOt90+vr64vr16zr9bz729fXV45lZJr6e5oWvp/nha0pERLfimaJ25M0330RMTAwiIiLu2Le1QznKysowevRoKBQK/Prrr7C1tdVp79+/P5YtW4a8vDx4e3sDAHbt2gVnZ2dERkY249kQX0/zwtfT/PA1JSKim1gUtSNRUVGYMWMGPv744zv2bc1QjrKyMowaNQqVlZX4/vvvUVZWhrKyMgCAl5cX5HI5Ro0ahcjISPzjH//A22+/jdzcXLzyyiuYN28eFApFi/dtSfh6mhe+nuaHrykREWlJPf2dJZs1a5Y4ceJEnWUZGRmijY2NUaeH3bt3rwigwZ+MjAxtv8zMTHHs2LGinZ2d6OnpKb7wwgva6WOpPr6e5oWvp/nha0pERI0RRFEU26b8IiIiIiIian840QIREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEREREREQWjUURERERERFZNBZFRERERERk0VgUERERERGRRWNRREREREREFo1FERERERERWTQWRUREREREZNEsqijKycnBa6+9hpycHKmjEBEREREZBD/jtp7FFUWvv/46f2GIiIiIyGzwM27rWVRRREREREREdDsWRUREREREZNFYFBERERERkUVjUURERERERBaNRREREREREVk0FkVERERERGTRWBQREREREZFFY1FEZAKqq6uljkBERERktlgUEZmA/Px8qSMQERERmS0WRUQmoKamBjU1NVLHICIiIjJLLIqITERZWZnUEYiIiIjMEosiIhNRXFwsdQQiIiIis8SiiMhE8LoiIiIiIuNgUURkIi5fvgxRFKWOQURERGR2WBQRmYjy8nJkZWVJHYOIiIjI7LAoIjIhx44d49kiIiIiIgNjUURkQgoLC3Hu3DmpYxARERGZFRZFRO1cbGwsBg4ciGXLlgEAEhMTUV5eLnEqIiIiIvPBooioncvNzcX169e19ylSqVT4448/oFKpJE5GREREZB5YFBGZoMLCQuzYsQNVVVVSRyEiIiIyeSyKiExUfn4+fvnlFxQWFkodhYiIiMiksSgiMmFlZWXYsmULTp48CY1GI3UcIiIiIpPEoojIxKnVahw+fBibN29Gdna21HGIiIiITA6LIiIzUVhYiO3bt2P79u3Iy8uTOg4RERGRybCSOgARGVZ2djays7MRHByM2NhYeHh4SB2JiIiIqF1jUURkpi5fvozLly8jNDQUPXv2hKenp9SRiIiIiNolFkVEZi4jIwMZGRnw8/ND165dERISAisrHvpEREREN/GTEVE7lpWVhcrKSgBATU0NioqK4O7u3qJt5eTkICcnB9bW1ggJCUF4eDgCAgIgk/HSQiIiIrJsLIqI2qEjR47gjTfewPbt2yGKIgCgsrIS//73vxEVFYXx48cjJCSkRduura3FhQsXcOHCBdjZ2SEiIgLdunWDg4ODAZ8BERERkelgUUTUzmzatAnTpk2DKIragugmURRx6tQpnDp1CnPnzkWvXr1ata+qqiqkpKTg1KlT6NevH7p169aq7RERERGZIo6bIWpHjhw5gmnTpkGtVkOtVjfYR6PRQKPRYNWqVcjMzDTIfuvq6pCQkID09HSDbI+IiIjIlLAoImpHli5d2uAZosbs2LHDoPs/ePAgysvLDbpNIiIiovaORRFRO5GVlYVt27Y1eobodhqNBidPnkRRUZHBMqhUKmzduhXFxcUG2yYRERFRe8eiiKid2L17d7PPEN0kiiLS0tIMmkOpVOKXX35Bfn6+QbdLRERE1F6xKCJqJ8rLy/WeHlsQBFRXVxs8S01NDVJSUgy+XSIiIqL2iEURUTvh5OQEjUaj1zqiKMLW1tbgWaytrREVFWXw7RIRERG1R5ySm6idGDFiBARB0GsInSAI6NKli0FzeHt7Y9iwYXBxcTHodomIiIjaK54pImongoKCcM8990Aulzerv0wmQ3R0NNzd3Q2yf7lcjr59++Lee+9lQUREREQWhUURUTuyePFiCIIAQRCa1X/cuHEG2a+vry8mT56MmJgYva9rIiIiIjJ1/PRD1I706dMHP/74I+RyeaNnjGQyGWQyGR577DGEhIS0an+urq64++67MWHCBLi5ubVqW0RERESmitcUEbUzkydPxqFDh/DGG29g27ZtOtcYCYKAqKgojBs3rlUFkbu7O3r27ImwsLBmn5UiIiIiMlcsiojaoT59+uDXX39FVlYWYmJiUFxcDHt7eyxevLhV1xB5eXmhV69eCAoKYjFERERE9D8mNXxu//79mDBhAvz9/SEIArZs2SJ1JCKjCgoKgr29PQDAxsamxQWRi4sLRo4cifvuuw/BwcEsiIiIiCwcP1frMqmiqKKiAj169MCnn34qdRQik2BtbY277roL999/P0JDQ1kMEREREQB+rr5di4bPpaenY/Xq1UhPT8dHH30Eb29v7Ny5E0FBQejWrZuhM2qNHTsWY8eONdr2icyFIAjo3Lkz+vTpoz3TRERERHQTP1fr0vtM0b59+xAVFYXExERs2rQJSqUSAHDixAksWbLE4AFbQ6VSoaysTPtzMyuROQsNDcX999+PuLg4FkREREQWRKlU6nz2ValUUkcyGXoXRYsWLcLSpUuxa9cu2NjYaJcPHz4chw8fNmi41lqxYgVcXFy0P3FxcVJHIjIKQRAQHh6OqVOnYuTIkZxem4iIyALFxcXpfPZdsWKF1JFMht7D51JTU7Fu3bp6y729vVFQUGCQUIby0ksv4fnnn9c+TklJYWFEZic4OBh9+/ZlIURERGTh9u3bh5iYGO1jhUIhXRgTo3dR5OrqipycHISGhuosT05ORkBAgMGCGYJCodD5ZXB0dJQwDZFh2draYvDgwfWORSIiIrJMjo6OcHZ2ljqGSdJ7+Nz06dOxcOFC5ObmQhAEaDQaJCQkYP78+Zg5c6YxMhLRbTw8PDB58mQWREREREQGoPeZouXLl2PevHkIDAyEWq1GZGQk1Go1HnroIbzyyivGyKilVCpx8eJF7eOMjAykpKTA3d0dQUFBRt03kVR8fX1RV1enPevp5eWF8ePH61zTR0RERKQPfq7WJYiiKLZkxStXriA1NRVKpRI9e/ZEp06dDJ2tnvj4eAwbNqze8lmzZmHNmjV3XD8pKQm9e/fG8ePH0atXLyMkJDKO9PR07N69G9bW1pg6dSqHghIREZFWSz7jtvZztblp0X2KACAwMBCBgYGGzHJHQ4cORQtrOCKzEBERwYKIiIiIWo2fq3XpfU3RlClT8NZbb9Vb/vbbb2Pq1KkGCUVEDeM1RERERESGp3dRtH//fowbN67e8rFjx2L//v0GCUVE9dnY2MDHx0fqGERERERmR++iSKlUNniBt7W1NcrKygwSiojq69ChA2QyvQ9ZIiIiIroDvT9hRUVF4ccff6y3fP369YiMjDRIKCKqLzg4WOoIRERERGZJ74kWFi9ejMmTJyM9PR3Dhw8HAOzevRs//PADfvrpJ4MHJKIbOHSOiIiIyDj0LoomTJiALVu2YPny5fj5559hZ2eH6Oho/Pnnn4iLizNGRiKLJwgCZ50jIiIik1ZdXY2amhqdZc7OzhKl0dWiKbnHjx+P8ePHGzoLETXC1taW1xMRERGRyamsrMSLL76IDRs2oLCwsF67Wq2WIFV9Lf6UVVNTg6tXryIrK0vnh4gMz9bWVuoIRNSEuro6qSMQEbVLCxYswJ49e7By5UooFAr897//xeuvvw5/f398++23UsfT0vtM0YULF/DII4/g0KFDOstFUYQgCO2m2iMyJ+3l1DIRNayyspLHKRFRA7Zu3Ypvv/0WQ4cOxZw5czB48GCEh4cjODgYa9euxYwZM6SOCKAFRdHs2bNhZWWFbdu2wc/PD4IgGCMXEd3CyqpFI12JqI0olUoWRUREDSgqKkJYWBiAG1/yFhUVAQAGDRqEJ554QspoOvT+pJWSkoLjx4+jS5cuxshDRERkcgoKCuDv7y91DCKidicsLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG09L6mKDIyEgUFBcbIQkREZJKuXLkCURSljkFE1O7MmTMHJ06cAAAsWrQIn376KWxtbfGvf/0LCxYskDjd3/Q+U/TWW2/hxRdfxPLlyxEVFQVra2uddg4fICIiS6NUKpGZmYnQ0FCpoxARtSv/+te/tP++++67kZaWhuPHjyM8PBzR0dESJtOld1F09913AwBGjBihs5wTLRARkSVLTk5GSEgIr7UlIrrFt99+i2nTpkGhUAAAgoODERwcjJqaGnz77beYOXOmxAlv0Lso2rt3rzFyEBERmbSCggJcu3YNAQEBUkchImo35syZgzFjxsDb21tneXl5OebMmWO6RVFcXJwxchAREZmk2NhYZGRkwNHRER4eHrjvvvsgl8uljkVE1C7cHE12u6tXr8LFxUWCRA1r0Ty/Bw4cwBdffIFLly7hp59+QkBAAL777juEhoZi0KBBhs5IRETUbuXm5qKoqAgajQaFhYVISEjA4MGDOYyOiCxaz549IQgCBEHAiBEjdG4volarkZGRgTFjxkiYUJfeRdHGjRvxj3/8AzNmzEBSUhJUKhUAoLS0FMuXL8eOHTsMHpKIiMhUpKWloaqqCgMHDoSjo6PUcYiIJHHfffcBuHE7n9GjR+v8PbSxsUFISAimTJkiUbr69C6Kli5dis8//xwzZ87E+vXrtcsHDhyIpUuXGjQcERGRKbp8+TKuXr2KTp06oXv37nB3d5c6EhFRm1qyZAkAICQkBNOmTYOtra3EiZqmd1F07tw5DBkypN5yFxcXlJSUGCITERGRyVOr1UhLS0NaWhr8/PwQExODDh06cFgdEVmUWbNmSR2hWfQuinx9fXHx4kWEhIToLD948CDCwsIMlYuIiMhs5OTkICcnBwEBARgyZAicnJykjkREZDRubm7N/gKoqKjIyGmaR++iaO7cuXj22Wfx9ddfQxAEXLt2DX/99Rfmz5+PxYsXGyMjERGRWcjOzsbGjRvRv39/dO7cmWeNiMgsffjhh1JH0JveRdGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiIyGzU1Ndi3bx9SU1PRvXt3dOzYEdbW1lLHIiIyGFMZMncrvYoitVqNhIQEzJs3DwsWLMDFixehVCoRGRnJGXaIiMjiZGVlobKyEsCNYqeoqKjZkyoUFRVh//79OHToEIKCghAaGooOHTpo7/pORGQu0tPTsXr1aqSnp+Ojjz6Ct7c3du7ciaCgIHTr1k3qeAAAmT6d5XI5Ro0aheLiYtjY2CAyMhJ9+/ZlQURERBblyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPa26urqcOnSJezevRvfffcdtm7dipSUFBQVFUEURSM9AyKitrFv3z5ERUUhMTERmzZtglKpBACcOHFCO0Nde6D38Lnu3bvj0qVLCA0NNUYeIiKidm3Tpk2YNm0aRFGsV7SIoohTp07h1KlTmDt3Lnr16qXXtjUajXZShiNHjsDR0REhISGIiIiAh4eHIZ8GEVGbWLRoEZYuXYrnn39eZ5KZ4cOH45NPPpEwmS69zhQBN+5TNH/+fGzbtg05OTkoKyvT+SEiIjJXR44cwbRp06BWq6FWqxvso9FooNFosGrVKr3OGDVEqVTi1KlT2LhxI3777TftDdOJiExFamoqJk2aVG+5t7c3CgoKJEjUML2LonHjxuHEiRO499570aFDB7i5ucHNzQ2urq5wc3MzRkYiIqJ2YenSpQ2eIWrMjh07DLbvrKwsHDhwwGDbIyJqC66ursjJyam3PDk5GQEBARIkapjew+f27t1rjBxERETtWlZWFrZt29bsgkij0eDkyZN6Tb5wJ5cuXcK5c+cQERFhkO0RERnb9OnTsXDhQvz0008QBAEajQYJCQmYP38+Zs6cKXU8Lb2Lori4OGPkICIiatd2796t98QHoigiLS0NAwYMMFiO/fv3QyaToVOnTgbbJhGRsSxfvhzz5s1DYGAg1Go1IiMjoVar8dBDD+GVV16ROp6W3sPnAODAgQN4+OGHMWDAAGRnZwMAvvvuOxw8eNCg4YiIiNqL8vJyyGT6vW0KgoDq6mqD5hBFEfv379dOBU5E1J7Z2Nhg1apVSE9Px7Zt2/D9998jLS0N3333HeRyudTxtPQuijZu3IjRo0fDzs4OSUlJ2os+S0tLsXz5coMHJCIiag+cnJyg0Wj0WkcURdja2hopERGR6QgKCsK4cePwwAMPtMsz3XoPn1u6dCk+//xzzJw5E+vXr9cuHzhwIJYuXWrQcERERO3FiBEjIAiCXkPoBEFAly5dDJrD2toaw4cPh729vUG3S0RkKM8//3yz+77//vtGTNJ8ehdF586dw5AhQ+otd3FxQUlJiSEyERERtTtBQUG45557sGPHjkan476VTCZDVFSUwSZZAAAvLy8MGzYMrq6uBtsmEZGhJScn6zxOSkpCXV2ddpKY8+fPQy6Xo3fv3lLEa5DeRZGvry8uXryIkJAQneUHDx5EWFiYoXIRERG1O4sXL8bOnTubfcZo3LhxBtmvlZUVevXqhejoaL2vayIiamu3zlb9/vvvw8nJCd9884329j3FxcWYM2cOBg8eLFXEevT+yzp37lw8++yzSExMhCAIuHbtGtauXYv58+fjiSeeMEZGIiKidqFPnz748ccfIZfLG71AWCaTQSaT4bHHHqv3BaK+BEFAREQEpk2bhpiYGBZERGRy3nvvPaxYsULnfqZubm5YunQp3nvvPQmT6dL7TNGiRYug0WgwYsQIVFZWYsiQIVAoFJg/fz6efvppY2QkIiJqNyZPnoxDhw7hjTfeqHffIkEQEBUVhXHjxrWqILKxsUHnzp0RFRUFJycnA6QmIpJGWVkZ8vPz6y3Pz89HeXm5BIka1qyi6OTJk+jevTtkMhkEQcDLL7+MBQsW4OLFi1AqlYiMjISjo6OxsxIREbULffr0wa+//oqsrCzExMSguLgY9vb2WLx4cYuvIRIEAf7+/ujUqRNCQ0NhbW1t4NRERG1v0qRJmDNnDt577z307dsXAJCYmIgFCxZg8uTJEqf7W7OKop49eyInJwfe3t4ICwvD0aNH4eHhgcjISGPnIyIiareCgoJgb2+P4uJi2NjYtKggcnd3R+fOndGxY0c4ODgYISURkXQ+//xzzJ8/Hw899BBqa2sB3LhO8tFHH8U777wjcbq/NasocnV1RUZGBry9vZGZman3fRqIiIjob1ZWVujYsSMiIyPh6ekJQRCkjkREZBT29vb47LPP8M477yA9PR0A2uWXQM0qiqZMmYK4uDj4+flBEATExsY2eoHppUuXDBqQiIjIXDg6OiIyMhJdu3aFQqGQOg4RUZtxcHBAdHS01DEa1ayi6Msvv8TkyZNx8eJFPPPMM5g7dy4v/CQiImomb29vREVFITQ0lDPIERG1Q82eaGHUqFEYM2YMjh8/jmeffZZFERER0R14e3ujb9++2pEWRETUPuk90cK+fftQU1Nj7FxEREQmSy6Xo3///ujatSuLISIiE9Csc/g3J1oAIPlEC59++ilCQkJga2uLfv364ciRI5JlISIi8vX1hbu7O5ydnQHcGDc/ceJEREZGsiAionaNn6v/ZlITLfz44494/vnn8fnnn6Nfv3748MMPMXr0aJw7dw7e3t5G2y8REVFjjh07hh9//BGlpaVwdHTEhAkTOMSciNo9KT5XOzs7IyUlBWFhYUbZfmuY1EQL77//PubOnYs5c+YAuDHv+fbt2/H1119j0aJFbZ6HiIjoJrlcjjFjxrAgIiKTIMXnalEUjbJdQ2hWUQQAY8aMAQDJJlqoqanB8ePH8dJLL2mXyWQy3H333fjrr78aXEelUkGlUmkfK5VKAEBdXZ325lFEREStVVdXh+7du8PJyYnvL0TU5urq6gDc+KxbVlamXa5QKBqc/r8ln6vNXbOLoptWr15tjBx3VFBQALVaDR8fH53lPj4+SEtLa3CdFStW4PXXX6+3vF+/fkbJSEREREQklbi4OJ3HS5YswWuvvVavX0s+VxvCww8/rL3+sr1pVlE0efJkrFmzBs7Ozpg8eXKTfTdt2mSQYIbw0ksv4fnnn9c+TklJQVxcHBITE9GzZ08JkxERkTk5fPgw7rrrLqljEJGFSk5ORr9+/bBv3z7ExMRol7e3m0SvXLlS6giNalZR5OLiop1Bx8XFxaiBGuPp6Qm5XI7r16/rLL9+/Tp8fX0bXOf2U4aOjo4AACsrK1hbWxsvLBERWRRfX1++rxCRZKysbnykd3R0bNaZmJZ8rjZ3zSqKbh0yJ9XwORsbG/Tu3Ru7d+/GfffdBwDQaDTYvXs3nnrqKUkyERERAX9/6UZEZAr4ubo+va8pktLzzz+PWbNmITY2Fn379sWHH36IiooK7awZREREUrC1tZU6AhGRXvi5WleziqKePXs2+wZ0SUlJrQrUlGnTpiE/Px+vvvoqcnNzERMTg99++63eRWJERERtiUUREZkafq7W1ayi6OZpNQCorq7GZ599hsjISPTv3x/AjQtMT58+jSeffNIoIW/11FNPWexpPSIiap9sbGykjkBEpLe2/lxdXV2NmpoanWXtZTa6ZhVFS5Ys0f77//7v//DMM8/gjTfeqNfnypUrhk1HRERkAmQymdQRiIjapcrKSrz44ovYsGEDCgsL67Wr1WoJUtWn91/xn376CTNnzqy3/OGHH8bGjRsNEoqIiIiIiEzfggULsGfPHqxcuRIKhQL//e9/8frrr8Pf3x/ffvut1PG09C6K7OzskJCQUG95QkICx1QTEREREZHW1q1b8dlnn2HKlCmwsrLC4MGD8corr2D58uVYu3at1PG09J597rnnnsMTTzyBpKQk9O3bFwCQmJiIr7/+GosXLzZ4QCIiovZOFMVmT0hERGRJioqKEBYWBuDG9UNFRUUAgEGDBuGJJ56QMpoOvYuiRYsWISwsDB999BG+//57AEDXrl2xevVqPPDAAwYPSERE1N6Joih1BCKidiksLAwZGRkICgpCly5dsGHDBvTt2xdbt26Fq6ur1PG0WnSfogceeIAFEBER0f/I5XKpIxARtUtz5szBiRMnEBcXh0WLFmHChAn45JNPUFtbi/fff1/qeFomdfNWIiIiIiIyHf/617+0/7777ruRlpaG48ePIzw8HNHR0RIm08U5RImIiIjIbNXW1kodwaJ9++23UKlU2sfBwcGYPHkyunTpYtqzzxERERERmYrbbxZKbWvOnDkoLS2tt7y8vBxz5syRIFHDWBQRERERkdniRCjSamx2zqtXr8LFxUWCRA3jNUVEREREZLZYFEmjZ8+eEAQBgiBgxIgRsLL6u+xQq9XIyMjAmDFjJEyoS++iSK1WY82aNdi9ezfy8vKg0Wh02vfs2WOwcERERERErcFriqRx3333AQBSUlIwevRoODo6attsbGwQEhKCKVOmSJSuPr2LomeffRZr1qzB+PHj0b17d96sjoiIiIjarVsv8qe2s2TJEgBASEgIpk2bBltbW4kTNU3vomj9+vXYsGEDxo0bZ4w8REREREQGU11d3eh1LWR8s2bNkjpCs+hdFNnY2CA8PNwYWYiIiIiIDEqtVkOlUrX7MxXmxM3NrdlFaFFRkZHTNI/eRdELL7yAjz76CJ988gkrbiIiIiJq98rKylgUtaEPP/xQ6gh607soOnjwIPbu3YudO3eiW7dusLa21mnftGmTwcIREREREbVWQUEBvL29pY5hMUxlyNyt9C6KXF1dMWnSJGNkISIiIiIyuKysLERGRkodw2Klp6dj9erVSE9Px0cffQRvb2/s3LkTQUFB6Natm9TxALSgKFq9erUxchARERERGcWVK1dQWlrarm4Wain27duHsWPHYuDAgdi/fz+WLVsGb29vnDhxAl999RV+/vlnqSMCAGRSByAiIiIiMiZRFJGYmCh1DIu0aNEiLF26FLt27YKNjY12+fDhw3H48GEJk+nS+0wRAPz888/YsGEDsrKyUFNTo9OWlJRkkGBERERERIaSmZmJS5cuISwsTOooFiU1NRXr1q2rt9zb2xsFBQUSJGqY3meKPv74Y8yZMwc+Pj5ITk5G37594eHhgUuXLmHs2LHGyEhEREREpLfY2FgMGjQIy5YtAwAcOHAAFRUVEqeyLK6ursjJyam3PDk5GQEBARIkapjeRdFnn32GL7/8Ev/5z39gY2ODF198Ebt27cIzzzyD0tJSY2QkIiIiItJbbm4url+/jrKyMgCASqXC7t27odFoJE5mOaZPn46FCxciNzcXgiBAo9EgISEB8+fPx8yZM6WOp6V3UZSVlYUBAwYAAOzs7FBeXg4A+Mc//oEffvjBsOmIiIiIiAwoNzcXBw8ehCiKUkexCMuXL0eXLl0QGBgIpVKJyMhIDBkyBAMGDMArr7widTwtvYsiX19f7Z1ng4KCtBdIZWRk8JeLiIiIiNq9tLQ0HDt2jJ9d24CNjQ1WrVqF9PR0bNu2Dd9//z3S0tLw3XffQS6XSx1PS++JFoYPH45ff/0VPXv2xJw5c/Cvf/0LP//8M44dO4bJkycbIyMRERERkUElJydDEAT07t0bgiBIHcfsBQUFISgoSOoYjdK7KPryyy+14zDnzZsHDw8PHDp0CPfeey/++c9/GjwgEREREZExJCUlQa1Wo2/fviyMDOj5559vdt/333/fiEmaT++iSCaTQSb7e9Td9OnTMX36dIOGIiIiIiJqCydOnEB1dTUGDRrUroZzmbLk5GSdx0lJSairq0NERAQA4Pz585DL5ejdu7cU8RrUovsUHThwAF988QXS09Px888/IyAgAN999x1CQ0MxaNAgQ2ckIiIiIjKac+fOobi4GMOHD4ezs7PUcUze3r17tf9+//334eTkhG+++QZubm4AgOLiYsyZMweDBw+WKmI9ek+0sHHjRowePRp2dnZITk6GSqUCAJSWlmL58uUGD0hEREREZGx5eXn4+eefcfLkSU7ZbUDvvfceVqxYoS2IAMDNzQ1Lly7Fe++9J2EyXXoXRUuXLsXnn3+OVatWwdraWrt84MCBSEpKMmg4IiIiIqK2UldXh8OHD2Pz5s24fv261HHMQllZGfLz8+stz8/P197apz3Quyg6d+4chgwZUm+5i4sLSkpKDJGJiIiIiEgyhYWF+OWXX7B//37tqChqmUmTJmHOnDnYtGkTrl69iqtXr2Ljxo149NFH29XM1XpfU+Tr64uLFy8iJCREZ/nBgwcRFhZmqFxERERERJJKS0vD5cuXMWDAAISFhXGGuhb4/PPPMX/+fDz00EOora0FAFhZWeHRRx/FO++8I3G6v+l9pmju3Ll49tlnkZiYCEEQcO3aNaxduxbz58/HE088YYyMRERERESSqKqqwu7du/HHH3+goqJC6jgmx97eHp999hkKCwuRnJyM5ORkFBUV4bPPPoODg4PU8bT0PlO0aNEiaDQajBgxApWVlRgyZAgUCgXmz5+Pp59+2hgZiYiIiIgkdfnyZeTm5mLAgAEIDw/nWSM9OTg4IDo6WuoYjdK7KBIEAS+//DIWLFiAixcvQqlUIjIyEo6OjsbIR0RERESkt6ysLFRWVgIAampqUFRUBHd391ZtU6VSYe/evcjMzNSeGCDzoPfwuZtsbGwQGRmJvn37siAiIiIionbhyJEjmDBhAkJCQlBcXAwAqKysxL///W98+umnyMzMbPU+MjIysHnzZu32yfQ1+0zRI4880qx+X3/9dYvDEBERERG11KZNmzBt2jSIoghRFHXaRFHEqVOncOrUKcydOxe9evVq1b7Kysrw66+/Yty4cfDy8mrVtkh6zT5TtGbNGuzduxclJSUoLi5u9IeIiIiIqK0dOXIE06ZNg1qthlqtbrCPRqOBRqPBqlWrDHLGSKVSYceOHfwMbAaafaboiSeewA8//ICMjAzMmTMHDz/8cKvHZRIRERERGcLSpUsbPEPUmB07duDJJ59s9X5vFkb33nsvnJycWr09kkazzxR9+umnyMnJwYsvvoitW7ciMDAQDzzwAH7//fdm//IRERERERlaVlYWtm3b1ugZottpNBqcPHkSRUVFBtl/RUUFtm3bhrKyMoNsj9qeXhMtKBQKPPjgg9i1axfOnDmDbt264cknn0RISAiUSqWxMhIRERERNWr37t16f0kviiLS0tIMlqG8vBxbt25FSUmJwbZJbafFs8/JZDIIggBRFJtdlRMRERERGVp5eTlkMv0+1gqCgOrqaoPmqKiowI4dO6BSqQy6XTI+vX57VCoVfvjhB4wcORKdO3dGamoqPvnkE2RlZRl9Wu5ly5ZhwIABsLe3h6urq1H3RURERESmw8nJCRqNRq91RFGEra2twbMolUqcPHnS4NuViqV8Bm92UfTkk0/Cz88Pb775Ju655x5cuXIFP/30E8aNG6d3Zd4SNTU1mDp1Kp544gmj74uIiIiITMeIESMgCIJe6wiCgC5duhgljznNRmcpn8GbPfvc559/jqCgIISFhWHfvn3Yt29fg/02bdpksHC3ev311wHcmBqciIiIiOimoKAg3HPPPdixY0ezLuuQyWSIiooy2kzKPj4+RtmuFCzlM3izi6KZM2fqXYETEREREbWFxYsXY+fOndpr3u9k3LhxRskRHByM7t27G2XbZDzNLopMsTpUqVQ6F7pxhjwiIiIi89SnTx/8+OOPmDZtWqMTgd285OOxxx5DSEiIQfevUCgQGxuLyMhIyU4kKJVKnWnBFQoFFAqFJFlMjfEvBmrCokWLIAhCkz+tmSpxxYoVcHFx0f7ExcUZMD0RERERtSeTJ0/GoUOHMG7cuHqFiSAIiIqKwsKFC9GzZ0+D7dPa2hq9evXCgw8+iG7dukk6siouLk7ns++KFSsa7Gfsz+CmSBAlvPNqfn4+CgsLm+wTFhYGGxsb7eM1a9bgueeea9Yc8LefKUpJSUFcXByOHz+OXr16tTg3EREREbVvWVlZiImJQXFxMezt7bF48WKDXkMkk8nQrVs39OzZ0yiz2OkjKSkJvXv3xr59+xATE6Nd3tiZImN/BjdFzR4+ZwxeXl7w8vIy2vZv/0Uw9rThRERERNQ+BAUFwd7eHsXFxbCxsTFoQeTj44MhQ4bAzc3NYNs0BEdHRzg7O9+xn7E/g5siSYsifWRlZaGoqAhZWVlQq9VISUkBAISHh7PYISIiIiKjs7a2Rp8+fSQfJteWLOUzuMkURa+++iq++eYb7eObY0H37t2LoUOHSpSKiIiIiCxBWFgY7rrrLrMqBJrDUj6Dm0xRtGbNGpOcAY+IiIiITJePjw/69esHX19fqaNIwlI+g5tMUURERERE1FZcXFzQt29fhISEWMxQOUvGooiIiIiI6H+sra0RGxuLbt26ae9rROaPRREREREREW7MWDd48GA4ODhIHYXaGIsiIiIiIrJoMpkMd911l0XNKke6WBQRERERkcVSKBQYNWoU/Pz8pI5CEmJRREREREQWycHBAePGjWt3N2GltseiiIiIiIgsjqOjIyZMmAAnJyepo1A7wCk1iIiIiMii2NnZYfz48SyISItFERERERFZDJlMhpEjR8LFxUXqKNSOcPgcEREREZklX19f1NXVQaFQaJf1798fvr6+Eqai9ohFERERERGZpWPHjuHixYvYs2cPACA0NBSRkZESp6L2iMPniIiIiMjs2djYYNCgQbwPETWIRRERERERmb3IyEjY2dlJHYPaKRZFRERERGT2OnfuLHUEasdYFBERERGRWXN1dYWrq6vUMagdY1FERERERGatQ4cOUkegdo5FERERERGZNR8fH6kjUDvHooiIiIiIzJq7u7vUEaidY1FERERERGZLEAQ4OztLHYPaORZFRERERGS2bG1tIZfLpY5B7RyLIiIiIiIyW7a2tlJHIBPAooiIiIiIzBbPElFzsCgiIiIiIrMlCILUEcgEsCgiIiIiIrMlk/HjLt0Zf0uIiIiIyGxx+Bw1B4siIiIiIjJbLIqoOVgUEREREZHZ4vA5ag7+lhARERGR2eJEC9QcLIqIiIiIyGxx+Bw1B4siIiIiIjJbPFNEzcGiiIiIiIiILBqLIiIiIiIismgsioiIiIiIyKKxKCIiIiIiIovGooiIiIiIiCwaiyIiIiIiIrJoVlIHIOPIyclBTk6O1DHIQPz8/ODn5yd1DDIQHp/mh8eoeeExal54fFJzWFRR5OfnhyVLlpj9gaFSqfDggw9i3759UkchA4mLi8Pvv/8OhUIhdRRqJR6f5onHqPngMWp+LOH4tJTPuMYkiKIoSh2CDKusrAwuLi7Yt28fHB0dpY5DraRUKhEXF4fS0lI4OztLHYdaicen+eExal54jJoXHp/UXBZ1psjSxMTE8A+AGSgrK5M6AhkBj0/zwWPUPPEYNQ88Pqm5ONECERERERFZNBZFRERERERk0VgUmSGFQoElS5aY9QWFloSvp3nh62l++JqaF76e5oWvJzUXJ1ogIiIiIiKLxjNFRERERERk0VgUERERERGRRWNRREREREREFo1FEZHEBEHAli1b2mx/a9asgaura5vtj4iIaOjQoXjuueea1bet36dee+01xMTENLt/ZmYmBEFASkqK0TJR22NRRNSE2bNnQxAEPP744/Xa5s2bB0EQMHv27EbXj4+PhyAI2h8fHx9MmTIFly5dMmJqIrqVIY9jmUwGFxcX9OzZEy+++CJycnKMmJyIAP2LlrZ2829ESUmJ1FGoFVgUEd1BYGAg1q9fj6qqKu2y6upqrFu3DkFBQc3axrlz53Dt2jX89NNPOH36NCZMmAC1Wm2syER0G0Mex0ePHsXChQvx559/onv37khNTW10nZqamlZnJyIi42NRRHQHvXr1QmBgIDZt2qRdtmnTJgQFBaFnz57N2oa3tzf8/PwwZMgQvPrqqzhz5gwuXrzYYN+FCxeic+fOsLe3R1hYGBYvXoza2lqdPlu3bkWfPn1ga2sLT09PTJo0SdumUqkwf/58BAQEwMHBAf369UN8fHy9/WzZsgWdOnWCra0tRo8ejStXrui0r1y5Eh07doSNjQ0iIiLw3XffNeu5ErVHhjqOfX190blzZ0yfPh0JCQnw8vLCE088oe0ze/Zs3HfffVi2bBn8/f0REREBoOFhsq6urlizZo328aFDhxATEwNbW1vExsZiy5YtHKJDJqmiogIzZ86Eo6Mj/Pz88N577+m0N/d9CrgxlO7111/HiRMntGdsbx4377//PqKiouDg4IDAwEA8+eSTUCqVd8z35ptvwsfHB05OTnj00UdRXV1dr89///tfdO3aFba2tujSpQs+++yzBreVmZmJYcOGAQDc3Nx0zjz/9ttvGDRoEFxdXeHh4YF77rkH6enpd8xH0mBRRNQMjzzyCFavXq19/PXXX2POnDkt2padnR2Axr9BdnJywpo1a3DmzBl89NFHWLVqFT744ANt+/bt2zFp0iSMGzcOycnJ2L17N/r27attf+qpp/DXX39h/fr1OHnyJKZOnYoxY8bgwoUL2j6VlZVYtmwZvv32WyQkJKCkpATTp0/Xtm/evBnPPvssXnjhBZw6dQr//Oc/MWfOHOzdu7dFz5moPTDkcQzcOJYff/xxJCQkIC8vT7t89+7dOHfuHHbt2oVt27Y1a1tlZWWYMGECoqKikJSUhDfeeAMLFy5scTYiKS1YsAD79u3DL7/8gj/++APx8fFISkrStjfnfeqmadOm4YUXXkC3bt2Qk5ODnJwcTJs2DQAgk8nw8ccf4/Tp0/jmm2+wZ88evPjii01m27BhA1577TUsX74cx44dg5+fX72CZ+3atXj11VexbNkynD17FsuXL8fixYvxzTff1NteYGAgNm7cCODG2eScnBx89NFHAG4Uh88//zyOHTuG3bt3QyaTYdKkSdBoNPr9D6W2IRJRo2bNmiVOnDhRzMvLExUKhZiZmSlmZmaKtra2Yn5+vjhx4kRx1qxZja6/d+9eEYBYXFwsiqIoXrt2TRwwYIAYEBAgqlQqURRFEYC4efPmRrfxzjvviL1799Y+7t+/vzhjxowG+16+fFmUy+Vidna2zvIRI0aIL730kiiKorh69WoRgHj48GFt+9mzZ0UAYmJioiiKojhgwABx7ty5OtuYOnWqOG7cuEZzErVXhj6Ob7Vz506dY2fWrFmij4+P9vi+qaHj3MXFRVy9erUoiqK4cuVK0cPDQ6yqqtK2r1q1SgQgJicnt+RpE0mivLxctLGxETds2KBdVlhYKNrZ2YnPPvtss9+nXFxctG1LliwRe/Toccd9//TTT6KHh0eTffr37y8++eSTOsv69euns/2OHTuK69at0+nzxhtviP379xdFURQzMjJ0js2m/kbcKj8/XwQgpqam3vG5UNuzkqgWIzIpXl5eGD9+PNasWQNRFDF+/Hh4eno2e/0OHTpAFEVUVlaiR48e2LhxI2xsbBrs++OPP+Ljjz9Geno6lEol6urq4OzsrG1PSUnB3LlzG1w3NTUVarUanTt31lmuUqng4eGhfWxlZYU+ffpoH3fp0gWurq44e/Ys+vbti7Nnz+Kxxx7T2cbAgQO1334RmaLWHscNEUURwI3hcTdFRUU1enw35ty5c4iOjoatra122a1ngIlMRXp6OmpqatCvXz/tMnd3d+1Q0ua+TzXHn3/+iRUrViAtLQ1lZWWoq6tDdXU1KisrYW9vD0dHR23fhx9+GJ9//jnOnj1bb9KV/v37a0dCVFRUID09HY8++qjOe21dXR1cXFz0ynfhwgW8+uqrSExMREFBgfYMUVZWFrp3767Xtsj4WBQRNdMjjzyCp556CgDw6aef6rXugQMH4OzsDG9vbzg5OTXa76+//sKMGTPw+uuvY/To0XBxccH69et1xmPfHH7XEKVSCblcjuPHj0Mul+u03frmQGSpWnMcN+Ts2bMAgJCQEO0yBweHev0EQdAWUDfdfq0gkSUw1PtUZmYm7rnnHjzxxBNYtmwZ3N3dcfDgQTz66KOoqamBvb29zvV4t365eKd8ALBq1Sqdwg5Avbx3MmHCBAQHB2PVqlXw9/eHRqNB9+7dOQFLO8WiiKiZxowZg5qaGgiCgNGjR+u1bmhoaLPuuXDo0CEEBwfj5Zdf1i67fPmyTp/o6Gjs3r27wWshevbsCbVajby8PAwePLjR/dTV1eHYsWPab6LPnTuHkpISdO3aFQDQtWtXJCQkYNasWdp1EhISEBkZecfnQNSeteY4vl1VVRW+/PJLDBkyBF5eXk329fLy0pm++8KFC6isrNQ+joiIwPfffw+VSgWFQgEAOHr0aKvyEUmhY8eOsLa2RmJionZmx+LiYpw/fx5xcXHNfp+6lY2NTb0ZW48fPw6NRoP33nsPMtmNS+Q3bNig0yc8PLzetrp27YrExETMnDlTu+zw4cPaf/v4+MDf3x+XLl3CjBkzmp0PgE7GwsJCnDt3DqtWrdI+z4MHDzZreyQNFkVEzSSXy7XfCuv7bVFzderUCVlZWVi/fj369OmD7du3Y/PmzTp9lixZghEjRqBjx46YPn066urqsGPHDu2sdTNmzMDMmTPx3nvvoWfPnsjPz8fu3bsRHR2N8ePHAwCsra3x9NNP4+OPP4aVlRWeeuop3HXXXdoiacGCBXjggQfQs2dP3H333di6dSs2bdqEP//80yjPm6ittOY4zsvLQ3V1NcrLy3H8+HG8/fbbKCgo0JnRrjHDhw/HJ598gv79+0OtVmPhwoWwtrbWtj/00EN4+eWX8dhjj2HRokXIysrCu+++C0B3aB5Re+fo6IhHH30UCxYsgIeHB7y9vfHyyy9rC5fmvk/dKiQkBBkZGUhJSUGHDh3g5OSE8PBw1NbW4j//+Q8mTJiAhIQEfP7553fM9+yzz2L27NmIjY3FwIEDsXbtWpw+fRphYWHaPq+//jqeeeYZuLi4YMyYMVCpVDh27BiKi4vx/PPP19tmcHAwBEHAtm3bMG7cONjZ2cHNzQ0eHh748ssv4efnh6ysLCxa9P/bu/uYpq64D+DfastLWxS0iEVRLFR8CdSpM5kuBUTFxSXgCxqj+DKjk0RQ55DHRBHd3JQ55rLF6ciSiUGSJfKHUdAY1k6G8S0K6kTnfJ/iTNQpqNAKv+ePPdzHzoJso4L2+0lMuOeee+45TX7S3+Wcc//nP3yy5HEduqKJqJNrXqDdkv+yQLsZ/rYAOzMzU3r27Cl6vV5mzJghX3zxhcuCUxGR3bt3y7Bhw8THx0cMBoNMmTJFOedwOCQ7O1vCw8NFo9GI0WiUyZMny+nTp0Xk/xew7t69W0wmk/j6+sq4cePk2rVrLvfYunWrmEwm0Wg0MnDgQCkoKGhxDESdWXvFMQBRqVQSEBAgFotFMjMzpaampk33unnzpkyYMEF0Op2YzWYpKSlx2WhBRKSiokJiYmLEx8dHRowYIbt27RIAcv78+X84YqKOVVtbK7NnzxatVishISGSm5srsbGxsnTpUhFp+++pZvX19TJ16lQJDAwUAErc5OXlidFoFH9/f0lMTJSCgoI2bXiwYcMGMRgMotfrZe7cubJy5crnNnIoLCxUfs8GBQWJ1WqV4uJiEXl+owURkfXr10vv3r1FpVIp/58cPHhQBg8eLL6+vhITEyN2u/2FmytRx1GJ/G2SMxEREXW4wsJCzJ8/Hw8ePGh1LSEREf13nD5HRETUCRQUFMBkMqFPnz6oqqpCVlYWpk+fzoSIiOglYFJERETUCdy+fRvZ2dm4ffs2jEYjUlJSsGHDho7uFhGRV+D0OSIiIiIi8mpdOroDREREREREHYlJEVEnYLfboVKp8Oeff3Z0V4jIDcYoEdHrjdPniDoBh8OBe/fuISQkhO8kIeqEGKNERK83JkVEREREROTVOH2OyAPi4uKQnp6OZcuWISgoCCEhIcjPz8ejR48wf/585W3cpaWlAJ6fmvP9998jMDAQBw4cwODBg6HX6zFx4kTU1NS43GPZsmUu901OTsa8efOU461bt8JsNsPPzw8hISGYNm2ap4dO9EpgjBIR0bOYFBF5yI4dO2AwGHDs2DGkp6cjLS0NKSkpGD16NE6ePIkJEyYgNTUVjx8/dnv948ePsXnzZuzcuROHDh3C9evX8eGHH7b5/idOnEBGRgbWr1+PCxcuYP/+/bBare01PKJXHmOUiIiaMSki8hCLxYLVq1fDbDZj1apV8PPzg8FgwMKFC2E2m5GdnY27d+/i9OnTbq93Op3Ytm0bRo4cieHDh2PJkiUoKytr8/2vX78OnU6Hd999F/3798cbb7yBjIyM9hoe0SuPMUpERM2YFBF5SExMjPJz165d0bNnT0RHRytlISEhAIA7d+64vV6r1SIiIkI5NhqNLdZ1Z/z48ejfvz9MJhNSU1NRWFjY4hNvIm/EGCUiomZMiog8RKPRuByrVCqXsuYdrJqamtp8/bP7onTp0gV/3yfF6XQqPwcEBODkyZMoKiqC0WhEdnY2LBYLtxQm+j+MUSIiasakiOgVFRwc7LKou7GxEWfPnnWpo1arMW7cOOTm5uL06dO4evUqfvzxx5fdVSKvxBglInp1qDu6A0T074wdOxYffPAB9u3bh4iICOTl5bk8Yd67dy8uX74Mq9WKoKAglJSUoKmpCVFRUR3XaSIvwhglInp1MCkiekW99957qKqqwpw5c6BWq7F8+XLEx8cr5wMDA1FcXIycnBzU19fDbDajqKgIQ4cO7cBeE3kPxigR0auDL28lIiIiIiKvxjVFRERERETk1ZgUERERERGRV2NSREREREREXo1JEREREREReTUmRUQdzG63Q6VSvdQXNs6bNw/Jyckv7X5EREREnRmTIiI3tm3bhoCAADx9+lQpq6urg0ajQVxcnEvd5qTm0qVLbtvKycmBSqWCSqWCWq1GeHg4li9fjrq6Ok8OgYjg2Vg2GAywWq3YsmULGhoaPDkMIiLyMCZFRG7Ex8ejrq4OJ06cUMrKy8vRu3dvHD16FPX19Uq5zWZDv379EBER0WJ7Q4cORU1NDa5evYpNmzbh22+/xYoVKzw6BiLyXCxfv34dNpsNKSkp+PTTTzF69GjU1ta2eJ3D4WifARERkUcwKSJyIyoqCkajEXa7XSmz2+1ISkrCgAEDcOTIEZfyZ1/I6I5arUbv3r3Rt29fzJgxA7NmzcKePXvc1r179y5mzpyJPn36QKvVIjo6GkVFRS51mpqakJubi8jISPj6+qJfv37YsGGDcv7GjRuYPn06AgMD0aNHDyQlJeHq1avP3WvdunUIDg5Gt27dsHjxYpcvbg0NDcjIyECvXr3g5+eHt99+G8ePH291nESdjadiOTQ0FNHR0UhPT8dPP/2Es2fPYtOmTUq98PBwfPTRR5gzZw66deuGRYsWuZ0qW1lZCZVK5RKf+fn5CAsLg1arxeTJk5GXl4fAwMD/+lEQEVErmBQRtSA+Ph42m005ttlsiIuLQ2xsrFL+5MkTHD169IVfpP7O39+/xSfH9fX1GDFiBPbt24ezZ89i0aJFSE1NxbFjx5Q6q1atwsaNG7FmzRqcO3cOu3btQkhICADA6XQiMTERAQEBKC8vR0VFBfR6PSZOnOhyz7KyMlRXV8Nut6OoqAjFxcVYt26dcn7lypXYvXs3duzYgZMnTyIyMhKJiYm4d+/ePxorUUfzZCwDwKBBg/DOO++guLjYpXzz5s2wWCw4deoU1qxZ06a2KioqsHjxYixduhSVlZUYP368ywMPIiLyECEit/Lz80Wn04nT6ZSHDx+KWq2WO3fuyK5du8RqtYqISFlZmQCQa9eutdjO2rVrxWKxKMcnTpwQg8Eg06ZNExERm80mAOT+/fsttjFp0iRZsWKFiIg8fPhQfH19JT8/323dnTt3SlRUlDQ1NSllDQ0N4u/vLwcOHBARkblz50qPHj3k0aNHSp1vvvlG9Hq9NDY2Sl1dnWg0GiksLFTOOxwOCQ0Nldzc3Bb7SdQZeSqWn5WVlSX+/v7Kcf/+/SU5OdmljrtYP3XqlACQK1euiIjIjBkzZNKkSS7XzZo1S7p37972ARMR0T+m7tCMjKgTi4uLw6NHj3D8+HHcv38fAwcORHBwMGJjYzF//nzU19fDbrfDZDKhX79+rbZ15swZ6PV6NDY2wuFwYNKkSfj666/d1m1sbMQnn3yCH374ATdv3oTD4UBDQwO0Wi0AoLq6Gg0NDUhISHB7fVVVFX777TcEBAS4lNfX17ssILdYLEqbAPDWW2+hrq4ON27cwIMHD+B0OjFmzBjlvEajwahRo1BdXd36B0fUybRnLLdERKBSqVzKRo4c+Y/buXDhAiZPnuxSNmrUKOzdu/df9YuIiNqGSRFRCyIjI9G3b1/YbDbcv38fsbGxAIDQ0FCEhYXh8OHDsNlsGDt27AvbioqKwp49e6BWqxEaGgofH58W63722Wf48ssvsWXLFkRHR0On02HZsmXK1Dd/f/9W71VXV4cRI0agsLDwuXPBwcEv7CvR66Y9Y7kl1dXVGDBggEuZTqdzOe7S5a8Z6yKilDmdzn99TyIiaj9cU0TUivj4eNjtdtjtdpfte61WK0pLS3Hs2LE2rUHw8fFBZGQkwsPDW02IgL/WFCQlJWH27NmwWCwwmUz49ddflfNmsxn+/v4oKytze/3w4cNx8eJF9OrVC5GRkS7/unfvrtSrqqrCkydPlOMjR45Ar9cjLCwMERER8PHxQUVFhXLe6XTi+PHjGDJkyAvHS9TZtFcsu3P+/Hns378fU6dObbVe80OJmpoapayystKlTlRU1HMbmnCDEyIiz2NSRNSK+Ph4/Pzzz6isrFSeLgNAbGwstm/fDofD8a+/SLXEbDbj4MGDOHz4MKqrq/H+++/jjz/+UM77+fkhKysLK1euREFBAS5duoQjR47gu+++AwDMmjULBoMBSUlJKC8vx5UrV2C325GRkYHff/9dacfhcGDBggU4d+4cSkpKsHbtWixZsgRdunSBTqdDWloaMjMzsX//fpw7dw4LFy7E48ePsWDBgnYdL9HL0F6x/PTpU9y+fRu3bt3CmTNn8NVXXyE2NhbDhg1DZmZmq9dGRkYiLCwMOTk5uHjxIvbt24fPP//cpU56ejpKSkqQl5eHixcvYvv27SgtLX1uah4REbUvTp8jakV8fDyePHmCQYMGKbu7AX99kaqtrVW2+21Pq1evxuXLl5GYmAitVotFixYhOTkZDx48UOqsWbMGarUa2dnZuHXrFoxGIxYvXgwA0Gq1OHToELKysjBlyhTU1taiT58+SEhIQLdu3ZQ2EhISYDabYbVa0dDQgJkzZyInJ0c5v3HjRjQ1NSE1NRW1tbUYOXIkDhw4gKCgoHYdL9HL0F6x/Msvv8BoNKJr167o3r07hgwZglWrViEtLQ2+vr6tXqvRaFBUVIS0tDTExMTgzTffxMcff4yUlBSlzpgxY7Bt2zasW7cOq1evRmJiIpYvX97iGkQiImofKnl2cjMRERF1KgsXLsT58+dRXl7e0V0hInpt8S9FREREncjmzZsxfvx46HQ6lJaWYseOHdi6dWtHd4uI6LXGvxQRERF1ItOnT4fdbkdtbS1MJhPS09OV6bFEROQZTIqIiIiIiMircfc5IiIiIiLyakyKiIiIiIjIqzEpIiIiIiIir8akiIiIiIiIvBqTIiIiIiIi8mpMioiIiIiIyKsxKSIiIiIiIq/GpIiIiIiIiLwakyIiIiIiIvJq/wvnnQ3fkVlEQgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "np.random.seed(9999) # Fix the seed so the results are replicable.\n", + "N = 20\n", + "# Create samples\n", + "y = norm.rvs(loc=3, scale=0.4, size=N*4)\n", + "y[N:2*N] = y[N:2*N]+1\n", + "y[2*N:3*N] = y[2*N:3*N]-0.5\n", + "# Add a `Treatment` column\n", + "t1 = np.repeat('Placebo', N*2).tolist()\n", + "t2 = np.repeat('Drug', N*2).tolist()\n", + "treatment = t1 + t2 \n", + "# Add a `Rep` column as the first variable for the 2 replicates of experiments done\n", + "rep = []\n", + "for i in range(N*2):\n", + " rep.append('Rep1')\n", + " rep.append('Rep2')\n", + "# Add a `Genotype` column as the second variable\n", + "wt = np.repeat('W', N).tolist()\n", + "mt = np.repeat('M', N).tolist()\n", + "wt2 = np.repeat('W', N).tolist()\n", + "mt2 = np.repeat('M', N).tolist()\n", + "genotype = wt + mt + wt2 + mt2\n", + "# Add an `id` column for paired data plotting.\n", + "id = list(range(0, N*2))\n", + "id_col = id + id \n", + "# Combine all columns into a DataFrame.\n", + "df_delta2 = pd.DataFrame({'ID' : id_col,\n", + " 'Rep' : rep,\n", + " 'Genotype' : genotype, \n", + " 'Treatment': treatment,\n", + " 'Y' : y\n", + " })\n", + "unpaired_delta2 = dabest.load(data = df_delta2, x = [\"Genotype\", \"Genotype\"], y = \"Y\", delta2 = True, experiment = \"Treatment\")\n", + "unpaired_delta2.mean_diff.plot();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class MiniMetaDelta(object):\n", + " \"\"\"\n", + " A class to compute and store the weighted delta.\n", + " A weighted delta is calculated if the argument ``mini_meta=True`` is passed during ``dabest.load()``.\n", + " \n", + " \"\"\"\n", + "\n", + " def __init__(self, effectsizedataframe, permutation_count,\n", + " ci=95):\n", + " from ._stats_tools import effsize as es\n", + " from ._stats_tools import confint_1group as ci1g\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + " \n", + " self.__effsizedf = effectsizedataframe.results\n", + " self.__dabest_obj = effectsizedataframe.dabest_obj\n", + " self.__ci = ci\n", + " self.__resamples = effectsizedataframe.resamples\n", + " self.__alpha = ci2g._compute_alpha_from_ci(ci)\n", + " self.__permutation_count = permutation_count\n", + " self.__bootstraps = np.array(self.__effsizedf[\"bootstraps\"])\n", + " self.__control = np.array(self.__effsizedf[\"control\"])\n", + " self.__test = np.array(self.__effsizedf[\"test\"])\n", + " self.__control_N = np.array(self.__effsizedf[\"control_N\"])\n", + " self.__test_N = np.array(self.__effsizedf[\"test_N\"])\n", + "\n", + "\n", + " idx = self.__dabest_obj.idx\n", + " dat = self.__dabest_obj._plot_data\n", + " xvar = self.__dabest_obj._xvar\n", + " yvar = self.__dabest_obj._yvar\n", + "\n", + " # compute the variances of each control group and each test group\n", + " control_var=[]\n", + " test_var=[]\n", + " for j, current_tuple in enumerate(idx):\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " control_var.append(np.var(control, ddof=1))\n", + "\n", + " tname = current_tuple[1]\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + " test_var.append(np.var(test, ddof=1))\n", + " self.__control_var = np.array(control_var)\n", + " self.__test_var = np.array(test_var)\n", + "\n", + " # Compute pooled group variances for each pair of experiment groups\n", + " # based on the raw data\n", + " self.__group_var = ci2g.calculate_group_var(self.__control_var, \n", + " self.__control_N,\n", + " self.__test_var, \n", + " self.__test_N)\n", + "\n", + " # Compute the weighted average mean differences of the bootstrap data\n", + " # using the pooled group variances of the raw data as the inverse of \n", + " # weights\n", + " self.__bootstraps_weighted_delta = ci2g.calculate_weighted_delta(\n", + " self.__group_var, \n", + " self.__bootstraps, \n", + " self.__resamples)\n", + "\n", + " # Compute the weighted average mean difference based on the raw data\n", + " self.__difference = es.weighted_delta(self.__effsizedf[\"difference\"],\n", + " self.__group_var)\n", + "\n", + " sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)\n", + "\n", + "\n", + " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", + " self.__bootstraps_weighted_delta, self.__difference)\n", + " \n", + " self.__jackknives = np.array(ci1g.compute_1group_jackknife(\n", + " self.__bootstraps_weighted_delta, \n", + " np.mean))\n", + "\n", + " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", + "\n", + " # Compute BCa intervals.\n", + " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", + " self.__bias_correction, self.__acceleration_value,\n", + " self.__resamples, ci)\n", + " \n", + " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", + "\n", + " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", + " self.__bca_low = sorted_weighted_deltas[bca_idx_low]\n", + " self.__bca_high = sorted_weighted_deltas[bca_idx_high]\n", + "\n", + " err1 = \"The $lim_type limit of the interval\"\n", + " err2 = \"was in the $loc 10 values.\"\n", + " err3 = \"The result should be considered unstable.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if bca_idx_low <= 10:\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\",\n", + " loc=\"bottom\"),\n", + " stacklevel=1)\n", + "\n", + " if bca_idx_high >= self.__resamples-9:\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\",\n", + " loc=\"top\"),\n", + " stacklevel=1)\n", + "\n", + " else:\n", + " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", + " err2 = \"It is set to the effect size itself.\"\n", + " err3 = \"All bootstrap values were likely all the same.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if isnan(bca_idx_low):\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"),\n", + " stacklevel=0)\n", + "\n", + " if isnan(bca_idx_high):\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"),\n", + " stacklevel=0)\n", + "\n", + " # Compute percentile intervals.\n", + " pct_idx_low = int((self.__alpha/2) * self.__resamples)\n", + " pct_idx_high = int((1-(self.__alpha/2)) * self.__resamples)\n", + "\n", + " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", + " self.__pct_low = sorted_weighted_deltas[pct_idx_low]\n", + " self.__pct_high = sorted_weighted_deltas[pct_idx_high]\n", + " \n", + " \n", + "\n", + " def __permutation_test(self):\n", + " \"\"\"\n", + " Perform a permutation test and obtain the permutation p-value\n", + " based on the permutation data.\n", + " \"\"\"\n", + " self.__permutations = np.array(self.__effsizedf[\"permutations\"])\n", + " self.__permutations_var = np.array(self.__effsizedf[\"permutations_var\"])\n", + "\n", + " THRESHOLD = np.abs(self.__difference)\n", + "\n", + " all_num = []\n", + " all_denom = []\n", + "\n", + " groups = len(self.__permutations)\n", + " for i in range(0, len(self.__permutations[0])):\n", + " weight = [1/self.__permutations_var[j][i] for j in range(0, groups)]\n", + " all_num.append(np.sum([weight[j]*self.__permutations[j][i] for j in range(0, groups)]))\n", + " all_denom.append(np.sum(weight))\n", + " \n", + " output=[]\n", + " for i in range(0, len(all_num)):\n", + " output.append(all_num[i]/all_denom[i])\n", + " \n", + " self.__permutations_weighted_delta = np.array(output)\n", + "\n", + " count = sum(np.abs(self.__permutations_weighted_delta)>THRESHOLD)\n", + " self.__pvalue_permutation = count/self.__permutation_count\n", + "\n", + "\n", + "\n", + " def __repr__(self, header=True, sigfig=3):\n", + " from .misc_tools import print_greeting\n", + " \n", + " is_paired = self.__dabest_obj.is_paired\n", + "\n", + " PAIRED_STATUS = {'baseline' : 'paired', \n", + " 'sequential' : 'paired',\n", + " 'None' : 'unpaired'\n", + " }\n", + "\n", + " first_line = {\"paired_status\": PAIRED_STATUS[str(is_paired)]}\n", + " \n", + "\n", + " out1 = \"The weighted-average {paired_status} mean differences \".format(**first_line)\n", + " \n", + " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", + " if \".\" in str(self.__ci):\n", + " ci_width = base_string_fmt.format(self.__ci)\n", + " else:\n", + " ci_width = str(self.__ci)\n", + " \n", + " ci_out = {\"es\" : base_string_fmt.format(self.__difference),\n", + " \"ci\" : ci_width,\n", + " \"bca_low\" : base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\" : base_string_fmt.format(self.__bca_high)}\n", + " \n", + " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", + " out = out1 + out2\n", + "\n", + " if header is True:\n", + " out = print_greeting() + \"\\n\" + \"\\n\" + out\n", + "\n", + "\n", + " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", + "\n", + " \n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(pval_rounded)\n", + " p2 = \"calculated for legacy purposes only. \"\n", + " pvalue = p1 + p2\n", + "\n", + "\n", + " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", + " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", + " bs = bs1 + bs2\n", + "\n", + " pval_def1 = \"Any p-value reported is the probability of observing the\" + \\\n", + " \"effect size (or greater),\\nassuming the null hypothesis of\" + \\\n", + " \"zero difference is true.\"\n", + " pval_def2 = \"\\nFor each p-value, 5000 reshuffles of the \" + \\\n", + " \"control and test labels were performed.\"\n", + " pval_def = pval_def1 + pval_def2\n", + "\n", + "\n", + " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", + "\n", + "\n", + " def to_dict(self):\n", + " \"\"\"\n", + " Returns all attributes of the `dabest.MiniMetaDelta` object as a\n", + " dictionary.\n", + " \"\"\"\n", + " # Only get public (user-facing) attributes.\n", + " attrs = [a for a in dir(self)\n", + " if not a.startswith((\"_\", \"to_dict\"))]\n", + " out = {}\n", + " for a in attrs:\n", + " out[a] = getattr(self, a)\n", + " return out\n", + "\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " Returns the width of the confidence interval, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + "\n", + " @property\n", + " def alpha(self):\n", + " \"\"\"\n", + " Returns the significance level of the statistical test as a float\n", + " between 0 and 1.\n", + " \"\"\"\n", + " return self.__alpha\n", + "\n", + "\n", + " @property\n", + " def bias_correction(self):\n", + " return self.__bias_correction\n", + "\n", + "\n", + " @property\n", + " def bootstraps(self):\n", + " '''\n", + " Return the bootstrapped differences from all the experiment groups.\n", + " '''\n", + " return self.__bootstraps\n", + "\n", + "\n", + " @property\n", + " def jackknives(self):\n", + " return self.__jackknives\n", + "\n", + "\n", + " @property\n", + " def acceleration_value(self):\n", + " return self.__acceleration_value\n", + "\n", + "\n", + " @property\n", + " def bca_low(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__bca_low\n", + "\n", + "\n", + " @property\n", + " def bca_high(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval upper limit.\n", + " \"\"\"\n", + " return self.__bca_high\n", + "\n", + "\n", + " @property\n", + " def bca_interval_idx(self):\n", + " return self.__bca_interval_idx\n", + "\n", + "\n", + " @property\n", + " def control(self):\n", + " '''\n", + " Return the names of the control groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__control\n", + "\n", + "\n", + " @property\n", + " def test(self):\n", + " '''\n", + " Return the names of the test groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__test\n", + " \n", + " @property\n", + " def control_N(self):\n", + " '''\n", + " Return the sizes of the control groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__control_N\n", + "\n", + "\n", + " @property\n", + " def test_N(self):\n", + " '''\n", + " Return the sizes of the test groups from all the experiment \n", + " groups in order.\n", + " '''\n", + " return self.__test_N\n", + "\n", + "\n", + " @property\n", + " def control_var(self):\n", + " '''\n", + " Return the estimated population variances of the control groups \n", + " from all the experiment groups in order. Here the population \n", + " variance is estimated from the sample variance. \n", + " '''\n", + " return self.__control_var\n", + "\n", + "\n", + " @property\n", + " def test_var(self):\n", + " '''\n", + " Return the estimated population variances of the control groups \n", + " from all the experiment groups in order. Here the population \n", + " variance is estimated from the sample variance. \n", + " '''\n", + " return self.__test_var\n", + "\n", + " \n", + " @property\n", + " def group_var(self):\n", + " '''\n", + " Return the pooled group variances of all the experiment groups \n", + " in order. \n", + " '''\n", + " return self.__group_var\n", + "\n", + "\n", + " @property\n", + " def bootstraps_weighted_delta(self):\n", + " '''\n", + " Return the weighted-average mean differences calculated from the bootstrapped \n", + " deltas and weights across the experiment groups, where the weights are \n", + " the inverse of the pooled group variances.\n", + " '''\n", + " return self.__bootstraps_weighted_delta\n", + "\n", + "\n", + " @property\n", + " def difference(self):\n", + " '''\n", + " Return the weighted-average delta calculated from the raw data.\n", + " '''\n", + " return self.__difference\n", + "\n", + "\n", + " @property\n", + " def pct_interval_idx (self):\n", + " return self.__pct_interval_idx \n", + "\n", + "\n", + " @property\n", + " def pct_low(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_low\n", + "\n", + "\n", + " @property\n", + " def pct_high(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_high\n", + "\n", + "\n", + " @property\n", + " def pvalue_permutation(self):\n", + " try:\n", + " return self.__pvalue_permutation\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__pvalue_permutation\n", + " \n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permuations taken.\n", + " \"\"\"\n", + " return self.__permutation_count\n", + "\n", + " \n", + " @property\n", + " def permutations(self):\n", + " '''\n", + " Return the mean differences of permutations obtained during\n", + " the permutation test for each experiment group.\n", + " '''\n", + " try:\n", + " return self.__permutations\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations\n", + "\n", + "\n", + " @property\n", + " def permutations_var(self):\n", + " '''\n", + " Return the pooled group variances of permutations obtained during\n", + " the permutation test for each experiment group.\n", + " '''\n", + " try:\n", + " return self.__permutations_var\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations_var\n", + "\n", + " \n", + " @property\n", + " def permutations_weighted_delta(self):\n", + " '''\n", + " Return the weighted-average deltas of permutations obtained \n", + " during the permutation test.\n", + " '''\n", + " try:\n", + " return self.__permutations_weighted_delta\n", + " except AttributeError:\n", + " self.__permutation_test()\n", + " return self.__permutations_weighted_delta\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The weighted delta is calcuated as follows:\n", + "\n", + "$$\\theta_{\\text{weighted}} = \\frac{\\Sigma\\hat{\\theta_{i}}w_{i}}{{\\Sigma}w_{i}}$$\n", + "\n", + "where:\n", + "\n", + "$$\\hat{\\theta_{i}} = \\text{Mean difference for replicate }i$$\n", + "\n", + "\n", + "$$w_{i} = \\text{Weight for replicate }i = \\frac{1}{s_{i}^2} $$\n", + "\n", + "$$s_{i}^2 = \\text{Pooled variance for replicate }i = \\frac{(n_{test}-1)s_{test}^2+(n_{control}-1)s_{control}^2}{n_{test}+n_{control}-2}$$\n", + "\n", + "$$n = \\text{sample size and }s^2 = \\text{variance for control/test.}$$\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: mini-meta-delta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DABEST v2023.03.29\n", + "==================\n", + " \n", + "Good afternoon!\n", + "The current time is Tue Apr 18 14:47:44 2023.\n", + "\n", + "The weighted-average unpaired mean differences is 0.0336 [95%CI -0.137, 0.228].\n", + "The p-value of the two-sided permutation t-test is 0.736, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Ns = 20\n", + "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", + "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", + "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", + "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", + "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", + "my_df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", + " 'Control 2' : c2, 'Test 2' : t2,\n", + " 'Control 3' : c3, 'Test 3' : t3})\n", + "my_dabest_object = dabest.load(my_df, idx=((\"Control 1\", \"Test 1\"), (\"Control 2\", \"Test 2\"), (\"Control 3\", \"Test 3\")), mini_meta=True)\n", + "my_dabest_object.mean_diff.mini_meta_delta" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As of version 2023.02.14, weighted delta can only be calculated for mean difference, and not for standardized measures such as Cohen's *d*.\n", + "\n", + "Details about the calculated weighted delta are accessed as attributes of the ``mini_meta_delta`` class. See the `minimetadelta` for details on usage.\n", + "\n", + "Refer to Chapter 10 of the Cochrane handbook for further information on meta-analysis: \n", + "https://training.cochrane.org/handbook/current/chapter-10\n", + "\t\t" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/API/effsize.ipynb b/nbs/API/effsize.ipynb index 45a854e3..bed57a4b 100644 --- a/nbs/API/effsize.ipynb +++ b/nbs/API/effsize.ipynb @@ -55,7 +55,10 @@ "source": [ "#|export\n", "from __future__ import annotations\n", - "import numpy as np" + "import numpy as np\n", + "import warnings\n", + "from scipy.special import gamma\n", + "from scipy.stats import mannwhitneyu" ] }, { @@ -114,8 +117,7 @@ " median of `test`.\n", "\n", " \"\"\"\n", - " import numpy as np\n", - " import warnings\n", + "\n", "\n", " if effect_size == \"mean_diff\":\n", " return func_difference(control, test, np.mean, is_paired)\n", @@ -165,13 +167,12 @@ " Applies func to `control` and `test`, and then returns the difference.\n", " \n", " \"\"\"\n", - " import numpy as np\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", "\n", " if is_paired:\n", @@ -250,13 +251,12 @@ " - https://en.wikipedia.org/wiki/Bessel%27s_correction\n", " - https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation\n", " \"\"\"\n", - " import numpy as np\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", @@ -306,9 +306,7 @@ " and a dict for mapping the 0s and 1s to the actual labels, e.g.{1: \"Smoker\", 0: \"Non-smoker\"}\n", " '''\n", "\n", - " import numpy as np\n", " np.seterr(divide='ignore', invalid='ignore')\n", - " import pandas as pd\n", "\n", " # Check whether dataframe contains only 0s and 1s.\n", " if np.isin(control, [0, 1]).all() == False or np.isin(test, [0, 1]).all() == False:\n", @@ -317,10 +315,10 @@ " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", " # Aligned with cohens_d calculation.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", - " test = np.array(test)\n", + " if ~isinstance(test, np.ndarray):\n", + " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", "\n", @@ -353,13 +351,12 @@ " See [here](https://en.wikipedia.org/wiki/Effect_size#Hedges'_g)\n", "\n", " \"\"\"\n", - " import numpy as np\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", " control = control[~np.isnan(control)]\n", " test = test[~np.isnan(test)]\n", @@ -386,14 +383,13 @@ " Computes Cliff's delta for 2 samples.\n", " See [here](https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data)\n", " \"\"\"\n", - " import numpy as np\n", - " from scipy.stats import mannwhitneyu\n", + "\n", "\n", " # Convert to numpy arrays for speed.\n", " # NaNs are automatically dropped.\n", - " if control.__class__ != np.ndarray:\n", + " if ~isinstance(control, np.ndarray):\n", " control = np.array(control)\n", - " if test.__class__ != np.ndarray:\n", + " if ~isinstance(test, np.ndarray):\n", " test = np.array(test)\n", "\n", " c = control[~np.isnan(control)]\n", @@ -406,17 +402,6 @@ " U, _ = mannwhitneyu(t, c, alternative='two-sided')\n", " cliffs_delta = ((2 * U) / (control_n * test_n)) - 1\n", "\n", - " # more = 0\n", - " # less = 0\n", - " #\n", - " # for i, c in enumerate(control):\n", - " # for j, t in enumerate(test):\n", - " # if t > c:\n", - " # more += 1\n", - " # elif t < c:\n", - " # less += 1\n", - " #\n", - " # cliffs_delta = (more - less) / (control_n * test_n)\n", "\n", " return cliffs_delta\n" ] @@ -430,37 +415,25 @@ "source": [ "#|export\n", "def _compute_standardizers(control, test):\n", - " from numpy import mean, var, sqrt, nan\n", + " # TODO missing docstring\n", " # For calculation of correlation; not currently used.\n", " # from scipy.stats import pearsonr\n", "\n", " control_n = len(control)\n", " test_n = len(test)\n", "\n", - " control_mean = mean(control)\n", - " test_mean = mean(test)\n", + " control_var = np.var(control, ddof=1) # use N-1 to compute the variance.\n", + " test_var = np.var(test, ddof=1)\n", "\n", - " control_var = var(control, ddof=1) # use N-1 to compute the variance.\n", - " test_var = var(test, ddof=1)\n", - "\n", - " control_std = sqrt(control_var)\n", - " test_std = sqrt(test_var)\n", "\n", " # For unpaired 2-groups standardized mean difference.\n", - " pooled = sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) /\n", + " pooled = np.sqrt(((control_n - 1) * control_var + (test_n - 1) * test_var) /\n", " (control_n + test_n - 2)\n", " )\n", "\n", " # For paired standardized mean difference.\n", - " average = sqrt((control_var + test_var) / 2)\n", - "\n", - " # if len(control) == len(test):\n", - " # corr = pearsonr(control, test)[0]\n", - " # std_diff = sqrt(control_var + test_var - (2 * corr * control_std * test_std))\n", - " # std_diff_corrected = std_diff / (sqrt(2 * (1 - corr)))\n", - " # return pooled, average, std_diff_corrected\n", - " #\n", - " # else:\n", + " average = np.sqrt((control_var + test_var) / 2)\n", + "\n", " return pooled, average # indent if you implement above code chunk." ] }, @@ -487,16 +460,12 @@ " ISBN 0-12-336380-2.\n", " \"\"\"\n", "\n", - " from scipy.special import gamma\n", - " from numpy import sqrt, isinf\n", - " import warnings\n", - "\n", " df = n1 + n2 - 2\n", " numer = gamma(df / 2)\n", " denom0 = gamma((df - 1) / 2)\n", - " denom = sqrt(df / 2) * denom0\n", + " denom = np.sqrt(df / 2) * denom0\n", "\n", - " if isinf(numer) or isinf(denom):\n", + " if np.isinf(numer) or np.isinf(denom):\n", " # occurs when df is too large.\n", " # Apply Hedges and Olkin's approximation.\n", " df_sum = n1 + n2\n", @@ -522,7 +491,6 @@ " Compute the weighted deltas where the weight is the inverse of the\n", " pooled group difference.\n", " '''\n", - " import numpy as np\n", "\n", " weight = np.true_divide(1, group_var)\n", " return np.sum(difference*weight)/np.sum(weight)" diff --git a/nbs/API/effsize_objects.ipynb b/nbs/API/effsize_objects.ipynb new file mode 100644 index 00000000..7ca59311 --- /dev/null +++ b/nbs/API/effsize_objects.ipynb @@ -0,0 +1,1891 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Effectsize objects\n", + "\n", + "> The auxiliary classes involved in the computations of bootstrapped effect sizes.\n", + "\n", + "- order: 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp _effsize_objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from __future__ import annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *\n", + "import nbdev\n", + "nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import dabest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import pandas as pd\n", + "import lqrt\n", + "from scipy.stats import norm\n", + "from numpy import array, isnan, isinf, repeat, random, isin, abs, var\n", + "from numpy import sort as npsort\n", + "from numpy import nan as npnan\n", + "from numpy.random import PCG64, RandomState\n", + "from statsmodels.stats.contingency_tables import mcnemar\n", + "import warnings\n", + "from string import Template\n", + "import scipy.stats as spstats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class TwoGroupsEffectSize(object):\n", + "\n", + " \"\"\"\n", + " A class to compute and store the results of bootstrapped\n", + " mean differences between two groups.\n", + "\n", + " Compute the effect size between two groups.\n", + "\n", + " Parameters\n", + " ----------\n", + " control : array-like\n", + " test : array-like\n", + " These should be numerical iterables.\n", + " effect_size : string.\n", + " Any one of the following are accepted inputs:\n", + " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", + " is_paired : string, default None\n", + " resamples : int, default 5000\n", + " The number of bootstrap resamples to be taken for the calculation\n", + " of the confidence interval limits.\n", + " permutation_count : int, default 5000\n", + " The number of permutations (reshuffles) to perform for the\n", + " computation of the permutation p-value\n", + " ci : float, default 95\n", + " The confidence interval width. The default of 95 produces 95%\n", + " confidence intervals.\n", + " random_seed : int, default 12345\n", + " `random_seed` is used to seed the random number generator during\n", + " bootstrap resampling. This ensures that the confidence intervals\n", + " reported are replicable.\n", + "\n", + " Returns\n", + " -------\n", + " A :py:class:`TwoGroupEffectSize` object:\n", + " `difference` : float\n", + " The effect size of the difference between the control and the test.\n", + " `effect_size` : string\n", + " The type of effect size reported.\n", + " `is_paired` : string\n", + " The type of repeated-measures experiment.\n", + " `ci` : float\n", + " Returns the width of the confidence interval, in percent.\n", + " `alpha` : float\n", + " Returns the significance level of the statistical test as a float between 0 and 1.\n", + " `resamples` : int\n", + " The number of resamples performed during the bootstrap procedure.\n", + " `bootstraps` : numpy ndarray\n", + " The generated bootstraps of the effect size.\n", + " `random_seed` : int\n", + " The number used to initialise the numpy random seed generator, ie.`seed_value` from `numpy.random.seed(seed_value)` is returned.\n", + " `bca_low, bca_high` : float\n", + " The bias-corrected and accelerated confidence interval lower limit and upper limits, respectively.\n", + " `pct_low, pct_high` : float\n", + " The percentile confidence interval lower limit and upper limits, respectively.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " control,\n", + " test,\n", + " effect_size,\n", + " proportional=False,\n", + " is_paired=None,\n", + " ci=95,\n", + " resamples=5000,\n", + " permutation_count=5000,\n", + " random_seed=12345,\n", + " ):\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + " from ._stats_tools import effsize as es\n", + "\n", + " self.__EFFECT_SIZE_DICT = {\n", + " \"mean_diff\": \"mean difference\",\n", + " \"median_diff\": \"median difference\",\n", + " \"cohens_d\": \"Cohen's d\",\n", + " \"cohens_h\": \"Cohen's h\",\n", + " \"hedges_g\": \"Hedges' g\",\n", + " \"cliffs_delta\": \"Cliff's delta\",\n", + " \"delta_g\": \"deltas' g\",\n", + " }\n", + "\n", + " self.__is_paired = is_paired\n", + " self.__resamples = resamples\n", + " self.__effect_size = effect_size\n", + " self.__random_seed = random_seed\n", + " self.__ci = ci\n", + " self.__proportional = proportional\n", + " self._check_errors(control, test)\n", + "\n", + " # Convert to numpy arrays for speed.\n", + " # NaNs are automatically dropped.\n", + " control = array(control)\n", + " test = array(test)\n", + " self.__control = control[~isnan(control)]\n", + " self.__test = test[~isnan(test)]\n", + " self.__permutation_count = permutation_count\n", + "\n", + " self.__alpha = ci2g._compute_alpha_from_ci(self.__ci)\n", + "\n", + " self.__difference = es.two_group_difference(\n", + " self.__control, self.__test, self.__is_paired, self.__effect_size\n", + " )\n", + "\n", + " self.__jackknives = ci2g.compute_meandiff_jackknife(\n", + " self.__control, self.__test, self.__is_paired, self.__effect_size\n", + " )\n", + "\n", + " self.__acceleration_value = ci2g._calc_accel(self.__jackknives)\n", + "\n", + " bootstraps = ci2g.compute_bootstrapped_diff(\n", + " self.__control,\n", + " self.__test,\n", + " self.__is_paired,\n", + " self.__effect_size,\n", + " self.__resamples,\n", + " self.__random_seed,\n", + " )\n", + " self.__bootstraps = bootstraps\n", + "\n", + " sorted_bootstraps = npsort(self.__bootstraps)\n", + " # Added in v0.2.6.\n", + " # Raises a UserWarning if there are any infiinities in the bootstraps.\n", + " num_infinities = len(self.__bootstraps[isinf(self.__bootstraps)])\n", + "\n", + " if num_infinities > 0:\n", + " warn_msg = (\n", + " \"There are {} bootstrap(s) that are not defined. \"\n", + " \"This is likely due to smaple sample sizes. \"\n", + " \"The values in a bootstrap for a group will be more likely \"\n", + " \"to be all equal, with a resulting variance of zero. \"\n", + " \"The computation of Cohen's d and Hedges' g thus \"\n", + " \"involved a division by zero. \"\n", + " )\n", + " warnings.warn(warn_msg.format(num_infinities), category=UserWarning)\n", + "\n", + " self.__bias_correction = ci2g.compute_meandiff_bias_correction(\n", + " self.__bootstraps, self.__difference\n", + " )\n", + "\n", + " self._compute_bca_intervals(sorted_bootstraps)\n", + "\n", + " # Compute percentile intervals.\n", + " pct_idx_low = int((self.__alpha / 2) * self.__resamples)\n", + " pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)\n", + "\n", + " self.__pct_interval_idx = (pct_idx_low, pct_idx_high)\n", + " self.__pct_low = sorted_bootstraps[pct_idx_low]\n", + " self.__pct_high = sorted_bootstraps[pct_idx_high]\n", + "\n", + " self._perform_statistical_test()\n", + "\n", + " def __repr__(self, show_resample_count=True, define_pval=True, sigfig=3):\n", + " RM_STATUS = {\n", + " \"baseline\": \"for repeated measures against baseline \\n\",\n", + " \"sequential\": \"for the sequential design of repeated-measures experiment \\n\",\n", + " \"None\": \"\",\n", + " }\n", + "\n", + " PAIRED_STATUS = {\n", + " \"baseline\": \"paired\",\n", + " \"sequential\": \"paired\",\n", + " \"None\": \"unpaired\",\n", + " }\n", + "\n", + " first_line = {\n", + " \"rm_status\": RM_STATUS[str(self.__is_paired)],\n", + " \"es\": self.__EFFECT_SIZE_DICT[self.__effect_size],\n", + " \"paired_status\": PAIRED_STATUS[str(self.__is_paired)],\n", + " }\n", + "\n", + " out1 = \"The {paired_status} {es} {rm_status}\".format(**first_line)\n", + "\n", + " base_string_fmt = \"{:.\" + str(sigfig) + \"}\"\n", + " if \".\" in str(self.__ci):\n", + " ci_width = base_string_fmt.format(self.__ci)\n", + " else:\n", + " ci_width = str(self.__ci)\n", + "\n", + " ci_out = {\n", + " \"es\": base_string_fmt.format(self.__difference),\n", + " \"ci\": ci_width,\n", + " \"bca_low\": base_string_fmt.format(self.__bca_low),\n", + " \"bca_high\": base_string_fmt.format(self.__bca_high),\n", + " }\n", + "\n", + " out2 = \"is {es} [{ci}%CI {bca_low}, {bca_high}].\".format(**ci_out)\n", + " out = out1 + out2\n", + "\n", + " pval_rounded = base_string_fmt.format(self.pvalue_permutation)\n", + "\n", + " p1 = \"The p-value of the two-sided permutation t-test is {}, \".format(\n", + " pval_rounded\n", + " )\n", + " p2 = \"calculated for legacy purposes only. \"\n", + " pvalue = p1 + p2\n", + "\n", + " bs1 = \"{} bootstrap samples were taken; \".format(self.__resamples)\n", + " bs2 = \"the confidence interval is bias-corrected and accelerated.\"\n", + " bs = bs1 + bs2\n", + "\n", + " pval_def1 = (\n", + " \"Any p-value reported is the probability of observing the\"\n", + " + \"effect size (or greater),\\nassuming the null hypothesis of\"\n", + " + \"zero difference is true.\"\n", + " )\n", + " pval_def2 = (\n", + " \"\\nFor each p-value, 5000 reshuffles of the \"\n", + " + \"control and test labels were performed.\"\n", + " )\n", + " pval_def = pval_def1 + pval_def2\n", + "\n", + " if show_resample_count and define_pval:\n", + " return \"{}\\n{}\\n\\n{}\\n{}\".format(out, pvalue, bs, pval_def)\n", + " elif not show_resample_count and define_pval:\n", + " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, pval_def)\n", + " elif show_resample_count and ~define_pval:\n", + " return \"{}\\n{}\\n\\n{}\".format(out, pvalue, bs)\n", + " else:\n", + " return \"{}\\n{}\".format(out, pvalue)\n", + "\n", + " def _check_errors(self, control, test):\n", + " '''\n", + " Function to check configuration errors for the given control and test data.\n", + " '''\n", + " kosher_es = [a for a in self.__EFFECT_SIZE_DICT.keys()]\n", + " if self.__effect_size not in kosher_es:\n", + " err1 = \"The effect size '{}'\".format(self.__effect_size)\n", + " err2 = \"is not one of {}\".format(kosher_es)\n", + " raise ValueError(\" \".join([err1, err2]))\n", + "\n", + " if self.__effect_size == \"cliffs_delta\" and self.__is_paired:\n", + " err1 = \"`paired` is not None; therefore Cliff's delta is not defined.\"\n", + " raise ValueError(err1)\n", + "\n", + " if self.__proportional and self.__effect_size not in [\"mean_diff\", \"cohens_h\"]:\n", + " err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n", + " raise ValueError(err1)\n", + "\n", + " if self.__proportional and (\n", + " isin(control, [0, 1]).all() == False or isin(test, [0, 1]).all() == False\n", + " ):\n", + " err1 = (\n", + " \"`proportional` is True; Only accept binary data consisting of 0 and 1.\"\n", + " )\n", + " raise ValueError(err1)\n", + "\n", + " def _compute_bca_intervals(self, sorted_bootstraps):\n", + " '''\n", + " Function to compute the bca intervals given the sorted bootstraps.\n", + " '''\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + "\n", + " # Compute BCa intervals.\n", + " bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(\n", + " self.__bias_correction,\n", + " self.__acceleration_value,\n", + " self.__resamples,\n", + " self.__ci,\n", + " )\n", + "\n", + " self.__bca_interval_idx = (bca_idx_low, bca_idx_high)\n", + "\n", + " if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):\n", + " self.__bca_low = sorted_bootstraps[bca_idx_low]\n", + " self.__bca_high = sorted_bootstraps[bca_idx_high]\n", + "\n", + " err1 = \"The $lim_type limit of the interval\"\n", + " err2 = \"was in the $loc 10 values.\"\n", + " err3 = \"The result should be considered unstable.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if bca_idx_low <= 10:\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"lower\", loc=\"bottom\"), stacklevel=1\n", + " )\n", + "\n", + " if bca_idx_high >= self.__resamples - 9:\n", + " warnings.warn(\n", + " err_temp.substitute(lim_type=\"upper\", loc=\"top\"), stacklevel=1\n", + " )\n", + "\n", + " else:\n", + " # TODO improve error handling, separate file with error messages?\n", + " err1 = \"The $lim_type limit of the BCa interval cannot be computed.\"\n", + " err2 = \"It is set to the effect size itself.\"\n", + " err3 = \"All bootstrap values were likely all the same.\"\n", + " err_temp = Template(\" \".join([err1, err2, err3]))\n", + "\n", + " if isnan(bca_idx_low):\n", + " self.__bca_low = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"lower\"), stacklevel=0)\n", + "\n", + " if isnan(bca_idx_high):\n", + " self.__bca_high = self.__difference\n", + " warnings.warn(err_temp.substitute(lim_type=\"upper\"), stacklevel=0)\n", + "\n", + " def _perform_statistical_test(self):\n", + " '''\n", + " Function to complete the statistical tests\n", + " '''\n", + " from ._stats_tools import effsize as es\n", + "\n", + " # Perform statistical tests.\n", + " self.__PermutationTest_result = PermutationTest(\n", + " self.__control,\n", + " self.__test,\n", + " self.__effect_size,\n", + " self.__is_paired,\n", + " self.__permutation_count,\n", + " )\n", + "\n", + " if self.__is_paired and not self.__proportional:\n", + " # Wilcoxon, a non-parametric version of the paired T-test.\n", + " wilcoxon = spstats.wilcoxon(self.__control, self.__test)\n", + " self.__pvalue_wilcoxon = wilcoxon.pvalue\n", + " self.__statistic_wilcoxon = wilcoxon.statistic\n", + "\n", + " if self.__effect_size != \"median_diff\":\n", + " # Paired Student's t-test.\n", + " paired_t = spstats.ttest_rel(\n", + " self.__control, self.__test, nan_policy=\"omit\"\n", + " )\n", + " self.__pvalue_paired_students_t = paired_t.pvalue\n", + " self.__statistic_paired_students_t = paired_t.statistic\n", + "\n", + " elif self.__is_paired and self.__proportional:\n", + " # for binary paired data, use McNemar's test\n", + " # References:\n", + " # https://en.wikipedia.org/wiki/McNemar%27s_test\n", + "\n", + " df_temp = pd.DataFrame({\"control\": self.__control, \"test\": self.__test})\n", + " x1 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 0)])\n", + " x2 = len(df_temp[(df_temp[\"control\"] == 0) & (df_temp[\"test\"] == 1)])\n", + " x3 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 0)])\n", + " x4 = len(df_temp[(df_temp[\"control\"] == 1) & (df_temp[\"test\"] == 1)])\n", + " table = [[x1, x2], [x3, x4]]\n", + " _mcnemar = mcnemar(table, exact=True, correction=True)\n", + " self.__pvalue_mcnemar = _mcnemar.pvalue\n", + " self.__statistic_mcnemar = _mcnemar.statistic\n", + "\n", + " elif self.__effect_size == \"cliffs_delta\":\n", + " # Let's go with Brunner-Munzel!\n", + " brunner_munzel = spstats.brunnermunzel(\n", + " self.__control, self.__test, nan_policy=\"omit\"\n", + " )\n", + " self.__pvalue_brunner_munzel = brunner_munzel.pvalue\n", + " self.__statistic_brunner_munzel = brunner_munzel.statistic\n", + "\n", + " elif self.__effect_size == \"median_diff\":\n", + " # According to scipy's documentation of the function,\n", + " # \"The Kruskal-Wallis H-test tests the null hypothesis\n", + " # that the population median of all of the groups are equal.\"\n", + " kruskal = spstats.kruskal(self.__control, self.__test, nan_policy=\"omit\")\n", + " self.__pvalue_kruskal = kruskal.pvalue\n", + " self.__statistic_kruskal = kruskal.statistic\n", + "\n", + " else: # for mean difference, Cohen's d, and Hedges' g.\n", + " # Welch's t-test, assumes normality of distributions,\n", + " # but does not assume equal variances.\n", + " welch = spstats.ttest_ind(\n", + " self.__control, self.__test, equal_var=False, nan_policy=\"omit\"\n", + " )\n", + " self.__pvalue_welch = welch.pvalue\n", + " self.__statistic_welch = welch.statistic\n", + "\n", + " # Student's t-test, assumes normality of distributions,\n", + " # as well as assumption of equal variances.\n", + " students_t = spstats.ttest_ind(\n", + " self.__control, self.__test, equal_var=True, nan_policy=\"omit\"\n", + " )\n", + " self.__pvalue_students_t = students_t.pvalue\n", + " self.__statistic_students_t = students_t.statistic\n", + "\n", + " # Mann-Whitney test: Non parametric,\n", + " # does not assume normality of distributions\n", + " try:\n", + " mann_whitney = spstats.mannwhitneyu(\n", + " self.__control, self.__test, alternative=\"two-sided\"\n", + " )\n", + " self.__pvalue_mann_whitney = mann_whitney.pvalue\n", + " self.__statistic_mann_whitney = mann_whitney.statistic\n", + " except ValueError:\n", + " # TODO At least print some warning?\n", + " # Occurs when the control and test are exactly identical\n", + " # in terms of rank (eg. all zeros.)\n", + " pass\n", + "\n", + " standardized_es = es.cohens_d(self.__control, self.__test, is_paired=None)\n", + "\n", + " # The Cohen's h calculation is for binary categorical data\n", + " try:\n", + " self.__proportional_difference = es.cohens_h(\n", + " self.__control, self.__test\n", + " )\n", + " except ValueError:\n", + " # TODO At least print some warning?\n", + " # Occur only when the data consists not only 0's and 1's.\n", + " pass\n", + "\n", + " def to_dict(self):\n", + " \"\"\"\n", + " Returns the attributes of the `dabest.TwoGroupEffectSize` object as a\n", + " dictionary.\n", + " \"\"\"\n", + " # Only get public (user-facing) attributes.\n", + " attrs = [a for a in dir(self) if not a.startswith((\"_\", \"to_dict\"))]\n", + " out = {}\n", + " for a in attrs:\n", + " out[a] = getattr(self, a)\n", + " return out\n", + "\n", + " @property\n", + " def difference(self):\n", + " \"\"\"\n", + " Returns the difference between the control and the test.\n", + " \"\"\"\n", + " return self.__difference\n", + "\n", + " @property\n", + " def effect_size(self):\n", + " \"\"\"\n", + " Returns the type of effect size reported.\n", + " \"\"\"\n", + " return self.__EFFECT_SIZE_DICT[self.__effect_size]\n", + "\n", + " @property\n", + " def is_paired(self):\n", + " return self.__is_paired\n", + "\n", + " @property\n", + " def proportional(self):\n", + " return self.__proportional\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " Returns the width of the confidence interval, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + " @property\n", + " def alpha(self):\n", + " \"\"\"\n", + " Returns the significance level of the statistical test as a float\n", + " between 0 and 1.\n", + " \"\"\"\n", + " return self.__alpha\n", + "\n", + " @property\n", + " def resamples(self):\n", + " \"\"\"\n", + " The number of resamples performed during the bootstrap procedure.\n", + " \"\"\"\n", + " return self.__resamples\n", + "\n", + " @property\n", + " def bootstraps(self):\n", + " \"\"\"\n", + " The generated bootstraps of the effect size.\n", + " \"\"\"\n", + " return self.__bootstraps\n", + "\n", + " @property\n", + " def random_seed(self):\n", + " \"\"\"\n", + " The number used to initialise the numpy random seed generator, ie.\n", + " `seed_value` from `numpy.random.seed(seed_value)` is returned.\n", + " \"\"\"\n", + " return self.__random_seed\n", + "\n", + " @property\n", + " def bca_interval_idx(self):\n", + " return self.__bca_interval_idx\n", + "\n", + " @property\n", + " def bca_low(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__bca_low\n", + "\n", + " @property\n", + " def bca_high(self):\n", + " \"\"\"\n", + " The bias-corrected and accelerated confidence interval upper limit.\n", + " \"\"\"\n", + " return self.__bca_high\n", + "\n", + " @property\n", + " def pct_interval_idx(self):\n", + " return self.__pct_interval_idx\n", + "\n", + " @property\n", + " def pct_low(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_low\n", + "\n", + " @property\n", + " def pct_high(self):\n", + " \"\"\"\n", + " The percentile confidence interval lower limit.\n", + " \"\"\"\n", + " return self.__pct_high\n", + "\n", + " @property\n", + " def pvalue_brunner_munzel(self):\n", + " try:\n", + " return self.__pvalue_brunner_munzel\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_brunner_munzel(self):\n", + " try:\n", + " return self.__statistic_brunner_munzel\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_wilcoxon(self):\n", + " try:\n", + " return self.__pvalue_wilcoxon\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_wilcoxon(self):\n", + " try:\n", + " return self.__statistic_wilcoxon\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_mcnemar(self):\n", + " try:\n", + " return self.__pvalue_mcnemar\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_mcnemar(self):\n", + " try:\n", + " return self.__statistic_mcnemar\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_paired_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_paired_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_paired_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_paired_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_kruskal(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_kruskal\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_kruskal(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_kruskal\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_welch(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_welch\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_welch(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_welch\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_students_t(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_students_t\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_mann_whitney(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__pvalue_mann_whitney\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def statistic_mann_whitney(self):\n", + " # TODO Missing docstring\n", + " try:\n", + " return self.__statistic_mann_whitney\n", + " except AttributeError:\n", + " return npnan\n", + "\n", + " @property\n", + " def pvalue_permutation(self):\n", + " # TODO Missing docstring\n", + " return self.__PermutationTest_result.pvalue\n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permutations taken.\n", + " \"\"\"\n", + " return self.__PermutationTest_result.permutation_count\n", + "\n", + " @property\n", + " def permutations(self):\n", + " return self.__PermutationTest_result.permutations\n", + "\n", + " @property\n", + " def permutations_var(self):\n", + " return self.__PermutationTest_result.permutations_var\n", + "\n", + " @property\n", + " def proportional_difference(self):\n", + " try:\n", + " return self.__proportional_difference\n", + " except AttributeError:\n", + " return npnan" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "The unpaired mean difference is -0.253 [95%CI -0.78, 0.25].\n", + "The p-value of the two-sided permutation t-test is 0.348, calculated for legacy purposes only. \n", + "\n", + "5000 bootstrap samples were taken; the confidence interval is bias-corrected and accelerated.\n", + "Any p-value reported is the probability of observing theeffect size (or greater),\n", + "assuming the null hypothesis ofzero difference is true.\n", + "For each p-value, 5000 reshuffles of the control and test labels were performed." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "random.seed(12345)\n", + "control = norm.rvs(loc=0, size=30)\n", + "test = norm.rvs(loc=0.5, size=30)\n", + "effsize = dabest.TwoGroupsEffectSize(control, test, \"mean_diff\")\n", + "effsize" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'alpha': 0.05,\n", + " 'bca_high': 0.24951887238295106,\n", + " 'bca_interval_idx': (125, 4875),\n", + " 'bca_low': -0.7801782111071534,\n", + " 'bootstraps': array([-0.3649424 , -0.45018155, -0.56034412, ..., -0.49805581,\n", + " -0.25334475, -0.55206229]),\n", + " 'ci': 95,\n", + " 'difference': -0.25315417702752846,\n", + " 'effect_size': 'mean difference',\n", + " 'is_paired': None,\n", + " 'pct_high': 0.24951887238295106,\n", + " 'pct_interval_idx': (125, 4875),\n", + " 'pct_low': -0.7801782111071534,\n", + " 'permutation_count': 5000,\n", + " 'permutations': array([ 0.17221029, 0.03112419, -0.13911387, ..., -0.38007941,\n", + " 0.30261507, -0.09073054]),\n", + " 'permutations_var': array([0.07201642, 0.07251104, 0.07219407, ..., 0.07003705, 0.07094885,\n", + " 0.07238581]),\n", + " 'proportional_difference': nan,\n", + " 'pvalue_brunner_munzel': nan,\n", + " 'pvalue_kruskal': nan,\n", + " 'pvalue_mann_whitney': 0.5201446121616038,\n", + " 'pvalue_mcnemar': nan,\n", + " 'pvalue_paired_students_t': nan,\n", + " 'pvalue_permutation': 0.3484,\n", + " 'pvalue_students_t': 0.34743913903372836,\n", + " 'pvalue_welch': 0.3474493875548964,\n", + " 'pvalue_wilcoxon': nan,\n", + " 'random_seed': 12345,\n", + " 'resamples': 5000,\n", + " 'statistic_brunner_munzel': nan,\n", + " 'statistic_kruskal': nan,\n", + " 'statistic_mann_whitney': 494.0,\n", + " 'statistic_mcnemar': nan,\n", + " 'statistic_paired_students_t': nan,\n", + " 'statistic_students_t': 0.9472545159069105,\n", + " 'statistic_welch': 0.9472545159069105,\n", + " 'statistic_wilcoxon': nan}" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "effsize.to_dict() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class EffectSizeDataFrame(object):\n", + " \"\"\"A class that generates and stores the results of bootstrapped effect\n", + " sizes for several comparisons.\"\"\"\n", + "\n", + " def __init__(self, dabest, effect_size,\n", + " is_paired, ci=95, proportional=False,\n", + " resamples=5000, \n", + " permutation_count=5000,\n", + " random_seed=12345, \n", + " x1_level=None, x2=None, \n", + " delta2=False, experiment_label=None,\n", + " mini_meta=False):\n", + " \"\"\"\n", + " Parses the data from a Dabest object, enabling plotting and printing\n", + " capability for the effect size of interest.\n", + " \"\"\"\n", + "\n", + " self.__dabest_obj = dabest\n", + " self.__effect_size = effect_size\n", + " self.__is_paired = is_paired\n", + " self.__ci = ci\n", + " self.__resamples = resamples\n", + " self.__permutation_count = permutation_count\n", + " self.__random_seed = random_seed\n", + " self.__proportional = proportional\n", + " self.__x1_level = x1_level\n", + " self.__experiment_label = experiment_label \n", + " self.__x2 = x2\n", + " self.__delta2 = delta2 \n", + " self.__mini_meta = mini_meta\n", + "\n", + "\n", + " def __pre_calc(self):\n", + " from .misc_tools import print_greeting, get_varname\n", + " from ._stats_tools import confint_2group_diff as ci2g\n", + " from ._delta_objects import MiniMetaDelta, DeltaDelta\n", + "\n", + " idx = self.__dabest_obj.idx\n", + " dat = self.__dabest_obj._plot_data\n", + " xvar = self.__dabest_obj._xvar\n", + " yvar = self.__dabest_obj._yvar\n", + "\n", + " out = []\n", + " reprs = []\n", + " \n", + " if self.__delta2:\n", + " mixed_data = []\n", + " for j, current_tuple in enumerate(idx):\n", + " if self.__is_paired != \"sequential\":\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + "\n", + " for ix, tname in enumerate(current_tuple[1:]):\n", + " if self.__is_paired == \"sequential\":\n", + " cname = current_tuple[ix]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + " mixed_data.append(control)\n", + " mixed_data.append(test)\n", + " bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(mixed_data[0], mixed_data[1], mixed_data[2], mixed_data[3],\n", + " self.__is_paired, self.__resamples, self.__random_seed)\n", + "\n", + "\n", + " for j, current_tuple in enumerate(idx):\n", + " if self.__is_paired!=\"sequential\":\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + "\n", + " for ix, tname in enumerate(current_tuple[1:]):\n", + " if self.__is_paired == \"sequential\":\n", + " cname = current_tuple[ix]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + "\n", + " result = TwoGroupsEffectSize(control, test,\n", + " self.__effect_size,\n", + " self.__proportional,\n", + " self.__is_paired,\n", + " self.__ci,\n", + " self.__resamples,\n", + " self.__permutation_count,\n", + " self.__random_seed)\n", + " r_dict = result.to_dict()\n", + " r_dict[\"control\"] = cname\n", + " r_dict[\"test\"] = tname\n", + " r_dict[\"control_N\"] = int(len(control))\n", + " r_dict[\"test_N\"] = int(len(test))\n", + " out.append(r_dict)\n", + " if j == len(idx)-1 and ix == len(current_tuple)-2:\n", + " if self.__delta2 and self.__effect_size in [\"mean_diff\",\"delta_g\"]:\n", + " resamp_count = False\n", + " def_pval = False\n", + " elif self.__mini_meta and self.__effect_size == \"mean_diff\":\n", + " resamp_count = False\n", + " def_pval = False\n", + " else:\n", + " resamp_count = True\n", + " def_pval = True\n", + " else:\n", + " resamp_count = False\n", + " def_pval = False\n", + "\n", + " text_repr = result.__repr__(show_resample_count=resamp_count,\n", + " define_pval=def_pval)\n", + "\n", + " to_replace = \"between {} and {} is\".format(cname, tname)\n", + " text_repr = text_repr.replace(\"is\", to_replace, 1)\n", + "\n", + " reprs.append(text_repr)\n", + "\n", + "\n", + " self.__for_print = \"\\n\\n\".join(reprs)\n", + "\n", + " out_ = pd.DataFrame(out)\n", + "\n", + " columns_in_order = ['control', 'test', 'control_N', 'test_N',\n", + " 'effect_size', 'is_paired',\n", + " 'difference', 'ci',\n", + "\n", + " 'bca_low', 'bca_high', 'bca_interval_idx',\n", + " 'pct_low', 'pct_high', 'pct_interval_idx',\n", + " \n", + " 'bootstraps', 'resamples', 'random_seed',\n", + " \n", + " 'permutations', 'pvalue_permutation', 'permutation_count', 'permutations_var',\n", + " \n", + " 'pvalue_welch',\n", + " 'statistic_welch',\n", + "\n", + " 'pvalue_students_t',\n", + " 'statistic_students_t',\n", + "\n", + " 'pvalue_mann_whitney',\n", + " 'statistic_mann_whitney',\n", + "\n", + " 'pvalue_brunner_munzel',\n", + " 'statistic_brunner_munzel',\n", + "\n", + " 'pvalue_wilcoxon',\n", + " 'statistic_wilcoxon',\n", + "\n", + " 'pvalue_mcnemar',\n", + " 'statistic_mcnemar',\n", + "\n", + " 'pvalue_paired_students_t',\n", + " 'statistic_paired_students_t',\n", + "\n", + " 'pvalue_kruskal',\n", + " 'statistic_kruskal',\n", + " 'proportional_difference'\n", + " ]\n", + " self.__results = out_.reindex(columns=columns_in_order)\n", + " self.__results.dropna(axis=\"columns\", how=\"all\", inplace=True)\n", + " \n", + " # Add the is_paired column back when is_paired is None\n", + " if self.is_paired is None:\n", + " self.__results.insert(5, 'is_paired', self.__results.apply(lambda _: None, axis=1))\n", + " \n", + " # Create and compute the delta-delta statistics\n", + " if self.__delta2:\n", + " self.__delta_delta = DeltaDelta(self,\n", + " self.__permutation_count,\n", + " bootstraps_delta_delta,\n", + " self.__ci)\n", + " reprs.append(self.__delta_delta.__repr__(header=False))\n", + " elif self.__delta2 and self.__effect_size not in [\"mean_diff\", \"delta_g\"]:\n", + " self.__delta_delta = \"Delta-delta is not supported for {}.\".format(self.__effect_size)\n", + " else:\n", + " self.__delta_delta = \"`delta2` is False; delta-delta is therefore not calculated.\"\n", + "\n", + " # Create and compute the weighted average statistics\n", + " if self.__mini_meta and self.__effect_size == \"mean_diff\":\n", + " self.__mini_meta_delta = MiniMetaDelta(self,\n", + " self.__permutation_count,\n", + " self.__ci)\n", + " reprs.append(self.__mini_meta_delta.__repr__(header=False))\n", + " elif self.__mini_meta and self.__effect_size != \"mean_diff\":\n", + " self.__mini_meta_delta = \"Weighted delta is not supported for {}.\".format(self.__effect_size)\n", + " else:\n", + " self.__mini_meta_delta = \"`mini_meta` is False; weighted delta is therefore not calculated.\"\n", + " \n", + " \n", + " varname = get_varname(self.__dabest_obj)\n", + " lastline = \"To get the results of all valid statistical tests, \" +\\\n", + " \"use `{}.{}.statistical_tests`\".format(varname, self.__effect_size)\n", + " reprs.append(lastline)\n", + "\n", + " reprs.insert(0, print_greeting())\n", + "\n", + " self.__for_print = \"\\n\\n\".join(reprs)\n", + "\n", + "\n", + " def __repr__(self):\n", + " try:\n", + " return self.__for_print\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__for_print\n", + " \n", + " \n", + " \n", + " def __calc_lqrt(self):\n", + " \n", + " rnd_seed = self.__random_seed\n", + " db_obj = self.__dabest_obj\n", + " dat = db_obj._plot_data\n", + " xvar = db_obj._xvar\n", + " yvar = db_obj._yvar\n", + " delta2 = self.__delta2\n", + " \n", + "\n", + " out = []\n", + "\n", + " for j, current_tuple in enumerate(db_obj.idx):\n", + " if self.__is_paired != \"sequential\":\n", + " cname = current_tuple[0]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + "\n", + " for ix, tname in enumerate(current_tuple[1:]):\n", + " if self.__is_paired == \"sequential\":\n", + " cname = current_tuple[ix]\n", + " control = dat[dat[xvar] == cname][yvar].copy()\n", + " test = dat[dat[xvar] == tname][yvar].copy()\n", + " \n", + " if self.__is_paired: \n", + " # Refactored here in v0.3.0 for performance issues.\n", + " lqrt_result = lqrt.lqrtest_rel(control, test, \n", + " random_state=rnd_seed)\n", + " \n", + " out.append({\"control\": cname, \"test\": tname, \n", + " \"control_N\": int(len(control)), \n", + " \"test_N\": int(len(test)),\n", + " \"pvalue_paired_lqrt\": lqrt_result.pvalue,\n", + " \"statistic_paired_lqrt\": lqrt_result.statistic\n", + " })\n", + "\n", + " else:\n", + " # Likelihood Q-Ratio test:\n", + " lqrt_equal_var_result = lqrt.lqrtest_ind(control, test, \n", + " random_state=rnd_seed,\n", + " equal_var=True)\n", + " \n", + " \n", + " lqrt_unequal_var_result = lqrt.lqrtest_ind(control, test, \n", + " random_state=rnd_seed,\n", + " equal_var=False)\n", + " \n", + " out.append({\"control\": cname, \"test\": tname, \n", + " \"control_N\": int(len(control)), \n", + " \"test_N\": int(len(test)),\n", + " \n", + " \"pvalue_lqrt_equal_var\" : lqrt_equal_var_result.pvalue,\n", + " \"statistic_lqrt_equal_var\" : lqrt_equal_var_result.statistic,\n", + " \"pvalue_lqrt_unequal_var\" : lqrt_unequal_var_result.pvalue,\n", + " \"statistic_lqrt_unequal_var\" : lqrt_unequal_var_result.statistic,\n", + " }) \n", + " self.__lqrt_results = pd.DataFrame(out)\n", + "\n", + "\n", + " def plot(self, color_col=None,\n", + "\n", + " raw_marker_size=6, es_marker_size=9,\n", + "\n", + " swarm_label=None, contrast_label=None, delta2_label=None,\n", + " swarm_ylim=None, contrast_ylim=None, delta2_ylim=None,\n", + "\n", + " custom_palette=None, swarm_desat=0.5, halfviolin_desat=1,\n", + " halfviolin_alpha=0.8, \n", + "\n", + " face_color = None,\n", + " #bar plot\n", + " bar_label=None, bar_desat=0.5, bar_width = 0.5,bar_ylim = None,\n", + " # error bar of proportion plot\n", + " ci=None, ci_type='bca', err_color=None,\n", + "\n", + " float_contrast=True,\n", + " show_pairs=True,\n", + " show_delta2=True,\n", + " show_mini_meta=True,\n", + " group_summaries=None,\n", + " group_summaries_offset=0.1,\n", + "\n", + " fig_size=None,\n", + " dpi=100,\n", + " ax=None,\n", + " \n", + " contrast_show_es = False,\n", + " es_sf = 2,\n", + " es_fontsize = 10,\n", + " \n", + " contrast_show_deltas = True,\n", + " \n", + " gridkey_rows=None,\n", + " gridkey_merge_pairs = False,\n", + " gridkey_show_Ns = True,\n", + " gridkey_show_es = True,\n", + "\n", + " swarmplot_kwargs=None,\n", + " barplot_kwargs=None,\n", + " violinplot_kwargs=None,\n", + " slopegraph_kwargs=None,\n", + " sankey_kwargs=None,\n", + " reflines_kwargs=None,\n", + " group_summary_kwargs=None,\n", + " legend_kwargs=None,\n", + " title=None, fontsize_title = 16,\n", + " fontsize_rawxlabel = 12,fontsize_rawylabel = 12,fontsize_contrastxlabel = 12, fontsize_contrastylabel = 12,\n", + " fontsize_delta2label = 12):\n", + "\n", + " \"\"\"\n", + " Creates an estimation plot for the effect size of interest.\n", + " \n", + "\n", + " Parameters\n", + " ----------\n", + " color_col : string, default None\n", + " Column to be used for colors.\n", + " raw_marker_size : float, default 6\n", + " The diameter (in points) of the marker dots plotted in the\n", + " swarmplot.\n", + " es_marker_size : float, default 9\n", + " The size (in points) of the effect size points on the difference\n", + " axes.\n", + " swarm_label, contrast_label, delta2_label : strings, default None\n", + " Set labels for the y-axis of the swarmplot and the contrast plot,\n", + " respectively. If `swarm_label` is not specified, it defaults to\n", + " \"value\", unless a column name was passed to `y`. If\n", + " `contrast_label` is not specified, it defaults to the effect size\n", + " being plotted. If `delta2_label` is not specifed, it defaults to \n", + " \"delta - delta\"\n", + " swarm_ylim, contrast_ylim, delta2_ylim : tuples, default None\n", + " The desired y-limits of the raw data (swarmplot) axes, the\n", + " difference axes and the delta-delta axes respectively, as a tuple. \n", + " These will be autoscaled to sensible values if they are not \n", + " specified. The delta2 axes and contrast axes should have the same \n", + " limits for y. When `show_delta2` is True, if both of the `contrast_ylim`\n", + " and `delta2_ylim` are not None, then they must be specified with the \n", + " same values; when `show_delta2` is True and only one of them is specified,\n", + " then the other will automatically be assigned with the same value.\n", + " Specifying `delta2_ylim` does not have any effect when `show_delta2` is\n", + " False. \n", + " custom_palette : dict, list, or matplotlib color palette, default None\n", + " This keyword accepts a dictionary with {'group':'color'} pairings,\n", + " a list of RGB colors, or a specified matplotlib palette. This\n", + " palette will be used to color the swarmplot. If `color_col` is not\n", + " specified, then each group will be colored in sequence according\n", + " to the default palette currently used by matplotlib.\n", + " Please take a look at the seaborn commands `color_palette`\n", + " and `cubehelix_palette` to generate a custom palette. Both\n", + " these functions generate a list of RGB colors.\n", + " See:\n", + " https://seaborn.pydata.org/generated/seaborn.color_palette.html\n", + " https://seaborn.pydata.org/generated/seaborn.cubehelix_palette.html\n", + " The named colors of matplotlib can be found here:\n", + " https://matplotlib.org/examples/color/named_colors.html\n", + " swarm_desat : float, default 1\n", + " Decreases the saturation of the colors in the swarmplot by the\n", + " desired proportion. Uses `seaborn.desaturate()` to acheive this.\n", + " halfviolin_desat : float, default 0.5\n", + " Decreases the saturation of the colors of the half-violin bootstrap\n", + " curves by the desired proportion. Uses `seaborn.desaturate()` to\n", + " acheive this.\n", + " halfviolin_alpha : float, default 0.8\n", + " The alpha (transparency) level of the half-violin bootstrap curves. \n", + " float_contrast : boolean, default True\n", + " Whether or not to display the halfviolin bootstrapped difference\n", + " distribution alongside the raw data.\n", + " show_pairs : boolean, default True\n", + " If the data is paired, whether or not to show the raw data as a\n", + " swarmplot, or as slopegraph, with a line joining each pair of\n", + " observations.\n", + " show_delta2, show_mini_meta : boolean, default True\n", + " If delta-delta or mini-meta delta is calculated, whether or not to \n", + " show the delta-delta plot or mini-meta plot.\n", + " group_summaries : ['mean_sd', 'median_quartiles', 'None'], default None.\n", + " Plots the summary statistics for each group. If 'mean_sd', then\n", + " the mean and standard deviation of each group is plotted as a\n", + " notched line beside each group. If 'median_quantiles', then the\n", + " median and 25th and 75th percentiles of each group is plotted\n", + " instead. If 'None', the summaries are not shown.\n", + " group_summaries_offset : float, default 0.1\n", + " If group summaries are displayed, they will be offset from the raw\n", + " data swarmplot groups by this value. \n", + " fig_size : tuple, default None\n", + " The desired dimensions of the figure as a (length, width) tuple.\n", + " dpi : int, default 100\n", + " The dots per inch of the resulting figure.\n", + " ax : matplotlib.Axes, default None\n", + " Provide an existing Axes for the plots to be created. If no Axes is\n", + " specified, a new matplotlib Figure will be created.\n", + " gridkey_rows : list, default None\n", + " Provide a list of row labels for the gridkey. The supplied idx is\n", + " checked against the row labels to determine whether the corresponding\n", + " cell should be populated or not.\n", + " swarmplot_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the seaborn `swarmplot`\n", + " command here, as a dict. If None, the following keywords are\n", + " passed to sns.swarmplot : {'size':`raw_marker_size`}.\n", + " violinplot_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the matplotlib `\n", + " pyplot.violinplot` command here, as a dict. If None, the following\n", + " keywords are passed to violinplot : {'widths':0.5, 'vert':True,\n", + " 'showextrema':False, 'showmedians':False}.\n", + " slopegraph_kwargs : dict, default None\n", + " This will change the appearance of the lines used to join each pair\n", + " of observations when `show_pairs=True`. Pass any keyword arguments\n", + " accepted by matplotlib `plot()` function here, as a dict.\n", + " If None, the following keywords are\n", + " passed to plot() : {'linewidth':1, 'alpha':0.5}.\n", + " sankey_kwargs: dict, default None\n", + " Whis will change the appearance of the sankey diagram used to depict\n", + " paired proportional data when `show_pairs=True` and `proportional=True`. \n", + " Pass any keyword arguments accepted by plot_tools.sankeydiag() function\n", + " here, as a dict. If None, the following keywords are passed to sankey diagram:\n", + " {\"width\": 0.5, \"align\": \"center\", \"alpha\": 0.4, \"bar_width\": 0.1, \"rightColor\": False}\n", + " reflines_kwargs : dict, default None\n", + " This will change the appearance of the zero reference lines. Pass\n", + " any keyword arguments accepted by the matplotlib Axes `hlines`\n", + " command here, as a dict. If None, the following keywords are\n", + " passed to Axes.hlines : {'linestyle':'solid', 'linewidth':0.75,\n", + " 'zorder':2, 'color' : default y-tick color}.\n", + " group_summary_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the matplotlib.lines.Line2D\n", + " command here, as a dict. This will change the appearance of the\n", + " vertical summary lines for each group, if `group_summaries` is not\n", + " 'None'. If None, the following keywords are passed to\n", + " matplotlib.lines.Line2D : {'lw':2, 'alpha':1, 'zorder':3}.\n", + " legend_kwargs : dict, default None\n", + " Pass any keyword arguments accepted by the matplotlib Axes\n", + " `legend` command here, as a dict. If None, the following keywords\n", + " are passed to matplotlib.Axes.legend : {'loc':'upper left',\n", + " 'frameon':False}.\n", + " title : string, default None\n", + " Title for the plot. If None, no title will be displayed. Pass any\n", + " keyword arguments accepted by the matplotlib.pyplot.suptitle `t` command here,\n", + " as a string.\n", + " fontsize_title : float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}, default 'large'\n", + " Font size for the plot title. If a float, the fontsize in points. The\n", + " string values denote sizes relative to the default font size. Pass any keyword arguments accepted\n", + " by the matplotlib.pyplot.suptitle `fontsize` command here, as a string.\n", + " fontsize_rawxlabel : float, default 12\n", + " Font size for the raw axes xlabel.\n", + " fontsize_rawylabel : float, default 12\n", + " Font size for the raw axes ylabel.\n", + " fontsize_contrastxlabel : float, default 12\n", + " Font size for the contrast axes xlabel.\n", + " fontsize_contrastylabel : float, default 12\n", + " Font size for the contrast axes ylabel.\n", + " fontsize_delta2label : float, default 12\n", + " Font size for the delta-delta axes ylabel.\n", + "\n", + "\n", + " Returns\n", + " -------\n", + " A :class:`matplotlib.figure.Figure` with 2 Axes, if ``ax = None``.\n", + " \n", + " The first axes (accessible with ``FigName.axes[0]``) contains the rawdata swarmplot; the second axes (accessible with ``FigName.axes[1]``) has the bootstrap distributions and effect sizes (with confidence intervals) plotted on it.\n", + " \n", + " If ``ax`` is specified, the rawdata swarmplot is accessed at ``ax`` \n", + " itself, while the effect size axes is accessed at ``ax.contrast_axes``.\n", + " See the last example below.\n", + " \n", + "\n", + "\n", + " \"\"\"\n", + "\n", + " from .plotter import EffectSizeDataFramePlotter\n", + "\n", + " if hasattr(self, \"results\") is False:\n", + " self.__pre_calc()\n", + "\n", + " if self.__delta2:\n", + " color_col = self.__x2\n", + "\n", + " # if self.__proportional:\n", + " # raw_marker_size = 0.01\n", + "\n", + " # Modification incurred due to update of Seaborn\n", + " ci = ('ci', ci) if ci is not None else None\n", + " \n", + " all_kwargs = locals()\n", + " del all_kwargs[\"self\"]\n", + "\n", + " out = EffectSizeDataFramePlotter(self, **all_kwargs)\n", + "\n", + " return out\n", + "\n", + "\n", + " @property\n", + " def proportional(self):\n", + " \"\"\"\n", + " Returns the proportional parameter\n", + " class.\n", + " \"\"\"\n", + " return self.__proportional\n", + "\n", + " @property\n", + " def results(self):\n", + " \"\"\"Prints all pairwise comparisons nicely.\"\"\"\n", + " try:\n", + " return self.__results\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__results\n", + "\n", + "\n", + "\n", + " @property\n", + " def statistical_tests(self):\n", + " results_df = self.results\n", + "\n", + " # Select only the statistics and p-values.\n", + " stats_columns = [c for c in results_df.columns\n", + " if c.startswith(\"statistic\") or c.startswith(\"pvalue\")]\n", + "\n", + " default_cols = ['control', 'test', 'control_N', 'test_N',\n", + " 'effect_size', 'is_paired',\n", + " 'difference', 'ci', 'bca_low', 'bca_high']\n", + "\n", + " cols_of_interest = default_cols + stats_columns\n", + "\n", + " return results_df[cols_of_interest]\n", + "\n", + "\n", + " @property\n", + " def _for_print(self):\n", + " return self.__for_print\n", + "\n", + " @property\n", + " def _plot_data(self):\n", + " return self.__dabest_obj._plot_data\n", + "\n", + " @property\n", + " def idx(self):\n", + " return self.__dabest_obj.idx\n", + "\n", + " @property\n", + " def xvar(self):\n", + " return self.__dabest_obj._xvar\n", + "\n", + " @property\n", + " def yvar(self):\n", + " return self.__dabest_obj._yvar\n", + "\n", + " @property\n", + " def is_paired(self):\n", + " return self.__is_paired\n", + "\n", + " @property\n", + " def ci(self):\n", + " \"\"\"\n", + " The width of the confidence interval being produced, in percent.\n", + " \"\"\"\n", + " return self.__ci\n", + "\n", + " @property\n", + " def x1_level(self):\n", + " return self.__x1_level\n", + "\n", + "\n", + " @property\n", + " def x2(self):\n", + " return self.__x2\n", + "\n", + "\n", + " @property\n", + " def experiment_label(self):\n", + " return self.__experiment_label\n", + " \n", + "\n", + " @property\n", + " def delta2(self):\n", + " return self.__delta2\n", + " \n", + "\n", + " @property\n", + " def resamples(self):\n", + " \"\"\"\n", + " The number of resamples (with replacement) during bootstrap resampling.\"\n", + " \"\"\"\n", + " return self.__resamples\n", + "\n", + " @property\n", + " def random_seed(self):\n", + " \"\"\"\n", + " The seed used by `numpy.seed()` for bootstrap resampling.\n", + " \"\"\"\n", + " return self.__random_seed\n", + "\n", + " @property\n", + " def effect_size(self):\n", + " \"\"\"The type of effect size being computed.\"\"\"\n", + " return self.__effect_size\n", + "\n", + " @property\n", + " def dabest_obj(self):\n", + " \"\"\"\n", + " Returns the `dabest` object that invoked the current EffectSizeDataFrame\n", + " class.\n", + " \"\"\"\n", + " return self.__dabest_obj\n", + "\n", + " @property\n", + " def proportional(self):\n", + " \"\"\"\n", + " Returns the proportional parameter\n", + " class.\n", + " \"\"\"\n", + " return self.__proportional\n", + " \n", + " @property\n", + " def lqrt(self):\n", + " \"\"\"Returns all pairwise Lq-Likelihood Ratio Type test results \n", + " as a pandas DataFrame.\n", + " \n", + " For more information on LqRT tests, see https://arxiv.org/abs/1911.11922\n", + " \"\"\"\n", + " try:\n", + " return self.__lqrt_results\n", + " except AttributeError:\n", + " self.__calc_lqrt()\n", + " return self.__lqrt_results\n", + " \n", + " \n", + " @property\n", + " def mini_meta(self):\n", + " \"\"\"\n", + " Returns the mini_meta boolean parameter.\n", + " \"\"\"\n", + " return self.__mini_meta\n", + "\n", + " \n", + " @property\n", + " def mini_meta_delta(self):\n", + " \"\"\"\n", + " Returns the mini_meta results.\n", + " \"\"\"\n", + " try:\n", + " return self.__mini_meta_delta\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__mini_meta_delta\n", + "\n", + " \n", + " @property\n", + " def delta_delta(self):\n", + " \"\"\"\n", + " Returns the mini_meta results.\n", + " \"\"\"\n", + " try:\n", + " return self.__delta_delta\n", + " except AttributeError:\n", + " self.__pre_calc()\n", + " return self.__delta_delta\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: plot\n", + "\n", + "Create a Gardner-Altman estimation plot for the mean difference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "random.seed(9999) # Fix the seed so the results are replicable.\n", + "# pop_size = 10000 # Size of each population.\n", + "Ns = 20 # The number of samples taken from each population\n", + "\n", + "# Create samples\n", + "c1 = norm.rvs(loc=3, scale=0.4, size=Ns)\n", + "c2 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", + "c3 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "\n", + "t1 = norm.rvs(loc=3.5, scale=0.5, size=Ns)\n", + "t2 = norm.rvs(loc=2.5, scale=0.6, size=Ns)\n", + "t3 = norm.rvs(loc=3, scale=0.75, size=Ns)\n", + "t4 = norm.rvs(loc=3.5, scale=0.75, size=Ns)\n", + "t5 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "t6 = norm.rvs(loc=3.25, scale=0.4, size=Ns)\n", + "\n", + "\n", + "# Add a `gender` column for coloring the data.\n", + "females = repeat('Female', Ns/2).tolist()\n", + "males = repeat('Male', Ns/2).tolist()\n", + "gender = females + males\n", + "\n", + "# Add an `id` column for paired data plotting.\n", + "id_col = pd.Series(range(1, Ns+1))\n", + "\n", + "# Combine samples and gender into a DataFrame.\n", + "df = pd.DataFrame({'Control 1' : c1, 'Test 1' : t1,\n", + " 'Control 2' : c2, 'Test 2' : t2,\n", + " 'Control 3' : c3, 'Test 3' : t3,\n", + " 'Test 4' : t4, 'Test 5' : t5, 'Test 6' : t6,\n", + " 'Gender' : gender, 'ID' : id_col\n", + " })\n", + "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeoAAAGGCAYAAAC0W8IbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABYW0lEQVR4nO3deVhUZfsH8O/MAMO+ryKLuICIbJoKmrvikkv6M1vVMnrfUrNMS99yz6U9ezO3NLLULDUzc8lIMHcUSVQkRRQVEJUdZJs5vz94nZxgEIZhzjB8P9c1V87znPOcm0a555zznOeWCIIggIiIiAySVOwAiIiISDMmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZsBaXqLOysrBgwQJkZWWJHQoRUYvG38f10yIT9cKFC/kXg4hIZPx9XD8tLlETERE1J0zUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQEzETsAImpapXeu4+bxH5F/NQlSmSmc/CPQqtsomFk7iB0aEdUDEzWRESu8fgHnNs+FsrJM1Xbj6A+4fS4OwZM+hNzWWcToiKg+eOmbyIhd3vuFWpK+r7zwNq7FfSNCRETUUEzUREaqJOcqSnPSNfbfuXAIglKhx4iISBtM1ERGqupecZ39yqoKKKsq9BQNEWmLiZrISFm6+kBqYqax38KpNWRmFnqMiIi0wURNZKRMLWzgGjJQY79njzF6jIaItMVETWTE/Aa9BJdOfQFIVG0SmQm8Hn0K7mFRosVFRPVnMIl6+fLlkEgkeO211zRuExMTA4lEovYyNzfXX5BEzYzUxBT+j89Cl1fWou3QqWj/2Gvo9upG+PR5VuzQiKieDOI56oSEBKxZswbBwcEP3dbW1hapqamq9xKJpI6tiQgALBxbwcKxldhhEJEWRD+jLi4uxjPPPIN169bBweHhKyVJJBK4u7urXm5ubnqIkoiISByiJ+opU6Zg+PDhGDhQ86SXBxUXF8PHxwdeXl4YNWoUzp8/X+f25eXlKCwsVL2Ki+t+ZIWIiMiQiHrp+7vvvkNiYiISEhLqtb2/vz82bNiA4OBgFBQU4MMPP0RkZCTOnz+P1q1b17rPsmXLsHDhQl2GTUREpDeinVFfv34d06dPx6ZNm+o9ISwiIgITJkxAaGgo+vTpgx07dsDFxQVr1qzRuM+cOXNQUFCgesXHx+vqRyAiImpyop1Rnz59Gjk5OQgPD1e1KRQKHDp0CJ9//jnKy8shk8nqHMPU1BRhYWG4fPmyxm3kcjnkcrnqvbW1deODJyIi0hPREvWAAQOQnJys1vb8888jICAAb7311kOTNFCd2JOTkzFs2LCmCpOIiEhUoiVqGxsbBAUFqbVZWVnByclJ1T5hwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePHFF/UePxERkT4YxHPUmmRkZEAq/fs2el5eHqKjo5GdnQ0HBwd06dIFR48eRWBgoIhREhERNR2DStRxcXF1vv/kk0/wySef6C8gIiIikYn+HDURERFpxkRNRERkwJioiYio2Th06BBGjBiBVq1aQSKRYOfOnXVuHxcXV6OYk0QiQXZ2tn4C1gEmaiIiajZKSkoQEhKClStXNmi/1NRUZGVlqV6urq5NFKHuGdRkMiJqGkpFFUpvX4NUZgpLF2+xwyHS2tChQzF06NAG7+fq6gp7e3vdB6QHTNRERi7z5C5cP/o9KovzAAAWTq3hO+AFOHXoLnJkRNWKi4tRWFioev/PFSV1ITQ0FOXl5QgKCsKCBQvQs2dPnY7flHjpm8iI3Tz+I678ukaVpAHg3t0bSPnhXeRfOSNiZER/69OnD+zs7FSv+4tc6YKHhwdWr16N7du3Y/v27fDy8kLfvn2RmJios2M0NZ5RExkpZVUlrh/9vvZOQYmMw9/B3i9Mv0ER1SI+Ph6hoaGq97o8m/b394e/v7/qfWRkJNLS0vDJJ5/gm2++0dlxmhITNZGRKs66hKrSQo39hRnnoKgsg8y0ftXriJqKtbU1bG1t9Xa8bt264fDhw3o7XmPx0jeRkZJIH/LPWyKFRMJfAdTyJCUlwcPDQ+ww6o1n1ERGytqjPcxsnFBRdLfWfge/cEhNzPQcFVHjFBcXq5U2Tk9PR1JSEhwdHeHt7Y05c+bg5s2b2LhxIwDg008/RZs2bdCpUyeUlZXhyy+/xO+//45ff/1VrB+hwZioiYyURCqDT98JuPRzzfXxpSZm8O79tAhRETXOqVOn0K9fP9X7GTNmAAAmTpyImJgYZGVlISMjQ9VfUVGBN954Azdv3oSlpSWCg4Px22+/qY1h6CSCIAhiB6FPiYmJ6NKlC06fPo3w8HCxwyFqcndSDiPjj+9QmpMOALDzDYFP3wmwbR0gcmTU0vH3cf3wjJrIyDl37AXnjr1QWVIAiUwGE3NrsUMiogZgoiZqIUyt7MQOgYi0wCmfREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQGjImaiIjIgDFRExERGTAmaiIiIgPGRE1ERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwE7EDIKKmVZaXhZsnd6Hg6p+QyEzg5B8Bj66PwdTCRuzQiKgemKiJjFhR5l84t+kdKMpLVG0l2WnIORuL4Invw8zaUcToiKg+eOmbyIhd3vO5WpK+rywvC9fivhUhIiJqKCZqIiNVejsDJdlpGvtvn4+DoFToMSIi0gYTNZGRqiwtrLNfWVkOZVWFnqIhIm0xURMZKUsXL0hkphr7zR1bQWZmoceIiEgbTNRERsrU0g6unftp7PfsNkqP0RCRtpioiYyYX9S/4Ogfod4okcKzxxh4dH1MnKCIqEH4eBaREZOZmiNw3DsoybmK/PQzkMhM4dShB+S2zmKHRkT1xERN1AJYufrCytVX7DCISAsGc+l7+fLlkEgkeO211+rc7ocffkBAQADMzc3RuXNn7NmzRz8BEhERicAgEnVCQgLWrFmD4ODgOrc7evQonnrqKUyePBlnzpzB6NGjMXr0aJw7d05PkRIREemX6Im6uLgYzzzzDNatWwcHB4c6t12xYgWGDBmCWbNmoWPHjli8eDHCw8Px+eef6ylaIiIi/RI9UU+ZMgXDhw/HwIEDH7rtsWPHamwXFRWFY8eONVV4REREohJ1Mtl3332HxMREJCQk1Gv77OxsuLm5qbW5ubkhOztb4z7l5eUoLy9XvS8uLtYuWCIiIhGIlqivX7+O6dOn48CBAzA3N2+y4yxbtgwLFy5ssvGJiIiakmiXvk+fPo2cnByEh4fDxMQEJiYmiI+Px2effQYTExMoFDWLBbi7u+PWrVtqbbdu3YK7u7vG48yZMwcFBQWqV3x8vM5/FiIioqYi2hn1gAEDkJycrNb2/PPPIyAgAG+99RZkMlmNfSIiIhAbG6v2CNeBAwcQERFRY9v75HI55HK56r21tXXjgyciItIT0RK1jY0NgoKC1NqsrKzg5OSkap8wYQI8PT2xbNkyAMD06dPRp08ffPTRRxg+fDi+++47nDp1CmvXrtV7/ERERPog+qzvumRkZCArK0v1PjIyEps3b8batWsREhKCbdu2YefOnTUSPhERkbEwqCVE4+Li6nwPAOPGjcO4ceP0ExAREZHIDPqMmoiIqKVjoiYiIjJgTNRERNRsHDp0CCNGjECrVq0gkUiwc+fOh+4TFxeH8PBwyOVytGvXDjExMU0epy4Z1D1qajo3b+dj1+E/kXr9Fmws5OjfJQC9Q9tDJuV3NSJqPkpKShASEoIXXngBY8aMeej26enpGD58OP79739j06ZNiI2NxYsvvggPDw9ERUXpIeLGY6JuARIuXsWCDT+jovLvRWSOX0jH74mpWPD8Y5DJmKyJqHkYOnQohg4dWu/tV69ejTZt2uCjjz4CAHTs2BGHDx/GJ5980mwSNX9DG7nKKgXe3/SrWpK+7/j5K9hznCVCich4GUMxJyZqI3fiQjryi0s19v968rweoyEiqqm4uBiFhYWq14OFlBpLUzGnwsJC3Lt3T2fHaUpM1EYur6ikzv67hZqTOBGRPvTp0wd2dnaq1/3VKKka71EbOR93pzr7fd0d9RQJEVHt4uPjERoaqnr/YH2GxtJUzMnW1hYWFhY6O05TYqI2csFtW8OvlTOuZN6ptX/Uo6H6DYiMXtL66agozoOZtQNCJ68QOxzjVFEKmFmKHYXOWFtbw9bWtknGjoiIwJ49e9TaHlbMydDw0ncLsOD5EfB0sVdrk0oleGF4JLoHthEnKDJaFcV5qCi6i4riPLFDMWKC2AGIpri4GElJSUhKSgJQ/fhVUlISMjIyAFSXNp4wYYJq+3//+9+4cuUK3nzzTVy8eBFffPEFvv/+e7z++utNEl9aWhreeecdPPXUU8jJyQEA7N27F+fPaz8fiGfULYCHsx3WvzUBx85fQer1W7A2l6NfuD9cHWzEDo2IqEFOnTqFfv36qd7PmDEDADBx4kTExMQgKytLlbQBoE2bNvjll1/w+uuvY8WKFWjdujW+/PLLJnk0Kz4+HkOHDkXPnj1x6NAhLFmyBK6urvjzzz+xfv16bNu2TatxmahbCJlMil7B7dAruJ3YoRARaa1v374QBM1XFGpbdaxv3744c+ZME0ZVbfbs2Xj33XcxY8YM2Nj8fSLUv39/fP7551qPy0vfRETNjaAUOwKqRXJyMh5//PEa7a6urrhzp/Z5QvXBRE1E1NwwURske3t7ZGVl1Wg/c+YMPD09tR6XiZqIqLlR1lxpkMT35JNP4q233kJ2djYkEgmUSiWOHDmCmTNnqk1waygmaiKi5qaqHKjjPi2JY+nSpQgICICXlxeKi4sRGBiI3r17IzIyEu+8847W43IyGRFRc6OsAhSVgImZ2JHQA8zMzLBu3TrMmzcPycnJKC4uRlhYGNq3b9+ocZmoiYiao4piwIQrCxoiLy8veHl56Ww8XvomImqOygrEjoD+YezYsXjvvfdqtL///vsYN26c1uMyURMRNUdl+WJHQP9w6NAhDBs2rEb70KFDcejQIa3H5aXvFkIQBJxKvYa/Mm7B2tIcvUPaw8HGeNYKJmpxSnPFjoD+obi4GGZmNecNmJqaorCwUOtxmahbgJy8Iryz7iekZ/39wP2anYcQPbIXHu8dJmJkRKS10rtiR0D/0LlzZ2zduhXz5s1Ta//uu+8QGBio9bhM1C3Awq92qyVpAKhUKPDFj/HwdnNEF38fkSIjIq0V33r4NqRXc+fOxZgxY5CWlob+/fsDAGJjY7Flyxb88MMPWo/LRG3kzl3JxF/XNf+D3nkoiYmaqDkqrLkCFolrxIgR2LlzJ5YuXYpt27bBwsICwcHB+O2339CnTx+tx2WiNnLpWbfr7E/TUKeaiAxc4U2xI6BaDB8+HMOHD9fpmEzURs7Oqu4JY/bWFnqKhIh0qvAmoFQCUj68Y2gqKiqQk5MDpVJ9TXZvb2+txmOiNnI9OrWBrZU5CkvKau0f/Ij2ExyISESKyupkba+7hTWocS5duoQXXngBR48eVWsXBAESiQQKhXZrtDNRGzkzUxO88eQgvBuzB5X/+EsS3sEbwyM7ixQZETVa7hUmagMyadIkmJiYYPfu3fDw8IBEItHJuEzULUBkUFt88cbT2PnHGaRm3IK1hRwDunbEwK4BMJHJxA6PiLR15y/AT/tJSqRbSUlJOH36NAICAnQ6LhN1C+Hr4YTXnhgodhhEpEs5KWJHQA8IDAzEnTu6n6DLWQhERM1VzgVAUSV2FPQ/7733Ht58803ExcXh7t27KCwsVHtpi2fURETNVeW96mTtESx2JARg4MDqq5YDBgxQa+dkMiKiluz6CSZqA3Hw4MEmGZeJmoioObt2FOgWLXYUBDRq9bG68B41EVEz0rVrV7Tu1A1dlyZWN+ReAfKvixsUqfzxxx949tlnERkZiZs3q1eP++abb3D48GGtx2SiJiJqRrKzs3EzMxvZhRV/N16JEy0e+tv27dsRFRUFCwsLJCYmory8HABQUFCApUuXaj0uEzURUXP31z5AEMSOosV79913sXr1aqxbtw6mpqaq9p49eyIxMVHrcZmoiYiau4IbwE3tEwHpRmpqKnr37l2j3c7ODvn5+VqPy0RNRGQMkjaJHUGL5+7ujsuXL9doP3z4MPz8/LQel4maiMgY3DwNXE8QO4oWLTo6GtOnT8eJEycgkUiQmZmJTZs2YebMmXj55Ze1HpePZxERGYujnwFj1wMmZmJH0iLNnj0bSqUSAwYMQGlpKXr37g25XI6ZM2di2rRpWo/LM2oiImORnwGc+UbsKFokhUKBP/74A1OmTEFubi7OnTuH48eP4/bt21i8eHGjxuYZNRGRMUnaBPg+Crh0EDuSFkUmk2Hw4MFISUmBvb09AgMDdTY2z6iJiIyJUgHELQUUlWJH0uIEBQXhypUrOh+XiZqIyNjkpgOJG8WOosV59913MXPmTOzevRtZWVmsnkVERHVI2gS06QM4txM7khZj2LBhAICRI0dCIpGo2lk9i4iIalIqgPjlwOjVgIy/6vXB4KpnXb58GWlpaejduzcsLCxU3xiIiMhA3LkEJMYAj7wodiQtgsFUz7p79y4GDhyIDh06YNiwYcjKygIATJ48GW+88YbOAyQiokY48y1w47TYUbQYBlE96/XXX4eJiQkyMjJgaWmpah8/fjz27dundSBERNQEBAGIXQgUZokdidEzmOpZv/76K9577z20bt1arb19+/a4du2a1oEQUbXS2xlI3fkBjr3/fzj63lhc3L4MxdlpYodFzVlZAbB/DlBeLHYkRs1gqmeVlJSonUnfl5ubC7lc3qCxVq1aheDgYNja2sLW1hYRERHYu3evxu1jYmIgkUjUXubm5g39EYgMVnF2Gv78agZun4uDouIelJVluJNyGGdjZqEg47zY4VFzlpsO/DYfUFSJHYnRMpjqWY8++ig2bvz7+TyJRAKlUon3338f/fr1a9BYrVu3xvLly3H69GmcOnUK/fv3x6hRo3D+vOZfSLa2tsjKylK9eBZPxuRq7AYoKu7VaFdWlSP9ty/1GouyqgK3zx/CzZM/If/KGQisd9z83TgFHP6YtaubSFNVz2rwrO/3338fAwYMwKlTp1BRUYE333wT58+fR25uLo4cOdKgsUaMGKH2fsmSJVi1ahWOHz+OTp061bqPRCKBu7t7Q8MmMniVpYXIT/9TY39x5l8oy78Fc3u3eo9ZUZKP7MR9KMw4B6mpGZwDe8O5Yy9IH/K4Tu6lk/hr18eoulekarNw9kLgE3Nh4ehZ7+OTAbr4C2DvDYQ8KXYkRud+9awNGzaoqmcdO3YMM2fOxNy5c7Uet8GJOigoCH/99Rc+//xz2NjYoLi4GGPGjMGUKVPg4eGhdSAKhQI//PADSkpKEBERoXG74uJi+Pj4QKlUIjw8HEuXLtWY1ImaE2VlOYC6z3QUlWX1Hq/41hWc2/Q2qkr/XhEp968TuHVmHwKfXAiZae23qu7dvYmUbUsh/GMJynt3ruP85nno8spaSKSyesdBBujEGsDRD/DqJnYkRqWpqmdp9Ry1nZ0d3n77ba0P+qDk5GRERESgrKwM1tbW+PHHHzUuZu7v748NGzYgODgYBQUF+PDDDxEZGYnz58/XmNx2X3l5uWrmHVCd6IkMkZmtE+T2bijPv1Vrv6mVPWRyK2Sf2Q+lohL2PsGwdPHWON5fP32slqTvK7iWjJvHtsO799O17pd5aneNJH1fWX427qYeh3PHnvX4ichgCUogdhEwZh1gq/0JFgFnz55FUFAQpFIpJBIJ3n77bcyaNQuXL19GcXExAgMDYW1t3ahjNDhRHzp0qM7+2m6k18Xf3x9JSUkoKCjAtm3bMHHiRMTHx9earCMiItTOtiMjI9GxY0esWbNGYxmxZcuWYeHChQ2KiUgMEokUrSP+D2l7V9bab+Xqi9MrJ0N4YDKQU0AkOox6AzJT9UmVRZmXUJqTrvFYt5J+hXfvp1F6OwO3z8ejqrwUtq0D4BTQEyVZNe+xPag46zITtTEoL6qeXDbyc9avboSwsDBkZWXB1dUVfn5+SEhIgJOTk06rZzU4Ufft27dG24MrkjV0LVMzMzO0a1e9Fm2XLl2QkJCAFStWYM2aNQ/d19TUFGFhYbXevL9vzpw5mDFjhup9UlJSk60eQ9RYHl2GoaqsGDeOfK+aVCY1lcPeNwS5l07W2P7uxaNIk1uhw4jX1Noriu/WeZzyoru4cuBLZJ74UdWWlbAL5g4ekNu51rmviUXjzg7IgNxOBY5+BvSeKXYkzZa9vT3S09Ph6uqKq1evQqlU6vwYDU7UeXl5au8rKytx5swZzJ07F0uWLGl0QEqlUu1SdV0UCgWSk5NVC6HXRi6Xqz021thLEERNzavnE/Do+hgKriUDghJ2Pp1x9us3NW5/O/kgfPtNhJm1g6rNwqn2W0H3mVk7qCXp+8rysuqcESyRyuDSiV90jUrKz4BbEOA/ROxImqWxY8eiT58+8PDwgEQiQdeuXSGT1T6HQ9sSmA1O1HZ2djXaBg0aBDMzM8yYMQOnT9d/qbo5c+Zg6NCh8Pb2RlFRETZv3oy4uDjs378fADBhwgR4enpi2bJlAIBFixahR48eaNeuHfLz8/HBBx/g2rVrePFFrmNLxsVEbgmnDt0BAEpFJUpva34MUVBWoTTnqlqitnRqDfs2ochPT6p1H0kds77L8rNh59O5+ovCP/j0mwi5rXM9fwpqNv74CHBuDzi1FTuSZmft2rUYM2YMLl++jFdffRXR0dGwsbHR6TF0VlLFzc0NqampDdonJycHEyZMQFZWFuzs7BAcHIz9+/dj0KBBAICMjAxIpX8/6p2Xl4fo6GhkZ2fDwcEBXbp0wdGjR3V6L4DI0EikJpCZWdT6fPV9tV2O7jDyDZzbMhelOVfV2t3DhuB2St3rDrsED4BL5/7I+fM3VBTnwtLFG626joC9X5hWPwMZOEUFcGAeMPZLwNRC7GialbNnz2Lw4MEYMmQITp8+jenTp4ufqM+ePav2XhAEZGVlYfny5QgNDW3QWOvXr6+zPy4uTu39J598gk8++aRBxyBq7iQSCVyC+iI7sfZV+yydvaFUVOH6ke8hNTGFk39PmNu7wszGEWEvfobcSyeRl5aIynuFsPfpDLfQwSi8eRGlZZqfgDC3dYF9m1C4hw5uqh+LDE3BDeD4KuDRGQ/fllQenEwWHx+PiooKnR+jwYk6NDQUEomkxipFPXr0wIYNG3QWGBH9zbv3M8hP/xNleZlq7VJTOSQmpjgb8/dkoPTfNsCzx+NoM+AFQCJB4fXzyDn7G5RVFbibchjX4r+FnW9IjTPt+8zt3WHnG9yUPw4Zqgs/Af7DANcAsSNpNgxyMll6uvojH1KpFC4uLlxzm6gJmVk7IOT5j5B1ajfupByGsqoC9r4hKCvIQf6Vfyz2Lyhx89h2WDh6oiwvGzePq08aq7pXhLsph2Ht3g7F2epPTMjMrdBh9CxIJA1eXZiMRcI6YPhHYkfRbBjkZDIfHx+tDkREjWNqaQvv3k+rFiopL7iNhM9f0Lj9zRM/oqLwjsZ+paBEwP+9jdvn46AovwcbzwB4hA+FmY2jzmMn3cjIyEBpaSkAoLRCiYzcMng76vgk6cYpoOAmYGe4S8WuXLkSH3zwAbKzsxESEoL//ve/6Nat9lXWYmJi8Pzzz6u1yeVylJXVf5W/uhjMZLLPPvus3gO++uqrWgdDRPVXeuda9QpTGty7c73u/W9dgYNfOJwDInUdGunYyZMnsXjxYvzyyy+q2455pVXwffskHuvsiLnDfPCIrw6Tw/UTgN0Y3Y2nQ1u3bsWMGTOwevVqdO/eHZ9++imioqKQmpoKV9fa1wCwtbVVm+z84NofujBkSPWjbaJOJqvvBC6JRMJETaQnppY1H5V8kExuCUV5qcZ+idQEEg2X6Mhw7NixA+PHj4cgCDXmBgkCsOdcLvaey8PW6I4YE6ajR+dyUnQzThP4+OOPER0drTpLXr16NX755Rds2LABs2fPrnUffRVz+uqrr5pk3Hol6n/elyYi8Vl7tIeli4/GZ6zdQgYh93ICynIza+138u8Bqcy01r7GuP8894PPdZN2Tp48ifHjx0OhUGgsM6pQAhIIGL8uBUffDNXNmXVB3VdjxFJRUYHTp09jzpw5qjapVIqBAwfi2LFjGvdrymJOY8aMQUxMDGxtbTFmTN1XIXbs2KHVMXT2HDUR6V/7x17Fuc1za5w5W7q2gfejT8HeLwwp378LQVml1m9iYQvvvs81SUyhk1c0ybgt0bvvvlvrmfQ/CQAECHh3zzX89EpQ4w9cWPuXu6ZSXFyMwsK/C8j8c0XJ++7cuQOFQgE3N/VSr25ubrh48WKtY2tTzKkh7OzsVJfSa1sQTBe0StQ3btzArl27kJGRUeOZsY8//lgngRHRw9l4BiAs+nNkJvyMgmtnITUxg3PHXnAPi4LMzAKO7R5B5+eW4frRH5B/5QykMhM4BfSEV6/xsHBsJXb4VIeMjAzs3r37oUn6PoUS+Dk5VzcTzMoKgLJCwNy2cePU0z/rL8yfPx8LFizQydjaFHNqiAcvd4t66ftBsbGxGDlyJPz8/HDx4kUEBQXh6tWrEAQB4eHhTREjEdXB3N4NfoM0L6Nr6xWITuPn6zGi5k+hUDTJ87ANsX///non6fsEAfj1Qh4mRrg9fOOHyU4BPJv2d3pVVfWVnvj4eLUFs2o7mwYAZ2dnyGQy3LqlXgr21q1b9b4HXZ9iToamwYl6zpw5mDlzJhYuXAgbGxts374drq6ueOaZZ1Qz34iImrPFixc32/K40d9eQvS3l3QwUg8djFE/1tbWsLV9+Nm7mZkZunTpgtjYWIwePRpAdSGn2NhYTJ06tV7Hqk8xp4YICwur9yzyxMTEh29UiwYn6pSUFGzZsqV6ZxMT3Lt3D9bW1li0aBFGjRqFl19+WatAiIgMxdy5c/H222+LGkNMTAxeeumlBu+37tn2ujmjdukIjK69NrqunDlzBt27d2/QPjNmzMDEiRPRtWtXdOvWDZ9++ilKSkpUs8D1Xczp/hcGACgrK8MXX3yBwMBA1eX248eP4/z583jllVe0PkaDE7WVlZXqvrSHhwfS0tJUs+fu3NG8uAIRia8s/xZKbl2BiYUtbL0Cdf48qbGQyWQaV5fSl6ioqFqXa66LRAIMDnSAqUwHK8vlXQKU5YC86UoDm5g0fJrU+PHjcfv2bcybNw/Z2dkIDQ3Fvn37VBPM9F3Maf78v28rvfjii3j11Vdr3PueP38+rl/Xfia9RGjgTZDRo0dj+PDhiI6OxsyZM/HTTz9h0qRJ2LFjBxwcHPDbb79pHYw+JCYmokuXLjh9+jTvqVOLUVVWjEu7V+Bu6nHVIinmDq3Qbvg02HNdb4M1cuRI7NmzBwqF4qHbyqTA8CBH3cz6vu/x1YBrR92N9w/G9vvYzs4Op06dQvv27dXaL126hK5du6KgoECrcRv8tevjjz9WXapYuHAhBgwYgK1bt8LX1/eh1bCISBwp25bi7sWjaiuZleVl4sLWBSi9e0Onx0paPx0nV0xA0vrpOh23JZo7dy4kEslDr3xIAEggwTvDdLzEs0L3laCMmYWFBY4cOVKj/ciRI42qh9Hg6w5Lly7Fs88+C6D6Mvjq1au1PjgRNb2im6kouPpnrX3KynJknvwJ7YZO0dnxKorzUFF0V2fjtWSPPPIItm7dqlqZrLYza5m0Okl/H91Rt8uIAoCt4a73bYhee+01vPzyy0hMTFStPX7ixAls2LABc+fO1XrcBifq27dvY8iQIXBxccGTTz6JZ599FiEhIVoHQE2jorIKPxw8jX0nzuNuQQm83BwwqlcIhkV0Fjs00rOC6+fr7C/MqLufxDVmzBgcPXoUixcvrvFctURSfbn7HV2v9Q0ArUIBKx0tSdpCzJ49G35+flixYgW+/fZbAEDHjh3x1Vdf4YknntB63AYn6p9++gl5eXn44YcfsHnzZnz88ccICAjAM888g6effhq+vr5aB0O6UaVQ4O21O5F0+e9Lmlcy7+CT72ORlnkH08b2a5LjvvLRZuQVlcLBxhJfvPF0kxyDGk5mWvszqfdJH9JP4nvkkUdUi0yFhoYiLy8PDpYmSHonXPfVs+4Lm9A04xq5J554olFJuTZaTQ10cHDASy+9hLi4OFy7dg2TJk3CN998g3bt2uk0ONLOoaRLakn6QbsO/4lr2U1zWTKvqBR3CoqRV6S5EATpR9HNi7hxdBsyE3bBppU/JFLN38mdAx/VY2TUGN7e3rC0tAQAWJpJmy5J+/QEWndpmrGpwRq11ndlZSVOnTqFEydO4OrVqzXWXyVxxCX9VXf/mb8wcWhEndtQ81RVVoKUbUvU7klLpDLYtA6o9RK3lVsbuIdxoSJ6gIk50JNVEA2JVmfUBw8eRHR0NNzc3DBp0iTY2tpi9+7duHFDt7NHSTtl5ZV191fU3U/N16VfVtSYOCYoFSjMOA/38CGwbtUBEqkMplb28IwYi87PLYeJ3FKkaMkgdZkI2DR9SUiqvwafUXt6eiI3NxdDhgzB2rVrMWLECI3rspI4Ovt54swlzQ/XB/mxGIMxKivIwd2Lmkv9Fd1MRVj053qMiJodBx+g8zixo6B/aHCiXrBgAcaNGwd7e/smCId0YXhkZ/z4RxKKSstq9Pl6OKFHJz8RoqKmVppzVe056X8quVVdPIerkZFGvWYATVCjnBqnwYk6Ojq6KeIgHXK0tcJ7/x6D9zbtw7Vbuar2IL9WGNM7DBnZuWjTio9dGBtTy7qLGphY2jBJk2adx1U/kkVaUygUiImJQWxsLHJycmpUYPv999+1GrdRk8nIcLX3csWXsyfgwtUs3MotwNFz6Th2Lg2LYn4BAPi1csa0sf15GdyI2HgGwMKpNe5pWGnMtfMAPUdEzYZLANCt4QVASN306dMRExOD4cOHIygoSGdfjJmojVygrwd2Hf4TcWdS1dqvZN7BnDU/YuWMp+Dt5ihSdKRr7R+bjvNb5kFRcU+t3dLVF66dB6CiKBdmNvy86QGWjsDgdwETM7Ejafa+++47fP/99zoroXkfE7WRu3k7H78nXqy1r6yiEtviEjFj/EA9R0VNxdYrEKEvfobMhJ9RcC0ZMlMzyO3dUZSZiqQvq+v12ngGwKffRBbjIMDMChj6AWDtInYkRsHMzKxJ1hPRQS00MmRJl66jrvpoZ/7K0F8wpBcWjq3QNupfCH/pc7iFDMKd8/Eoz8tW9RfdvIjzW+ai8PoFEaMk0cnMgKilgDMXqtKVN954AytWrGhQadL64Bm1kTMxqfu7mInINXep6SgVVbh2aFOtfYKiChmHNiPomXf1HBUZBIkUGLSQk8d07PDhwzh48CD27t2LTp06wdRUfQb9jh07tBqXidrIdQ9sA1OZDJUa6tn2Cm6r54hIX4qzLqOyOE9jf356EpRVFZDy3mTL03cO4BMpdhRGx97eHo8//rjOx2WiNnL21pZ4cmBXfLP/RI0+VwcbjOkTJkJUpB8Pu/wm6PwSHTUDEVOADoPFjsIoffXVV00yLhN1CzBhSASc7W2w7eBpXM/Jg5mpDH1CO2DS0Eg42FiJHR41EWuPdjC1skdlSX6t/Xa+IQ+trEVGpvM4IFi3lZ2o6TFRtxDDegRhWI8glJZVwMxUxnvTLYBUZgqvR5/ClX2ravRJpDJ4P8pSpC1K2/5Aj1fEjsLobdu2Dd9//z0yMjJQUVGh1peYmKjVmJz13cJYmpsxSbcgrbo+hnbDX4Xc7u/KdlbubRH45ELY+QSJGBnplWeX6vvSUv7Kb0qfffYZnn/+ebi5ueHMmTPo1q0bnJyccOXKFQwdOlTrcXlGTWTk3MOi4BY6CGW5WZDITGBuz3K0LYpLABc00ZMvvvgCa9euxVNPPYWYmBi8+eab8PPzw7x585Cbm/vwATTg1yuiFkAikcLCyZNJuqVx9AOGfQCYsZSpPmRkZCAysno2vYWFBYqKigAAzz33HLZs2aL1uEzURETGyLYVMOxDwLzuYi2kO+7u7qozZ29vbxw/fhwAkJ6e3qgnLJioiYiMjdym+kzayknsSFqU/v37Y9euXQCA559/Hq+//joGDRqE8ePHN+r5at6jJiIyJvdXHbNrLXYkLc7atWtVpS2nTJkCJycnHD16FCNHjsS//vUvrcdloiYiMiahT1fP8ia9k0qlkD4ws/7JJ5/Ek08+2fhxGz0CEREZBrvWQJdJYkfRov3xxx949tlnERERgZs3bwIAvvnmGxw+fFjrMZmoiQgAuJyoMejxCiAzffh21CS2b9+OqKgoWFhY4MyZMygvLwcAFBQUYOnSpVqPy0RN1IJVFOchbd8qHPvwCRxZMgJJ61/D7fOHxA6LtOESwEIbInv33XexevVqrFu3Tq1yVs+ePbVelQzgPWqiZk9QKpB76STuXDwKQVEJO58QuHbuB5mZeZ37VZYW4OzXb6IsL1PVVpx1Cak/vofywttoHTG2qUMnXQp9GpBIxI6iRUtNTUXv3r1rtNvZ2SE/P1/rcZmojUxpWQViT1/Etey7cLC1wqCuAXB14HOUxkpZVYELWxciPz1J1Xbnwh+4cewHdH52OcztXTXum3lyl1qSflBG/Ca4h0XBxNxa1yFTI7m7uwOCAu4mxX832rYCfB8VLygCUP3ZXL58Gb6+vmrthw8fhp+fn9bjMlEbkfPpmZj75S4UlZap2jbuO4Ypj/fFyF4h4gVGTeb64a1qSfq+8vxbuLT7U3R+dikEQYncSwkouPonJDITOPlHwLZ1R9xJ0Ty5RVlVjty/TsI1uH8TRk/aOHXqFJB/Hdj67N+NgaO5jrcBiI6OxvTp07FhwwZIJBJkZmbi2LFjmDlzJubOnav1uEzURqK8ogoLNvyslqQBQKkU8PmOg/D3doO/t7tI0VFTEAQB2Wf2aewvuPonijL/QtrelSjOuqxqv3lsO5w79oKysrzO8ZVVdfeTgZCaAP5DxI6CAMyePRtKpRIDBgxAaWkpevfuDblcjpkzZ2LatGlaj8uvYEYiLikV+cX3au0TBGDXkbMa9714LRs/HzmL+KS/UF5R1VQhko4pq8o11pq+L/3Al2pJ+r47KYdhYmFTx54S2PkENy5A0g+fCMDcTuwoCIBEIsHbb7+N3NxcnDt3DsePH8ft27exePHiRo3LM2ojceN2ft39OXk12nILS7Ao5hecT//7PqWNpRzT/28A+oR10HWIpGMyU3OYWTuiolhTVR4JCq9f0Lh/edFdSGSmEBSVNfqcAiJh4eSpo0ipSfn1EzsC+gczMzMEBgbqbDwmaiPhbFf3pJ/a+hds+Bkp17LV2opKy7Hs231wd7KDvzcrLRk69/ChyDi0qdY+61btUZz5l8Z9q0oLEDDuHVw98CXK8qv/HkikMrh06ou2w15pknhJxyRSwKub2FG0eC+88EK9ttuwYYNW4zNRG4n+4f748uc/UKbh0vXQHkFq789dyayRpO9TKJX48dAZzH6W970MXeueT6A4+zJy/zqh1m7h1BptBr2E5K9natzXxNwaTh26w6lDDxTdSEFVeQms3Pwgt2Ehh2bDxb+6AAeJKiYmBj4+PggLC2uShYOYqI2EjaU5Zj0VhWXf7kWVQqnW1yu4HfYeP4dv9h+Hp4s9RvQMRmpG7Un6vtSMW00ZLumIVGaCwCfmIT/9T9y5eBjKqkrYtwmFc8eekMpMYefTGQXXkmvd1zVkICSS6mkqtl66u0xHeuTWSewICMDLL7+MLVu2ID09Hc8//zyeffZZODo66mx8Jmoj0ju0Pfw8nbH7aHL1c9Q2lqisVCAu6e/LnxeuZuG3UynoF+5f51jWFmZNHS7pkH2bENi3qfkIXrvhryL5mzmoKLqj1m7j6Q/v3s/oKzxqKs6cS2IIVq5ciY8//hg7duzAhg0bMGfOHAwfPhyTJ0/G4MGDIWnkQjSizvpetWoVgoODYWtrC1tbW0RERGDv3r117vPDDz8gICAA5ubm6Ny5M/bs2aOnaJuH1i4O+Peo3lj2r8cxrEeQWpK+TxCAg6dTYWaq+XvagC4dmzJM0hMLx1YI/9dKtBn0IhzadYWjfwQ6jHwDnZ97DyZyS7HDo8ay9xE7AvofuVyOp556CgcOHMCFCxfQqVMnvPLKK/D19UVxcfHDB6iDqIm6devWWL58OU6fPo1Tp06hf//+GDVqFM6fP1/r9kePHsVTTz2FyZMn48yZMxg9ejRGjx6Nc+fO6Tny5mH/Sc0zfgUA4R28al1xsFObVjXuaVPzZWJuDc/uj6PTkwsROO4duAb3h9SEhRuMAmtOGySpVAqJRAJBEKBQKBo/ng5i0tqIESMwbNgwtG/fHh06dMCSJUtgbW2N48eP17r9ihUrMGTIEMyaNQsdO3bE4sWLER4ejs8//1zPkTcPuYUldfa72Nvgg1fGIjLIDy72NvBr5YyXRj6K9/49BnIz3hUhMmjmtoCcS7waivLycmzZsgWDBg1Chw4dkJycjM8//xwZGRmwtm7c52Qwv40VCgV++OEHlJSUICIiotZtjh07hhkzZqi1RUVFYefOnXqIsPnxdXfCyZSrGvt93J0Q0s4LIe289BcUEemGTSuxI6D/eeWVV/Ddd9/By8sLL7zwArZs2QJnZ2edjS96ok5OTkZERATKyspgbW2NH3/8UeOD4tnZ2XBzU3+2183NDdnZmmcwl5eXq2qCAmj0vYLmZHhkZ/x4KAmVtVx6sbGUY2DXABGiIjFUlZWg6GYKJFJT2HoHQsqaxc2fDdc5MBSrV6+Gt7c3/Pz8EB8fj/j4+Fq327Fjh1bji56o/f39kZSUhIKCAmzbtg0TJ05EfHy8zlZ1WbZsGRYuXKiTsZqbVs72+M+EoXhv036UVfy9+pSdlQUWvDACVuZyEaMjfRAEJa4d3IjMhF2qtb1Nrezh03cC3MOiRI6OGsWaa/cbigkTJjR6ZnddRE/UZmZmaNeuHQCgS5cuSEhIwIoVK7BmzZoa27q7u+PWLfXne2/dulVd9k2DOXPmqF0uT0pKQp8+fXQUveHrFdwOoe1b42BiKv66ngOlIKBLB2+0b625/CEZj4z4Tbhx9Ae1tsqSfFz+5TOYWFjDOaCnSJFRo1nz37ChiImJadLxRU/U/6RUKtUuVT8oIiICsbGxeO2111RtBw4c0HhPG6ieMi+X/33m2Nib+s2RUing8Nk0JP6VAQD49eQF2FjKMXVsP/QP5+VvY6WoKENmwi6N/TeO/MBE3ZxZuYgdAemJqIl6zpw5GDp0KLy9vVFUVITNmzcjLi4O+/fvB1B9OcHT0xPLli0DAEyfPh19+vTBRx99hOHDh+O7777DqVOnsHbtWjF/DIO3+Os9SLp0Xa2tqLQc723aD3dHWwT6clKKMSrOToOivFRzf9YlKCrKIDMz12NUpDOWXOq1pRD18aycnBxMmDAB/v7+GDBgABISErB//34MGjQIAJCRkYGsrCzV9pGRkdi8eTPWrl2LkJAQbNu2DTt37kRQEJ/51eTS9ZwaSfo+pVLA9rgzeo6I9EVmWvfqchKpDBKZTE/RkM6xtGWLIeoZ9fr16+vsj4uLq9E2btw4jBs3rokiMj4XH7Km98P6qfmycm8HcwcPlOVl1drv2KEHZ383Z2ZWYkdAeiLqGTU1PauHrNnNmd/GSyKRoM2gF6tLIf6DzNwKPn2eFSEq0hlTC7EjID1hojZyEZ38YCnXnKz7d6m7OAc1b04deqDzs0th7xcOSKSQmpjBpVNfhEz6CJYu3mKHR40h45fsloKJ2shZyM3w8uN9al3Tu31rV4zqVbPiEhkXO5/OCHp6MXr+ZxciZ/8I/8dnwdKZq9E1a1IZIG25v75XrlwJX19fmJubo3v37jh58mSd2zf3Yk4t95NuQYZ074QPXhmLiCA/ONpawdvNEc8Pi8RHU/8PFnWcbTeUg40lnO2s4WDDqkyGqCkXZCA9k7TcSYBbt27FjBkzMH/+fCQmJiIkJARRUVHIycmpdXtjKOYkEQRBEDsIfUpMTESXLl1w+vRphIeHix0OkdE5uWICKoruwszGCd2mbxQ7HONUcgew0t1a0mLR5vdx9+7d8cgjj6iKMSmVSnh5eWHatGmYPXt2je3Hjx+PkpIS7N69W9XWo0cPhIaGYvXq1br5QZoYz6iJiJqbWiYItgQVFRU4ffo0Bg4cqGqTSqUYOHAgjh07Vus+x44dU9seqC7mpGl7Q2RwK5OR+O6VVyD29EVcybwDB2tLDHykIzyc+MwmtRwKhQJKpVLsMDSrqgIqKx++nYGrqqoCUF0sqbCwUNX+zxUl77tz5w4UCkWtxZkuXrxY6zG0KeZkaJioW6DcwhIcO3cFlVUKhHXwgo/73yscpWZk4+21P6Gg5J6q7dtfTyB65KP4v768VUAtw+LFi1tsMR8x/LP+wvz587FgwQJxgjFATNQtTMzeY9gam4Aqxd9nC48Gt8NbzwyBVCrBvPU/qyVpAFAKAtb8dAj+Xm7o3NZT3yET6d3cuXPx9ttvix2GZuVFgNxG7Cga7cyZM+jevTvi4+MRGhqqaq/tbBoAnJ2dIZPJGlScSZtiToaGidoI3cjJQ+zpiyi+V44OXm7oE9YeZiYm2Hv8HDb9eqLG9n+cvQxryziEtfdCbmGJxnF3HfmTiZpaBJlMBpkhL68qyAHT5r+qnIlJdQqytraGra3tQ7c3MzNDly5dEBsbi9GjRwOonkwWGxuLqVOn1rqPNsWcDA0TtZH5as9RbPntJB6cyx+z9yiW/etxbItL1LjfbwkpsLGsewGFGzl5ugqTjJiZtYPaf6kJtNDJZAAwY8YMTJw4EV27dkW3bt3w6aefoqSkBM8//zwA4yzmxERtRA6fvYzNB2o++J+TV4T5G36uM9FWKhR42IN6zvZ1X2p75aPNyCsqhYONJb544+l6xUzGJ3TyCrFDaAFa7jPx48ePx+3btzFv3jxkZ2cjNDQU+/btU00Yy8jIgPSBxWDuF3N655138J///Aft27dvdsWcmKiNyM9Hzmrsu5GTB1MTGSqrFBq36dm5LX45lozSsopa+4f1qPsvdl5RKe4UFNcvWCLSXgs+owaAqVOnarzUbYzFnFr2p21krj/k0nQHL1eNfX6tnNGpTSvMeXYITE1q3psb1SsEEUF+jY6RiHSghSfqloZn1EbE2c4Kt/OLNPYP69EZeUWlyLxToNZuITfFhCE9ELP3GNJu3kaPTm1gIpOiuLQc9taWiOoeiJB2XBuayGBwOdgWhYnaiAzpEYSUa7U/xO9kZwV/HzeM698F59IykXbzNioVCoS194K/txuWf7sPZRVVavuM6hWCqWP76SN0aoR7uZm4cWwb8i6fAgA4tOuK1hFjYeHIGfrGi4m6JWGiNiJR3QKRmHoN8UmX1NrNzUzh7miL6Pe+UU0Yc7SxxKvj+qOrvy+eWvhljSQNAD8d/hNhHbzQs3M7fYRPWii+dQXJ38yGouzvx+pundmPOymH0fm55bB2q75dUVGUi4KMZEhkJrBvEwYTOQunNGs8o25RmKiNiEwqxdsThmFg13TVc9T+Xm44l56JPy/fUNs2t6gU7369B08P6oai0jKNY+45do6J2oClH/hSLUnfpygrQfqBLxH09GKk7V+NW2f2Q1BWTySUmVnAu88z8Oz+uL7DJV1hom5RmKiNjEQiQY9OfujRqfpM6krmbWyq5ZEtAKhSKHH47OU6x8up4543iauiOA8FVzXP9C+4ehZp+1YhO3GvWrui4h7SD3wJU0t7uHbmrQ0iQ8epg0bufHpWnf0Pe5zK09leh9GQLikq7gGo6+F3AbfOxmrsvXFsu85jIj1pWdWJWzwmaiNnaW5WZ7+DtSWc7aw19o/sGaLrkEhH5HauMK1j9S8Tc2sIVbU/Ew8ApTnp/0v2RGTImKiNXI9ObWBupnlN4H5dArDghcdgZ2Wh1i6RAJOGRiDc37upQyQtSWUmaPXISI39Lg+5rC2RmUAia/7rRRMZO96jNjIVVVXIySuCjYU57KwtYGUuR/SIXvjv9oM1tvX1cMLoXiGwspBj4zvP47dTKbiSeRu2VhYY9EhHeLk6ivATUEO0jhyHqntFyEzYBUFRPXNf8r8E7jvgBeRfOYN7d2/Uuq9zQE9IZfwV0CwJAieUtSD8V2okFAolNu4/jp+PnEVRaRmkEgm6BbbBy6N7Y2SvELg52mJ7XCJSr9+CtYUcA7oE4In+XWBlUV2Iw9LcDCN78TJ3cyORSNBm4GR4RoxF/pXqoiv2fuEws7IHALSN+jfOb10IQVGptp+plT28+zyn73BJZ3iPuiVhojYSH209gAMJKar3SkHA8fNXcOnGLax642l0D2yD7oFtRIyQmpKZlT1cO/ev0W7vF4aQSR/gxrHtyL9yBhKZCZwDesIz8v9gbqd5SVkycIISgAGX4SSdYqI2AtdzcvHbqZRa++4WlOCnw2cxaWjzqb1KumXt0R4BY2aLHQbpEmd9tyicTGYETly4Wue/2xPnrwAABEFAYUkZKiprrkJGRESGiWfULYAA4Ocjf2Jb3Blk3smHqUyGXiHtMHl4T7g52oodHhE1lJS/ulsSnlEbge6BvnX2m5uZ4rNtB5F5Jx8AUKlQ4GBiKl777HvcLai5/CQRGTgpf3W3JPy0jYCXqyMGdu1Ya5+DjSVSNVTUulNQjB2HEpsyNCIiaiQmaiMx88lBeGrgI7CxrH7cSiqRoEdgGzzeOwxVSqXG/Y4kp+krRCIi0gJvdBgJmUyKF4b3xLODuyMnrwjWlnLYW1vil6PJde6nUGhO4kREJD4maiNjZmqC1q5/r/8c3sEbEonmpzm6cIlQIiKDxkvfRs7D2Q4Du9R+/9pCboqxfcP1HBERETUEz6hbgNfHD4S1pRx7j59HWUX1UpLtW7ti6th+XM+7BagqK8atPw8gPz0JEpkpnP0j4dzpUUhZkIOoWWCibgFMTWR45fG+mDgkAtdu5cLaQg5vNyboluBebhaSv5mNiqI7qrbc1GPIOr0bnZ5+FyZySxGjI6L64KXvFsTKQo5AXw8m6Rbk8i8r1JL0fUU3U5FxaLMIERFRQzFRExmpe7lZKLimedZ/zp8HIAic9U9k6JioiYxURdHdOvuryoqhqCjTUzREpC0maiIjZe7gDkg0/xM3s3GCzMxCjxERkTaYqImMlNzWGU7+PTT2e3QZBolEoseIiEgbTNRERqzdsKmw9mhXo925Yy+0jhwnQkRE1FB8PIvIiJla2iHkhU+QeykB+elJkMpM4BTQE7atA8QOjYjqiYmayMhJJFI4degOpw7dxQ6FiLTAS99EREQGjImaiIjIgDFRExERGTDeo24h7uQX45djyUi9fgs2FnL07xKAbh19+XgOEZGBY6JuAc6m3cDcdbtQWl6havs9MRX9wv0x+5khkEqZrImIDBUvfRs5hUKJpd/sVUvS9x1MTMWvCRdEiIqIiOqLidrInUy5irsFJRr79x4/p8doiIiooZiojdydgqI6+2/nF+spEiIi0oaoiXrZsmV45JFHYGNjA1dXV4wePRqpqal17hMTEwOJRKL2Mjc311PEzY+ni0Od/a1d7PUTCBERaUXURB0fH48pU6bg+PHjOHDgACorKzF48GCUlGi+VAsAtra2yMrKUr2uXbump4ibn7D2XvBy1ZysR/YK0WM0RETUUKLO+t63b5/a+5iYGLi6uuL06dPo3bu3xv0kEgnc3d2bOjyjIJFIMP+FEZizekeNy9xPDnwEvYJrFmwgIiLDYVCPZxUUFAAAHB0d69yuuLgYPj4+UCqVCA8Px9KlS9GpUyd9hNgs+bg5IuY/kxB35i+156i93er+/0xEROIzmEStVCrx2muvoWfPnggKCtK4nb+/PzZs2IDg4GAUFBTgww8/RGRkJM6fP4/WrVvX2L68vBzl5eWq98XFLXPylJmpCQZ3C8TgboFih0JERA1gMIl6ypQpOHfuHA4fPlzndhEREYiIiFC9j4yMRMeOHbFmzRosXry4xvbLli3DwoULdR4vERGRPhjE41lTp07F7t27cfDgwVrPiutiamqKsLAwXL58udb+OXPmoKCgQPWKj4/XRchGqbDkHnIL657IR0RE+iXqGbUgCJg2bRp+/PFHxMXFoU2bNg0eQ6FQIDk5GcOGDau1Xy6XQy6Xq95bW1trHa+xunA1E+t3H8HZtJsAAF93JzwzuBv6hvmLHBkREYmaqKdMmYLNmzfjp59+go2NDbKzswEAdnZ2sLCwAABMmDABnp6eWLZsGQBg0aJF6NGjB9q1a4f8/Hx88MEHuHbtGl588UXRfo7m7OK1bMz6YjsqKhWqtqvZd7Fk416UVVRhSHdO0iMiEpOoiXrVqlUAgL59+6q1f/XVV5g0aRIAICMjA1Lp31fo8/LyEB0djezsbDg4OKBLly44evQoAgM5SUobG/cdU0vSD/p67zEM6toRMln97pA42Fiq/ZeIiBpP9EvfDxMXF6f2/pNPPsEnn3zSRBEZr9zCEuw9fg6p12/B2kKOAV0C0LmtJ06lal4s5k5BMVKv30Kgr0e9jvHFG0/rKlwiIvofg5n1TU3nfHom3l67EyVlf1fQOpCQgv7h/njYdyWl8uFfpoiIqOkYxKxvajoKhRJLNu5RS9L3/Z6YitZ1LC9qb22BDt6uTRkeERE9BBO1kUu4eLXOClkyqRRSqaTWvicHPAIzE150IaLmJzc3F8888wxsbW1hb2+PyZMnP3TBq759+9Yo+vTvf/9bTxFrxkRt5G7n113msrSsAosmj4Svu5OqzcnOClPG9MXYvuFNHR4RUZN45plncP78eRw4cAC7d+/GoUOH8NJLLz10v+joaLWiT++//74eoq0bT5eMXCtn+zr7PV3s0T2wDboHtkHGrVxUKRTwcXOq90xvIiJDk5KSgn379iEhIQFdu3YFAPz3v//FsGHD8OGHH6JVq1Ya97W0tDS4ok/8bWzkwjt4w7OOmtMjIoNVf/Z2c4RfKxcmaSJq1o4dOwZ7e3tVkgaAgQMHQiqV4sSJE3Xuu2nTJjg7OyMoKAhz5sxBaWlpU4f7UDyjNnISiQTzn38Ms1f/WGN50P/rG47eoe1FioyIqFpxcTEKCwtV7/+5omRDZWdnw9VVfSKsiYkJHB0dVQtr1ebpp5+Gj48PWrVqhbNnz+Ktt95CamoqduzYoXUsusBE3QK08XBGzH8m4eCZVPyVkQ0rCzkGdumINq2cxQ6NiAh9+vRRez9//nwsWLCgxnazZ8/Ge++9V+dYKSkpWsfx4D3szp07w8PDAwMGDEBaWhratm2r9biNxUTdQljITTGsRxCG9dBcQpSISAzx8fEIDQ1Vvdd0Nv3GG2+oVq3UxM/PD+7u7sjJyVFrr6qqQm5uboPuP3fv3h0AcPnyZSZq0h2FUonLN3KgFAS0b+0KE5lM7JCIiOpkbW0NW1vbh27n4uICFxeXh24XERGB/Px8nD59Gl26dAEA/P7771AqlarkWx9JSUkAAA+P+q3O2FSYqI3IrycvIGbvUdVz0w42lnh6UDeMfjRU3MCIiPSoY8eOGDJkCKKjo7F69WpUVlZi6tSpePLJJ1Uzvm/evIkBAwZg48aN6NatG9LS0rB582YMGzYMTk5OOHv2LF5//XX07t0bwcHBDzli02KiNhLxZ/7CB1t+VWvLKyrFyh1xkEmlGNFT3L9oRET6tGnTJkydOhUDBgyAVCrF2LFj8dlnn6n6KysrkZqaqprVbWZmht9++w2ffvopSkpK4OXlhbFjx+Kdd94R60dQYaI2Et/+qvmRgy2/ncSwiCDIpHzsiohaBkdHR2zevFljv6+vr1phKC8vL8THx+sjtAbjb24jcLegBFez72rsv51fjIxbuXqMiIiIdIVn1EbApB4LlEglEhxKuoTU69mwsTBHv3B/uDk+fPIGERGJi4naCNhZWyCoTSucS8+std/TxR7z1u9C5p0CVdtXe45iwtAIPDOom77CJCIiLfDSt5F4YXhPmJrUfBRLKpWgqkqplqQBQCkIiNlzFCfOp+srRCIi0gITtZHo3NYTH7wyFuEdvCH5X9XKzn6eePGxXriVV6hxv52Hk/QTIBERaYWXvo1Ipzat8N7LY1BWUQlBEGAhN8PPR/6sc59r2ZxkRkRkyJiojZC5manqzw42VnVu62Rr2dThEBFRI/DSt5HrHtgGDjaak3FU9056jIaIiBqKidrImZrI8ObTUZCb1rx4Ehnkh6HdWaSDiMiQ8dJ3C9A1wAdr33wWPx85i9Trt2BtIceALgHoFdyOq5URERk4JuoWopWzPf41qrfYYRARUQPxdIqIiMiAMVETEREZMCZqIiIiA8ZETUREZMCYqImIiAwYEzUREZEBY6ImIiIyYEzUREREBqzFLniSkpIidghE9D8eHh7w8PAQO4wGy8rKQlZWlthhNFv8PVw/LS5Re3h4oE+fPnj22WfFDoWI/mf+/PlYsGCB2GE02Jo1a7Bw4UKxw2jW+vTp0yy/pOmTRBAEQewg9K0lfwsuLi5Gnz59EB8fD2tra7HDIT0y5M+eZ9T1Z8ifozaa62evTy0yUbdkhYWFsLOzQ0FBAWxtbcUOh/SIn71x4OfY8nAyGRERkQFjoiYiIjJgTNQtjFwux/z58yGXy8UOhfSMn71x4OfY8vAeNRERkQHjGTUREZEBY6ImIiIyYEzUREREBoyJmhokLi4OEokE+fn5YodCRNQiMFGLKDs7G9OmTYOfnx/kcjm8vLwwYsQIxMbG6vQ4ffv2xWuvvabTMeuydu1a9O3bF7a2tkzqjSSRSOp8NWbZTYlEgp07dz50uyVLliAyMhKWlpawt7fX+ngtGT9HaowWt9a3obh69Sp69uwJe3t7fPDBB+jcuTMqKyuxf/9+TJkyBRcvXtRrPIIgQKFQwMSk8X8lSktLMWTIEAwZMgRz5szRQXQt14PLU27duhXz5s1Damqqqk0fS0hWVFRg3LhxiIiIwPr165v8eMaInyM1ikCiGDp0qODp6SkUFxfX6MvLy1P9+dq1a8LIkSMFKysrwcbGRhg3bpyQnZ2t6p8/f74QEhIibNy4UfDx8RFsbW2F8ePHC4WFhYIgCMLEiRMFAGqv9PR04eDBgwIAYc+ePUJ4eLhgamoqHDx4UCgrKxOmTZsmuLi4CHK5XOjZs6dw8uRJ1fHu7/dgjJo0ZFt6uK+++kqws7NTa1u3bp0QEBAgyOVywd/fX1i5cqWqr7y8XJgyZYrg7u4uyOVywdvbW1i6dKkgCILg4+Oj9nfCx8dHq+NTw/FzpIbiGbUIcnNzsW/fPixZsgRWVlY1+u9fllIqlRg1ahSsra0RHx+PqqoqTJkyBePHj0dcXJxq+7S0NOzcuRO7d+9GXl4ennjiCSxfvhxLlizBihUr8NdffyEoKAiLFi0CALi4uODq1asAgNmzZ+PDDz+En58fHBwc8Oabb2L79u34+uuv4ePjg/fffx9RUVG4fPkyHB0dm/p/DTXApk2bMG/ePHz++ecICwvDmTNnEB0dDSsrK0ycOBGfffYZdu3ahe+//x7e3t64fv06rl+/DgBISEiAq6srvvrqKwwZMgQymUzkn6bl4udID8NELYLLly9DEAQEBATUuV1sbCySk5ORnp4OLy8vAMDGjRvRqVMnJCQk4JFHHgFQndBjYmJgY2MDAHjuuecQGxuLJUuWwM7ODmZmZrC0tIS7u3uNYyxatAiDBg0CAJSUlGDVqlWIiYnB0KFDAQDr1q3DgQMHsH79esyaNUtn/w+o8ebPn4+PPvoIY8aMAQC0adMGFy5cwJo1azBx4kRkZGSgffv26NWrFyQSCXx8fFT7uri4AKj+Uljb3wvSH36O9DCcTCYCoZ6LwaWkpMDLy0uVpAEgMDAQ9vb2agXXfX19VUkaqC4bl5OTU69jdO3aVfXntLQ0VFZWomfPnqo2U1NTdOvWjQXeDUxJSQnS0tIwefJkWFtbq17vvvsu0tLSAACTJk1CUlIS/P398eqrr+LXX38VOWr6J36OVB88oxZB+/btIZFIdDZhzNTUVO29RCKBUqms1761XXonw1dcXAyg+opH9+7d1fruX/4MDw9Heno69u7di99++w1PPPEEBg4ciG3btuk9XqodP0eqD55Ri8DR0RFRUVFYuXIlSkpKavTff5ypY8eOavejAODChQvIz89HYGBgvY9nZmYGhULx0O3atm0LMzMzHDlyRNVWWVmJhISEBh2Pmp6bmxtatWqFK1euoF27dmqvNm3aqLaztbXF+PHjsW7dOmzduhXbt29Hbm4ugOovePX5e0FNh58j1QfPqEWycuVK9OzZE926dcOiRYsQHByMqqoqHDhwAKtWrUJKSgoGDhyIzp0745lnnsGnn36KqqoqvPLKK+jTp4/aJeuH8fX1xYkTJ3D16lVYW1trnBRmZWWFl19+GbNmzYKjoyO8vb3x/vvvo7S0FJMnT6738bKzs5GdnY3Lly8DAJKTk2FjYwNvb29OSNOhhQsX4tVXX4WdnR2GDBmC8vJynDp1Cnl5eZgxYwY+/vhjeHh4ICwsDFKpFD/88APc3d1VkxV9fX0RGxuLnj17Qi6Xw8HBodbjZGRkIDc3FxkZGVAoFEhKSgIAtGvXTi+PFRk7fo70UGJPO2/JMjMzhSlTpgg+Pj6CmZmZ4OnpKYwcOVI4ePCgapv6Pp71oE8++UTtMY3U1FShR48egoWFRY3Hs/756NS9e/eEadOmCc7Ozlo/njV//vwaj4QBEL766ist/i/RfbU9VrNp0yYhNDRUMDMzExwcHITevXsLO3bsEARBENauXSuEhoYKVlZWgq2trTBgwAAhMTFRte+uXbuEdu3aCSYmJnU+1lPbI34A1P6eUv3xc6SGYplLIiIiA8Z71ERERAaMiZqIiMiAMVETEREZMCZqIiIiA8ZETURkgFj7ne5jojZQkyZNgkQiwfLly9Xad+7cCYlE0mTHzc3NxbRp0+Dv7w8LCwt4e3vj1VdfRUFBgdp2GRkZGD58OCwtLeHq6opZs2ahqqqqyeJqSfjZEwBERkYiKysLdnZ2YodCImOiNmDm5uZ47733kJeXp7djZmZmIjMzEx9++CHOnTuHmJgY7Nu3T23BE4VCgeHDh6OiogJHjx7F119/jZiYGMybN09vcRo7fvZkZmYGd3f3Jv1yRs2E2A9yU+0mTpwoPPbYY0JAQIAwa9YsVfuPP/4o6Ptj+/777wUzMzOhsrJSEARB2LNnjyCVStUWXlm1apVga2srlJeX6zU2Y8TP3jj16dNHmDp1qjB9+nTB3t5ecHV1FdauXSsUFxcLkyZNEqytrYW2bdsKe/bsEQSh5uJC9xdK2bdvnxAQECBYWVkJUVFRQmZmptoxpk+frnbcUaNGCRMnTlS9X7lypdCuXTtBLpcLrq6uwtixY5v6R6dG4hm1AZPJZFi6dCn++9//4saNG/Xeb+jQoWqVeP756tSpU4PiKCgogK2tLUxMqlecPXbsGDp37gw3NzfVNlFRUSgsLMT58+cbNDbVjp+9cfr666/h7OyMkydPYtq0aXj55Zcxbtw4REZGIjExEYMHD8Zzzz2H0tLSWvcvLS3Fhx9+iG+++QaHDh1CRkYGZs6cWe/jnzp1Cq+++ioWLVqE1NRU7Nu3D71799bVj0dNhGt9G7jHH38coaGhmD9/PtavX1+vfb788kvcu3dPY/8/q23V5c6dO1i8eDFeeuklVVt2drbaL2oAqvfZ2dn1Hpvqxs/e+ISEhOCdd94BAMyZMwfLly+Hs7MzoqOjAQDz5s3DqlWrcPbs2Vr3r6ysxOrVq9G2bVsAwNSpU7Fo0aJ6Hz8jIwNWVlZ47LHHYGNjAx8fH4SFhTXyp6KmxkTdDLz33nvo379/vb85e3p66uS4hYWFGD58OAIDA7FgwQKdjEkNw8/euAQHB6v+LJPJ4OTkhM6dO6va7n/pycnJga2tbY39LS0tVUkaaFjteQAYNGgQfHx84OfnhyFDhmDIkCF4/PHHYWlpqc2PQ3rCS9/NQO/evREVFYU5c+bUa3tdXP4sKirCkCFDYGNjgx9//FHtTMzd3R23bt1S2/7+e3d39wb8ZPQw/OyNS2214x9suz9xTFM9+dr2Fx4o1yCVStXeA9Vn4ffZ2NggMTERW7ZsgYeHB+bNm4eQkBA+AmbgeEbdTCxfvhyhoaHw9/d/6LaNvfxZWFiIqKgoyOVy7Nq1C+bm5mr9ERERWLJkCXJycuDq6goAOHDgAGxtbVm3ugnws6f6cnFxQVZWluq9QqHAuXPn0K9fP1WbiYkJBg4ciIEDB2L+/Pmwt7fH77//jjFjxogRMtUDE3Uzcb8u9WefffbQbRtz+bOwsBCDBw9GaWkpvv32WxQWFqKwsBBA9S8BmUyGwYMHIzAwEM899xzef/99ZGdn45133sGUKVMgl8u1PjbVjp891Vf//v0xY8YM/PLLL2jbti0+/vhjtbPl3bt348qVK+jduzccHBywZ88eKJXKen0JJPEwUTcjixYtwtatW5v0GImJiThx4gSA6oLyD0pPT4evry9kMhl2796Nl19+GREREbCyssLEiRMbNKmFGoafPdXHCy+8gD///BMTJkyAiYkJXn/9dbWzaXt7e+zYsQMLFixAWVkZ2rdvjy1btjT4aQDSL9ajJiIiMmCcTEZERGTAmKiJiIgMGBM1ERGRAWOiJiIiMmBM1ERELQxrXTcvTNRERI2QnZ2NadOmwc/PD3K5HF5eXhgxYgRiY2N1epy+ffvitdde0+mYdVm7di369u0LW1tbJnWRMVETEWnp6tWr6NKlC37//Xd88MEHSE5Oxr59+9CvXz9MmTJF7/EIgoCqqiqdjFVaWoohQ4bgP//5j07Go0YQtcgmEVEzNnToUMHT01MoLi6u0Xe/jrQgCMK1a9eEkSNHClZWVoKNjY0wbtw4tZre8+fPF0JCQoSNGzcKPj4+gq2trTB+/HihsLBQEITqGuUA1F7p6emqmtV79uwRwsPDBVNTU+HgwYNCWVmZMG3aNMHFxUWQy+VCz549hZMnT6qO989a13VpyLbUNHhGTUSkhdzcXOzbtw9TpkyBlZVVjX57e3sA1QU2Ro0ahdzcXMTHx+PAgQO4cuUKxo8fr7Z9Wloadu7cid27d2P37t2Ij4/H8uXLAQArVqxAREQEoqOjkZWVhaysLHh5ean2nT17NpYvX46UlBQEBwfjzTffxPbt2/H1118jMTER7dq1Q1RUFHJzc5vufwg1GS4hSkSkhcuXL0MQBAQEBNS5XWxsLJKTk5Genq5Krhs3bkSnTp2QkJCARx55BEB1Qo+JiYGNjQ0A4LnnnkNsbCyWLFkCOzs7mJmZwdLSstYqZYsWLcKgQYMAACUlJVi1ahViYmIwdOhQAMC6detw4MABrF+/HrNmzdLZ/wPSD55RExFpQajn6sspKSnw8vJSOwMODAyEvb09UlJSVG2+vr6qJA00rNZ0165dVX9OS0tDZWUlevbsqWozNTVFt27d1I5HzQcTNRGRFtq3bw+JRIKLFy/qZLzaak1rqkv9T7VdeifjwURNRKQFR0dHREVFYeXKlSgpKanRf/9xpo4dO+L69eu4fv26qu/ChQvIz89vUA1vMzMzKBSKh27Xtm1bmJmZ4ciRI6q2yspKJCQksGZ4M8VETUSkpZUrV0KhUKBbt27Yvn07Ll26hJSUFHz22WeIiIgAAAwcOFBVUzwxMREnT57EhAkT0KdPH7VL1g/j6+uLEydO4OrVq7hz547Gs20rKyu8/PLLmDVrFvbt24cLFy4gOjoapaWlmDx5cr2Pl52djaSkJFy+fBkAkJycjKSkJE5IEwETNRGRlvz8/JCYmIh+/frhjTfeQFBQEAYNGoTY2FisWrUKQPUl7J9++gkODg7o3bs3Bg4cCD8/vwbXF585cyZkMhkCAwPh4uKCjIwMjdsuX74cY8eOxXPPPYfw8HBcvnwZ+/fvh4ODQ72Pt3r1aoSFhSE6OhoA0Lt3b4SFhWHXrl0Nipsaj/WoiYiIDBjPqImIiAwYEzUREZEBY6ImIiIyYEzUREREBoyJmoiIyIAxURMRERkwJmoiIiIDxkRNRERkwJioiYiIDBgTNRERkQFjoiYiIjJgTNREREQG7P8BAzIgTT7Tai8AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig1 = my_data.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Create a Gardner-Altman plot for the Hedges' g effect size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig2 = my_data.hedges_g.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a Cumming estimation plot for the mean difference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig3 = my_data.mean_diff.plot(float_contrast=True);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Create a paired Gardner-Altman plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_data_paired = dabest.load(df, idx=(\"Control 1\", \"Test 1\"),\n", + " id_col = \"ID\", paired='baseline')\n", + "fig4 = my_data_paired.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a multi-group Cumming plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_multi_groups = dabest.load(df, id_col = \"ID\", \n", + " idx=((\"Control 1\", \"Test 1\"),\n", + " (\"Control 2\", \"Test 2\")))\n", + "fig5 = my_multi_groups.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a shared control Cumming plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_shared_control = dabest.load(df, id_col = \"ID\",\n", + " idx=(\"Control 1\", \"Test 1\",\n", + " \"Test 2\", \"Test 3\"))\n", + "fig6 = my_shared_control.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a repeated meausures (against baseline) Slopeplot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_rm_baseline = dabest.load(df, id_col = \"ID\", paired = \"baseline\",\n", + " idx=(\"Control 1\", \"Test 1\",\n", + " \"Test 2\", \"Test 3\"))\n", + "fig7 = my_rm_baseline.mean_diff.plot();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a repeated meausures (sequential) Slopeplot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "my_rm_sequential = dabest.load(df, id_col = \"ID\", paired = \"sequential\",\n", + " idx=(\"Control 1\", \"Test 1\",\n", + " \"Test 2\", \"Test 3\"))\n", + "fig8 = my_rm_sequential.mean_diff.plot();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class PermutationTest:\n", + " \"\"\"\n", + " A class to compute and report permutation tests.\n", + " \n", + " Parameters\n", + " ----------\n", + " control : array-like\n", + " test : array-like\n", + " These should be numerical iterables.\n", + " effect_size : string.\n", + " Any one of the following are accepted inputs:\n", + " 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', 'delta_g\" or 'cliffs_delta'\n", + " is_paired : string, default None\n", + " permutation_count : int, default 10000\n", + " The number of permutations (reshuffles) to perform.\n", + " random_seed : int, default 12345\n", + " `random_seed` is used to seed the random number generator during\n", + " bootstrap resampling. This ensures that the generated permutations\n", + " are replicable.\n", + " \n", + " Returns\n", + " -------\n", + " A :py:class:`PermutationTest` object:\n", + " `difference`:float\n", + " The effect size of the difference between the control and the test.\n", + " `effect_size`:string\n", + " The type of effect size reported.\n", + " \n", + " \n", + " \"\"\"\n", + " \n", + " def __init__(self, control: array,\n", + " test: array, # These should be numerical iterables.\n", + " effect_size:str, # Any one of the following are accepted inputs: 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'\n", + " is_paired:str=None,\n", + " permutation_count:int=5000, # The number of permutations (reshuffles) to perform.\n", + " random_seed:int=12345,#`random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the generated permutations are replicable.\n", + " **kwargs):\n", + " from ._stats_tools.effsize import two_group_difference\n", + " from ._stats_tools.confint_2group_diff import calculate_group_var\n", + " \n", + "\n", + " self.__permutation_count = permutation_count\n", + "\n", + " # Run Sanity Check.\n", + " if is_paired and len(control) != len(test):\n", + " raise ValueError(\"The two arrays do not have the same length.\")\n", + "\n", + " # Initialise random number generator.\n", + " # rng = random.default_rng(seed=random_seed)\n", + " rng = RandomState(PCG64(random_seed))\n", + "\n", + " # Set required constants and variables\n", + " control = array(control)\n", + " test = array(test)\n", + "\n", + " control_sample = control.copy()\n", + " test_sample = test.copy()\n", + "\n", + " BAG = array([*control, *test])\n", + " CONTROL_LEN = int(len(control))\n", + " EXTREME_COUNT = 0.\n", + " THRESHOLD = abs(two_group_difference(control, test, \n", + " is_paired, effect_size))\n", + " self.__permutations = []\n", + " self.__permutations_var = []\n", + "\n", + " for i in range(int(self.__permutation_count)):\n", + " if is_paired:\n", + " # Select which control-test pairs to swap.\n", + " random_idx = rng.choice(CONTROL_LEN,\n", + " rng.randint(0, CONTROL_LEN+1),\n", + " replace=False)\n", + "\n", + " # Perform swap.\n", + " for i in random_idx:\n", + " _placeholder = control_sample[i]\n", + " control_sample[i] = test_sample[i]\n", + " test_sample[i] = _placeholder\n", + " \n", + " else:\n", + " # Shuffle the bag and assign to control and test groups.\n", + " # NB. rng.shuffle didn't produce replicable results...\n", + " shuffled = rng.permutation(BAG) \n", + " control_sample = shuffled[:CONTROL_LEN]\n", + " test_sample = shuffled[CONTROL_LEN:]\n", + "\n", + "\n", + " es = two_group_difference(control_sample, test_sample, \n", + " False, effect_size)\n", + " \n", + " group_var = calculate_group_var(var(control_sample, ddof=1), \n", + " CONTROL_LEN, \n", + " var(test_sample, ddof=1), \n", + " len(test_sample))\n", + " self.__permutations.append(es)\n", + " self.__permutations_var.append(group_var)\n", + "\n", + " if abs(es) > THRESHOLD:\n", + " EXTREME_COUNT += 1.\n", + "\n", + " self.__permutations = array(self.__permutations)\n", + " self.__permutations_var = array(self.__permutations_var)\n", + "\n", + " self.pvalue = EXTREME_COUNT / self.__permutation_count\n", + "\n", + "\n", + " def __repr__(self):\n", + " return(\"{} permutations were taken. The p-value is {}.\".format(self.__permutation_count, \n", + " self.pvalue))\n", + "\n", + "\n", + " @property\n", + " def permutation_count(self):\n", + " \"\"\"\n", + " The number of permuations taken.\n", + " \"\"\"\n", + " return self.__permutation_count\n", + "\n", + "\n", + " @property\n", + " def permutations(self):\n", + " \"\"\"\n", + " The effect sizes of all the permutations in a list.\n", + " \"\"\"\n", + " return self.__permutations\n", + "\n", + " \n", + " @property\n", + " def permutations_var(self):\n", + " \"\"\"\n", + " The experiment group variance of all the permutations in a list.\n", + " \"\"\"\n", + " return self.__permutations_var\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Notes**:\n", + " \n", + "The basic concept of permutation tests is the same as that behind bootstrapping.\n", + "In an \"exact\" permutation test, all possible resuffles of the control and test \n", + "labels are performed, and the proportion of effect sizes that equal or exceed \n", + "the observed effect size is computed. This is the probability, under the null \n", + "hypothesis of zero difference between test and control groups, of observing the\n", + "effect size: the p-value of the Student's t-test.\n", + "\n", + "Exact permutation tests are impractical: computing the effect sizes for all reshuffles quickly exceeds trivial computational loads. A control group and a test group both with 10 observations each would have a total of $20!$ or $2.43 \\times {10}^{18}$ reshuffles.\n", + "Therefore, in practice, \"approximate\" permutation tests are performed, where a sufficient number of reshuffles are performed (5,000 or 10,000), from which the p-value is computed.\n", + "\n", + "More information can be found [here](https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example: permutation test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "control = norm.rvs(loc=0, size=30, random_state=12345)\n", + "test = norm.rvs(loc=0.5, size=30, random_state=12345)\n", + "perm_test = dabest.PermutationTest(control, test, \n", + " effect_size=\"mean_diff\", \n", + " is_paired=None)\n", + "perm_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/API/load.ipynb b/nbs/API/load.ipynb index ea118cb3..5ae39e13 100644 --- a/nbs/API/load.ipynb +++ b/nbs/API/load.ipynb @@ -39,6 +39,7 @@ "#| hide\n", "from nbdev.showdoc import *\n", "import nbdev\n", + "\n", "nbdev.nbdev_export()" ] }, @@ -49,11 +50,24 @@ "outputs": [], "source": [ "#| export\n", - "def load(data, idx=None, x=None, y=None, paired=None, id_col=None,\n", - " ci=95, resamples=5000, random_seed=12345, proportional=False, \n", - " delta2 = False, experiment = None, experiment_label = None,\n", - " x1_level = None, mini_meta=False):\n", - " '''\n", + "def load(\n", + " data,\n", + " idx=None,\n", + " x=None,\n", + " y=None,\n", + " paired=None,\n", + " id_col=None,\n", + " ci=95,\n", + " resamples=5000,\n", + " random_seed=12345,\n", + " proportional=False,\n", + " delta2=False,\n", + " experiment=None,\n", + " experiment_label=None,\n", + " x1_level=None,\n", + " mini_meta=False,\n", + "):\n", + " \"\"\"\n", " Loads data in preparation for estimation statistics.\n", "\n", " This is designed to work with pandas DataFrames.\n", @@ -67,15 +81,15 @@ " with each individual tuple producing its own contrast plot\n", " x : string or list, default None\n", " Column name(s) of the independent variable. This can be expressed as\n", - " a list of 2 elements if and only if 'delta2' is True; otherwise it \n", + " a list of 2 elements if and only if 'delta2' is True; otherwise it\n", " can only be a string.\n", " y : string, default None\n", " Column names for data to be plotted on the x-axis and y-axis.\n", " paired : string, default None\n", - " The type of the experiment under which the data are obtained. If 'paired' \n", + " The type of the experiment under which the data are obtained. If 'paired'\n", " is None then the data will not be treated as paired data in the subsequent\n", - " calculations. If 'paired' is 'baseline', then in each tuple of x, other \n", - " groups will be paired up with the first group (as control). If 'paired' is \n", + " calculations. If 'paired' is 'baseline', then in each tuple of x, other\n", + " groups will be paired up with the first group (as control). If 'paired' is\n", " 'sequential', then in each tuple of x, each group will be paired up with\n", " its previous group (as control).\n", " id_col : default None.\n", @@ -90,7 +104,7 @@ " This integer is used to seed the random number generator during\n", " bootstrap resampling, ensuring that the confidence intervals\n", " reported are replicable.\n", - " proportional : boolean, default False. \n", + " proportional : boolean, default False.\n", " An indicator of whether the data is binary or not. When set to True, it\n", " specifies that the data consists of binary data, where the values are\n", " limited to 0 and 1. The code is not suitable for analyzing proportion\n", @@ -100,27 +114,42 @@ " delta2 : boolean, default False\n", " Indicator of delta-delta experiment\n", " experiment : String, default None\n", - " The name of the column of the dataframe which contains the label of \n", + " The name of the column of the dataframe which contains the label of\n", " experiments\n", " experiment_lab : list, default None\n", " A list of String to specify the order of subplots for delta-delta plots.\n", - " This can be expressed as a list of 2 elements if and only if 'delta2' \n", - " is True; otherwise it can only be a string. \n", + " This can be expressed as a list of 2 elements if and only if 'delta2'\n", + " is True; otherwise it can only be a string.\n", " x1_level : list, default None\n", " A list of String to specify the order of subplots for delta-delta plots.\n", - " This can be expressed as a list of 2 elements if and only if 'delta2' \n", - " is True; otherwise it can only be a string. \n", + " This can be expressed as a list of 2 elements if and only if 'delta2'\n", + " is True; otherwise it can only be a string.\n", " mini_meta : boolean, default False\n", " Indicator of weighted delta calculation.\n", "\n", " Returns\n", " -------\n", " A `Dabest` object.\n", - " '''\n", - " from ._classes import Dabest\n", + " \"\"\"\n", + " from dabest import Dabest\n", "\n", - " return Dabest(data, idx, x, y, paired, id_col, ci, resamples, random_seed, proportional, delta2, experiment, experiment_label, x1_level, mini_meta)\n", - "\n" + " return Dabest(\n", + " data,\n", + " idx,\n", + " x,\n", + " y,\n", + " paired,\n", + " id_col,\n", + " ci,\n", + " resamples,\n", + " random_seed,\n", + " proportional,\n", + " delta2,\n", + " experiment,\n", + " experiment_label,\n", + " x1_level,\n", + " mini_meta,\n", + " )" ] }, { @@ -129,16 +158,21 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "import numpy as np\n", "from typing import Union, Optional\n", + "import pandas as pd\n", "\n", - "def prop_dataset(group:Union[list, tuple, np.ndarray, dict], #Accepts lists, tuples, or numpy ndarrays of numeric types.\n", - " group_names: Optional[list] = None):\n", - " '''\n", + "\n", + "def prop_dataset(\n", + " group: Union[\n", + " list, tuple, np.ndarray, dict\n", + " ], # Accepts lists, tuples, or numpy ndarrays of numeric types.\n", + " group_names: Optional[list] = None,\n", + "):\n", + " \"\"\"\n", " Convenient function to generate a dataframe of binary data.\n", - " '''\n", - " import pandas as pd\n", + " \"\"\"\n", "\n", " if isinstance(group, dict):\n", " # If group_names is not provided, use the keys of the dict as group_names\n", @@ -146,37 +180,54 @@ " group_names = list(group.keys())\n", " elif not set(group_names) == set(group.keys()):\n", " # Check if the group_names provided is the same as the keys of the dict\n", - " raise ValueError('group_names must be the same as the keys of the dict.')\n", + " raise ValueError(\"group_names must be the same as the keys of the dict.\")\n", " # Check if the values in the dict are numeric\n", - " if not all([isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]):\n", - " raise ValueError('group must be a dict of lists, tuples, or numpy ndarrays of numeric types.')\n", + " if not all(\n", + " [isinstance(group[name], (list, tuple, np.ndarray)) for name in group_names]\n", + " ):\n", + " raise ValueError(\n", + " \"group must be a dict of lists, tuples, or numpy ndarrays of numeric types.\"\n", + " )\n", " # Check if the values in the dict only have two elements under each parent key\n", " if not all([len(group[name]) == 2 for name in group_names]):\n", - " raise ValueError('Each parent key should have only two elements.')\n", + " raise ValueError(\"Each parent key should have only two elements.\")\n", " group_val = group\n", "\n", " else:\n", " if group_names is None:\n", - " raise ValueError('group_names must be provided if group is not a dict.')\n", + " raise ValueError(\"group_names must be provided if group is not a dict.\")\n", " # Check if the length of group is two times of the length of group_names\n", " if not len(group) == 2 * len(group_names):\n", - " raise ValueError('The length of group must be two times of the length of group_names.')\n", - " group_val = {group_names[i]: [group[i*2], group[i*2+1]] for i in range(len(group_names))}\n", + " raise ValueError(\n", + " \"The length of group must be two times of the length of group_names.\"\n", + " )\n", + " group_val = {\n", + " group_names[i]: [group[i * 2], group[i * 2 + 1]]\n", + " for i in range(len(group_names))\n", + " }\n", "\n", " # Check if the sum of values in group_val under each key are the same\n", - " if not all([sum(group_val[name]) == sum(group_val[group_names[0]]) for name in group_val.keys()]):\n", - " raise ValueError('The sum of values under each key must be the same.')\n", - " \n", - " id_col = pd.Series(range(1, sum(group_val[group_names[0]])+1))\n", - " \n", + " if not all(\n", + " [\n", + " sum(group_val[name]) == sum(group_val[group_names[0]])\n", + " for name in group_val.keys()\n", + " ]\n", + " ):\n", + " raise ValueError(\"The sum of values under each key must be the same.\")\n", + "\n", + " id_col = pd.Series(range(1, sum(group_val[group_names[0]]) + 1))\n", + "\n", " final_df = pd.DataFrame()\n", "\n", " for name in group_val.keys():\n", - " col = np.repeat(0, group_val[name][0]).tolist() + np.repeat(1, group_val[name][1]).tolist()\n", - " df = pd.DataFrame({name:col})\n", + " col = (\n", + " np.repeat(0, group_val[name][0]).tolist()\n", + " + np.repeat(1, group_val[name][1]).tolist()\n", + " )\n", + " df = pd.DataFrame({name: col})\n", " final_df = pd.concat([final_df, df], axis=1)\n", "\n", - " final_df['ID'] = id_col\n", + " final_df[\"ID\"] = id_col\n", "\n", " return final_df" ] @@ -217,7 +268,7 @@ "N = 10\n", "c1 = sp.stats.norm.rvs(loc=100, scale=5, size=N)\n", "t1 = sp.stats.norm.rvs(loc=115, scale=5, size=N)\n", - "df = pd.DataFrame({'Control 1' : c1, 'Test 1': t1})" + "df = pd.DataFrame({\"Control 1\": c1, \"Test 1\": t1})" ] }, { @@ -282,8 +333,8 @@ "N = 10\n", "c1 = np.random.binomial(1, 0.2, size=N)\n", "t1 = np.random.binomial(1, 0.5, size=N)\n", - "df = pd.DataFrame({'Control 1' : c1, 'Test 1': t1})\n", - "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"),proportional=True)" + "df = pd.DataFrame({\"Control 1\": c1, \"Test 1\": t1})\n", + "my_data = dabest.load(df, idx=(\"Control 1\", \"Test 1\"), proportional=True)" ] }, { diff --git a/nbs/API/misc_tools.ipynb b/nbs/API/misc_tools.ipynb index da49407b..e63e8f07 100644 --- a/nbs/API/misc_tools.ipynb +++ b/nbs/API/misc_tools.ipynb @@ -49,14 +49,26 @@ { "cell_type": "code", "execution_count": null, - "id": "6b50da46", + "id": "5f54be1c", "metadata": {}, "outputs": [], "source": [ "#| export\n", - "def merge_two_dicts(x:dict,\n", - " y:dict\n", - " )->dict:#A dictionary containing a union of all keys in both original dicts.\n", + "import datetime as dt\n", + "from numpy import repeat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b50da46", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "def merge_two_dicts(\n", + " x: dict, y: dict\n", + ") -> dict: # A dictionary containing a union of all keys in both original dicts.\n", " \"\"\"\n", " Given two dicts, merge them into a new dict as a shallow copy.\n", " Any overlapping keys in `y` will override the values in `x`.\n", @@ -70,24 +82,20 @@ " return z\n", "\n", "\n", - "\n", "def unpack_and_add(l, c):\n", " \"\"\"Convenience function to allow me to add to an existing list\n", " without altering that list.\"\"\"\n", " t = [a for a in l]\n", " t.append(c)\n", - " return(t)\n", - "\n", + " return t\n", "\n", "\n", "def print_greeting():\n", " from .__init__ import __version__\n", - " import datetime as dt\n", - " import numpy as np\n", "\n", " line1 = \"DABEST v{}\".format(__version__)\n", - " header = \"\".join(np.repeat(\"=\", len(line1)))\n", - " spacer = \"\".join(np.repeat(\" \", len(line1)))\n", + " header = \"\".join(repeat(\"=\", len(line1)))\n", + " spacer = \"\".join(repeat(\" \", len(line1)))\n", "\n", " now = dt.datetime.now()\n", " if 0 < now.hour < 12:\n", @@ -103,11 +111,10 @@ "\n", "\n", "def get_varname(obj):\n", - " matching_vars = [k for k,v in globals().items() if v is obj]\n", + " matching_vars = [k for k, v in globals().items() if v is obj]\n", " if len(matching_vars) > 0:\n", " return matching_vars[0]\n", - " else:\n", - " return \"\"\n" + " return \"\"" ] }, { diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index 4f000c36..2582367a 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -60,7 +60,8 @@ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import numpy as np\n", - "import itertools" + "import itertools\n", + "import matplotlib.lines as mlines" ] }, { @@ -70,25 +71,21 @@ "metadata": {}, "outputs": [], "source": [ - "#| export \n", - "\n", - "def halfviolin(v, half='right', fill_color='k', alpha=1,\n", - " line_color='k', line_width=0):\n", - " import numpy as np\n", - "\n", - " for b in v['bodies']:\n", + "#| export\n", + "def halfviolin(v, half=\"right\", fill_color=\"k\", alpha=1, line_color=\"k\", line_width=0):\n", + " for b in v[\"bodies\"]:\n", " V = b.get_paths()[0].vertices\n", "\n", " mean_vertical = np.mean(V[:, 0])\n", " mean_horizontal = np.mean(V[:, 1])\n", "\n", - " if half == 'right':\n", + " if half == \"right\":\n", " V[:, 0] = np.clip(V[:, 0], mean_vertical, np.inf)\n", - " elif half == 'left':\n", + " elif half == \"left\":\n", " V[:, 0] = np.clip(V[:, 0], -np.inf, mean_vertical)\n", - " elif half == 'bottom':\n", + " elif half == \"bottom\":\n", " V[:, 1] = np.clip(V[:, 1], -np.inf, mean_horizontal)\n", - " elif half == 'top':\n", + " elif half == \"top\":\n", " V[:, 1] = np.clip(V[:, 1], mean_horizontal, np.inf)\n", "\n", " b.set_color(fill_color)\n", @@ -97,70 +94,50 @@ " b.set_linewidth(line_width)\n", "\n", "\n", - "\n", - "# def align_yaxis(ax1, v1, ax2, v2):\n", - "# \"\"\"adjust ax2 ylimit so that v2 in ax2 is aligned to v1 in ax1\"\"\"\n", - "# # Taken from\n", - "# # http://stackoverflow.com/questions/7630778/\n", - "# # matplotlib-align-origin-of-right-axis-with-specific-left-axis-value\n", - "# _, y1 = ax1.transData.transform((0, v1))\n", - "# _, y2 = ax2.transData.transform((0, v2))\n", - "# inv = ax2.transData.inverted()\n", - "# _, dy = inv.transform((0, 0)) - inv.transform((0, y1-y2))\n", - "# miny, maxy = ax2.get_ylim()\n", - "# ax2.set_ylim(miny+dy, maxy+dy)\n", - "#\n", - "#\n", - "#\n", - "# def rotate_ticks(axes, angle=45, alignment='right'):\n", - "# for tick in axes.get_xticklabels():\n", - "# tick.set_rotation(angle)\n", - "# tick.set_horizontalalignment(alignment)\n", - "\n", - "\n", - "\n", "def get_swarm_spans(coll):\n", " \"\"\"\n", " Given a matplotlib Collection, will obtain the x and y spans\n", " for the collection. Will return None if this fails.\n", " \"\"\"\n", - " import numpy as np\n", " x, y = np.array(coll.get_offsets()).T\n", " try:\n", " return x.min(), x.max(), y.min(), y.max()\n", " except ValueError:\n", " return None\n", "\n", - "def error_bar(data:pd.DataFrame, # This DataFrame should be in 'long' format.\n", - " x:str, #x column to be plotted.\n", - " y:str, # y column to be plotted.\n", - " type:str='mean_sd', # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead.\n", - " offset:float=0.2, #Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets.\n", - " ax=None, #If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used.\n", - " line_color=\"black\", # The color of the gapped lines.\n", - " gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span.\n", - " pos:list=[0, 1],#The positions of the error bars for the sankey_error_bar method.\n", - " method:str='gapped_lines', #The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'.\n", - " **kwargs:dict\n", - " ):\n", - " '''\n", + "\n", + "def error_bar(\n", + " data: pd.DataFrame, # This DataFrame should be in 'long' format.\n", + " x: str, # x column to be plotted.\n", + " y: str, # y column to be plotted.\n", + " type: str = \"mean_sd\", # Choose from ['mean_sd', 'median_quartiles']. Plots the summary statistics for each group. If 'mean_sd', then the mean and standard deviation of each group is plotted as a gapped line. If 'median_quantiles', then the median and 25th and 75th percentiles of each group is plotted instead.\n", + " offset: float = 0.2, # Give a single float (that will be used as the x-offset of all gapped lines), or an iterable containing the list of x-offsets.\n", + " ax=None, # If a matplotlib Axes object is specified, the gapped lines will be plotted in order on this axes. If None, the current axes (plt.gca()) is used.\n", + " line_color=\"black\", # The color of the gapped lines.\n", + " gap_width_percent=1, # The width of the gap in the gapped lines, as a percentage of the y-axis span.\n", + " pos: list = [\n", + " 0,\n", + " 1,\n", + " ], # The positions of the error bars for the sankey_error_bar method.\n", + " method: str = \"gapped_lines\", # The method to use for drawing the error bars. Options are: 'gapped_lines', 'proportional_error_bar', and 'sankey_error_bar'.\n", + " **kwargs: dict,\n", + "):\n", + " \"\"\"\n", " Function to plot the standard deviations as vertical errorbars.\n", " The mean is a gap defined by negative space.\n", "\n", " This function combines the functionality of gapped_lines(),\n", " proportional_error_bar(), and sankey_error_bar().\n", "\n", - " '''\n", - " import numpy as np\n", - " import pandas as pd\n", - " import matplotlib.pyplot as plt\n", - " import matplotlib.lines as mlines\n", + " \"\"\"\n", "\n", " if gap_width_percent < 0 or gap_width_percent > 100:\n", " raise ValueError(\"`gap_width_percent` must be between 0 and 100.\")\n", - " if method not in ['gapped_lines', 'proportional_error_bar', 'sankey_error_bar']:\n", - " raise ValueError(\"Invalid `method`. Must be one of 'gapped_lines', \\\n", - " 'proportional_error_bar', or 'sankey_error_bar'.\")\n", + " if method not in [\"gapped_lines\", \"proportional_error_bar\", \"sankey_error_bar\"]:\n", + " raise ValueError(\n", + " \"Invalid `method`. Must be one of 'gapped_lines', \\\n", + " 'proportional_error_bar', or 'sankey_error_bar'.\"\n", + " )\n", "\n", " if ax is None:\n", " ax = plt.gca()\n", @@ -169,14 +146,14 @@ " gap_width = ax_yspan * gap_width_percent / 100\n", "\n", " keys = kwargs.keys()\n", - " if 'clip_on' not in keys:\n", - " kwargs['clip_on'] = False\n", + " if \"clip_on\" not in keys:\n", + " kwargs[\"clip_on\"] = False\n", "\n", - " if 'zorder' not in keys:\n", - " kwargs['zorder'] = 5\n", + " if \"zorder\" not in keys:\n", + " kwargs[\"zorder\"] = 5\n", "\n", - " if 'lw' not in keys:\n", - " kwargs['lw'] = 2.\n", + " if \"lw\" not in keys:\n", + " kwargs[\"lw\"] = 2.0\n", "\n", " if isinstance(data[x].dtype, pd.CategoricalDtype):\n", " group_order = pd.unique(data[x]).categories\n", @@ -185,8 +162,10 @@ "\n", " means = data.groupby(x)[y].mean().reindex(index=group_order)\n", "\n", - " if method in ['proportional_error_bar', 'sankey_error_bar']:\n", - " g = lambda x: np.sqrt((np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x)))\n", + " if method in [\"proportional_error_bar\", \"sankey_error_bar\"]:\n", + " g = lambda x: np.sqrt(\n", + " (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))\n", + " )\n", " sd = data.groupby(x)[y].apply(g)\n", " else:\n", " sd = data.groupby(x)[y].std().reindex(index=group_order)\n", @@ -195,20 +174,20 @@ " upper_sd = means + sd\n", "\n", " if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any():\n", - " kwargs['clip_on'] = True\n", + " kwargs[\"clip_on\"] = True\n", "\n", " medians = data.groupby(x)[y].median().reindex(index=group_order)\n", - " quantiles = data.groupby(x)[y].quantile([0.25, 0.75]) \\\n", - " .unstack() \\\n", - " .reindex(index=group_order)\n", + " quantiles = (\n", + " data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n", + " )\n", " lower_quartiles = quantiles[0.25]\n", " upper_quartiles = quantiles[0.75]\n", "\n", - " if type == 'mean_sd':\n", + " if type == \"mean_sd\":\n", " central_measures = means\n", " lows = lower_sd\n", " highs = upper_sd\n", - " elif type == 'median_quartiles':\n", + " elif type == \"median_quartiles\":\n", " central_measures = medians\n", " lows = lower_quartiles\n", " highs = upper_quartiles\n", @@ -235,13 +214,12 @@ " err2 = \"{} offset(s) were supplied in `offset`.\".format(len_offset)\n", " raise ValueError(err1 + err2)\n", "\n", - " kwargs['zorder'] = kwargs['zorder']\n", + " kwargs[\"zorder\"] = kwargs[\"zorder\"]\n", "\n", " for xpos, central_measure in enumerate(central_measures):\n", - " \n", - " kwargs['color'] = custom_palette[xpos]\n", + " kwargs[\"color\"] = custom_palette[xpos]\n", "\n", - " if method == 'sankey_error_bar':\n", + " if method == \"sankey_error_bar\":\n", " _xpos = pos[xpos] + offset[xpos]\n", " else:\n", " _xpos = xpos + offset[xpos]\n", @@ -249,36 +227,37 @@ " low = lows[xpos]\n", " high = highs[xpos]\n", " if low == high == central_measure:\n", - " low_to_mean = mlines.Line2D([_xpos, _xpos],\n", - " [low, central_measure],\n", - " **kwargs)\n", + " low_to_mean = mlines.Line2D(\n", + " [_xpos, _xpos], [low, central_measure], **kwargs\n", + " )\n", " ax.add_line(low_to_mean)\n", - " \n", - " mean_to_high = mlines.Line2D([_xpos, _xpos],\n", - " [central_measure, high],\n", - " **kwargs)\n", + "\n", + " mean_to_high = mlines.Line2D(\n", + " [_xpos, _xpos], [central_measure, high], **kwargs\n", + " )\n", " ax.add_line(mean_to_high)\n", " else:\n", - " low_to_mean = mlines.Line2D([_xpos, _xpos],\n", - " [low, central_measure - gap_width],\n", - " **kwargs)\n", + " low_to_mean = mlines.Line2D(\n", + " [_xpos, _xpos], [low, central_measure - gap_width], **kwargs\n", + " )\n", " ax.add_line(low_to_mean)\n", - " \n", - " mean_to_high = mlines.Line2D([_xpos, _xpos],\n", - " [central_measure + gap_width, high],\n", - " **kwargs)\n", + "\n", + " mean_to_high = mlines.Line2D(\n", + " [_xpos, _xpos], [central_measure + gap_width, high], **kwargs\n", + " )\n", " ax.add_line(mean_to_high)\n", - " \n", - "\n", - "def check_data_matches_labels(labels,#list of input labels \n", - " data, #Pandas Series of input data\n", - " side:str # 'left' or 'right' on the sankey diagram\n", - " ):\n", - " '''\n", - " Function to check that the labels and data match in the sankey diagram. \n", + "\n", + "\n", + "def check_data_matches_labels(\n", + " labels, # list of input labels\n", + " data, # Pandas Series of input data\n", + " side: str, # 'left' or 'right' on the sankey diagram\n", + "):\n", + " \"\"\"\n", + " Function to check that the labels and data match in the sankey diagram.\n", " And enforce labels and data to be lists.\n", " Raises an exception if the labels and data do not match.\n", - " '''\n", + " \"\"\"\n", " if len(labels) > 0:\n", " if isinstance(data, list):\n", " data = set(data)\n", @@ -294,12 +273,18 @@ " msg += \"Data: \" + \",\".join(data)\n", " raise Exception(f\"{side} labels and data do not match.{msg}\")\n", "\n", - " \n", + "\n", "def normalize_dict(nested_dict, target):\n", " val = {}\n", " for key in nested_dict.keys():\n", - " val[key] = np.sum([nested_dict[sub_key][key] for sub_key in nested_dict.keys() if key in nested_dict[sub_key]])\n", - " \n", + " val[key] = np.sum(\n", + " [\n", + " nested_dict[sub_key][key]\n", + " for sub_key in nested_dict.keys()\n", + " if key in nested_dict[sub_key]\n", + " ]\n", + " )\n", + "\n", " for key, value in nested_dict.items():\n", " if isinstance(value, dict):\n", " for subkey in value.keys():\n", @@ -307,66 +292,68 @@ " if subkey in val.keys():\n", " if val[subkey] != 0:\n", " # Address the problem when one of the label have zero value\n", - " value[subkey] = value[subkey] * target[subkey]['right']/val[subkey]\n", + " value[subkey] = (\n", + " value[subkey] * target[subkey][\"right\"] / val[subkey]\n", + " )\n", " else:\n", " value[subkey] = 0\n", " else:\n", - " value[subkey] = target[subkey]['right']\n", + " value[subkey] = target[subkey][\"right\"]\n", " return nested_dict\n", "\n", "\n", - "def width_determine(labels, data, pos='left'):\n", + "def width_determine(labels, data, pos=\"left\"):\n", " widths_norm = defaultdict()\n", " for i, label in enumerate(labels):\n", " myD = {}\n", " myD[pos] = data[data[pos] == label][pos + \"Weight\"].sum()\n", " if len(labels) != 1:\n", " if i == 0:\n", - " myD['bottom'] = 0\n", + " myD[\"bottom\"] = 0\n", " myD[pos] -= 0.01\n", - " myD['top'] = myD[pos]\n", + " myD[\"top\"] = myD[pos]\n", " elif i == len(labels) - 1:\n", " myD[pos] -= 0.01\n", - " myD['bottom'] = 1 - myD[pos]\n", - " myD['top'] = 1\n", + " myD[\"bottom\"] = 1 - myD[pos]\n", + " myD[\"top\"] = 1\n", " else:\n", " myD[pos] -= 0.02\n", - " myD['bottom'] = widths_norm[labels[i - 1]]['top'] + 0.02\n", - " myD['top'] = myD['bottom'] + myD[pos]\n", - " topEdge = myD['top']\n", + " myD[\"bottom\"] = widths_norm[labels[i - 1]][\"top\"] + 0.02\n", + " myD[\"top\"] = myD[\"bottom\"] + myD[pos]\n", " else:\n", - " myD['bottom'] = 0\n", - " myD['top'] = 1\n", + " myD[\"bottom\"] = 0\n", + " myD[\"top\"] = 1\n", " widths_norm[label] = myD\n", " return widths_norm\n", "\n", - "def single_sankey(left:np.array,# data on the left of the diagram\n", - " right:np.array, # data on the right of the diagram, len(left) == len(right)\n", - " xpos:float=0, # the starting point on the x-axis\n", - " leftWeight:np.array=None, #weights for the left labels, if None, all weights are 1\n", - " rightWeight:np.array=None, #weights for the right labels, if None, all weights are corresponding leftWeight\n", - " colorDict:dict=None, #input format: {'label': 'color'}\n", - " leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels.\n", - " rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels.\n", - " ax=None, #matplotlib axes to be drawn on\n", - " flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", - " sankey:bool=True, #if True, draw the sankey diagram, else draw barplot\n", - " width=0.5, \n", - " alpha=0.65, \n", - " bar_width=0.2, \n", - " error_bar_on:bool=True, #if True, draw error bar for each group comparison\n", - " strip_on:bool=True, #if True, draw strip for each group comparison\n", - " one_sankey:bool=False, #if True, only draw one sankey diagram\n", - " rightColor:bool=False, #if True, each strip of the diagram will be colored according to the corresponding left labels\n", - " align:bool='center'# if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick\n", - " ):\n", - "\n", - " '''\n", + "\n", + "def single_sankey(\n", + " left: np.array, # data on the left of the diagram\n", + " right: np.array, # data on the right of the diagram, len(left) == len(right)\n", + " xpos: float = 0, # the starting point on the x-axis\n", + " leftWeight: np.array = None, # weights for the left labels, if None, all weights are 1\n", + " rightWeight: np.array = None, # weights for the right labels, if None, all weights are corresponding leftWeight\n", + " colorDict: dict = None, # input format: {'label': 'color'}\n", + " leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels.\n", + " rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels.\n", + " ax=None, # matplotlib axes to be drawn on\n", + " flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", + " sankey: bool = True, # if True, draw the sankey diagram, else draw barplot\n", + " width=0.5,\n", + " alpha=0.65,\n", + " bar_width=0.2,\n", + " error_bar_on: bool = True, # if True, draw error bar for each group comparison\n", + " strip_on: bool = True, # if True, draw strip for each group comparison\n", + " one_sankey: bool = False, # if True, only draw one sankey diagram\n", + " rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels\n", + " align: bool = \"center\", # if 'center', the diagram will be centered on each xtick, if 'edge', the diagram will be aligned with the left edge of each xtick\n", + "):\n", + " \"\"\"\n", " Make a single Sankey diagram showing proportion flow from left to right\n", " Original code from: https://github.com/anazalea/pySankey\n", " Changes are added to normalize each diagram's height to be 1\n", "\n", - " '''\n", + " \"\"\"\n", "\n", " # Initiating values\n", " if ax is None:\n", @@ -391,26 +378,35 @@ " left.reset_index(drop=True, inplace=True)\n", " if isinstance(right, pd.Series):\n", " right.reset_index(drop=True, inplace=True)\n", - " dataFrame = pd.DataFrame({'left': left, 'right': right, 'leftWeight': leftWeight,\n", - " 'rightWeight': rightWeight}, index=range(len(left)))\n", - " \n", - " if dataFrame[['left', 'right']].isnull().any(axis=None):\n", - " raise Exception('Sankey graph does not support null values.')\n", + " dataFrame = pd.DataFrame(\n", + " {\n", + " \"left\": left,\n", + " \"right\": right,\n", + " \"leftWeight\": leftWeight,\n", + " \"rightWeight\": rightWeight,\n", + " },\n", + " index=range(len(left)),\n", + " )\n", + "\n", + " if dataFrame[[\"left\", \"right\"]].isnull().any(axis=None):\n", + " raise Exception(\"Sankey graph does not support null values.\")\n", "\n", " # Identify all labels that appear 'left' or 'right'\n", - " allLabels = pd.Series(np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]).unique()\n", + " allLabels = pd.Series(\n", + " np.sort(np.r_[dataFrame.left.unique(), dataFrame.right.unique()])[::-1]\n", + " ).unique()\n", "\n", " # Identify left labels\n", " if len(leftLabels) == 0:\n", " leftLabels = pd.Series(np.sort(dataFrame.left.unique())[::-1]).unique()\n", " else:\n", - " check_data_matches_labels(leftLabels, dataFrame['left'], 'left')\n", + " check_data_matches_labels(leftLabels, dataFrame[\"left\"], \"left\")\n", "\n", " # Identify right labels\n", " if len(rightLabels) == 0:\n", " rightLabels = pd.Series(np.sort(dataFrame.right.unique())[::-1]).unique()\n", " else:\n", - " check_data_matches_labels(leftLabels, dataFrame['right'], 'right')\n", + " check_data_matches_labels(leftLabels, dataFrame[\"right\"], \"right\")\n", "\n", " # If no colorDict given, make one\n", " if colorDict is None:\n", @@ -419,31 +415,33 @@ " colorPalette = sns.color_palette(palette, len(allLabels))\n", " for i, label in enumerate(allLabels):\n", " colorDict[label] = colorPalette[i]\n", - " fail_color = {0:\"grey\"}\n", + " fail_color = {0: \"grey\"}\n", " colorDict.update(fail_color)\n", " else:\n", " missing = [label for label in allLabels if label not in colorDict.keys()]\n", " if missing:\n", " msg = \"The palette parameter is missing values for the following labels : \"\n", - " msg += '{}'.format(', '.join(missing))\n", + " msg += \"{}\".format(\", \".join(missing))\n", " raise ValueError(msg)\n", "\n", " if align not in (\"center\", \"edge\"):\n", - " err = '{} assigned for `align` is not valid.'.format(align)\n", + " err = \"{} assigned for `align` is not valid.\".format(align)\n", " raise ValueError(err)\n", " if align == \"center\":\n", " try:\n", " leftpos = xpos - width / 2\n", " except TypeError as e:\n", - " raise TypeError(f'the dtypes of parameters x ({xpos.dtype}) '\n", - " f'and width ({width.dtype}) '\n", - " f'are incompatible') from e\n", - " else: \n", + " raise TypeError(\n", + " f\"the dtypes of parameters x ({xpos.dtype}) \"\n", + " f\"and width ({width.dtype}) \"\n", + " f\"are incompatible\"\n", + " ) from e\n", + " else:\n", " leftpos = xpos\n", "\n", " # Combine left and right arrays to have a pandas.DataFrame in the 'long' format\n", - " left_series = pd.Series(left, name='values').to_frame().assign(groups='left')\n", - " right_series = pd.Series(right, name='values').to_frame().assign(groups='right')\n", + " left_series = pd.Series(left, name=\"values\").to_frame().assign(groups=\"left\")\n", + " right_series = pd.Series(right, name=\"values\").to_frame().assign(groups=\"right\")\n", " concatenated_df = pd.concat([left_series, right_series], ignore_index=True)\n", "\n", " # Determine positions of left label patches and total widths\n", @@ -451,53 +449,57 @@ " leftWidths_norm = defaultdict()\n", " for i, leftLabel in enumerate(leftLabels):\n", " myD = {}\n", - " myD['left'] = (dataFrame[dataFrame.left == leftLabel].leftWeight.sum()/ \\\n", - " dataFrame.leftWeight.sum())\n", + " myD[\"left\"] = (\n", + " dataFrame[dataFrame.left == leftLabel].leftWeight.sum()\n", + " / dataFrame.leftWeight.sum()\n", + " )\n", " if len(leftLabels) != 1:\n", " if i == 0:\n", - " myD['bottom'] = 0\n", - " myD['left'] -= 0.01\n", - " myD['top'] = myD['left']\n", + " myD[\"bottom\"] = 0\n", + " myD[\"left\"] -= 0.01\n", + " myD[\"top\"] = myD[\"left\"]\n", " elif i == len(leftLabels) - 1:\n", - " myD['left'] -= 0.01\n", - " myD['bottom'] = 1 - myD['left']\n", - " myD['top'] = 1\n", + " myD[\"left\"] -= 0.01\n", + " myD[\"bottom\"] = 1 - myD[\"left\"]\n", + " myD[\"top\"] = 1\n", " else:\n", - " myD['left'] -= 0.02\n", - " myD['bottom'] = leftWidths_norm[leftLabels[i - 1]]['top'] + 0.02\n", - " myD['top'] = myD['bottom'] + myD['left']\n", - " topEdge = myD['top']\n", + " myD[\"left\"] -= 0.02\n", + " myD[\"bottom\"] = leftWidths_norm[leftLabels[i - 1]][\"top\"] + 0.02\n", + " myD[\"top\"] = myD[\"bottom\"] + myD[\"left\"]\n", + " topEdge = myD[\"top\"]\n", " else:\n", - " myD['bottom'] = 0\n", - " myD['top'] = 1\n", - " myD['left'] = 1\n", + " myD[\"bottom\"] = 0\n", + " myD[\"top\"] = 1\n", + " myD[\"left\"] = 1\n", " leftWidths_norm[leftLabel] = myD\n", "\n", " # Determine positions of right label patches and total widths\n", " rightWidths_norm = defaultdict()\n", " for i, rightLabel in enumerate(rightLabels):\n", " myD = {}\n", - " myD['right'] = (dataFrame[dataFrame.right == rightLabel].rightWeight.sum()/ \\\n", - " dataFrame.rightWeight.sum())\n", + " myD[\"right\"] = (\n", + " dataFrame[dataFrame.right == rightLabel].rightWeight.sum()\n", + " / dataFrame.rightWeight.sum()\n", + " )\n", " if len(rightLabels) != 1:\n", " if i == 0:\n", - " myD['bottom'] = 0\n", - " myD['right'] -= 0.01\n", - " myD['top'] = myD['right']\n", + " myD[\"bottom\"] = 0\n", + " myD[\"right\"] -= 0.01\n", + " myD[\"top\"] = myD[\"right\"]\n", " elif i == len(rightLabels) - 1:\n", - " myD['right'] -= 0.01\n", - " myD['bottom'] = 1 - myD['right']\n", - " myD['top'] = 1\n", + " myD[\"right\"] -= 0.01\n", + " myD[\"bottom\"] = 1 - myD[\"right\"]\n", + " myD[\"top\"] = 1\n", " else:\n", - " myD['right'] -= 0.02\n", - " myD['bottom'] = rightWidths_norm[rightLabels[i - 1]]['top'] + 0.02\n", - " myD['top'] = myD['bottom'] + myD['right']\n", - " topEdge = myD['top']\n", + " myD[\"right\"] -= 0.02\n", + " myD[\"bottom\"] = rightWidths_norm[rightLabels[i - 1]][\"top\"] + 0.02\n", + " myD[\"top\"] = myD[\"bottom\"] + myD[\"right\"]\n", + " topEdge = myD[\"top\"]\n", " else:\n", - " myD['bottom'] = 0\n", - " myD['top'] = 1\n", - " myD['right'] = 1\n", - " rightWidths_norm[rightLabel] = myD \n", + " myD[\"bottom\"] = 0\n", + " myD[\"top\"] = 1\n", + " myD[\"right\"] = 1\n", + " rightWidths_norm[rightLabel] = myD\n", "\n", " # Total width of the graph\n", " xMax = width\n", @@ -514,19 +516,29 @@ " if (flow == False and sankey == True) or one_sankey:\n", " for rightLabel in rightLabels:\n", " ax.fill_between(\n", - " [xMax + leftpos + (-bar_width * xMax * 0.5), leftpos + xMax + (bar_width * xMax * 0.5)], \n", + " [\n", + " xMax + leftpos + (-bar_width * xMax * 0.5),\n", + " leftpos + xMax + (bar_width * xMax * 0.5),\n", + " ],\n", " 2 * [rightWidths_norm[rightLabel][\"bottom\"]],\n", " 2 * [rightWidths_norm[rightLabel][\"top\"]],\n", " color=colorDict[rightLabel],\n", - " alpha=0.99\n", + " alpha=0.99,\n", " )\n", "\n", " # Plot error bars\n", " if error_bar_on and strip_on:\n", - " error_bar(concatenated_df, x='groups', y='values', ax=ax, offset=0, gap_width_percent=2,\n", - " method=\"sankey_error_bar\",\n", - " pos=[leftpos, leftpos + xMax])\n", - " \n", + " error_bar(\n", + " concatenated_df,\n", + " x=\"groups\",\n", + " y=\"values\",\n", + " ax=ax,\n", + " offset=0,\n", + " gap_width_percent=2,\n", + " method=\"sankey_error_bar\",\n", + " pos=[leftpos, leftpos + xMax],\n", + " )\n", + "\n", " # Determine widths of individual strips, all widths are normalized to 1\n", " ns_l = defaultdict()\n", " ns_r = defaultdict()\n", @@ -538,96 +550,121 @@ " for rightLabel in rightLabels:\n", " leftDict[rightLabel] = dataFrame[\n", " (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)\n", - " ].leftWeight.sum()\n", - " \n", + " ].leftWeight.sum()\n", + "\n", " rightDict[rightLabel] = dataFrame[\n", " (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)\n", - " ].rightWeight.sum()\n", - " factorleft = leftWidths_norm[leftLabel]['left']/sum(leftDict.values())\n", - " leftDict_norm = {k: v*factorleft for k, v in leftDict.items()}\n", + " ].rightWeight.sum()\n", + " factorleft = leftWidths_norm[leftLabel][\"left\"] / sum(leftDict.values())\n", + " leftDict_norm = {k: v * factorleft for k, v in leftDict.items()}\n", " ns_l_norm[leftLabel] = leftDict_norm\n", " ns_r[leftLabel] = rightDict\n", - " \n", + "\n", " # ns_r should be using a different way of normalization to fit the right side\n", " # It is normalized using the value with the same key in each sub-dictionary\n", " ns_r_norm = normalize_dict(ns_r, rightWidths_norm)\n", - " \n", + "\n", " # Plot strips\n", - " if sankey == True and strip_on == True:\n", + " if sankey and strip_on:\n", " for leftLabel, rightLabel in itertools.product(leftLabels, rightLabels):\n", " labelColor = leftLabel\n", " if rightColor:\n", " labelColor = rightLabel\n", - " if len(dataFrame[(dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)]) > 0:\n", + " if (\n", + " len(\n", + " dataFrame[\n", + " (dataFrame.left == leftLabel) & (dataFrame.right == rightLabel)\n", + " ]\n", + " )\n", + " > 0\n", + " ):\n", " # Create array of y values for each strip, half at left value,\n", " # half at right, convolve\n", - " ys_d = np.array(50 * [leftWidths_norm[leftLabel]['bottom']] + \\\n", - " 50 * [rightWidths_norm[rightLabel]['bottom']])\n", - " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid')\n", - " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode='valid')\n", - " ys_u = np.array(50 * [leftWidths_norm[leftLabel]['bottom'] + ns_l_norm[leftLabel][rightLabel]] + \\\n", - " 50 * [rightWidths_norm[rightLabel]['bottom'] + ns_r_norm[leftLabel][rightLabel]])\n", - " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid')\n", - " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode='valid')\n", + " ys_d = np.array(\n", + " 50 * [leftWidths_norm[leftLabel][\"bottom\"]]\n", + " + 50 * [rightWidths_norm[rightLabel][\"bottom\"]]\n", + " )\n", + " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode=\"valid\")\n", + " ys_d = np.convolve(ys_d, 0.05 * np.ones(20), mode=\"valid\")\n", + " ys_u = np.array(\n", + " 50\n", + " * [\n", + " leftWidths_norm[leftLabel][\"bottom\"]\n", + " + ns_l_norm[leftLabel][rightLabel]\n", + " ]\n", + " + 50\n", + " * [\n", + " rightWidths_norm[rightLabel][\"bottom\"]\n", + " + ns_r_norm[leftLabel][rightLabel]\n", + " ]\n", + " )\n", + " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode=\"valid\")\n", + " ys_u = np.convolve(ys_u, 0.05 * np.ones(20), mode=\"valid\")\n", "\n", " # Update bottom edges at each label so next strip starts at the right place\n", - " leftWidths_norm[leftLabel]['bottom'] += ns_l_norm[leftLabel][rightLabel]\n", - " rightWidths_norm[rightLabel]['bottom'] += ns_r_norm[leftLabel][rightLabel]\n", + " leftWidths_norm[leftLabel][\"bottom\"] += ns_l_norm[leftLabel][rightLabel]\n", + " rightWidths_norm[rightLabel][\"bottom\"] += ns_r_norm[leftLabel][\n", + " rightLabel\n", + " ]\n", " ax.fill_between(\n", - " np.linspace(leftpos + (bar_width * xMax * 0.5), \\\n", - " leftpos + xMax - (bar_width * xMax * 0.5), len(ys_d)), \\\n", - " ys_d, ys_u, alpha=alpha,\n", - " color=colorDict[labelColor], edgecolor='none'\n", + " np.linspace(\n", + " leftpos + (bar_width * xMax * 0.5),\n", + " leftpos + xMax - (bar_width * xMax * 0.5),\n", + " len(ys_d),\n", + " ),\n", + " ys_d,\n", + " ys_u,\n", + " alpha=alpha,\n", + " color=colorDict[labelColor],\n", + " edgecolor=\"none\",\n", " )\n", - " \n", - "def sankeydiag(data:pd.DataFrame,\n", - " xvar:str, # x column to be plotted.\n", - " yvar:str, # y column to be plotted.\n", - " left_idx:str, #the value in column xvar that is on the left side of each sankey diagram\n", - " right_idx:str, #the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx\n", - " leftLabels:list=None, #labels for the left side of the diagram. The diagram will be sorted by these labels.\n", - " rightLabels:list=None, #labels for the right side of the diagram. The diagram will be sorted by these labels.\n", - " palette:str|dict=None, \n", - " ax=None, #matplotlib axes to be drawn on\n", - " flow:bool=True, #if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", - " sankey:bool=True, #if True, draw the sankey diagram, else draw barplot\n", - " one_sankey:bool=False,# determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes\n", - " width:float=0.4, # the width of each sankey diagram\n", - " rightColor:bool=False,#if True, each strip of the diagram will be colored according to the corresponding left labels\n", - " align:str='center', #the alignment of each sankey diagram, can be 'center' or 'left'\n", - " alpha:float=0.65, #the transparency of each strip\n", - " **kwargs):\n", - " '''\n", + "\n", + "\n", + "def sankeydiag(\n", + " data: pd.DataFrame,\n", + " xvar: str, # x column to be plotted.\n", + " yvar: str, # y column to be plotted.\n", + " left_idx: str, # the value in column xvar that is on the left side of each sankey diagram\n", + " right_idx: str, # the value in column xvar that is on the right side of each sankey diagram, if len(left_idx) == 1, it will be broadcasted to the same length as right_idx, otherwise it should have the same length as right_idx\n", + " leftLabels: list = None, # labels for the left side of the diagram. The diagram will be sorted by these labels.\n", + " rightLabels: list = None, # labels for the right side of the diagram. The diagram will be sorted by these labels.\n", + " palette: str | dict = None,\n", + " ax=None, # matplotlib axes to be drawn on\n", + " flow: bool = True, # if True, draw the sankey in a flow, else draw 1 vs 1 Sankey diagram for each group comparison\n", + " sankey: bool = True, # if True, draw the sankey diagram, else draw barplot\n", + " one_sankey: bool = False, # determined by the driver function on plotter.py, if True, draw the sankey diagram across the whole raw data axes\n", + " width: float = 0.4, # the width of each sankey diagram\n", + " rightColor: bool = False, # if True, each strip of the diagram will be colored according to the corresponding left labels\n", + " align: str = \"center\", # the alignment of each sankey diagram, can be 'center' or 'left'\n", + " alpha: float = 0.65, # the transparency of each strip\n", + " **kwargs,\n", + "):\n", + " \"\"\"\n", " Read in melted pd.DataFrame, and draw multiple sankey diagram on a single axes\n", " using the value in column yvar according to the value in column xvar\n", " left_idx in the column xvar is on the left side of each sankey diagram\n", " right_idx in the column xvar is on the right side of each sankey diagram\n", "\n", - " '''\n", - "\n", - " import numpy as np\n", - " import pandas as pd\n", - " import seaborn as sns\n", - " import matplotlib.pyplot as plt\n", + " \"\"\"\n", "\n", " if \"width\" in kwargs:\n", " width = kwargs[\"width\"]\n", "\n", " if \"align\" in kwargs:\n", " align = kwargs[\"align\"]\n", - " \n", + "\n", " if \"alpha\" in kwargs:\n", " alpha = kwargs[\"alpha\"]\n", - " \n", + "\n", " if \"rightColor\" in kwargs:\n", " rightColor = kwargs[\"rightColor\"]\n", - " \n", + "\n", " if \"bar_width\" in kwargs:\n", " bar_width = kwargs[\"bar_width\"]\n", - " \n", + "\n", " if \"sankey\" in kwargs:\n", " sankey = kwargs[\"sankey\"]\n", - " \n", + "\n", " if \"flow\" in kwargs:\n", " flow = kwargs[\"flow\"]\n", "\n", @@ -635,7 +672,7 @@ " ax = plt.gca()\n", "\n", " allLabels = pd.Series(np.sort(data[yvar].unique())[::-1]).unique()\n", - " \n", + "\n", " # Check if all the elements in left_idx and right_idx are in xvar column\n", " unique_xvar = data[xvar].unique()\n", " if not all(elem in unique_xvar for elem in left_idx):\n", @@ -647,7 +684,7 @@ "\n", " # For baseline comparison, broadcast left_idx to the same length as right_idx\n", " # so that the left of sankey diagram will be the same\n", - " # For sequential comparison, left_idx and right_idx can have anything different \n", + " # For sequential comparison, left_idx and right_idx can have anything different\n", " # but should have the same length\n", " if len(left_idx) == 1:\n", " broadcasted_left = np.broadcast_to(left_idx, len(right_idx))\n", @@ -659,8 +696,7 @@ " if isinstance(palette, dict):\n", " if not all(key in allLabels for key in palette.keys()):\n", " raise ValueError(f\"keys in palette should be in {yvar} column\")\n", - " else: \n", - " plot_palette = palette\n", + " plot_palette = palette\n", " elif isinstance(palette, str):\n", " plot_palette = {}\n", " colorPalette = sns.color_palette(palette, len(allLabels))\n", @@ -670,38 +706,75 @@ " plot_palette = None\n", "\n", " # Create a strip_on list to determine whether to draw the strip during repeated measures\n", - " strip_on = [int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx)]\n", + " strip_on = [\n", + " int(right not in broadcasted_left[:i]) for i, right in enumerate(right_idx)\n", + " ]\n", "\n", " draw_idx = list(zip(broadcasted_left, right_idx))\n", " for i, (left, right) in enumerate(draw_idx):\n", " if one_sankey == False:\n", " if flow == True:\n", " width = 1\n", - " align = 'edge'\n", - " sankey = False if i == len(draw_idx)-1 else sankey # Remove last strip in flow\n", - " error_bar_on = False if i == len(draw_idx)-1 and flow else True # Remove last error_bar in flow\n", + " align = \"edge\"\n", + " sankey = (\n", + " False if i == len(draw_idx) - 1 else sankey\n", + " ) # Remove last strip in flow\n", + " error_bar_on = (\n", + " False if i == len(draw_idx) - 1 and flow else True\n", + " ) # Remove last error_bar in flow\n", " bar_width = 0.4 if sankey == False and flow == False else bar_width\n", - " single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], \n", - " xpos=xpos, ax=ax, colorDict=plot_palette, width=width, \n", - " leftLabels=leftLabels, rightLabels=rightLabels, strip_on=strip_on[i],\n", - " rightColor=rightColor, bar_width=bar_width, sankey=sankey,\n", - " error_bar_on=error_bar_on, flow=flow, align=align, alpha=alpha)\n", + " single_sankey(\n", + " data[data[xvar] == left][yvar],\n", + " data[data[xvar] == right][yvar],\n", + " xpos=xpos,\n", + " ax=ax,\n", + " colorDict=plot_palette,\n", + " width=width,\n", + " leftLabels=leftLabels,\n", + " rightLabels=rightLabels,\n", + " strip_on=strip_on[i],\n", + " rightColor=rightColor,\n", + " bar_width=bar_width,\n", + " sankey=sankey,\n", + " error_bar_on=error_bar_on,\n", + " flow=flow,\n", + " align=align,\n", + " alpha=alpha,\n", + " )\n", " xpos += 1\n", " else:\n", " xpos = 0\n", " width = 1\n", " if sankey == False:\n", " bar_width = 0.5\n", - " single_sankey(data[data[xvar]==left][yvar], data[data[xvar]==right][yvar], \n", - " xpos=xpos, ax=ax, colorDict=plot_palette, width=width, \n", - " leftLabels=leftLabels, rightLabels=rightLabels, \n", - " rightColor=rightColor, bar_width=bar_width, sankey=sankey, \n", - " one_sankey=one_sankey, flow=False, align='edge', alpha=alpha)\n", - "\n", - "# Now only draw vs xticks for two-column sankey diagram\n", - " if one_sankey == False or (sankey and not flow): \n", - " sankey_ticks = [f\"{left}\" for left in broadcasted_left] if flow \\\n", - " else [f\"{left}\\n v.s.\\n{right}\" for left, right in zip(broadcasted_left, right_idx)]\n", + " single_sankey(\n", + " data[data[xvar] == left][yvar],\n", + " data[data[xvar] == right][yvar],\n", + " xpos=xpos,\n", + " ax=ax,\n", + " colorDict=plot_palette,\n", + " width=width,\n", + " leftLabels=leftLabels,\n", + " rightLabels=rightLabels,\n", + " rightColor=rightColor,\n", + " bar_width=bar_width,\n", + " sankey=sankey,\n", + " one_sankey=one_sankey,\n", + " flow=False,\n", + " align=\"edge\",\n", + " alpha=alpha,\n", + " )\n", + "\n", + " # Now only draw vs xticks for two-column sankey diagram\n", + " if ~one_sankey or (sankey and not flow):\n", + " sankey_ticks = (\n", + " [f\"{left}\" for left in broadcasted_left]\n", + " if flow\n", + " else [\n", + " f\"{left}\\n v.s.\\n{right}\"\n", + " for left, right in zip(broadcasted_left, right_idx)\n", + " ]\n", + " )\n", " ax.get_xaxis().set_ticks(np.arange(len(right_idx)))\n", " ax.get_xaxis().set_ticklabels(sankey_ticks)\n", " else:\n", diff --git a/nbs/API/plotter.ipynb b/nbs/API/plotter.ipynb index 227c27fa..ef4d602a 100644 --- a/nbs/API/plotter.ipynb +++ b/nbs/API/plotter.ipynb @@ -47,6 +47,23 @@ "nbdev.nbdev_export()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7562c1a1", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import warnings\n", + "import logging" + ] + }, { "cell_type": "code", "execution_count": null, @@ -55,15 +72,15 @@ "outputs": [], "source": [ "#| export\n", - "\n", - "def EffectSizeDataFramePlotter(EffectSizeDataFrame, **plot_kwargs):\n", + "# TODO refactor function name\n", + "def EffectSizeDataFramePlotter(effectsize_df, **plot_kwargs):\n", " \"\"\"\n", " Custom function that creates an estimation plot from an EffectSizeDataFrame.\n", " Keywords\n", " --------\n", " Parameters\n", " ----------\n", - " EffectSizeDataFrame\n", + " effectsize_df\n", " A `dabest` EffectSizeDataFrame object.\n", " plot_kwargs\n", " color_col=None\n", @@ -96,31 +113,28 @@ " fontsize_contrastxlabel=12, fontsize_contrastylabel=12,\n", " fontsize_delta2label=12\n", " \"\"\"\n", - "\n", - " import numpy as np\n", - " import seaborn as sns\n", - " import matplotlib\n", - " import matplotlib.pyplot as plt\n", - " import pandas as pd\n", - " import warnings\n", - " warnings.filterwarnings('ignore', 'This figure includes Axes that are not compatible with tight_layout')\n", - "\n", " from .misc_tools import merge_two_dicts\n", " from .plot_tools import halfviolin, get_swarm_spans, error_bar, sankeydiag\n", - " from ._stats_tools.effsize import _compute_standardizers, _compute_hedges_correction_factor\n", + " from ._stats_tools.effsize import (\n", + " _compute_standardizers,\n", + " _compute_hedges_correction_factor,\n", + " )\n", + "\n", + " warnings.filterwarnings(\n", + " \"ignore\", \"This figure includes Axes that are not compatible with tight_layout\"\n", + " )\n", "\n", - " import logging\n", " # Have to disable logging of warning when get_legend_handles_labels()\n", " # tries to get from slopegraph.\n", " logging.disable(logging.WARNING)\n", "\n", " # Save rcParams that I will alter, so I can reset back.\n", " original_rcParams = {}\n", - " _changed_rcParams = ['axes.grid']\n", + " _changed_rcParams = [\"axes.grid\"]\n", " for parameter in _changed_rcParams:\n", " original_rcParams[parameter] = plt.rcParams[parameter]\n", "\n", - " plt.rcParams['axes.grid'] = False\n", + " plt.rcParams[\"axes.grid\"] = False\n", "\n", " ytick_color = plt.rcParams[\"ytick.color\"]\n", " face_color = plot_kwargs[\"face_color\"]\n", @@ -128,18 +142,18 @@ " if plot_kwargs[\"face_color\"] is None:\n", " face_color = \"white\"\n", "\n", - " dabest_obj = EffectSizeDataFrame.dabest_obj\n", - " plot_data = EffectSizeDataFrame._plot_data\n", - " xvar = EffectSizeDataFrame.xvar\n", - " yvar = EffectSizeDataFrame.yvar\n", - " is_paired = EffectSizeDataFrame.is_paired\n", - " delta2 = EffectSizeDataFrame.delta2\n", - " mini_meta = EffectSizeDataFrame.mini_meta\n", - " effect_size = EffectSizeDataFrame.effect_size\n", - " proportional = EffectSizeDataFrame.proportional\n", + " dabest_obj = effectsize_df.dabest_obj\n", + " plot_data = effectsize_df._plot_data\n", + " xvar = effectsize_df.xvar\n", + " yvar = effectsize_df.yvar\n", + " is_paired = effectsize_df.is_paired\n", + " delta2 = effectsize_df.delta2\n", + " mini_meta = effectsize_df.mini_meta\n", + " effect_size = effectsize_df.effect_size\n", + " proportional = effectsize_df.proportional\n", "\n", " all_plot_groups = dabest_obj._all_plot_groups\n", - " idx = dabest_obj.idx\n", + " idx = dabest_obj.idx\n", "\n", " if effect_size not in [\"mean_diff\", \"delta_g\"] or not delta2:\n", " show_delta2 = False\n", @@ -157,16 +171,16 @@ "\n", " # Disable Gardner-Altman plotting if any of the idxs comprise of more than\n", " # two groups or if it is a delta-delta plot.\n", - " float_contrast = plot_kwargs[\"float_contrast\"]\n", - " effect_size_type = EffectSizeDataFrame.effect_size\n", + " float_contrast = plot_kwargs[\"float_contrast\"]\n", + " effect_size_type = effectsize_df.effect_size\n", " if len(idx) > 1 or len(idx[0]) > 2:\n", " float_contrast = False\n", "\n", - " if effect_size_type in ['cliffs_delta']:\n", + " if effect_size_type in [\"cliffs_delta\"]:\n", " float_contrast = False\n", "\n", " if show_delta2 or show_mini_meta:\n", - " float_contrast = False \n", + " float_contrast = False\n", "\n", " if not is_paired:\n", " show_pairs = False\n", @@ -174,12 +188,13 @@ " show_pairs = plot_kwargs[\"show_pairs\"]\n", "\n", " # Set default kwargs first, then merge with user-dictated ones.\n", - " default_swarmplot_kwargs = {'size': plot_kwargs[\"raw_marker_size\"]}\n", + " default_swarmplot_kwargs = {\"size\": plot_kwargs[\"raw_marker_size\"]}\n", " if plot_kwargs[\"swarmplot_kwargs\"] is None:\n", " swarmplot_kwargs = default_swarmplot_kwargs\n", " else:\n", - " swarmplot_kwargs = merge_two_dicts(default_swarmplot_kwargs,\n", - " plot_kwargs[\"swarmplot_kwargs\"])\n", + " swarmplot_kwargs = merge_two_dicts(\n", + " default_swarmplot_kwargs, plot_kwargs[\"swarmplot_kwargs\"]\n", + " )\n", "\n", " # Barplot kwargs\n", " default_barplot_kwargs = {\"estimator\": np.mean, \"errorbar\": plot_kwargs[\"ci\"]}\n", @@ -187,87 +202,105 @@ " if plot_kwargs[\"barplot_kwargs\"] is None:\n", " barplot_kwargs = default_barplot_kwargs\n", " else:\n", - " barplot_kwargs = merge_two_dicts(default_barplot_kwargs,\n", - " plot_kwargs[\"barplot_kwargs\"])\n", + " barplot_kwargs = merge_two_dicts(\n", + " default_barplot_kwargs, plot_kwargs[\"barplot_kwargs\"]\n", + " )\n", "\n", " # Sankey Diagram kwargs\n", - " default_sankey_kwargs = {\"width\": 0.4, \"align\": \"center\",\n", - " \"sankey\":True, \"flow\":True,\n", - " \"alpha\": 0.4, \"rightColor\": False,\n", - " \"bar_width\":0.2}\n", + " default_sankey_kwargs = {\n", + " \"width\": 0.4,\n", + " \"align\": \"center\",\n", + " \"sankey\": True,\n", + " \"flow\": True,\n", + " \"alpha\": 0.4,\n", + " \"rightColor\": False,\n", + " \"bar_width\": 0.2,\n", + " }\n", " if plot_kwargs[\"sankey_kwargs\"] is None:\n", " sankey_kwargs = default_sankey_kwargs\n", " else:\n", - " sankey_kwargs = merge_two_dicts(default_sankey_kwargs,\n", - " plot_kwargs[\"sankey_kwargs\"])\n", + " sankey_kwargs = merge_two_dicts(\n", + " default_sankey_kwargs, plot_kwargs[\"sankey_kwargs\"]\n", + " )\n", " # We also need to extract the `sankey` and `flow` from the kwargs for plotter.py\n", " # to use for varying different kinds of paired proportional plots\n", " # We also don't want to pop the parameter from the kwargs\n", - " sankey = sankey_kwargs['sankey']\n", - " flow = sankey_kwargs['flow']\n", + " sankey = sankey_kwargs[\"sankey\"]\n", + " flow = sankey_kwargs[\"flow\"]\n", "\n", " # Violinplot kwargs.\n", - " default_violinplot_kwargs = {'widths':0.5, 'vert':True,\n", - " 'showextrema':False, 'showmedians':False}\n", + " default_violinplot_kwargs = {\n", + " \"widths\": 0.5,\n", + " \"vert\": True,\n", + " \"showextrema\": False,\n", + " \"showmedians\": False,\n", + " }\n", " if plot_kwargs[\"violinplot_kwargs\"] is None:\n", " violinplot_kwargs = default_violinplot_kwargs\n", " else:\n", - " violinplot_kwargs = merge_two_dicts(default_violinplot_kwargs,\n", - " plot_kwargs[\"violinplot_kwargs\"])\n", + " violinplot_kwargs = merge_two_dicts(\n", + " default_violinplot_kwargs, plot_kwargs[\"violinplot_kwargs\"]\n", + " )\n", "\n", " # slopegraph kwargs.\n", - " default_slopegraph_kwargs = {'linewidth':1, 'alpha':0.5}\n", + " default_slopegraph_kwargs = {\"linewidth\": 1, \"alpha\": 0.5}\n", " if plot_kwargs[\"slopegraph_kwargs\"] is None:\n", " slopegraph_kwargs = default_slopegraph_kwargs\n", " else:\n", - " slopegraph_kwargs = merge_two_dicts(default_slopegraph_kwargs,\n", - " plot_kwargs[\"slopegraph_kwargs\"])\n", + " slopegraph_kwargs = merge_two_dicts(\n", + " default_slopegraph_kwargs, plot_kwargs[\"slopegraph_kwargs\"]\n", + " )\n", "\n", " # Zero reference-line kwargs.\n", - " default_reflines_kwargs = {'linestyle':'solid', 'linewidth':0.75,\n", - " 'zorder': 2,\n", - " 'color': ytick_color}\n", + " default_reflines_kwargs = {\n", + " \"linestyle\": \"solid\",\n", + " \"linewidth\": 0.75,\n", + " \"zorder\": 2,\n", + " \"color\": ytick_color,\n", + " }\n", " if plot_kwargs[\"reflines_kwargs\"] is None:\n", " reflines_kwargs = default_reflines_kwargs\n", " else:\n", - " reflines_kwargs = merge_two_dicts(default_reflines_kwargs,\n", - " plot_kwargs[\"reflines_kwargs\"])\n", + " reflines_kwargs = merge_two_dicts(\n", + " default_reflines_kwargs, plot_kwargs[\"reflines_kwargs\"]\n", + " )\n", "\n", " # Legend kwargs.\n", - " default_legend_kwargs = {'loc': 'upper left', 'frameon': False}\n", + " default_legend_kwargs = {\"loc\": \"upper left\", \"frameon\": False}\n", " if plot_kwargs[\"legend_kwargs\"] is None:\n", " legend_kwargs = default_legend_kwargs\n", " else:\n", - " legend_kwargs = merge_two_dicts(default_legend_kwargs,\n", - " plot_kwargs[\"legend_kwargs\"])\n", - " \n", - " \n", - "################################################### GRIDKEY WIP - extracting arguments \n", - " \n", + " legend_kwargs = merge_two_dicts(\n", + " default_legend_kwargs, plot_kwargs[\"legend_kwargs\"]\n", + " )\n", + "\n", + " ################################################### GRIDKEY WIP - extracting arguments\n", + "\n", " gridkey_rows = plot_kwargs[\"gridkey_rows\"]\n", " gridkey_merge_pairs = plot_kwargs[\"gridkey_merge_pairs\"]\n", " gridkey_show_Ns = plot_kwargs[\"gridkey_show_Ns\"]\n", " gridkey_show_es = plot_kwargs[\"gridkey_show_es\"]\n", - " \n", - " if gridkey_rows == None:\n", + "\n", + " if gridkey_rows is None:\n", " gridkey_show_Ns = False\n", " gridkey_show_es = False\n", - " \n", - "################################################### END GRIDKEY WIP - extracting arguments\n", + "\n", + " ################################################### END GRIDKEY WIP - extracting arguments\n", "\n", " # Group summaries kwargs.\n", - " gs_default = {'mean_sd', 'median_quartiles', None}\n", + " gs_default = {\"mean_sd\", \"median_quartiles\", None}\n", " if plot_kwargs[\"group_summaries\"] not in gs_default:\n", - " raise ValueError('group_summaries must be one of'\n", - " ' these: {}.'.format(gs_default) )\n", + " raise ValueError(\n", + " \"group_summaries must be one of\" \" these: {}.\".format(gs_default)\n", + " )\n", "\n", - " default_group_summary_kwargs = {'zorder': 3, 'lw': 2,\n", - " 'alpha': 1}\n", + " default_group_summary_kwargs = {\"zorder\": 3, \"lw\": 2, \"alpha\": 1}\n", " if plot_kwargs[\"group_summary_kwargs\"] is None:\n", " group_summary_kwargs = default_group_summary_kwargs\n", " else:\n", - " group_summary_kwargs = merge_two_dicts(default_group_summary_kwargs,\n", - " plot_kwargs[\"group_summary_kwargs\"])\n", + " group_summary_kwargs = merge_two_dicts(\n", + " default_group_summary_kwargs, plot_kwargs[\"group_summary_kwargs\"]\n", + " )\n", "\n", " # Create color palette that will be shared across subplots.\n", " color_col = plot_kwargs[\"color_col\"]\n", @@ -293,35 +326,24 @@ " if custom_pal is None:\n", " unsat_colors = sns.color_palette(n_colors=n_groups)\n", " else:\n", - "\n", " if isinstance(custom_pal, dict):\n", - " groups_in_palette = {k: v for k,v in custom_pal.items()\n", - " if k in color_groups}\n", - "\n", - " # # check that all the keys in custom_pal are found in the\n", - " # # color column.\n", - " # col_grps = {k for k in color_groups}\n", - " # pal_grps = {k for k in custom_pal.keys()}\n", - " # not_in_pal = pal_grps.difference(col_grps)\n", - " # if len(not_in_pal) > 0:\n", - " # err1 = 'The custom palette keys {} '.format(not_in_pal)\n", - " # err2 = 'are not found in `{}`. Please check.'.format(color_col)\n", - " # errstring = (err1 + err2)\n", - " # raise IndexError(errstring)\n", + " groups_in_palette = {\n", + " k: v for k, v in custom_pal.items() if k in color_groups\n", + " }\n", "\n", " names = groups_in_palette.keys()\n", " unsat_colors = groups_in_palette.values()\n", "\n", " elif isinstance(custom_pal, list):\n", - " unsat_colors = custom_pal[0: n_groups]\n", + " unsat_colors = custom_pal[0:n_groups]\n", "\n", " elif isinstance(custom_pal, str):\n", " # check it is in the list of matplotlib palettes.\n", " if custom_pal in plt.colormaps():\n", " unsat_colors = sns.color_palette(custom_pal, n_groups)\n", " else:\n", - " err1 = 'The specified `custom_palette` {}'.format(custom_pal)\n", - " err2 = ' is not a matplotlib palette. Please check.'\n", + " err1 = \"The specified `custom_palette` {}\".format(custom_pal)\n", + " err2 = \" is not a matplotlib palette. Please check.\"\n", " raise ValueError(err1 + err2)\n", "\n", " if custom_pal is None and color_col is None:\n", @@ -351,159 +373,165 @@ " plot_palette_sankey = custom_pal\n", "\n", " # Infer the figsize.\n", - " fig_size = plot_kwargs[\"fig_size\"]\n", + " fig_size = plot_kwargs[\"fig_size\"]\n", " if fig_size is None:\n", " all_groups_count = np.sum([len(i) for i in dabest_obj.idx])\n", " # Increase the width for delta-delta graph\n", " if show_delta2 or show_mini_meta:\n", " all_groups_count += 2\n", - " if is_paired and show_pairs is True and proportional is False:\n", + " if is_paired and show_pairs and proportional is False:\n", " frac = 0.75\n", " else:\n", " frac = 1\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " height_inches = 4\n", " each_group_width_inches = 2.5 * frac\n", " else:\n", " height_inches = 6\n", " each_group_width_inches = 1.5 * frac\n", "\n", - " width_inches = (each_group_width_inches * all_groups_count)\n", + " width_inches = each_group_width_inches * all_groups_count\n", " fig_size = (width_inches, height_inches)\n", "\n", " # Initialise the figure.\n", - " # sns.set(context=\"talk\", style='ticks')\n", - " init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs[\"dpi\"]\n", - " ,tight_layout=True)\n", + " init_fig_kwargs = dict(figsize=fig_size, dpi=plot_kwargs[\"dpi\"], tight_layout=True)\n", "\n", " width_ratios_ga = [2.5, 1]\n", - " \n", - "###################### GRIDKEY HSPACE ALTERATION\n", + "\n", + " ###################### GRIDKEY HSPACE ALTERATION\n", "\n", " # Sets hspace for cummings plots if gridkey is shown.\n", - " if gridkey_rows != None:\n", + " if gridkey_rows is not None:\n", " h_space_cummings = 0.1\n", " else:\n", " h_space_cummings = 0.3\n", - " \n", - " \n", - "###################### END GRIDKEY HSPACE ALTERATION \n", - " \n", + "\n", + " ###################### END GRIDKEY HSPACE ALTERATION\n", + "\n", " if plot_kwargs[\"ax\"] is not None:\n", " # New in v0.2.6.\n", " # Use inset axes to create the estimation plot inside a single axes.\n", " # Author: Adam L Nekimken. (PR #73)\n", - " inset_contrast = True\n", " rawdata_axes = plot_kwargs[\"ax\"]\n", " ax_position = rawdata_axes.get_position() # [[x0, y0], [x1, y1]]\n", - " \n", + "\n", " fig = rawdata_axes.get_figure()\n", " fig.patch.set_facecolor(face_color)\n", - " \n", - " if float_contrast is True:\n", + "\n", + " if float_contrast:\n", " axins = rawdata_axes.inset_axes(\n", - " [1, 0,\n", - " width_ratios_ga[1]/width_ratios_ga[0], 1])\n", + " [1, 0, width_ratios_ga[1] / width_ratios_ga[0], 1]\n", + " )\n", " rawdata_axes.set_position( # [l, b, w, h]\n", - " [ax_position.x0,\n", - " ax_position.y0,\n", - " (ax_position.x1 - ax_position.x0) * (width_ratios_ga[0] /\n", - " sum(width_ratios_ga)),\n", - " (ax_position.y1 - ax_position.y0)])\n", + " [\n", + " ax_position.x0,\n", + " ax_position.y0,\n", + " (ax_position.x1 - ax_position.x0)\n", + " * (width_ratios_ga[0] / sum(width_ratios_ga)),\n", + " (ax_position.y1 - ax_position.y0),\n", + " ]\n", + " )\n", "\n", " contrast_axes = axins\n", "\n", " else:\n", " axins = rawdata_axes.inset_axes([0, -1 - h_space_cummings, 1, 1])\n", - " plot_height = ((ax_position.y1 - ax_position.y0) /\n", - " (2 + h_space_cummings))\n", + " plot_height = (ax_position.y1 - ax_position.y0) / (2 + h_space_cummings)\n", " rawdata_axes.set_position(\n", - " [ax_position.x0,\n", - " ax_position.y0 + (1 + h_space_cummings) * plot_height,\n", - " (ax_position.x1 - ax_position.x0),\n", - " plot_height])\n", - "\n", - " # If the contrast axes are NOT floating, create lists to store\n", - " # raw ylims and raw tick intervals, so that I can normalize\n", - " # their ylims later.\n", - " contrast_ax_ylim_low = list()\n", - " contrast_ax_ylim_high = list()\n", - " contrast_ax_ylim_tickintervals = list()\n", + " [\n", + " ax_position.x0,\n", + " ax_position.y0 + (1 + h_space_cummings) * plot_height,\n", + " (ax_position.x1 - ax_position.x0),\n", + " plot_height,\n", + " ]\n", + " )\n", + "\n", " contrast_axes = axins\n", " rawdata_axes.contrast_axes = axins\n", "\n", " else:\n", - " inset_contrast = False\n", " # Here, we hardcode some figure parameters.\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " fig, axx = plt.subplots(\n", - " ncols=2,\n", - " gridspec_kw={\"width_ratios\": width_ratios_ga,\n", - " \"wspace\": 0},\n", - " **init_fig_kwargs)\n", + " ncols=2,\n", + " gridspec_kw={\"width_ratios\": width_ratios_ga, \"wspace\": 0},\n", + " **init_fig_kwargs\n", + " )\n", " fig.patch.set_facecolor(face_color)\n", "\n", " else:\n", - " fig, axx = plt.subplots(nrows=2,\n", - " gridspec_kw={\"hspace\": h_space_cummings},\n", - " **init_fig_kwargs)\n", + " fig, axx = plt.subplots(\n", + " nrows=2, gridspec_kw={\"hspace\": h_space_cummings}, **init_fig_kwargs\n", + " )\n", " fig.patch.set_facecolor(face_color)\n", - " # If the contrast axes are NOT floating, create lists to store\n", - " # raw ylims and raw tick intervals, so that I can normalize\n", - " # their ylims later.\n", - " contrast_ax_ylim_low = list()\n", - " contrast_ax_ylim_high = list()\n", - " contrast_ax_ylim_tickintervals = list()\n", - " \n", + "\n", " # Title\n", " title = plot_kwargs[\"title\"]\n", " fontsize_title = plot_kwargs[\"fontsize_title\"]\n", " if title is not None:\n", " fig.suptitle(title, fontsize=fontsize_title)\n", - " rawdata_axes = axx[0]\n", + " rawdata_axes = axx[0]\n", " contrast_axes = axx[1]\n", " rawdata_axes.set_frame_on(False)\n", " contrast_axes.set_frame_on(False)\n", "\n", - " redraw_axes_kwargs = {'colors' : ytick_color,\n", - " 'facecolors' : ytick_color,\n", - " 'lw' : 1,\n", - " 'zorder' : 10,\n", - " 'clip_on' : False}\n", + " redraw_axes_kwargs = {\n", + " \"colors\": ytick_color,\n", + " \"facecolors\": ytick_color,\n", + " \"lw\": 1,\n", + " \"zorder\": 10,\n", + " \"clip_on\": False,\n", + " }\n", "\n", " swarm_ylim = plot_kwargs[\"swarm_ylim\"]\n", "\n", " if swarm_ylim is not None:\n", " rawdata_axes.set_ylim(swarm_ylim)\n", "\n", - " one_sankey = False if is_paired is not None else False # Flag to indicate if only one sankey is plotted.\n", - " two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False\n", + " one_sankey = (\n", + " False if is_paired is not None else False\n", + " ) # Flag to indicate if only one sankey is plotted.\n", + " two_col_sankey = (\n", + " True if proportional and not one_sankey and sankey and not flow else False\n", + " )\n", "\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " # Determine temp_idx based on is_paired and proportional conditions\n", " if is_paired == \"baseline\":\n", - " idx_pairs = [(control, test) for i in idx for control, test in zip([i[0]] * (len(i) - 1), i[1:])]\n", + " idx_pairs = [\n", + " (control, test)\n", + " for i in idx\n", + " for control, test in zip([i[0]] * (len(i) - 1), i[1:])\n", + " ]\n", " temp_idx = idx if not proportional else idx_pairs\n", " else:\n", - " idx_pairs = [(control, test) for i in idx for control, test in zip(i[:-1], i[1:])]\n", + " idx_pairs = [\n", + " (control, test) for i in idx for control, test in zip(i[:-1], i[1:])\n", + " ]\n", " temp_idx = idx if not proportional else idx_pairs\n", "\n", " # Determine temp_all_plot_groups based on proportional condition\n", " plot_groups = [item for i in temp_idx for item in i]\n", " temp_all_plot_groups = all_plot_groups if not proportional else plot_groups\n", - " \n", - " if proportional==False:\n", - " # Plot the raw data as a slopegraph.\n", - " # Pivot the long (melted) data.\n", + "\n", + " if not proportional:\n", + " # Plot the raw data as a slopegraph.\n", + " # Pivot the long (melted) data.\n", " if color_col is None:\n", " pivot_values = [yvar]\n", " else:\n", " pivot_values = [yvar, color_col]\n", - " pivoted_plot_data = pd.pivot(data=plot_data, index=dabest_obj.id_col,\n", - " columns=xvar, values=pivot_values)\n", + " pivoted_plot_data = pd.pivot(\n", + " data=plot_data,\n", + " index=dabest_obj.id_col,\n", + " columns=xvar,\n", + " values=pivot_values,\n", + " )\n", " x_start = 0\n", " for ii, current_tuple in enumerate(temp_idx):\n", - " current_pair = pivoted_plot_data.loc[:, pd.MultiIndex.from_product([pivot_values, current_tuple])].dropna()\n", + " current_pair = pivoted_plot_data.loc[\n", + " :, pd.MultiIndex.from_product([pivot_values, current_tuple])\n", + " ].dropna()\n", " grp_count = len(current_tuple)\n", " # Iterate through the data for the current tuple.\n", " for ID, observation in current_pair.iterrows():\n", @@ -511,136 +539,174 @@ " y_points = observation[yvar].tolist()\n", "\n", " if color_col is None:\n", - " slopegraph_kwargs['color'] = ytick_color\n", + " slopegraph_kwargs[\"color\"] = ytick_color\n", " else:\n", " color_key = observation[color_col][0]\n", - " if isinstance(color_key, (str, np.int64, np.float64)) == True:\n", - " slopegraph_kwargs['color'] = plot_palette_raw[color_key]\n", - " slopegraph_kwargs['label'] = color_key\n", + " if isinstance(color_key, (str, np.int64, np.float64)):\n", + " slopegraph_kwargs[\"color\"] = plot_palette_raw[color_key]\n", + " slopegraph_kwargs[\"label\"] = color_key\n", "\n", - " rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs) \n", + " rawdata_axes.plot(x_points, y_points, **slopegraph_kwargs)\n", "\n", - " \n", " x_start = x_start + grp_count\n", - " \n", - " ##################### DELTA PTS ON CONTRAST PLOT WIP \n", + "\n", + " ##################### DELTA PTS ON CONTRAST PLOT WIP\n", "\n", " contrast_show_deltas = plot_kwargs[\"contrast_show_deltas\"]\n", - " \n", - " if is_paired == None:\n", + "\n", + " if is_paired is None:\n", " contrast_show_deltas = False\n", - " \n", - " if contrast_show_deltas == True:\n", - " \n", - " trans = plt.gca().transData\n", - " \n", + "\n", + " if contrast_show_deltas:\n", " delta_plot_data_temp = plot_data.copy()\n", " delta_id_col = dabest_obj.id_col\n", - " if color_col != None:\n", - " delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col, color_col]]\n", - " deltapts_args = {\"hue\" : color_col, \n", - " \"palette\" : plot_palette_raw,\n", - " \"marker\" : \"^\",\n", - " \"alpha\" : 0.5}\n", - " \n", + " if color_col is not None:\n", + " delta_plot_data = delta_plot_data_temp[\n", + " [xvar, yvar, delta_id_col, color_col]\n", + " ]\n", + " deltapts_args = {\n", + " \"hue\": color_col,\n", + " \"palette\": plot_palette_raw,\n", + " \"marker\": \"^\",\n", + " \"alpha\": 0.5,\n", + " }\n", + "\n", " else:\n", " delta_plot_data = delta_plot_data_temp[[xvar, yvar, delta_id_col]]\n", - " deltapts_args = {\"color\" : \"k\",\n", - " \"marker\" : \"^\",\n", - " \"alpha\" : 0.5}\n", - " \n", + " deltapts_args = {\"color\": \"k\", \"marker\": \"^\", \"alpha\": 0.5}\n", + "\n", " final_deltas = pd.DataFrame()\n", " for i in idx:\n", " for j in i:\n", " if i.index(j) != 0:\n", - " temp_df_exp = delta_plot_data[delta_plot_data[xvar].str.contains(j)].reset_index(drop=True)\n", + " temp_df_exp = delta_plot_data[\n", + " delta_plot_data[xvar].str.contains(j)\n", + " ].reset_index(drop=True)\n", " if is_paired == \"baseline\":\n", - " temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[0])].reset_index(drop=True)\n", + " temp_df_cont = delta_plot_data[\n", + " delta_plot_data[xvar].str.contains(i[0])\n", + " ].reset_index(drop=True)\n", " elif is_paired == \"sequential\":\n", - " temp_df_cont = delta_plot_data[delta_plot_data[xvar].str.contains(i[i.index(j) - 1])].reset_index(drop=True)\n", + " temp_df_cont = delta_plot_data[\n", + " delta_plot_data[xvar].str.contains(\n", + " i[i.index(j) - 1]\n", + " )\n", + " ].reset_index(drop=True)\n", " delta_df = temp_df_exp.copy()\n", " delta_df[yvar] = temp_df_exp[yvar] - temp_df_cont[yvar]\n", - " final_deltas = pd.concat([final_deltas, delta_df]) \n", - " \n", - " \n", + " final_deltas = pd.concat([final_deltas, delta_df])\n", + "\n", " # Plot the raw data as a swarmplot.\n", - " deltapts_plot = sns.swarmplot(data=final_deltas, x=xvar, y=yvar,\n", - " ax=contrast_axes,\n", - " order=all_plot_groups, \n", - " zorder=2,\n", - " **deltapts_args)\n", + " deltapts_plot = sns.swarmplot(\n", + " data=final_deltas,\n", + " x=xvar,\n", + " y=yvar,\n", + " ax=contrast_axes,\n", + " order=all_plot_groups,\n", + " zorder=2,\n", + " **deltapts_args\n", + " )\n", " contrast_axes.legend().set_visible(False)\n", - " \n", - " ##################### DELTA PTS ON CONTRAST PLOT END\n", "\n", - " \n", + " ##################### DELTA PTS ON CONTRAST PLOT END\n", + "\n", " # Set the tick labels, because the slopegraph plotting doesn't.\n", " rawdata_axes.set_xticks(np.arange(0, len(temp_all_plot_groups)))\n", " rawdata_axes.set_xticklabels(temp_all_plot_groups)\n", - " \n", + "\n", " else:\n", " # Plot the raw data as a set of Sankey Diagrams aligned like barplot.\n", " group_summaries = plot_kwargs[\"group_summaries\"]\n", " if group_summaries is None:\n", " group_summaries = \"mean_sd\"\n", " err_color = plot_kwargs[\"err_color\"]\n", - " if err_color == None:\n", + " if err_color is None:\n", " err_color = \"black\"\n", "\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " sankey_control_group = []\n", " sankey_test_group = []\n", " # Design for Sankey Flow Diagram\n", - " sankey_idx = [(control, test) for i in idx for control, test in zip(i[:], (i[1:]+(i[0],)))]\\\n", - " if flow is True else temp_idx\n", + " sankey_idx = (\n", + " [\n", + " (control, test)\n", + " for i in idx\n", + " for control, test in zip(i[:], (i[1:] + (i[0],)))\n", + " ]\n", + " if flow\n", + " else temp_idx\n", + " )\n", " for i in sankey_idx:\n", " sankey_control_group.append(i[0])\n", - " sankey_test_group.append(i[1]) \n", + " sankey_test_group.append(i[1])\n", "\n", " if len(temp_all_plot_groups) == 2:\n", - " one_sankey = True \n", - " sankey_control_group.pop(); sankey_test_group.pop() # Remove the last element from two lists\n", + " one_sankey = True\n", + " sankey_control_group.pop()\n", + " sankey_test_group.pop() # Remove the last element from two lists\n", "\n", " # two_col_sankey = True if proportional == True and one_sankey == False and sankey == True and flow == False else False\n", "\n", " # Replace the paired proportional plot with sankey diagram\n", - " sankeyplot = sankeydiag(plot_data, xvar=xvar, yvar=yvar, \n", - " left_idx=sankey_control_group, \n", - " right_idx=sankey_test_group,\n", - " palette=plot_palette_sankey,\n", - " ax=rawdata_axes, \n", - " one_sankey=one_sankey,\n", - " **sankey_kwargs)\n", - " \n", + " sankeyplot = sankeydiag(\n", + " plot_data,\n", + " xvar=xvar,\n", + " yvar=yvar,\n", + " left_idx=sankey_control_group,\n", + " right_idx=sankey_test_group,\n", + " palette=plot_palette_sankey,\n", + " ax=rawdata_axes,\n", + " one_sankey=one_sankey,\n", + " **sankey_kwargs\n", + " )\n", + "\n", " else:\n", - " if proportional==False:\n", + " if not proportional:\n", " # Plot the raw data as a swarmplot.\n", - " rawdata_plot = sns.swarmplot(data=plot_data, x=xvar, y=yvar,\n", - " ax=rawdata_axes,\n", - " order=all_plot_groups, hue=color_col,\n", - " palette=plot_palette_raw, zorder=1,\n", - " **swarmplot_kwargs)\n", + " rawdata_plot = sns.swarmplot(\n", + " data=plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " ax=rawdata_axes,\n", + " order=all_plot_groups,\n", + " hue=color_col,\n", + " palette=plot_palette_raw,\n", + " zorder=1,\n", + " **swarmplot_kwargs\n", + " )\n", " else:\n", " # Plot the raw data as a barplot.\n", - " bar1_df = pd.DataFrame({xvar: all_plot_groups, 'proportion': np.ones(len(all_plot_groups))})\n", - " bar1 = sns.barplot(data=bar1_df, x=xvar, y=\"proportion\",\n", - " ax=rawdata_axes,\n", - " order=all_plot_groups,\n", - " linewidth=2, facecolor=(1, 1, 1, 0), edgecolor=bar_color,\n", - " zorder=1)\n", - " bar2 = sns.barplot(data=plot_data, x=xvar, y=yvar,\n", - " ax=rawdata_axes,\n", - " order=all_plot_groups,\n", - " palette=plot_palette_bar,\n", - " zorder=1,\n", - " **barplot_kwargs)\n", + " bar1_df = pd.DataFrame(\n", + " {xvar: all_plot_groups, \"proportion\": np.ones(len(all_plot_groups))}\n", + " )\n", + " bar1 = sns.barplot(\n", + " data=bar1_df,\n", + " x=xvar,\n", + " y=\"proportion\",\n", + " ax=rawdata_axes,\n", + " order=all_plot_groups,\n", + " linewidth=2,\n", + " facecolor=(1, 1, 1, 0),\n", + " edgecolor=bar_color,\n", + " zorder=1,\n", + " )\n", + " bar2 = sns.barplot(\n", + " data=plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " ax=rawdata_axes,\n", + " order=all_plot_groups,\n", + " palette=plot_palette_bar,\n", + " zorder=1,\n", + " **barplot_kwargs\n", + " )\n", " # adjust the width of bars\n", " bar_width = plot_kwargs[\"bar_width\"]\n", " for bar in bar1.patches:\n", " x = bar.get_x()\n", " width = bar.get_width()\n", - " centre = x + width / 2.\n", - " bar.set_x(centre - bar_width / 2.)\n", + " centre = x + width / 2.0\n", + " bar.set_x(centre - bar_width / 2.0)\n", " bar.set_width(bar_width)\n", "\n", " # Plot the gapped line summaries, if this is not a Cumming plot.\n", @@ -649,7 +715,7 @@ " if group_summaries is None:\n", " group_summaries = \"mean_sd\"\n", "\n", - " if group_summaries is not None and proportional==False:\n", + " if group_summaries is not None and not proportional:\n", " # Create list to gather xspans.\n", " xspans = []\n", " line_colors = []\n", @@ -662,33 +728,42 @@ " # we have got a None, so skip and move on.\n", " pass\n", "\n", - " if bootstraps_color_by_group is True:\n", + " if bootstraps_color_by_group:\n", " line_colors.append(plot_palette_raw[all_plot_groups[jj]])\n", "\n", " if len(line_colors) != len(all_plot_groups):\n", " line_colors = ytick_color\n", "\n", - " error_bar(plot_data, x=xvar, y=yvar,\n", - " # Hardcoded offset...\n", - " offset=xspans + np.array(plot_kwargs[\"group_summaries_offset\"]),\n", - " line_color=line_colors,\n", - " gap_width_percent=1.5,\n", - " type=group_summaries, ax=rawdata_axes,\n", - " method=\"gapped_lines\",\n", - " **group_summary_kwargs)\n", - "\n", - " if group_summaries is not None and proportional == True:\n", - "\n", + " error_bar(\n", + " plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " # Hardcoded offset...\n", + " offset=xspans + np.array(plot_kwargs[\"group_summaries_offset\"]),\n", + " line_color=line_colors,\n", + " gap_width_percent=1.5,\n", + " type=group_summaries,\n", + " ax=rawdata_axes,\n", + " method=\"gapped_lines\",\n", + " **group_summary_kwargs\n", + " )\n", + "\n", + " if group_summaries is not None and proportional:\n", " err_color = plot_kwargs[\"err_color\"]\n", - " if err_color == None:\n", + " if err_color is None:\n", " err_color = \"black\"\n", - " error_bar(plot_data, x=xvar, y=yvar,\n", - " offset=0,\n", - " line_color=err_color,\n", - " gap_width_percent=1.5,\n", - " type=group_summaries, ax=rawdata_axes,\n", - " method=\"proportional_error_bar\",\n", - " **group_summary_kwargs)\n", + " error_bar(\n", + " plot_data,\n", + " x=xvar,\n", + " y=yvar,\n", + " offset=0,\n", + " line_color=err_color,\n", + " gap_width_percent=1.5,\n", + " type=group_summaries,\n", + " ax=rawdata_axes,\n", + " method=\"proportional_error_bar\",\n", + " **group_summary_kwargs\n", + " )\n", "\n", " # Add the counts to the rawdata axes xticks.\n", " counts = plot_data.groupby(xvar).count()[yvar]\n", @@ -698,7 +773,7 @@ " for xticklab in rawdata_axes.xaxis.get_ticklabels():\n", " t = xticklab.get_text()\n", " if t.rfind(\"\\n\") != -1:\n", - " te = t[t.rfind(\"\\n\") + len(\"\\n\"):]\n", + " te = t[t.rfind(\"\\n\") + len(\"\\n\") :]\n", " N = str(counts.loc[te])\n", " te = t\n", " else:\n", @@ -707,13 +782,13 @@ "\n", " ticks_with_counts.append(\"{}\\nN = {}\".format(te, N))\n", "\n", - " if plot_kwargs['fontsize_rawxlabel'] is not None:\n", - " fontsize_rawxlabel = plot_kwargs['fontsize_rawxlabel']\n", - " rawdata_axes.set_xticklabels(ticks_with_counts,fontsize=fontsize_rawxlabel)\n", + " if plot_kwargs[\"fontsize_rawxlabel\"] is not None:\n", + " fontsize_rawxlabel = plot_kwargs[\"fontsize_rawxlabel\"]\n", + " rawdata_axes.set_xticklabels(ticks_with_counts, fontsize=fontsize_rawxlabel)\n", "\n", " # Save the handles and labels for the legend.\n", " handles, labels = rawdata_axes.get_legend_handles_labels()\n", - " legend_labels = [l for l in labels]\n", + " legend_labels = [l for l in labels]\n", " legend_handles = [h for h in handles]\n", " if bootstraps_color_by_group is False:\n", " rawdata_axes.legend().set_visible(False)\n", @@ -724,11 +799,11 @@ "\n", " # Plot effect sizes and bootstraps.\n", " # Take note of where the `control` groups are.\n", - " if is_paired == \"baseline\" and show_pairs == True:\n", + " if is_paired == \"baseline\" and show_pairs:\n", " if two_col_sankey:\n", " ticks_to_skip = []\n", - " ticks_to_plot = np.arange(0, len(temp_all_plot_groups)/2).tolist()\n", - " ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist()\n", + " ticks_to_plot = np.arange(0, len(temp_all_plot_groups) / 2).tolist()\n", + " ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist()\n", " ticks_to_start_twocol_sankey.pop()\n", " ticks_to_start_twocol_sankey.insert(0, 0)\n", " else:\n", @@ -737,60 +812,63 @@ " ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()\n", " ticks_to_skip.insert(0, 0)\n", " # Then obtain the ticks where we have to plot the effect sizes.\n", - " ticks_to_plot = [t for t in range(0, len(all_plot_groups))\n", - " if t not in ticks_to_skip]\n", + " ticks_to_plot = [\n", + " t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip\n", + " ]\n", " ticks_to_skip_contrast = np.cumsum([(len(t)) for t in idx])[:-1].tolist()\n", " ticks_to_skip_contrast.insert(0, 0)\n", " else:\n", " if two_col_sankey:\n", " ticks_to_skip = [len(sankey_control_group)]\n", " # Then obtain the ticks where we have to plot the effect sizes.\n", - " ticks_to_plot = [t for t in range(0, len(temp_idx))\n", - " if t not in ticks_to_skip]\n", + " ticks_to_plot = [\n", + " t for t in range(0, len(temp_idx)) if t not in ticks_to_skip\n", + " ]\n", " ticks_to_skip = []\n", - " ticks_to_start_twocol_sankey = np.cumsum([len(i)-1 for i in idx]).tolist()\n", + " ticks_to_start_twocol_sankey = np.cumsum([len(i) - 1 for i in idx]).tolist()\n", " ticks_to_start_twocol_sankey.pop()\n", " ticks_to_start_twocol_sankey.insert(0, 0)\n", " else:\n", " ticks_to_skip = np.cumsum([len(t) for t in idx])[:-1].tolist()\n", " ticks_to_skip.insert(0, 0)\n", " # Then obtain the ticks where we have to plot the effect sizes.\n", - " ticks_to_plot = [t for t in range(0, len(all_plot_groups))\n", - " if t not in ticks_to_skip]\n", + " ticks_to_plot = [\n", + " t for t in range(0, len(all_plot_groups)) if t not in ticks_to_skip\n", + " ]\n", "\n", " # Plot the bootstraps, then the effect sizes and CIs.\n", - " es_marker_size = plot_kwargs[\"es_marker_size\"]\n", + " es_marker_size = plot_kwargs[\"es_marker_size\"]\n", " halfviolin_alpha = plot_kwargs[\"halfviolin_alpha\"]\n", "\n", " ci_type = plot_kwargs[\"ci_type\"]\n", "\n", - " results = EffectSizeDataFrame.results\n", + " results = effectsize_df.results\n", " contrast_xtick_labels = []\n", "\n", - "\n", " for j, tick in enumerate(ticks_to_plot):\n", - " current_group = results.test[j]\n", - " current_control = results.control[j]\n", + " current_group = results.test[j]\n", + " current_control = results.control[j]\n", " current_bootstrap = results.bootstraps[j]\n", - " current_effsize = results.difference[j]\n", + " current_effsize = results.difference[j]\n", " if ci_type == \"bca\":\n", - " current_ci_low = results.bca_low[j]\n", - " current_ci_high = results.bca_high[j]\n", + " current_ci_low = results.bca_low[j]\n", + " current_ci_high = results.bca_high[j]\n", " else:\n", - " current_ci_low = results.pct_low[j]\n", - " current_ci_high = results.pct_high[j]\n", - "\n", + " current_ci_low = results.pct_low[j]\n", + " current_ci_high = results.pct_high[j]\n", "\n", " # Create the violinplot.\n", " # New in v0.2.6: drop negative infinities before plotting.\n", - " v = contrast_axes.violinplot(current_bootstrap[~np.isinf(current_bootstrap)],\n", - " positions=[tick],\n", - " **violinplot_kwargs)\n", + " v = contrast_axes.violinplot(\n", + " current_bootstrap[~np.isinf(current_bootstrap)],\n", + " positions=[tick],\n", + " **violinplot_kwargs\n", + " )\n", " # Turn the violinplot into half, and color it the same as the swarmplot.\n", " # Do this only if the color column is not specified.\n", " # Ideally, the alpha (transparency) fo the violin plot should be\n", " # less than one so the effect size and CIs are visible.\n", - " if bootstraps_color_by_group is True:\n", + " if bootstraps_color_by_group:\n", " fc = plot_palette_contrast[current_group]\n", " else:\n", " fc = \"grey\"\n", @@ -798,96 +876,110 @@ " halfviolin(v, fill_color=fc, alpha=halfviolin_alpha)\n", "\n", " # Plot the effect size.\n", - " contrast_axes.plot([tick], current_effsize, marker='o',\n", - " color=ytick_color,\n", - " markersize=es_marker_size)\n", - " \n", - "################## SHOW ES ON CONTRAST PLOT WIP \n", + " contrast_axes.plot(\n", + " [tick],\n", + " current_effsize,\n", + " marker=\"o\",\n", + " color=ytick_color,\n", + " markersize=es_marker_size,\n", + " )\n", + "\n", + " ################## SHOW ES ON CONTRAST PLOT WIP\n", "\n", " contrast_show_es = plot_kwargs[\"contrast_show_es\"]\n", - " es_sf = plot_kwargs['es_sf']\n", - " es_fontsize = plot_kwargs['es_fontsize']\n", - " \n", - " if gridkey_show_es == True:\n", + " es_sf = plot_kwargs[\"es_sf\"]\n", + " es_fontsize = plot_kwargs[\"es_fontsize\"]\n", + "\n", + " if gridkey_show_es:\n", " contrast_show_es = False\n", - " \n", "\n", - " \n", " effsize_for_print = current_effsize\n", - " \n", - " printed_es = np.format_float_positional(effsize_for_print,\n", - " precision=es_sf,\n", - " sign=True,\n", - " trim= 'k',\n", - " min_digits = es_sf)\n", - " if contrast_show_es == True:\n", + "\n", + " printed_es = np.format_float_positional(\n", + " effsize_for_print, precision=es_sf, sign=True, trim=\"k\", min_digits=es_sf\n", + " )\n", + " if contrast_show_es:\n", " if effsize_for_print < 0:\n", " textoffset = 10\n", " else:\n", " textoffset = 15\n", - " contrast_axes.annotate(text=printed_es, \n", - " xy = (tick, effsize_for_print),\n", - " xytext = (-textoffset-len(printed_es)*es_fontsize/2,-es_fontsize/2),\n", - " textcoords = \"offset points\",\n", - " **{ \"fontsize\" : es_fontsize })\n", - " \n", - "################## SHOW ES ON CONTRAST PLOT END \n", - " \n", - " # Plot the confidence interval.\n", - " contrast_axes.plot([tick, tick],\n", - " [current_ci_low, current_ci_high],\n", - " linestyle=\"-\",\n", - " color=ytick_color,\n", - " linewidth=group_summary_kwargs['lw'])\n", + " contrast_axes.annotate(\n", + " text=printed_es,\n", + " xy=(tick, effsize_for_print),\n", + " xytext=(\n", + " -textoffset - len(printed_es) * es_fontsize / 2,\n", + " -es_fontsize / 2,\n", + " ),\n", + " textcoords=\"offset points\",\n", + " **{\"fontsize\": es_fontsize}\n", + " )\n", + "\n", + " ################## SHOW ES ON CONTRAST PLOT END\n", "\n", - " contrast_xtick_labels.append(\"{}\\nminus\\n{}\".format(current_group,\n", - " current_control))\n", + " # Plot the confidence interval.\n", + " contrast_axes.plot(\n", + " [tick, tick],\n", + " [current_ci_low, current_ci_high],\n", + " linestyle=\"-\",\n", + " color=ytick_color,\n", + " linewidth=group_summary_kwargs[\"lw\"],\n", + " )\n", + "\n", + " contrast_xtick_labels.append(\n", + " \"{}\\nminus\\n{}\".format(current_group, current_control)\n", + " )\n", "\n", " # Plot mini-meta violin\n", " if show_mini_meta or show_delta2:\n", " if show_mini_meta:\n", - " mini_meta_delta = EffectSizeDataFrame.mini_meta_delta\n", - " data = mini_meta_delta.bootstraps_weighted_delta\n", - " difference = mini_meta_delta.difference\n", + " mini_meta_delta = effectsize_df.mini_meta_delta\n", + " data = mini_meta_delta.bootstraps_weighted_delta\n", + " difference = mini_meta_delta.difference\n", " if ci_type == \"bca\":\n", - " ci_low = mini_meta_delta.bca_low\n", - " ci_high = mini_meta_delta.bca_high\n", + " ci_low = mini_meta_delta.bca_low\n", + " ci_high = mini_meta_delta.bca_high\n", " else:\n", - " ci_low = mini_meta_delta.pct_low\n", - " ci_high = mini_meta_delta.pct_high\n", - " else: \n", - " delta_delta = EffectSizeDataFrame.delta_delta\n", - " data = delta_delta.bootstraps_delta_delta\n", - " difference = delta_delta.difference\n", + " ci_low = mini_meta_delta.pct_low\n", + " ci_high = mini_meta_delta.pct_high\n", + " else:\n", + " delta_delta = effectsize_df.delta_delta\n", + " data = delta_delta.bootstraps_delta_delta\n", + " difference = delta_delta.difference\n", " if ci_type == \"bca\":\n", - " ci_low = delta_delta.bca_low\n", - " ci_high = delta_delta.bca_high\n", + " ci_low = delta_delta.bca_low\n", + " ci_high = delta_delta.bca_high\n", " else:\n", - " ci_low = delta_delta.pct_low\n", - " ci_high = delta_delta.pct_high\n", - " #Create the violinplot.\n", - " #New in v0.2.6: drop negative infinities before plotting.\n", - " position = max(rawdata_axes.get_xticks())+2\n", - " v = contrast_axes.violinplot(data[~np.isinf(data)],\n", - " positions=[position],\n", - " **violinplot_kwargs)\n", + " ci_low = delta_delta.pct_low\n", + " ci_high = delta_delta.pct_high\n", + " # Create the violinplot.\n", + " # New in v0.2.6: drop negative infinities before plotting.\n", + " position = max(rawdata_axes.get_xticks()) + 2\n", + " v = contrast_axes.violinplot(\n", + " data[~np.isinf(data)], positions=[position], **violinplot_kwargs\n", + " )\n", "\n", " fc = \"grey\"\n", "\n", " halfviolin(v, fill_color=fc, alpha=halfviolin_alpha)\n", "\n", " # Plot the effect size.\n", - " contrast_axes.plot([position], difference, marker='o',\n", - " color=ytick_color,\n", - " markersize=es_marker_size)\n", + " contrast_axes.plot(\n", + " [position],\n", + " difference,\n", + " marker=\"o\",\n", + " color=ytick_color,\n", + " markersize=es_marker_size,\n", + " )\n", " # Plot the confidence interval.\n", - " contrast_axes.plot([position, position],\n", - " [ci_low, ci_high],\n", - " linestyle=\"-\",\n", - " color=ytick_color,\n", - " linewidth=group_summary_kwargs['lw'])\n", + " contrast_axes.plot(\n", + " [position, position],\n", + " [ci_low, ci_high],\n", + " linestyle=\"-\",\n", + " color=ytick_color,\n", + " linewidth=group_summary_kwargs[\"lw\"],\n", + " )\n", " if show_mini_meta:\n", - " contrast_xtick_labels.extend([\"\",\"Weighted delta\"])\n", + " contrast_xtick_labels.extend([\"\", \"Weighted delta\"])\n", " elif effect_size == \"delta_g\":\n", " contrast_xtick_labels.extend([\"\", \"deltas' g\"])\n", " else:\n", @@ -899,22 +991,22 @@ " contrast_axes.set_xticks(rawdata_axes.get_xticks())\n", " else:\n", " temp = rawdata_axes.get_xticks()\n", - " temp = np.append(temp, [max(temp)+1, max(temp)+2])\n", + " temp = np.append(temp, [max(temp) + 1, max(temp) + 2])\n", " contrast_axes.set_xticks(temp)\n", "\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " max_x = contrast_axes.get_xlim()[1]\n", " rawdata_axes.set_xlim(-0.375, max_x)\n", "\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " contrast_axes.set_xlim(0.5, 1.5)\n", " elif show_delta2 or show_mini_meta:\n", " # Increase the xlim of raw data by 2\n", " temp = rawdata_axes.get_xlim()\n", " if show_pairs:\n", - " rawdata_axes.set_xlim(temp[0], temp[1]+0.25)\n", + " rawdata_axes.set_xlim(temp[0], temp[1] + 0.25)\n", " else:\n", - " rawdata_axes.set_xlim(temp[0], temp[1]+2)\n", + " rawdata_axes.set_xlim(temp[0], temp[1] + 2)\n", " contrast_axes.set_xlim(rawdata_axes.get_xlim())\n", " else:\n", " contrast_axes.set_xlim(rawdata_axes.get_xlim())\n", @@ -923,55 +1015,67 @@ " for t in ticks_to_skip:\n", " contrast_xtick_labels.insert(t, \"\")\n", "\n", - " if plot_kwargs['fontsize_contrastxlabel'] is not None:\n", - " fontsize_contrastxlabel = plot_kwargs['fontsize_contrastxlabel']\n", + " if plot_kwargs[\"fontsize_contrastxlabel\"] is not None:\n", + " fontsize_contrastxlabel = plot_kwargs[\"fontsize_contrastxlabel\"]\n", "\n", - " contrast_axes.set_xticklabels(contrast_xtick_labels,fontsize=fontsize_contrastxlabel)\n", + " contrast_axes.set_xticklabels(\n", + " contrast_xtick_labels, fontsize=fontsize_contrastxlabel\n", + " )\n", "\n", " if bootstraps_color_by_group is False:\n", " legend_labels_unique = np.unique(legend_labels)\n", " unique_idx = np.unique(legend_labels, return_index=True)[1]\n", - " legend_handles_unique = (pd.Series(legend_handles, dtype=\"object\").loc[unique_idx]).tolist()\n", + " legend_handles_unique = (\n", + " pd.Series(legend_handles, dtype=\"object\").loc[unique_idx]\n", + " ).tolist()\n", "\n", " if len(legend_handles_unique) > 0:\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " axes_with_legend = contrast_axes\n", - " if show_pairs is True:\n", + " if show_pairs:\n", " bta = (1.75, 1.02)\n", " else:\n", " bta = (1.5, 1.02)\n", " else:\n", " axes_with_legend = rawdata_axes\n", - " if show_pairs is True:\n", - " bta = (1.02, 1.)\n", + " if show_pairs:\n", + " bta = (1.02, 1.0)\n", " else:\n", - " bta = (1.,1.)\n", - " leg = axes_with_legend.legend(legend_handles_unique,\n", - " legend_labels_unique,\n", - " bbox_to_anchor=bta,\n", - " **legend_kwargs)\n", - " if show_pairs is True:\n", + " bta = (1.0, 1.0)\n", + " leg = axes_with_legend.legend(\n", + " legend_handles_unique,\n", + " legend_labels_unique,\n", + " bbox_to_anchor=bta,\n", + " **legend_kwargs\n", + " )\n", + " if show_pairs:\n", " for line in leg.get_lines():\n", " line.set_linewidth(3.0)\n", "\n", " og_ylim_raw = rawdata_axes.get_ylim()\n", " og_xlim_raw = rawdata_axes.get_xlim()\n", "\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " # For Gardner-Altman plots only.\n", "\n", " # Normalize ylims and despine the floating contrast axes.\n", " # Check that the effect size is within the swarm ylims.\n", - " if effect_size_type in [\"mean_diff\", \"cohens_d\", \"hedges_g\",\"cohens_h\"]:\n", - " control_group_summary = plot_data.groupby(xvar)\\\n", - " .mean(numeric_only=True).loc[current_control, yvar]\n", - " test_group_summary = plot_data.groupby(xvar)\\\n", - " .mean(numeric_only=True).loc[current_group, yvar]\n", + " if effect_size_type in [\"mean_diff\", \"cohens_d\", \"hedges_g\", \"cohens_h\"]:\n", + " control_group_summary = (\n", + " plot_data.groupby(xvar)\n", + " .mean(numeric_only=True)\n", + " .loc[current_control, yvar]\n", + " )\n", + " test_group_summary = (\n", + " plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar]\n", + " )\n", " elif effect_size_type == \"median_diff\":\n", - " control_group_summary = plot_data.groupby(xvar)\\\n", - " .median().loc[current_control, yvar]\n", - " test_group_summary = plot_data.groupby(xvar)\\\n", - " .median().loc[current_group, yvar]\n", + " control_group_summary = (\n", + " plot_data.groupby(xvar).median().loc[current_control, yvar]\n", + " )\n", + " test_group_summary = (\n", + " plot_data.groupby(xvar).median().loc[current_group, yvar]\n", + " )\n", "\n", " if swarm_ylim is None:\n", " swarm_ylim = rawdata_axes.get_ylim()\n", @@ -979,7 +1083,7 @@ " _, contrast_xlim_max = contrast_axes.get_xlim()\n", "\n", " difference = float(results.difference[0])\n", - " \n", + "\n", " if effect_size_type in [\"mean_diff\", \"median_diff\"]:\n", " # Align 0 of contrast_axes to reference group mean of rawdata_axes.\n", " # If the effect size is positive, shift the contrast axis up.\n", @@ -997,48 +1101,53 @@ " og_ylim_contrast = rawdata_axes.get_ylim() - np.array(control_group_summary)\n", "\n", " contrast_axes.set_ylim(og_ylim_contrast)\n", - " contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max)\n", + " contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max)\n", "\n", - " elif effect_size_type in [\"cohens_d\", \"hedges_g\",\"cohens_h\"]:\n", + " elif effect_size_type in [\"cohens_d\", \"hedges_g\", \"cohens_h\"]:\n", " if is_paired:\n", " which_std = 1\n", " else:\n", " which_std = 0\n", " temp_control = plot_data[plot_data[xvar] == current_control][yvar]\n", - " temp_test = plot_data[plot_data[xvar] == current_group][yvar]\n", - " \n", + " temp_test = plot_data[plot_data[xvar] == current_group][yvar]\n", + "\n", " stds = _compute_standardizers(temp_control, temp_test)\n", " if is_paired:\n", " pooled_sd = stds[1]\n", " else:\n", " pooled_sd = stds[0]\n", - " \n", - " if effect_size_type == 'hedges_g':\n", - " gby_count = plot_data.groupby(xvar).count()\n", + "\n", + " if effect_size_type == \"hedges_g\":\n", + " gby_count = plot_data.groupby(xvar).count()\n", " len_control = gby_count.loc[current_control, yvar]\n", - " len_test = gby_count.loc[current_group, yvar]\n", - " \n", - " hg_correction_factor = _compute_hedges_correction_factor(len_control, len_test)\n", - " \n", + " len_test = gby_count.loc[current_group, yvar]\n", + "\n", + " hg_correction_factor = _compute_hedges_correction_factor(\n", + " len_control, len_test\n", + " )\n", + "\n", " ylim_scale_factor = pooled_sd / hg_correction_factor\n", "\n", " elif effect_size_type == \"cohens_h\":\n", - " ylim_scale_factor = (np.mean(temp_test)-np.mean(temp_control)) / difference\n", + " ylim_scale_factor = (\n", + " np.mean(temp_test) - np.mean(temp_control)\n", + " ) / difference\n", "\n", " else:\n", " ylim_scale_factor = pooled_sd\n", - " \n", - " scaled_ylim = ((rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor).tolist()\n", + "\n", + " scaled_ylim = (\n", + " (rawdata_axes.get_ylim() - control_group_summary) / ylim_scale_factor\n", + " ).tolist()\n", "\n", " contrast_axes.set_ylim(scaled_ylim)\n", " og_ylim_contrast = scaled_ylim\n", "\n", - " contrast_axes.set_xlim(contrast_xlim_max-1, contrast_xlim_max)\n", + " contrast_axes.set_xlim(contrast_xlim_max - 1, contrast_xlim_max)\n", "\n", " if one_sankey is None:\n", " # Draw summary lines for control and test groups..\n", " for jj, axx in enumerate([rawdata_axes, contrast_axes]):\n", - "\n", " # Draw effect size line.\n", " if jj == 0:\n", " ref = control_group_summary\n", @@ -1048,66 +1157,74 @@ " elif jj == 1:\n", " ref = 0\n", " diff = ref + difference\n", - " effsize_line_start = contrast_xlim_max-1.1\n", + " effsize_line_start = contrast_xlim_max - 1.1\n", "\n", " xlimlow, xlimhigh = axx.get_xlim()\n", "\n", " # Draw reference line.\n", - " axx.hlines(ref, # y-coordinates\n", - " 0, xlimhigh, # x-coordinates, start and end.\n", - " **reflines_kwargs)\n", - " \n", + " axx.hlines(\n", + " ref, # y-coordinates\n", + " 0,\n", + " xlimhigh, # x-coordinates, start and end.\n", + " **reflines_kwargs\n", + " )\n", + "\n", " # Draw effect size line.\n", - " axx.hlines(diff,\n", - " effsize_line_start, xlimhigh,\n", - " **reflines_kwargs)\n", - " else: \n", + " axx.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs)\n", + " else:\n", " ref = 0\n", " diff = ref + difference\n", " effsize_line_start = contrast_xlim_max - 0.9\n", " xlimlow, xlimhigh = contrast_axes.get_xlim()\n", " # Draw reference line.\n", - " contrast_axes.hlines(ref, # y-coordinates\n", - " effsize_line_start, xlimhigh, # x-coordinates, start and end.\n", - " **reflines_kwargs)\n", - " \n", + " contrast_axes.hlines(\n", + " ref, # y-coordinates\n", + " effsize_line_start,\n", + " xlimhigh, # x-coordinates, start and end.\n", + " **reflines_kwargs\n", + " )\n", + "\n", " # Draw effect size line.\n", - " contrast_axes.hlines(diff,\n", - " effsize_line_start, xlimhigh,\n", - " **reflines_kwargs) \n", - " rawdata_axes.set_xlim(og_xlim_raw) # to align the axis\n", + " contrast_axes.hlines(diff, effsize_line_start, xlimhigh, **reflines_kwargs)\n", + " rawdata_axes.set_xlim(og_xlim_raw) # to align the axis\n", " # Despine appropriately.\n", - " sns.despine(ax=rawdata_axes, bottom=True)\n", + " sns.despine(ax=rawdata_axes, bottom=True)\n", " sns.despine(ax=contrast_axes, left=True, right=False)\n", "\n", " # Insert break between the rawdata axes and the contrast axes\n", " # by re-drawing the x-spine.\n", - " rawdata_axes.hlines(og_ylim_raw[0], # yindex\n", - " rawdata_axes.get_xlim()[0], 1.3, # xmin, xmax\n", - " **redraw_axes_kwargs)\n", + " rawdata_axes.hlines(\n", + " og_ylim_raw[0], # yindex\n", + " rawdata_axes.get_xlim()[0],\n", + " 1.3, # xmin, xmax\n", + " **redraw_axes_kwargs\n", + " )\n", " rawdata_axes.set_ylim(og_ylim_raw)\n", "\n", - " contrast_axes.hlines(contrast_axes.get_ylim()[0],\n", - " contrast_xlim_max-0.8, contrast_xlim_max,\n", - " **redraw_axes_kwargs)\n", - "\n", + " contrast_axes.hlines(\n", + " contrast_axes.get_ylim()[0],\n", + " contrast_xlim_max - 0.8,\n", + " contrast_xlim_max,\n", + " **redraw_axes_kwargs\n", + " )\n", "\n", " else:\n", " # For Cumming Plots only.\n", "\n", " # Set custom contrast_ylim, if it was specified.\n", - " if plot_kwargs['contrast_ylim'] is not None or (plot_kwargs['delta2_ylim'] is not None and show_delta2):\n", - "\n", - " if plot_kwargs['contrast_ylim'] is not None:\n", - " custom_contrast_ylim = plot_kwargs['contrast_ylim']\n", - " if plot_kwargs['delta2_ylim'] is not None and show_delta2:\n", - " custom_delta2_ylim = plot_kwargs['delta2_ylim']\n", - " if custom_contrast_ylim!=custom_delta2_ylim:\n", + " if plot_kwargs[\"contrast_ylim\"] is not None or (\n", + " plot_kwargs[\"delta2_ylim\"] is not None and show_delta2\n", + " ):\n", + " if plot_kwargs[\"contrast_ylim\"] is not None:\n", + " custom_contrast_ylim = plot_kwargs[\"contrast_ylim\"]\n", + " if plot_kwargs[\"delta2_ylim\"] is not None and show_delta2:\n", + " custom_delta2_ylim = plot_kwargs[\"delta2_ylim\"]\n", + " if custom_contrast_ylim != custom_delta2_ylim:\n", " err1 = \"Please check if `contrast_ylim` and `delta2_ylim` are assigned\"\n", " err2 = \"with same values.\"\n", " raise ValueError(err1 + err2)\n", " else:\n", - " custom_delta2_ylim = plot_kwargs['delta2_ylim']\n", + " custom_delta2_ylim = plot_kwargs[\"delta2_ylim\"]\n", " custom_contrast_ylim = custom_delta2_ylim\n", "\n", " if len(custom_contrast_ylim) != 2:\n", @@ -1117,8 +1234,8 @@ "\n", " if effect_size_type == \"cliffs_delta\":\n", " # Ensure the ylims for a cliffs_delta plot never exceed [-1, 1].\n", - " l = plot_kwargs['contrast_ylim'][0]\n", - " h = plot_kwargs['contrast_ylim'][1]\n", + " l = plot_kwargs[\"contrast_ylim\"][0]\n", + " h = plot_kwargs[\"contrast_ylim\"][1]\n", " low = -1 if l < -1 else l\n", " high = 1 if h > 1 else h\n", " contrast_axes.set_ylim(low, high)\n", @@ -1135,228 +1252,237 @@ " if contrast_ylim_low < 0 < contrast_ylim_high:\n", " contrast_axes.axhline(y=0, **reflines_kwargs)\n", "\n", - " if is_paired == \"baseline\" and show_pairs == True:\n", + " if is_paired == \"baseline\" and show_pairs:\n", " if two_col_sankey:\n", - " rightend_ticks_raw = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey)\n", + " rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(\n", + " ticks_to_start_twocol_sankey\n", + " )\n", " elif proportional and is_paired is not None:\n", - " rightend_ticks_raw = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip)\n", - " else: \n", - " rightend_ticks_raw = np.array([len(i)-1 for i in temp_idx]) + np.array(ticks_to_skip)\n", + " rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(\n", + " ticks_to_skip\n", + " )\n", + " else:\n", + " rightend_ticks_raw = np.array(\n", + " [len(i) - 1 for i in temp_idx]\n", + " ) + np.array(ticks_to_skip)\n", " for ax in [rawdata_axes]:\n", " sns.despine(ax=ax, bottom=True)\n", - " \n", + "\n", " ylim = ax.get_ylim()\n", " xlim = ax.get_xlim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", - " \n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", + "\n", " if two_col_sankey:\n", " for k, start_tick in enumerate(ticks_to_start_twocol_sankey):\n", " end_tick = rightend_ticks_raw[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", - " else: \n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", + " else:\n", " for k, start_tick in enumerate(ticks_to_skip):\n", " end_tick = rightend_ticks_raw[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", " ax.set_ylim(ylim)\n", - " del redraw_axes_kwargs['y']\n", - " \n", - " if proportional == False:\n", - " temp_length = [(len(i)-1) for i in idx]\n", + " del redraw_axes_kwargs[\"y\"]\n", + "\n", + " if not proportional:\n", + " temp_length = [(len(i) - 1) for i in idx]\n", " else:\n", - " temp_length = [(len(i)-1)*2-1 for i in idx]\n", + " temp_length = [(len(i) - 1) * 2 - 1 for i in idx]\n", " if two_col_sankey:\n", - " rightend_ticks_contrast = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey)\n", + " rightend_ticks_contrast = np.array(\n", + " [len(i) - 2 for i in idx]\n", + " ) + np.array(ticks_to_start_twocol_sankey)\n", " elif proportional and is_paired is not None:\n", - " rightend_ticks_contrast = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip)\n", - " else: \n", - " rightend_ticks_contrast = np.array(temp_length) + np.array(ticks_to_skip_contrast)\n", + " rightend_ticks_contrast = np.array(\n", + " [len(i) - 1 for i in idx]\n", + " ) + np.array(ticks_to_skip)\n", + " else:\n", + " rightend_ticks_contrast = np.array(temp_length) + np.array(\n", + " ticks_to_skip_contrast\n", + " )\n", " for ax in [contrast_axes]:\n", " sns.despine(ax=ax, bottom=True)\n", - " \n", + "\n", " ylim = ax.get_ylim()\n", " xlim = ax.get_xlim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", - " \n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", + "\n", " if two_col_sankey:\n", " for k, start_tick in enumerate(ticks_to_start_twocol_sankey):\n", " end_tick = rightend_ticks_contrast[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", " else:\n", " for k, start_tick in enumerate(ticks_to_skip_contrast):\n", " end_tick = rightend_ticks_contrast[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs) \n", - " \n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", + "\n", " ax.set_ylim(ylim)\n", - " del redraw_axes_kwargs['y']\n", + " del redraw_axes_kwargs[\"y\"]\n", " else:\n", " # Compute the end of each x-axes line.\n", " if two_col_sankey:\n", - " rightend_ticks = np.array([len(i)-2 for i in idx]) + np.array(ticks_to_start_twocol_sankey)\n", + " rightend_ticks = np.array([len(i) - 2 for i in idx]) + np.array(\n", + " ticks_to_start_twocol_sankey\n", + " )\n", " else:\n", - " rightend_ticks = np.array([len(i)-1 for i in idx]) + np.array(ticks_to_skip)\n", - " \n", + " rightend_ticks = np.array([len(i) - 1 for i in idx]) + np.array(\n", + " ticks_to_skip\n", + " )\n", + "\n", " for ax in [rawdata_axes, contrast_axes]:\n", " sns.despine(ax=ax, bottom=True)\n", - " \n", + "\n", " ylim = ax.get_ylim()\n", " xlim = ax.get_xlim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", - " \n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", + "\n", " if two_col_sankey:\n", " for k, start_tick in enumerate(ticks_to_start_twocol_sankey):\n", " end_tick = rightend_ticks[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", " else:\n", " for k, start_tick in enumerate(ticks_to_skip):\n", " end_tick = rightend_ticks[k]\n", - " ax.hlines(xmin=start_tick, xmax=end_tick,\n", - " **redraw_axes_kwargs)\n", - " \n", + " ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)\n", + "\n", " ax.set_ylim(ylim)\n", - " del redraw_axes_kwargs['y']\n", + " del redraw_axes_kwargs[\"y\"]\n", "\n", - " if show_delta2 is True or show_mini_meta is True:\n", + " if show_delta2 or show_mini_meta:\n", " ylim = contrast_axes.get_ylim()\n", - " redraw_axes_kwargs['y'] = ylim[0]\n", + " redraw_axes_kwargs[\"y\"] = ylim[0]\n", " x_ticks = contrast_axes.get_xticks()\n", - " contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1],\n", - " **redraw_axes_kwargs)\n", - " del redraw_axes_kwargs['y']\n", + " contrast_axes.hlines(xmin=x_ticks[-2], xmax=x_ticks[-1], **redraw_axes_kwargs)\n", + " del redraw_axes_kwargs[\"y\"]\n", "\n", " # Set raw axes y-label.\n", - " swarm_label = plot_kwargs['swarm_label']\n", + " swarm_label = plot_kwargs[\"swarm_label\"]\n", " if swarm_label is None and yvar is None:\n", " swarm_label = \"value\"\n", " elif swarm_label is None and yvar is not None:\n", " swarm_label = yvar\n", "\n", - " bar_label = plot_kwargs['bar_label']\n", + " bar_label = plot_kwargs[\"bar_label\"]\n", " if bar_label is None and effect_size_type != \"cohens_h\":\n", " bar_label = \"proportion of success\"\n", " elif bar_label is None and effect_size_type == \"cohens_h\":\n", " bar_label = \"value\"\n", "\n", " # Place contrast axes y-label.\n", - " contrast_label_dict = {'mean_diff': \"mean difference\",\n", - " 'median_diff': \"median difference\",\n", - " 'cohens_d': \"Cohen's d\",\n", - " 'hedges_g': \"Hedges' g\",\n", - " 'cliffs_delta': \"Cliff's delta\",\n", - " 'cohens_h': \"Cohen's h\",\n", - " 'delta_g': \"mean difference\"}\n", - "\n", - " if proportional == True and effect_size_type != \"cohens_h\":\n", + " contrast_label_dict = {\n", + " \"mean_diff\": \"mean difference\",\n", + " \"median_diff\": \"median difference\",\n", + " \"cohens_d\": \"Cohen's d\",\n", + " \"hedges_g\": \"Hedges' g\",\n", + " \"cliffs_delta\": \"Cliff's delta\",\n", + " \"cohens_h\": \"Cohen's h\",\n", + " \"delta_g\": \"mean difference\",\n", + " }\n", + "\n", + " if proportional and effect_size_type != \"cohens_h\":\n", " default_contrast_label = \"proportion difference\"\n", " elif effect_size_type == \"delta_g\":\n", " default_contrast_label = \"Hedges' g\"\n", " else:\n", - " default_contrast_label = contrast_label_dict[EffectSizeDataFrame.effect_size]\n", + " default_contrast_label = contrast_label_dict[effectsize_df.effect_size]\n", "\n", - "\n", - " if plot_kwargs['contrast_label'] is None:\n", + " if plot_kwargs[\"contrast_label\"] is None:\n", " if is_paired:\n", " contrast_label = \"paired\\n{}\".format(default_contrast_label)\n", " else:\n", " contrast_label = default_contrast_label\n", " contrast_label = contrast_label.capitalize()\n", " else:\n", - " contrast_label = plot_kwargs['contrast_label']\n", + " contrast_label = plot_kwargs[\"contrast_label\"]\n", "\n", - " if plot_kwargs['fontsize_rawylabel'] is not None:\n", - " fontsize_rawylabel = plot_kwargs['fontsize_rawylabel']\n", - " if plot_kwargs['fontsize_contrastylabel'] is not None:\n", - " fontsize_contrastylabel = plot_kwargs['fontsize_contrastylabel']\n", - " if plot_kwargs['fontsize_delta2label'] is not None:\n", - " fontsize_delta2label = plot_kwargs['fontsize_delta2label']\n", + " if plot_kwargs[\"fontsize_rawylabel\"] is not None:\n", + " fontsize_rawylabel = plot_kwargs[\"fontsize_rawylabel\"]\n", + " if plot_kwargs[\"fontsize_contrastylabel\"] is not None:\n", + " fontsize_contrastylabel = plot_kwargs[\"fontsize_contrastylabel\"]\n", + " if plot_kwargs[\"fontsize_delta2label\"] is not None:\n", + " fontsize_delta2label = plot_kwargs[\"fontsize_delta2label\"]\n", "\n", - " contrast_axes.set_ylabel(contrast_label,fontsize = fontsize_contrastylabel)\n", - " if float_contrast is True:\n", + " contrast_axes.set_ylabel(contrast_label, fontsize=fontsize_contrastylabel)\n", + " if float_contrast:\n", " contrast_axes.yaxis.set_label_position(\"right\")\n", "\n", " # Set the rawdata axes labels appropriately\n", - " if proportional == False:\n", - " rawdata_axes.set_ylabel(swarm_label,fontsize = fontsize_rawylabel)\n", + " if not proportional:\n", + " rawdata_axes.set_ylabel(swarm_label, fontsize=fontsize_rawylabel)\n", " else:\n", - " rawdata_axes.set_ylabel(bar_label,fontsize = fontsize_rawylabel)\n", + " rawdata_axes.set_ylabel(bar_label, fontsize=fontsize_rawylabel)\n", " rawdata_axes.set_xlabel(\"\")\n", "\n", " # Because we turned the axes frame off, we also need to draw back\n", " # the y-spine for both axes.\n", - " if float_contrast==False:\n", + " if not float_contrast:\n", " rawdata_axes.set_xlim(contrast_axes.get_xlim())\n", " og_xlim_raw = rawdata_axes.get_xlim()\n", - " rawdata_axes.vlines(og_xlim_raw[0],\n", - " og_ylim_raw[0], og_ylim_raw[1],\n", - " **redraw_axes_kwargs)\n", + " rawdata_axes.vlines(\n", + " og_xlim_raw[0], og_ylim_raw[0], og_ylim_raw[1], **redraw_axes_kwargs\n", + " )\n", "\n", " og_xlim_contrast = contrast_axes.get_xlim()\n", "\n", - " if float_contrast is True:\n", + " if float_contrast:\n", " xpos = og_xlim_contrast[1]\n", " else:\n", " xpos = og_xlim_contrast[0]\n", "\n", " og_ylim_contrast = contrast_axes.get_ylim()\n", - " contrast_axes.vlines(xpos,\n", - " og_ylim_contrast[0], og_ylim_contrast[1],\n", - " **redraw_axes_kwargs)\n", - "\n", - "\n", - " if show_delta2 is True:\n", - " if plot_kwargs['delta2_label'] is not None:\n", - " delta2_label = plot_kwargs['delta2_label']\n", - " elif effect_size == \"mean_diff\" :\n", + " contrast_axes.vlines(\n", + " xpos, og_ylim_contrast[0], og_ylim_contrast[1], **redraw_axes_kwargs\n", + " )\n", + "\n", + " if show_delta2:\n", + " if plot_kwargs[\"delta2_label\"] is not None:\n", + " delta2_label = plot_kwargs[\"delta2_label\"]\n", + " elif effect_size == \"mean_diff\":\n", " delta2_label = \"delta - delta\"\n", " else:\n", " delta2_label = \"deltas' g\"\n", " delta2_axes = contrast_axes.twinx()\n", " delta2_axes.set_frame_on(False)\n", - " delta2_axes.set_ylabel(delta2_label, fontsize = fontsize_delta2label)\n", + " delta2_axes.set_ylabel(delta2_label, fontsize=fontsize_delta2label)\n", " og_xlim_delta = contrast_axes.get_xlim()\n", " og_ylim_delta = contrast_axes.get_ylim()\n", " delta2_axes.set_ylim(og_ylim_delta)\n", - " delta2_axes.vlines(og_xlim_delta[1],\n", - " og_ylim_delta[0], og_ylim_delta[1],\n", - " **redraw_axes_kwargs)\n", + " delta2_axes.vlines(\n", + " og_xlim_delta[1], og_ylim_delta[0], og_ylim_delta[1], **redraw_axes_kwargs\n", + " )\n", "\n", + " ################################################### GRIDKEY MAIN CODE WIP\n", "\n", - "################################################### GRIDKEY MAIN CODE WIP\n", - " \n", - " #if gridkey_rows is None, skip everything here\n", + " # if gridkey_rows is None, skip everything here\n", " if gridkey_rows is not None:\n", - " \n", " # Raise error if there are more than 2 items in any idx and gridkey_merge_pairs is True and is_paired is not None\n", - " if gridkey_merge_pairs is True and is_paired is not None:\n", + " if gridkey_merge_pairs and is_paired is not None:\n", " for i in idx:\n", " if len(i) > 2:\n", - " warnings.warn(\"gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False\")\n", + " warnings.warn(\n", + " \"gridkey_merge_pairs=True only works if all idx in tuples have only two items. gridkey_merge_pairs has automatically been set to False\"\n", + " )\n", " gridkey_merge_pairs = False\n", " break\n", - " elif gridkey_merge_pairs is True and is_paired is None:\n", - " warnings.warn(\"gridkey_merge_pairs=True is only applicable for paired data.\")\n", + " elif gridkey_merge_pairs and is_paired is None:\n", + " warnings.warn(\n", + " \"gridkey_merge_pairs=True is only applicable for paired data.\"\n", + " )\n", " gridkey_merge_pairs = False\n", - " \n", + "\n", " # Checks for gridkey_merge_pairs and is_paired; if both are true, \"merges\" the gridkey per pair\n", - " if gridkey_merge_pairs is True and is_paired is not None: \n", + " if gridkey_merge_pairs and is_paired is not None:\n", " groups_for_gridkey = []\n", " for i in idx:\n", " groups_for_gridkey.append(i[1])\n", " else:\n", " groups_for_gridkey = all_plot_groups\n", - " \n", - " \n", + "\n", " # raise errors if gridkey_rows is not a list, or if the list is empty\n", " if isinstance(gridkey_rows, list) is False:\n", " raise TypeError(\"gridkey_rows must be a list.\")\n", " elif len(gridkey_rows) == 0:\n", " warnings.warn(\"gridkey_rows is an empty list.\")\n", - " \n", - " \n", + "\n", " # raise Warning if an item in gridkey_rows is not contained in any idx\n", " for i in gridkey_rows:\n", " in_idx = 0\n", @@ -1365,93 +1491,101 @@ " in_idx += 1\n", " if in_idx == 0:\n", " if is_paired is not None:\n", - " warnings.warn(i + \" is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False.\")\n", + " warnings.warn(\n", + " i\n", + " + \" is not in any idx. Please check. Alternatively, merging gridkey pairs may not be suitable for your data; try passing gridkey_merge_pairs=False.\"\n", + " )\n", " else:\n", - " warnings.warn(i + \" is not in any idx. Please check.\") \n", - " \n", - " \n", + " warnings.warn(i + \" is not in any idx. Please check.\")\n", + "\n", " # Populate table: checks if idx for each column contains rowlabel name\n", " # IF so, marks that element as present w black dot, or space if not present\n", - " table_cellcols = [] \n", + " table_cellcols = []\n", " for i in gridkey_rows:\n", " thisrow = []\n", " for q in groups_for_gridkey:\n", " if str(i) in q:\n", - " thisrow.append(u\"\\u25CF\")\n", + " thisrow.append(\"\\u25CF\")\n", " else:\n", " thisrow.append(\"\")\n", " table_cellcols.append(thisrow)\n", - " \n", - " \n", + "\n", " # Adds a row for Ns with the Ns values\n", - " if gridkey_show_Ns == True:\n", + " if gridkey_show_Ns:\n", " gridkey_rows.append(\"Ns\")\n", " list_of_Ns = []\n", " for i in groups_for_gridkey:\n", " list_of_Ns.append(str(counts.loc[i]))\n", " table_cellcols.append(list_of_Ns)\n", "\n", - " \n", " # Adds a row for effectsizes with effectsize values\n", - " if gridkey_show_es == True:\n", - " gridkey_rows.append(u\"\\u0394\")\n", + " if gridkey_show_es:\n", + " gridkey_rows.append(\"\\u0394\")\n", " effsize_list = []\n", " results_list = results.test.to_list()\n", - " \n", + "\n", " # get the effect size, append + or -, 2 dec places\n", " for i in enumerate(groups_for_gridkey):\n", " if i[1] in results_list:\n", - " curr_esval = results.loc[results[\"test\"] == i[1]][\"difference\"].iloc[0]\n", - " curr_esval_str = np.format_float_positional(curr_esval,\n", - " precision=es_sf,\n", - " sign=True,\n", - " trim= 'k',\n", - " min_digits = es_sf)\n", + " curr_esval = results.loc[results[\"test\"] == i[1]][\n", + " \"difference\"\n", + " ].iloc[0]\n", + " curr_esval_str = np.format_float_positional(\n", + " curr_esval,\n", + " precision=es_sf,\n", + " sign=True,\n", + " trim=\"k\",\n", + " min_digits=es_sf,\n", + " )\n", " effsize_list.append(curr_esval_str)\n", " else:\n", " effsize_list.append(\"-\")\n", - " \n", + "\n", " table_cellcols.append(effsize_list)\n", - " \n", + "\n", " # If Gardner-Altman plot, plot on raw data and not contrast axes\n", - " if float_contrast == True:\n", + " if float_contrast:\n", " axes_ploton = rawdata_axes\n", " else:\n", " axes_ploton = contrast_axes\n", - " \n", + "\n", " # Account for extended x axis in case of show_delta2 or show_mini_meta\n", " x_groups_for_width = len(groups_for_gridkey)\n", - " if show_delta2 is True or show_mini_meta is True:\n", - " x_groups_for_width += 2 \n", + " if show_delta2 or show_mini_meta:\n", + " x_groups_for_width += 2\n", " gridkey_width = len(groups_for_gridkey) / x_groups_for_width\n", - " \n", - " gridkey = axes_ploton.table(cellText = table_cellcols, \n", - " rowLabels = gridkey_rows, \n", - " cellLoc = \"center\",\n", - " bbox = [0, -len(gridkey_rows)*0.1-0.05, gridkey_width, len(gridkey_rows)*0.1],\n", - " **{\"alpha\" : 0.5}) \n", - " \n", + "\n", + " gridkey = axes_ploton.table(\n", + " cellText=table_cellcols,\n", + " rowLabels=gridkey_rows,\n", + " cellLoc=\"center\",\n", + " bbox=[\n", + " 0,\n", + " -len(gridkey_rows) * 0.1 - 0.05,\n", + " gridkey_width,\n", + " len(gridkey_rows) * 0.1,\n", + " ],\n", + " **{\"alpha\": 0.5}\n", + " )\n", + "\n", " # modifies row label cells\n", " for cell in gridkey._cells:\n", " if cell[1] == -1:\n", " gridkey._cells[cell].visible_edges = \"open\"\n", - " gridkey._cells[cell].set_text_props(**{ \"ha\" : \"right\" }) \n", - " \n", + " gridkey._cells[cell].set_text_props(**{\"ha\": \"right\"})\n", + "\n", " # turns off both x axes\n", " rawdata_axes.get_xaxis().set_visible(False)\n", " contrast_axes.get_xaxis().set_visible(False)\n", - " \n", - " ####################################################### END GRIDKEY MAIN CODE WIP \n", - " \n", - " \n", - " \n", - " \n", + "\n", + " ####################################################### END GRIDKEY MAIN CODE WIP\n", + "\n", " # Make sure no stray ticks appear!\n", - " rawdata_axes.xaxis.set_ticks_position('bottom')\n", - " rawdata_axes.yaxis.set_ticks_position('left')\n", - " contrast_axes.xaxis.set_ticks_position('bottom')\n", + " rawdata_axes.xaxis.set_ticks_position(\"bottom\")\n", + " rawdata_axes.yaxis.set_ticks_position(\"left\")\n", + " contrast_axes.xaxis.set_ticks_position(\"bottom\")\n", " if float_contrast is False:\n", - " contrast_axes.yaxis.set_ticks_position('left')\n", + " contrast_axes.yaxis.set_ticks_position(\"left\")\n", "\n", " # Reset rcParams.\n", " for parameter in _changed_rcParams:\n", diff --git a/nbs/tests/test_01_effsizes_pvals.ipynb b/nbs/tests/test_01_effsizes_pvals.ipynb index fa848f90..717d7ff3 100644 --- a/nbs/tests/test_01_effsizes_pvals.ipynb +++ b/nbs/tests/test_01_effsizes_pvals.ipynb @@ -24,7 +24,7 @@ "outputs": [], "source": [ "from dabest._stats_tools import effsize\n", - "from dabest._classes import TwoGroupsEffectSize, PermutationTest, Dabest" + "from dabest import Dabest, TwoGroupsEffectSize, PermutationTest" ] }, { diff --git a/nbs/tests/test_03_plotting.py b/nbs/tests/test_03_plotting.py index 40a753a9..7311aec6 100644 --- a/nbs/tests/test_03_plotting.py +++ b/nbs/tests/test_03_plotting.py @@ -160,7 +160,7 @@ def test_10_cummings_multi_groups(): @pytest.mark.mpl_image_compare(tolerance=10) def test_11_inset_plots(): - + # TODO Better remove external dependencies in tests. Tests need to run locally without internet. # Load the iris dataset. Requires internet access. iris = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv") iris_melt = pd.melt(iris.reset_index(), diff --git a/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb b/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb index b3f77f83..775113fd 100644 --- a/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb +++ b/nbs/tests/test_04_repeated_measures_effsizes_pvals.ipynb @@ -21,8 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "from dabest._stats_tools import effsize\n", - "from dabest._classes import TwoGroupsEffectSize, PermutationTest, Dabest, EffectSizeDataFrame" + "from dabest import Dabest" ] }, { diff --git a/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb b/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb index 521117dc..0bb2a7f0 100644 --- a/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb +++ b/nbs/tests/test_06_delta-delta_effsize_pvals.ipynb @@ -22,7 +22,7 @@ "outputs": [], "source": [ "from dabest._stats_tools import effsize\n", - "from dabest._classes import TwoGroupsEffectSize, PermutationTest, Dabest" + "from dabest import Dabest, PermutationTest" ] }, { diff --git a/nbs/tests/test_08_mini_meta_pvals.ipynb b/nbs/tests/test_08_mini_meta_pvals.ipynb index c5d58184..d989258a 100644 --- a/nbs/tests/test_08_mini_meta_pvals.ipynb +++ b/nbs/tests/test_08_mini_meta_pvals.ipynb @@ -21,7 +21,7 @@ "source": [ "from dabest._stats_tools import effsize\n", "from dabest._stats_tools import confint_2group_diff as ci2g\n", - "from dabest._classes import PermutationTest, Dabest" + "from dabest import Dabest, PermutationTest" ] }, { diff --git a/nbs/tests/test_10_proportion_plot.py b/nbs/tests/test_10_proportion_plot.py index 443f1007..02927471 100644 --- a/nbs/tests/test_10_proportion_plot.py +++ b/nbs/tests/test_10_proportion_plot.py @@ -2,16 +2,14 @@ import numpy as np import pandas as pd import matplotlib as mpl -mpl.use('Agg') + +mpl.use("Agg") import matplotlib.ticker as Ticker import matplotlib.pyplot as plt from dabest._api import load def create_demo_prop_dataset(seed=9999, N=40): - import numpy as np - import pandas as pd - np.random.seed(9999) # Fix the seed so the results are replicable. # Create samples n = 1 @@ -30,21 +28,32 @@ def create_demo_prop_dataset(seed=9999, N=40): t9 = np.zeros(N) # Add a `gender` column for coloring the data. - females = np.repeat('Female', N / 2).tolist() - males = np.repeat('Male', N / 2).tolist() + females = np.repeat("Female", N / 2).tolist() + males = np.repeat("Male", N / 2).tolist() gender = females + males # Add an `id` column for paired data plotting. id_col = pd.Series(range(1, N + 1)) # Combine samples and gender into a DataFrame. - df = pd.DataFrame({'Control 1': c1, 'Test 1': t1, - 'Control 2': c2, 'Test 2': t2, - 'Control 3': c3, 'Test 3': t3, - 'Test 4': t4, 'Test 5': t5, 'Test 6': t6, - 'Test 7': t7, 'Test 8': t8, 'Test 9': t9, - 'Gender': gender, 'ID': id_col - }) + df = pd.DataFrame( + { + "Control 1": c1, + "Test 1": t1, + "Control 2": c2, + "Test 2": t2, + "Control 3": c3, + "Test 3": t3, + "Test 4": t4, + "Test 5": t5, + "Test 6": t6, + "Test 7": t7, + "Test 8": t8, + "Test 9": t9, + "Gender": gender, + "ID": id_col, + } + ) return df @@ -53,127 +62,195 @@ def create_demo_prop_dataset(seed=9999, N=40): two_groups_unpaired = load(df, idx=("Control 1", "Test 1"), proportional=True) -multi_2group = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2")), - proportional=True) - -shared_control = load(df, idx=("Control 1", "Test 1", - "Test 2", "Test 3", - "Test 4", "Test 5", "Test 6"), - proportional=True) - -multi_groups = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2","Test 3"), - ("Control 3", "Test 4","Test 5", "Test 6") - ),proportional=True) - -two_groups_paired = load(df, idx=("Control 1", "Test 1"), - paired="baseline", id_col="ID",proportional=True) - -multi_2group_paired = load(df, idx=(("Control 1", "Test 1"), - ("Control 2", "Test 2")), - paired="baseline", id_col="ID", proportional=True) - -multi_groups_paired = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2","Test 3"), - ("Control 3", "Test 4","Test 5", "Test 6") - ),paired="baseline", id_col="ID", proportional=True) - -two_groups_sequential = load(df, idx=("Control 1", "Test 1"), - paired="sequential", id_col="ID",proportional=True) - -multi_2group_sequential = load(df, idx=(("Control 1", "Test 1"), - ("Control 2", "Test 2")), - paired="sequential", id_col="ID", proportional=True) - -multi_groups_sequential = load(df, idx=(("Control 1", "Test 1",), - ("Control 2", "Test 2","Test 3"), - ("Control 3", "Test 4","Test 5", "Test 6") - ),paired="sequential", id_col="ID", proportional=True) -shared_control_paired = load(df, idx=("Control 1", "Test 1", - "Test 2", "Test 3", - "Test 4", "Test 5", "Test 6"), - paired="sequential", id_col="ID", proportional=True) - -zero_to_zero = load(df, idx=('Test 7', 'Test 9'), - proportional=True, paired='sequential', id_col="ID") -zero_to_one = load(df, idx=('Test 7', 'Test 8'), - proportional=True, paired='sequential', id_col="ID") -one_to_zero = load(df, idx=('Test 8', 'Test 7'), - proportional=True, paired='sequential', id_col="ID") - -one_in_separate_control = load(df, idx=((("Control 1", "Test 1"), - ("Test 2", "Test 3"), - ("Test 4", "Test 8", "Test 6"))), - proportional=True, paired="sequential", id_col="ID") - - - - -@pytest.mark.mpl_image_compare +multi_2group = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2"), + ), + proportional=True, +) + +shared_control = load( + df, + idx=("Control 1", "Test 1", "Test 2", "Test 3", "Test 4", "Test 5", "Test 6"), + proportional=True, +) + +multi_groups = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2", "Test 3"), + ("Control 3", "Test 4", "Test 5", "Test 6"), + ), + proportional=True, +) + +two_groups_paired = load( + df, idx=("Control 1", "Test 1"), paired="baseline", id_col="ID", proportional=True +) + +multi_2group_paired = load( + df, + idx=(("Control 1", "Test 1"), ("Control 2", "Test 2")), + paired="baseline", + id_col="ID", + proportional=True, +) + +multi_groups_paired = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2", "Test 3"), + ("Control 3", "Test 4", "Test 5", "Test 6"), + ), + paired="baseline", + id_col="ID", + proportional=True, +) + +two_groups_sequential = load( + df, idx=("Control 1", "Test 1"), paired="sequential", id_col="ID", proportional=True +) + +multi_2group_sequential = load( + df, + idx=(("Control 1", "Test 1"), ("Control 2", "Test 2")), + paired="sequential", + id_col="ID", + proportional=True, +) + +multi_groups_sequential = load( + df, + idx=( + ( + "Control 1", + "Test 1", + ), + ("Control 2", "Test 2", "Test 3"), + ("Control 3", "Test 4", "Test 5", "Test 6"), + ), + paired="sequential", + id_col="ID", + proportional=True, +) +shared_control_paired = load( + df, + idx=("Control 1", "Test 1", "Test 2", "Test 3", "Test 4", "Test 5", "Test 6"), + paired="sequential", + id_col="ID", + proportional=True, +) + +zero_to_zero = load( + df, idx=("Test 7", "Test 9"), proportional=True, paired="sequential", id_col="ID" +) +zero_to_one = load( + df, idx=("Test 7", "Test 8"), proportional=True, paired="sequential", id_col="ID" +) +one_to_zero = load( + df, idx=("Test 8", "Test 7"), proportional=True, paired="sequential", id_col="ID" +) + +one_in_separate_control = load( + df, + idx=( + (("Control 1", "Test 1"), ("Test 2", "Test 3"), ("Test 4", "Test 8", "Test 6")) + ), + proportional=True, + paired="sequential", + id_col="ID", +) + + +@pytest.mark.mpl_image_compare(tolerance=10) def test_101_gardner_altman_unpaired_propdiff(): - return two_groups_unpaired.mean_diff.plot(); + return two_groups_unpaired.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_103_cummings_two_group_unpaired_propdiff(): - return two_groups_unpaired.mean_diff.plot(fig_size=(4, 6), - float_contrast=False); + return two_groups_unpaired.mean_diff.plot(fig_size=(4, 6), float_contrast=False) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_105_cummings_multi_group_unpaired_propdiff(): - return multi_2group.mean_diff.plot(); + return multi_2group.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_106_cummings_shared_control_propdiff(): - return shared_control.mean_diff.plot(); + return shared_control.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_107_cummings_multi_groups_propdiff(): - return multi_groups.mean_diff.plot(); + return multi_groups.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_109_gardner_altman_ylabel(): - return two_groups_unpaired.mean_diff.plot(bar_label="This is my\nrawdata", - contrast_label="The bootstrap\ndistribtions!"); + return two_groups_unpaired.mean_diff.plot( + bar_label="This is my\nrawdata", contrast_label="The bootstrap\ndistribtions!" + ) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_110_change_fig_size(): - return two_groups_unpaired.mean_diff.plot(fig_size=(6, 6), - custom_palette="Dark2"); + return two_groups_unpaired.mean_diff.plot(fig_size=(6, 6), custom_palette="Dark2") -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_111_change_palette_b(): - return multi_2group.mean_diff.plot(custom_palette="Paired"); + return multi_2group.mean_diff.plot(custom_palette="Paired") + +my_color_palette = { + "Control 1": "blue", + "Test 1": "purple", + "Control 2": "#cb4b16", # This is a hex string. + "Test 2": (0.0, 0.7, 0.2), # This is a RGB tuple. +} -my_color_palette = {"Control 1" : "blue", - "Test 1" : "purple", - "Control 2" : "#cb4b16", # This is a hex string. - "Test 2" : (0., 0.7, 0.2) # This is a RGB tuple. - } -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_112_change_palette_c(): - return multi_2group.mean_diff.plot(custom_palette=my_color_palette); + return multi_2group.mean_diff.plot(custom_palette=my_color_palette) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_113_desat(): - return multi_2group.mean_diff.plot(custom_palette=my_color_palette, - bar_desat=0.1, - halfviolin_desat=0.25); + return multi_2group.mean_diff.plot( + custom_palette=my_color_palette, bar_desat=0.1, halfviolin_desat=0.25 + ) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_114_change_ylims(): - return multi_2group.mean_diff.plot(contrast_ylim=(-2, 2)); + return multi_2group.mean_diff.plot(contrast_ylim=(-2, 2)) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_115_invert_ylim(): - return multi_2group.mean_diff.plot(contrast_ylim=(2, -2), - contrast_label="More negative is better!"); + return multi_2group.mean_diff.plot( + contrast_ylim=(2, -2), contrast_label="More negative is better!" + ) -@pytest.mark.mpl_image_compare -def test_116_ticker_gardner_altman(): +@pytest.mark.mpl_image_compare(tolerance=10) +def test_116_ticker_gardner_altman(): fig = two_groups_unpaired.mean_diff.plot() rawswarm_axes = fig.axes[0] @@ -186,112 +263,135 @@ def test_116_ticker_gardner_altman(): contrast_axes.yaxis.set_minor_locator(Ticker.MultipleLocator(0.25)) return fig -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_117_err_color(): - return two_groups_unpaired.mean_diff.plot(err_color="purple"); + return two_groups_unpaired.mean_diff.plot(err_color="purple") + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_118_cummings_two_group_unpaired_meandiff_bar_width(): - return two_groups_unpaired.mean_diff.plot(bar_width=0.4,float_contrast=False); + return two_groups_unpaired.mean_diff.plot(bar_width=0.4, float_contrast=False) + np.random.seed(9999) Ns = [20, 10, 21, 20] -n=1 -c1 = pd.DataFrame({'Control':np.random.binomial(n, 0.2, size=Ns[0])}) -t1 = pd.DataFrame({'Test 1': np.random.binomial(n, 0.5, size=Ns[1])}) -t2 = pd.DataFrame({'Test 2': np.random.binomial(n, 0.4, size=Ns[2])}) -t3 = pd.DataFrame({'Test 3': np.random.binomial(n, 0.7, size=Ns[3])}) -wide_df = pd.concat([c1, t1, t2, t3],axis=1) +n = 1 +c1 = pd.DataFrame({"Control": np.random.binomial(n, 0.2, size=Ns[0])}) +t1 = pd.DataFrame({"Test 1": np.random.binomial(n, 0.5, size=Ns[1])}) +t2 = pd.DataFrame({"Test 2": np.random.binomial(n, 0.4, size=Ns[2])}) +t3 = pd.DataFrame({"Test 3": np.random.binomial(n, 0.7, size=Ns[3])}) +wide_df = pd.concat([c1, t1, t2, t3], axis=1) + +long_df = pd.melt( + wide_df, + value_vars=["Control", "Test 1", "Test 2", "Test 3"], + value_name="value", + var_name="group", +) +long_df["dummy"] = np.repeat(np.nan, len(long_df)) -long_df = pd.melt(wide_df, - value_vars=["Control", "Test 1", "Test 2", "Test 3"], - value_name="value", - var_name="group") -long_df['dummy'] = np.repeat(np.nan, len(long_df)) -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_119_wide_df_nan(): + wide_df_dabest = load( + wide_df, idx=("Control", "Test 1", "Test 2", "Test 3"), proportional=True + ) - wide_df_dabest = load(wide_df, - idx=("Control", "Test 1", "Test 2", "Test 3"), - proportional=True - ) + return wide_df_dabest.mean_diff.plot() - return wide_df_dabest.mean_diff.plot(); -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_120_long_df_nan(): + long_df_dabest = load( + long_df, + x="group", + y="value", + idx=("Control", "Test 1", "Test 2", "Test 3"), + proportional=True, + ) - long_df_dabest = load(long_df, x="group", y="value", - idx=("Control", "Test 1", "Test 2", "Test 3"), - proportional=True - ) + return long_df_dabest.mean_diff.plot() - return long_df_dabest.mean_diff.plot(); -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_121_cohens_h_gardner_altman(): - return two_groups_unpaired.cohens_h.plot(); + return two_groups_unpaired.cohens_h.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_122_cohens_h_cummings(): - return two_groups_unpaired.cohens_h.plot(float_contrast=False); + return two_groups_unpaired.cohens_h.plot(float_contrast=False) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_123_sankey_gardner_altman(): - return two_groups_paired.mean_diff.plot(); + return two_groups_paired.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_124_sankey_cummings(): - return two_groups_paired.mean_diff.plot(float_contrast=False); + return two_groups_paired.mean_diff.plot(float_contrast=False) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_125_sankey_2paired_groups(): - return multi_2group_paired.mean_diff.plot(); + return multi_2group_paired.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_126_sankey_2sequential_groups(): - return multi_2group_sequential.mean_diff.plot(); + return multi_2group_sequential.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_127_sankey_multi_group_paired(): - return multi_groups_paired.mean_diff.plot(); + return multi_groups_paired.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_128_sankey_transparency(): - return two_groups_paired.mean_diff.plot(sankey_kwargs = {"alpha": 0.2}); + return two_groups_paired.mean_diff.plot(sankey_kwargs={"alpha": 0.2}) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_129_zero_to_zero(): - return zero_to_zero.mean_diff.plot(); + return zero_to_zero.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_130_zero_to_one(): - return zero_to_one.mean_diff.plot(); + return zero_to_one.mean_diff.plot() + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_131_one_to_zero(): - return one_to_zero.mean_diff.plot(); + return one_to_zero.mean_diff.plot() -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_132_shared_control_sankey_off(): - return shared_control_paired.mean_diff.plot(sankey_kwargs={'sankey':False}); + return shared_control_paired.mean_diff.plot(sankey_kwargs={"sankey": False}) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_133_shared_control_flow_off(): - return shared_control_paired.mean_diff.plot(sankey_kwargs={'flow':False}); + return shared_control_paired.mean_diff.plot(sankey_kwargs={"flow": False}) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_134_separate_control_sankey_off(): - return multi_groups_sequential.mean_diff.plot(sankey_kwargs={'sankey':False}); + return multi_groups_sequential.mean_diff.plot(sankey_kwargs={"sankey": False}) -@pytest.mark.mpl_image_compare + +@pytest.mark.mpl_image_compare(tolerance=10) def test_135_separate_control_flow_off(): - return multi_groups_sequential.mean_diff.plot(sankey_kwargs={'flow':False}); + return multi_groups_sequential.mean_diff.plot(sankey_kwargs={"flow": False}) + -@pytest.mark.mpl_image_compare +@pytest.mark.mpl_image_compare(tolerance=10) def test_136_style_sheets(): # Perform this test last so we don't have to reset the plot style. plt.style.use("dark_background") - return multi_2group.mean_diff.plot(face_color="black"); \ No newline at end of file + return multi_2group.mean_diff.plot(face_color="black")