In [None]:
import pytest
import lqrt
import numpy as np
from numpy import median as npmedian
from numpy import mean as npmean
import scipy as sp
import pandas as pd

In [None]:
from dabest._stats_tools import effsize
from dabest._classes import TwoGroupsEffectSize, PermutationTest, Dabest

In [None]:
# Data for tests.
# See Cumming, G. Understanding the New Statistics:
# Effect Sizes, Confidence Intervals, and Meta-Analysis. Routledge, 2012,
# from Cumming 2012 Table 11.1 Pg 287.
wb = {"control": [34, 54, 33, 44, 45, 53, 37, 26, 38, 58],
      "expt":    [66, 38, 35, 55, 48, 39, 65, 32, 57, 41]}
wellbeing = pd.DataFrame(wb)



# from Cumming 2012 Table 11.2 Page 291
paired_wb = {"pre":   [43, 28, 54, 36, 31, 48, 50, 69, 29, 40],
             "post":  [51, 33, 58, 42, 39, 45, 54, 68, 35, 44],
             "ID":    np.arange(10)}
paired_wellbeing = pd.DataFrame(paired_wb)



# Data for testing Cohen's calculation.
# Only work with binary data.
# See Venables, W. N. and Ripley, B. D. (2002) Modern Applied Statistics with S. Fourth edition. Springer.
# Make two groups of `smoke` by choosing `low` as a standard, and the data is trimed from the back.
sk = {  "low":  [0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 
                 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0],
        "high": [1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 
                 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1]}
smoke = pd.DataFrame(sk)



# Data from Hogarty and Kromrey (1999)
# Kromrey, Jeffrey D., and Kristine Y. Hogarty. 1998.
# "Analysis Options for Testing Group Differences on Ordered Categorical
# Variables: An Empirical Investigation of Type I Error Control
# Statistical Power."
# Multiple Linear Regression Viewpoints 25 (1): 70 - 82.
likert_control   = [1, 1, 2, 2, 2, 3, 3, 3, 4, 5]
likert_treatment = [1, 2, 3, 4, 4, 5]



# Data from Cliff (1993)
# Cliff, Norman. 1993. "Dominance Statistics: Ordinal Analyses to Answer
# Ordinal Questions."
# Psychological Bulletin 114 (3): 494-509.
a_scores = [6, 7, 9, 10]
b_scores = [1, 3, 4, 7, 8]



# kwargs for Dabest class init.
dabest_default_kwargs = dict(x=None, y=None, ci=95, 
                            resamples=5000, random_seed=12345,
                            proportional=False, delta2=False, experiment=None, 
                            experiment_label=None, x1_level=None, mini_meta=False
                            )

test_mean_diff_unpaired

In [None]:
mean_diff = effsize.func_difference(wellbeing.control, wellbeing.expt,
                                    np.mean, is_paired=False)
assert mean_diff == pytest.approx(5.4)

test_median_diff_unpaired

In [None]:
median_diff = effsize.func_difference(wellbeing.control, wellbeing.expt,
                                    npmedian, is_paired=False)
assert median_diff == pytest.approx(3.5)

test_mean_diff_paired

In [None]:
mean_diff = effsize.func_difference(paired_wellbeing.pre,
                                    paired_wellbeing.post,
                                    npmean, is_paired="baseline")
assert mean_diff == pytest.approx(4.10)

test_median_diff_paired

In [None]:
median_diff = effsize.func_difference(paired_wellbeing.pre,
                                      paired_wellbeing.post,
                                      npmedian, is_paired="baseline")
assert median_diff == pytest.approx(4.5)

test_cohens_d_unpaired

In [None]:
cohens_d = effsize.cohens_d(wellbeing.control, wellbeing.expt,
                            is_paired=False)
assert np.round(cohens_d, 2) == pytest.approx(0.47)

test_hedges_g_unpaired

In [None]:
hedges_g = effsize.hedges_g(wellbeing.control, wellbeing.expt,
                                is_paired=False)
assert np.round(hedges_g, 2) == pytest.approx(0.45)

test_cohens_d_paired

In [None]:
cohens_d = effsize.cohens_d(paired_wellbeing.pre, paired_wellbeing.post,
                                is_paired="baseline")
assert np.round(cohens_d, 2) == pytest.approx(0.34)


test_hedges_g_paired

In [None]:
hedges_g = effsize.hedges_g(paired_wellbeing.pre, paired_wellbeing.post,
                            is_paired="baseline")
assert np.round(hedges_g, 2) == pytest.approx(0.33)

test_cohens_h

In [None]:
cohens_h = effsize.cohens_h(smoke.low, smoke.high)
assert np.round(cohens_h, 2) == pytest.approx(0.17)

test_cliffs_delta

In [None]:
likert_delta = effsize.cliffs_delta(likert_treatment, likert_control)
assert likert_delta == pytest.approx(-0.25)

scores_delta = effsize.cliffs_delta(b_scores, a_scores)
assert scores_delta == pytest.approx(0.65)

test_unpaired_stats

In [None]:
c = wellbeing.control
t = wellbeing.expt

unpaired_es = TwoGroupsEffectSize(c, t, "mean_diff", is_paired=False, proportional=False)

p1 = sp.stats.mannwhitneyu(c, t, alternative="two-sided").pvalue
assert unpaired_es.pvalue_mann_whitney == pytest.approx(p1)

p2 = sp.stats.ttest_ind(c, t, nan_policy='omit').pvalue
assert unpaired_es.pvalue_students_t == pytest.approx(p2)

p3 = sp.stats.ttest_ind(c, t, equal_var=False, nan_policy='omit').pvalue
assert unpaired_es.pvalue_welch == pytest.approx(p3)

test_paired_stats

In [None]:
before = paired_wellbeing.pre
after = paired_wellbeing.post

paired_es = TwoGroupsEffectSize(before, after, "mean_diff", is_paired="baseline", proportional=False)

p1 = sp.stats.ttest_rel(before, after, nan_policy='omit').pvalue
assert paired_es.pvalue_paired_students_t == pytest.approx(p1)

p2 = sp.stats.wilcoxon(before, after).pvalue
assert paired_es.pvalue_wilcoxon == pytest.approx(p2)

test_median_diff_stats

In [None]:
c = wellbeing.control
t = wellbeing.expt

es = TwoGroupsEffectSize(c, t, "median_diff", is_paired=False, proportional=False)

p1 = sp.stats.kruskal(c, t, nan_policy='omit').pvalue
assert es.pvalue_kruskal == pytest.approx(p1)

When plotting, please consider using percetile confidence intervals by specifying `ci_type='percentile'`. For detailed information, refer to https://github.com/ACCLAB/DABEST-python/issues/129 



test_ordinal_dominance

In [None]:
es = TwoGroupsEffectSize(likert_control, likert_treatment, 
                             "cliffs_delta", is_paired=False, proportional=False)
                             
p1 = sp.stats.brunnermunzel(likert_control, likert_treatment).pvalue
assert es.pvalue_brunner_munzel == pytest.approx(p1)

test_unpaired_permutation_test

In [None]:
perm_test = PermutationTest(wellbeing.control, wellbeing.expt, 
                                effect_size="mean_diff", 
                                is_paired=False)
assert perm_test.pvalue == pytest.approx(0.2976)    

test_paired_permutation_test

In [None]:
perm_test = PermutationTest(paired_wellbeing.pre, 
                                paired_wellbeing.post, 
                                effect_size="mean_diff", 
                                is_paired="baseline")
assert perm_test.pvalue == pytest.approx(0.0124)

test_lqrt_unpaired

In [None]:
unpaired_dabest = Dabest(wellbeing, idx=("control", "expt"), 
                             paired=None, id_col=None, 
                             **dabest_default_kwargs)
lqrt_result = unpaired_dabest.mean_diff.lqrt

p1 = lqrt.lqrtest_ind(wellbeing.control, wellbeing.expt,
                      equal_var=True,
                      random_state=12345)

p2 = lqrt.lqrtest_ind(wellbeing.control, wellbeing.expt,
                      equal_var=False,
                      random_state=12345)

assert lqrt_result.pvalue_lqrt_equal_var[0] == pytest.approx(p1.pvalue)
assert lqrt_result.pvalue_lqrt_unequal_var[0] == pytest.approx(p2.pvalue)

test_lqrt_paired

In [None]:
paired_dabest = Dabest(paired_wellbeing, idx=("pre", "post"),
                           paired="baseline", id_col="ID",
                           **dabest_default_kwargs)
lqrt_result = paired_dabest.mean_diff.lqrt

p1 = lqrt.lqrtest_rel(paired_wellbeing.pre, paired_wellbeing.post, 
             random_state=12345)

assert lqrt_result.pvalue_paired_lqrt[0] == pytest.approx(p1.pvalue)