In [5]:
import numpy as np
np.random.seed(42)

In [6]:
def loop_bootstrap(a: np.ndarray, bs_iters: int = 10000, agg="mean", **kwargs) -> np.ndarray:
    """Return a sampling with replacement with given number of iterations

    Arguments:
    a:          Dataset to sample from
    bs_iters:   Number of sampl ing iteration
    agg:        Aggregation method to use, must be in ["mean", "median", "quantile"]
    Returns:
    np.ndarray with samples of size(bs_iters, len(a))
    """
    if agg not in ["mean", "median", "quantile"]:
        raise ValueError("agg should be in ['mean', 'median', 'quantile']")
    res = []
    for _ in range(bs_iters):
        if agg == "mean":
            res.append(np.mean(np.random.choice(a, len(a), replace=True)))
        elif agg == "median":
            res.append(np.median(np.random.choice(a, len(a), replace=True)))
        elif agg == "quantile":
            res.append(np.quantile(np.random.choice(a, len(a), replace=True)), q=kwargs["q"])
    return np.array(res)

def bootstrap_diff(
    a: np.ndarray, b: np.ndarray, bs_iters: int = 10000, metric: str = "mean", **kwargs
) -> np.ndarray:
    """Return a sampling with replacement with given number of iterations

    Arguments:
    a:          Dataset A (control)
    b:          Dataset B (treatment)
    bs_iters:   Number of sampling iteration for bootstrap
    metric:     Metric to compute difference from, must be in ["mean", "median", "quantile"]
    Optional Arguments:
    q:          Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive.
    Returns:
    np.ndarray with differences of shape (bs_iters)
    """

    diffs = []

    a_agg = loop_bootstrap(a, bs_iters, agg=metric, **kwargs)
    b_agg = loop_bootstrap(b, bs_iters, agg=metric, **kwargs)
    diffs.append(b_agg - a_agg)

    return np.array(diffs).flatten()

def ci(diffs: np.ndarray, alpha: float = 0.05, **kwargs):
    # -> tuple[tuple[float, float], bool]:
    """Return a sampling with replacement with given number of iterations

    Arguments:
        diffs:  Dataset to compute percentiles upon
        alpha:  Confidence interval alpha
    Returns:
        tuple[tuple[CI left, CI right], Bool]: True if CI includes zero, else False)
    """
    ci_l = np.percentile(diffs, (alpha / 2) * 100)
    ci_r = np.percentile(diffs, (1 - alpha / 2) * 100)
    ci = (ci_l, ci_r)
    significant = True if ci_l * ci_r > 0 else False
    return ci, significant

In [7]:
effect = 0.01
a_mean = 10
sample_size = 100_000

a = np.random.normal(a_mean, 1, sample_size)
b = np.random.normal(a_mean*(1+effect), 1, sample_size)

print("A MEAN: ", np.mean(a))
print("B MEAN: ", np.mean(b))

A MEAN:  10.00096686814095
B MEAN:  10.100981034036048


In [9]:
diffs = bootstrap_diff(a, b, bs_iters=10_000, metric="mean")
ci_diffs, sig = ci(diffs)
print("CI DIFF:     ", ci_diffs)
print("SIGNIFICANT: ", sig)

CI DIFF:      (0.09131586456412144, 0.10875123646705277)
SIGNIFICANT:  True
