Run the code section before proceeding in order to make sure the program works!

# Code

In [1]:
import ipywidgets as widgets
from IPython.display import display
import numpy as np
from scipy import stats

def calculate_sample_size(p1, p2, power, alpha, alternative, enrollment_ratio):
    '''
    A function to calculate the required sample size for a proportion test.

    Inputs
    ------
    p1: Probability of success in group A (default conversion rate)
    p2: Probability of success in group B (variant conversion rate)
    power: The desired level of statistical power
    alpha: Type 1 error rate
    alternative: One- or two-tailed test
    enrollment_ratio: The ratio of group B to group A, in case of an unbalanced test

    Outputs
    -------
    n_a: Required sample size for group A
    n_b: Required sample size for group B

    '''
    z_alpha = stats.norm.ppf(1 - alpha) if alternative == 'one-sided' else stats.norm.ppf(1 - alpha/2)
    z_beta = stats.norm.ppf(power)

    numerator = (z_alpha + z_beta)**2 * ((p1*(1-p1) / 1) + (p2*(1-p2) / enrollment_ratio))
    denominator = (p2 - p1)**2

    n_a = numerator / denominator
    n_b = n_a * enrollment_ratio

    return int(np.ceil(n_a)), int(np.ceil(n_b))

def calculate_effect_size(p1, n_a, n_b, power, alpha, alternative):
    '''
    A function to calculate the MDE (minimum detectable effect) given a fixed sample size.

    Inputs
    ------
    p1: Probability of success in group A (default conversion rate)
    n_a: Sample size for group A
    n_b: Sample size for group B
    power: The desired level of statistical power
    alpha: Type 1 error rate
    alternative: One- or two-tailed test

    Outputs
    -------
    mde: The minimum detectable effect
    '''

    z_alpha = stats.norm.ppf(1 - alpha) if alternative == 'one-sided' else stats.norm.ppf(1 - alpha/2)
    z_beta = stats.norm.ppf(power)

    mde = np.sqrt((z_alpha + z_beta)**2 * (p1*(1-p1)/n_a + p1*(1-p1)/n_b))

    return mde

def update_output(b):
    power = float(power_dropdown.value[:-1]) / 100
    alternative = 'one-sided' if tails_dropdown.value == 'One-tailed' else 'two-sided'
    alpha = float(alpha_input.value)
    p1 = float(base_rate.value) / 100
    enrollment_ratio = float(enrollment_ratio_input.value)

    if calc_dropdown.value == 'Population size given MDE':
        effect_type = effect_type_dropdown.value
        effect_size = float(effect_size_input.value) / 100

        if effect_type == 'Relative':
            p2 = p1 * (1 + effect_size)
        else:  # Absolute
            p2 = p1 + effect_size

        n_a, n_b = calculate_sample_size(p1, p2, power, alpha, alternative, enrollment_ratio)
        total = n_a + n_b

        output.value = f"Required sample size: • Group A: {n_a} • Group B: {n_b} • Total: {total}"
    else:  # Calculate effect size
        n_a = int(group_a_size.value)
        n_b = int(group_b_size.value)

        effect = calculate_effect_size(p1, n_a, n_b, power, alpha, alternative)

        absolute_effect = effect * 100
        relative_effect = (effect / p1) * 100

        output.value = f"Minimum detectable effect at {power*100:.0f}% power: • Absolute: {absolute_effect:.2f}% • Relative: {relative_effect:.2f}%"

# Create widgets
power_dropdown = widgets.Dropdown(
    options=['50%', '60%', '70%', '80%', '90%'],
    value='80%',
    description='Power'
)

tails_dropdown = widgets.Dropdown(
    options=['One-tailed', 'Two-tailed'],
    value='Two-tailed',
    description='Test type'
)

calc_dropdown = widgets.Dropdown(
    options=['Population size given MDE', 'MDE given population size'],
    value='Population size given MDE',
    description='Calculate'
)


base_rate = widgets.FloatText(
    value=5,
    description='Base CR %',
    disabled=False
)

effect_type_dropdown = widgets.Dropdown(
    options=['Relative', 'Absolute'],
    value='Relative',
    description='Effect type:'
)

effect_size_input = widgets.FloatText(
    value=10,
    description='Effect %',
    disabled=False
)

group_a_size = widgets.IntText(
    value=10_000,
    description='Group A size',
    disabled=False
)

group_b_size = widgets.IntText(
    value=10_000,
    description='Group B size',
    disabled=False
)

alpha_input = widgets.FloatText(
    value=0.05,
    description='Alpha',
    disabled=False
)

enrollment_ratio_input = widgets.FloatText(
    value=1.0,
    description='Enrollment',
    disabled=False
)

calculate_button = widgets.Button(
    description='Calculate',
    disabled=False,
    button_style='',
    tooltip='Click to calculate',
    icon='check'
)

output = widgets.Label()

# Layout
input_widgets = widgets.VBox([power_dropdown, tails_dropdown, calc_dropdown,
                              base_rate, alpha_input, enrollment_ratio_input])
effect_widgets = widgets.VBox([effect_type_dropdown, effect_size_input])
group_size_widgets = widgets.VBox([group_a_size, group_b_size])

def on_calc_change(change):
    if change['new'] == 'Population size given MDE':
        effect_widgets.layout.display = ''
        group_size_widgets.layout.display = 'none'
    else:
        effect_widgets.layout.display = 'none'
        group_size_widgets.layout.display = ''

calc_dropdown.observe(on_calc_change, names='value')

# Initial update
on_calc_change({'new': calc_dropdown.value})

# Calculator

This is a calculator program that, given some pre-specifications, computes either
 1. The required sample size for an experiment given a pre-defined minimum detectable effect, or
 2. The minimum detectable effect given pre-defined sample sizes.


 The calculations are made assuming binary Bernoulli variables, like converted or didn't convert.

 The inputs are following:
 * Power: The desired level of statistical power
 * Test type: One- or two-tailed test. The one-tailed test analyzes whether group B outperforms group A
 * Calculate: Either the required sample size given an MDE, or the MDE given a pre-defined sample size
 * Base CR %: The default conversion rate
 * Alpha: Type 1 error rate
 * Enrollment: The ratio of the population in group B over group A, in case of unbalanced tests
 * Effect type: Whether the specified expected effect is relative or absolute
 * Effect: The expected effect of the experiment

 Run the code chunk below to start the calculations.

In [2]:
# Display widgets
display(widgets.VBox([input_widgets, effect_widgets, group_size_widgets, calculate_button, output]))

# Set up interactivity
calculate_button.on_click(update_output)

VBox(children=(VBox(children=(Dropdown(description='Power', index=3, options=('50%', '60%', '70%', '80%', '90%…