In [4]:
import numpy as np
import statsmodels.stats.api as sms

In [6]:
alpha = 0.05  # Significance level (p-value threshold)
power = 0.80  # Desired statistical power (probability of detecting an effect if it exists)

# Baseline Churn Rate (hypothetical for the high-risk group)
# Let's assume our model identifies a high-risk segment where the churn rate is 50%
baseline_churn_rate = 0.50

# Minimum Detectable Effect (MDE)
# We want to be able to detect at least a 5 percentage point reduction in churn
# This means the churn rate in the treatment group would be 50% - 5% = 45%
mde = 0.05
treatment_churn_rate = baseline_churn_rate - mde

In [7]:
# Cell 3: Sample Size Calculation
effect_size = sms.proportion_effectsize(baseline_churn_rate, treatment_churn_rate)

# Perform the power analysis
sample_size_per_group = sms.NormalIndPower().solve_power(
    effect_size=effect_size,
    power=power,
    alpha=alpha,
    ratio=1.0,  # Equal sample sizes for control and treatment
    alternative='two-sided'
)

In [8]:
# Cell 4: Print Results
print("--- A/B Test Sample Size Calculation ---")
print(f"Significance Level (alpha): {alpha}")
print(f"Desired Power: {power}")
print(f"Baseline Churn Rate: {baseline_churn_rate:.2%}")
print(f"Minimum Detectable Effect (MDE): {mde:.2%}")
print("-" * 40)
print(f"Required sample size per group (Control & Treatment): {int(np.ceil(sample_size_per_group))}")
print(f"Total required sample size for the test: {int(np.ceil(sample_size_per_group)) * 2}")


--- A/B Test Sample Size Calculation ---
Significance Level (alpha): 0.05
Desired Power: 0.8
Baseline Churn Rate: 50.00%
Minimum Detectable Effect (MDE): 5.00%
----------------------------------------
Required sample size per group (Control & Treatment): 1565
Total required sample size for the test: 3130


In [9]:
# Cell 5: Business Story
print("\n--- Business Plan ---")
print("To confidently detect if our retention offer reduces churn by at least 5 percentage points,")
print(f"we need to target approximately {int(np.ceil(sample_size_per_group))} high-risk customers for the treatment group")
print(f"and observe {int(np.ceil(sample_size_per_group))} high-risk customers in a control group (who receive no offer).")


--- Business Plan ---
To confidently detect if our retention offer reduces churn by at least 5 percentage points,
we need to target approximately 1565 high-risk customers for the treatment group
and observe 1565 high-risk customers in a control group (who receive no offer).
