# Power Analysis in A/B Testing

## Example 1: Determining Sample Size

In [None]:
# Import
from statsmodels.stats.power import TTestIndPower

# Define parameters
effect_size = 0.5
alpha = 0.05
power = 0.8

# Create an instance of TTestIndPower
power_analysis = TTestIndPower()

# Calculate the sample size for each group to achieve the defined power
sample_size = power_analysis.solve_power(effect_size = effect_size, alpha = alpha, power = power)

# Display the result
print(f"Required sample size: {sample_size:.2f}")

Required sample size: 63.77


# Overcoming Common A/B Testing Challenges

## Exercise 1: Implementing Bootstrapping


In [None]:
# Simulate some data for our 2 versions

# import
import numpy as np

# Set a random seed for reproducibility
np.random.seed(37)

# Simulate data for version A with 30% CTR
version_A = np.random.choice([0, 1], size = 40, p = [0.7, 0.3])

# Simulate data for version B with 60% CTR
version_B = np.random.choice([0, 1], size = 40, p = [0.4, 0.6])

# Display our data:
print(f"Data for version A (0: no click, 1: click): {version_A}")
print(f"Data for version B (0: no click, 1: click): {version_B}")

Data for version A (0: no click, 1: click): [1 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 0 1 0
 0 1 0]
Data for version B (0: no click, 1: click): [0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1
 0 1 1]


In [None]:
# Bootstrapping

# Define a bootstrapping function
def bootstrap_diff(version_A, version_B, num_resamples = 1000):
  # Store the differences between the mean differences
  mean_diffs = []

  # Perform boostrapping
  for _ in range(num_resamples):
    # Resample with replacement
    resample_A = np.random.choice(version_A, size = len(version_A), replace = True)
    resample_B = np.random.choice(version_B, size = len(version_B), replace = True)

    # Calculate the mean of the resamples
    mean_A = np.mean(resample_A)
    mean_B = np.mean(resample_B)

    # Calculate the difference in means
    mean_diffs.append(mean_B - mean_A)

  # Calculate the 95% confidence interval (2.5th and 97.5th percentiles)
  conf_interval = np.percentile(mean_diffs, [2.5, 97.5])

  return np.mean(mean_diffs), conf_interval

# Run the boostrapping function
mean_diff, conf_interval = bootstrap_diff(version_A, version_B)

# Display the results
print(f"Mean difference in CTR: {mean_diff:.4f}")
print(f"95% confidence interval for the difference: {conf_interval}")


Mean difference in CTR: 0.4431
95% confidence interval for the difference: [0.249375 0.625   ]
