In [75]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from joblib import Parallel, delayed
import plotly.graph_objects as go
from tqdm import tqdm



In [74]:

def calculate_sample_size_classic(power, alpha, p, mde):
    """
    Calculates the sample size using the classical formula.
    """
    z_alpha = norm.ppf(1 - alpha / 2)
    z_beta = norm.ppf(power)
    pooled_variance = p * (1 - p)
    sample_size = ((z_alpha + z_beta)**2 * pooled_variance) / (mde**2)
    return int(np.ceil(sample_size / 100.0) * 100)  # Round up to the nearest 100

def simulate_single_run(p, mde, sample_size, alpha):
    """
    Simulates a single A/B test run.
    """
    group_a = np.random.binomial(1, p, sample_size)
    group_b = np.random.binomial(1, p + mde, sample_size)

    mean_a = np.mean(group_a)
    mean_b = np.mean(group_b)

    pooled_std = np.sqrt(((mean_a * (1 - mean_a)) + (mean_b * (1 - mean_b))) / sample_size)
    z_score = (mean_b - mean_a) / pooled_std

    critical_value = norm.ppf(1 - alpha / 2)
    return abs(z_score) > critical_value

def simulate_ab_test_monte_carlo_parallel(p, mde, sample_size, n_simulations, alpha=0.05):
    """
    Simulates A/B testing using Monte Carlo with parallelization.
    """
    results = Parallel(n_jobs=-1)(
        delayed(simulate_single_run)(p, mde, sample_size, alpha) for _ in range(n_simulations)
    )
    return np.mean(results)

def compare_sample_size_methods(p_values, mde_values, power, alpha, n_simulations):
    """
    Compares classical formula sample size calculation with Monte Carlo method across multiple baselines.
    """
    results = {}
    table_rows = []

    for p in tqdm(p_values, desc="Processing Baseline p Values"):
        results[p] = []

        for mde in tqdm(mde_values, desc=f"Processing MDE for p = {p}", leave=False):
            # Classic method
            classic_sample_size = calculate_sample_size_classic(power, alpha, p, mde)

            # Monte Carlo method
            monte_carlo_sample_size = None
            for sample_size in range(100, 50000, 100):
                power_estimated = simulate_ab_test_monte_carlo_parallel(p, mde, sample_size, n_simulations, alpha)
                if power_estimated >= power:
                    monte_carlo_sample_size = sample_size
                    break

            # Calculate absolute difference
            if monte_carlo_sample_size:
                difference_absolute = abs(monte_carlo_sample_size - classic_sample_size)
            else:
                difference_absolute = None

            results[p].append((mde, classic_sample_size, monte_carlo_sample_size))
            table_rows.append({
                "Baseline p": p,
                "MDE (%)": mde * 100,
                "Classic Sample Size": classic_sample_size,
                "Monte Carlo Sample Size": monte_carlo_sample_size,
                "Difference (Absolute)": difference_absolute
            })

        # Visualization for convergence of sample sizes
        mde_percent = [mde * 100 for mde, _, _ in results[p]]
        classic_sizes = [classic for _, classic, _ in results[p]]
        monte_carlo_sizes = [mc if mc is not None else 0 for _, _, mc in results[p]]

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=mde_percent, y=classic_sizes, mode='lines+markers', name='Classic Sample Size'))
        fig.add_trace(go.Scatter(x=mde_percent, y=monte_carlo_sizes, mode='lines+markers', name='Monte Carlo Sample Size'))
        fig.update_layout(
            title=f'Sample Size Convergence: p = {p}',
            xaxis_title='MDE (%)',
            yaxis_title='Sample Size per Group',
            legend_title='Method',
            template='plotly_white'
        )
        fig.show()

    # Tabular results using pandas
    df_results = pd.DataFrame(table_rows)
    print("\nSummary Table:\n")
    print(df_results)

# Input params
alpha = 0.05
p_values = [0.2, 0.5]  # Different baseline conversion rates
mde_values = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, .1 , 0.15]  # Minimum detectable effects
power = 0.8  # Statistical power
n_simulations = 5000  # Number of simulations

compare_sample_size_methods(p_values, mde_values, power, alpha, n_simulations)


Processing Baseline p Values:   0%|          | 0/2 [00:00<?, ?it/s]

Processing Baseline p Values:  50%|█████     | 1/2 [02:26<02:26, 146.07s/it]

Processing Baseline p Values: 100%|██████████| 2/2 [07:48<00:00, 234.39s/it]


Summary Table:

    Baseline p  MDE (%)  Classic Sample Size  Monte Carlo Sample Size  \
0          0.2      1.0                12600                    25300   
1          0.2      2.0                 3200                     6700   
2          0.2      3.0                 1400                     3100   
3          0.2      4.0                  800                     1700   
4          0.2      5.0                  600                     1200   
5          0.2      6.0                  400                      800   
6          0.2      7.0                  300                      600   
7          0.2     10.0                  200                      300   
8          0.2     15.0                  100                      200   
9          0.5      1.0                19700                    39500   
10         0.5      2.0                 5000                     9700   
11         0.5      3.0                 2200                     4300   
12         0.5      4.0           


