In [1]:
import pandas as pd
import numpy as np
import scipy
import scipy.stats as sp
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
A = np.array([12,20,39,17,23,32,21,23,10,9,17,24])
B = np.array([21,17,14,40,31,29,34,18,14,25,9,28])

In [3]:
AB = np.array([np.append(np.zeros(len(A)), np.ones(len(B))), np.append(A,B)])
AB


array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
         1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [12., 20., 39., 17., 23., 32., 21., 23., 10.,  9., 17., 24., 21.,
        17., 14., 40., 31., 29., 34., 18., 14., 25.,  9., 28.]])

In [4]:
AB_t = AB.T
AB_t

array([[ 0., 12.],
       [ 0., 20.],
       [ 0., 39.],
       [ 0., 17.],
       [ 0., 23.],
       [ 0., 32.],
       [ 0., 21.],
       [ 0., 23.],
       [ 0., 10.],
       [ 0.,  9.],
       [ 0., 17.],
       [ 0., 24.],
       [ 1., 21.],
       [ 1., 17.],
       [ 1., 14.],
       [ 1., 40.],
       [ 1., 31.],
       [ 1., 29.],
       [ 1., 34.],
       [ 1., 18.],
       [ 1., 14.],
       [ 1., 25.],
       [ 1.,  9.],
       [ 1., 28.]])

A shuffled data with labels 
0 -> A
1 -> B

In [5]:
np.random.shuffle(AB_t)
AB_t

array([[ 1., 14.],
       [ 1., 18.],
       [ 1., 14.],
       [ 1., 40.],
       [ 0., 10.],
       [ 0., 23.],
       [ 1.,  9.],
       [ 0., 39.],
       [ 1., 28.],
       [ 0., 21.],
       [ 1., 34.],
       [ 0., 17.],
       [ 0., 20.],
       [ 1., 25.],
       [ 1., 29.],
       [ 0.,  9.],
       [ 0., 24.],
       [ 0., 32.],
       [ 1., 21.],
       [ 1., 31.],
       [ 0., 23.],
       [ 0., 17.],
       [ 1., 17.],
       [ 0., 12.]])

Create random lebels for the datas -> Chance

In [6]:
experimental_data = np.array([np.random.randint(0,2,len(AB_t)), AB_t[:, 1]]).T
experimental_data 

array([[ 1., 14.],
       [ 1., 18.],
       [ 1., 14.],
       [ 0., 40.],
       [ 0., 10.],
       [ 1., 23.],
       [ 0.,  9.],
       [ 0., 39.],
       [ 1., 28.],
       [ 0., 21.],
       [ 1., 34.],
       [ 0., 17.],
       [ 1., 20.],
       [ 0., 25.],
       [ 0., 29.],
       [ 1.,  9.],
       [ 0., 24.],
       [ 1., 32.],
       [ 0., 21.],
       [ 1., 31.],
       [ 0., 23.],
       [ 1., 17.],
       [ 0., 17.],
       [ 0., 12.]])

In [80]:

def ab_test(group_a: list, group_b: list, n_bootstraps: int, confidence_level: float = 0.95) -> tuple:
    """
    Performs A/B testing using bootstrap to compare the means of two groups.
    
    Args:
        group_a (list): Data for group A (e.g., conversion rates, sales).
        group_b (list): Data for group B.
        n_bootstraps (int): Number of bootstrap iterations.
        confidence_level (float): Confidence level for the confidence interval (default: 0.95).
    
    Returns:
        tuple: (mean_diff, p_value, conf_interval)
            - mean_diff: Observed difference in means (group_a - group_b).
            - p_value: P-value for the null hypothesis (no difference between groups).
            - conf_interval: Confidence interval for the difference in means.
    """
    # Convert inputs to numpy arrays for efficiency
    group_a = np.array(group_a)
    group_b = np.array(group_b)
    
    # Check for valid inputs
    if len(group_a) == 0 or len(group_b) == 0:
        raise ValueError("Input groups cannot be empty.")
    
    # Observed difference in means
    observed_diff = np.mean(group_a) - np.mean(group_b)
    
    # Combine data for bootstrap
    combined_data = np.concatenate([group_a, group_b])
    len_a = len(group_a)
    
    # Array to store bootstrap differences
    bootstrap_diffs = np.empty(n_bootstraps)
    
    # Bootstrap loop
    for i in range(n_bootstraps):
        # Randomly assign data to two groups
        indices = np.random.choice(len(combined_data), size=len(combined_data), replace=True)
        bootstrap_sample = combined_data[indices]
        bootstrap_a = bootstrap_sample[:len_a]
        bootstrap_b = bootstrap_sample[len_a:]
        
        # Calculate difference in means
        bootstrap_diffs[i] = np.mean(bootstrap_a) - np.mean(bootstrap_b)
    
    # Calculate p-value (two-tailed test)
    p_value = np.mean(np.abs(bootstrap_diffs) >= np.abs(observed_diff))
    
    # Calculate confidence interval
    conf_interval = np.percentile(bootstrap_diffs, [(1 - confidence_level) / 2 * 100, (1 + confidence_level) / 2 * 100])
    
    return observed_diff, p_value, conf_interval


In [83]:
ab_test(B,A,1000)

(2.75, 0.447, array([-6.66666667,  7.25208333]))