In [12]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats

def compare_ab(control: np.ndarray, treatment: np.ndarray):
    """
    Compares A/B test results using:
      1) two‐sample t‐test
      2) OLS regression with a treatment dummy

    Prints effect estimates, test statistics, and p‐values.
    """
    # --- Two‐sample t‐test ---
    t_stat, p_val = stats.ttest_ind(treatment, control, equal_var=True)
    diff = treatment.mean() - control.mean()

    # --- OLS regression ---
    df = pd.DataFrame({
        'y': np.concatenate([control, treatment]),
        'treatment': np.concatenate([np.zeros_like(control), np.ones_like(treatment)])
    })
    X = sm.add_constant(df['treatment'])
    model = sm.OLS(df['y'], X).fit()
    reg_coef = model.params['treatment']
    reg_pval = model.pvalues['treatment']

    # --- Print results ---
    print("=== Two‐sample t‐test ===")
    print(f"Estimate (μ₁–μ₀): {diff:.4f}")
    print(f"t‐statistic:         {t_stat:.4f}")
    print(f"p‐value:             {p_val:.4e}\n")

    print("=== OLS Regression ===")
    print(model.summary())

if __name__ == "__main__":
    # Simulate data
    np.random.seed(0)
    n = 1000
    control   = np.random.normal(loc=0.0, scale=1.0, size=n)
    treatment = np.random.normal(loc=0.01, scale=1.0, size=n)

    # Compare methods
    compare_ab(control, treatment)


=== Two‐sample t‐test ===
Estimate (μ₁–μ₀): 0.0689
t‐statistic:         1.5745
p‐value:             1.1553e-01

=== OLS Regression ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     2.479
Date:                Fri, 20 Jun 2025   Prob (F-statistic):              0.116
Time:                        21:26:16   Log-Likelihood:                -2792.7
No. Observations:                2000   AIC:                             5589.
Df Residuals:                    1998   BIC:                             5601.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.9

In [2]:
    # Simulate data
    np.random.seed(0)
    n = 1000
    control   = np.random.normal(loc=0.0, scale=1.0, size=n)
    treatment = np.random.normal(loc=0.2, scale=1.0, size=n)

In [6]:
    # --- OLS regression ---
    df = pd.DataFrame({
        'y': np.concatenate([control, treatment]),
        'treatment': np.concatenate([np.zeros_like(control), np.ones_like(treatment)])
    })
    X = sm.add_constant(df['treatment'])
    model = sm.OLS(df['y'], X).fit()
    reg_coef = model.params['treatment']
    reg_pval = model.pvalues['treatment']

In [11]:
X

Unnamed: 0,const,treatment
0,1.0,0.0
1,1.0,0.0
2,1.0,0.0
3,1.0,0.0
4,1.0,0.0
...,...,...
1995,1.0,1.0
1996,1.0,1.0
1997,1.0,1.0
1998,1.0,1.0


In [9]:
X.columns

Index(['const', 'treatment'], dtype='object')

In [10]:
X.head()

Unnamed: 0,const,treatment
0,1.0,0.0
1,1.0,0.0
2,1.0,0.0
3,1.0,0.0
4,1.0,0.0
