### Client Project: Compare Two Business Strategies (A/B)

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from scipy import stats

In [2]:
data = load_breast_cancer(as_frame=True)
df = data.frame

In [3]:
kpi = df["mean radius"].to_numpy()

Simulate two business strategies (A vs B)

In [4]:
np.random.seed(42)  # reproducibility
n = len(kpi)
mask = np.random.choice([0, 1], size=n)  # random split into 2 groups

A = kpi[mask == 0]
B = kpi[mask == 1]

print(f"Strategy A sample size: {len(A)}")
print(f"Strategy B sample size: {len(B)}")

Strategy A sample size: 275
Strategy B sample size: 294


Perform Welch's t-test

In [5]:
t_stat, p_val = stats.ttest_ind(A, B, equal_var=False)

# 95% CI for difference in means
def diff_means_ci(a, b, confidence=0.95):
    a, b = np.asarray(a), np.asarray(b)
    na, nb = len(a), len(b)
    ma, mb = a.mean(), b.mean()
    sa2, sb2 = a.var(ddof=1), b.var(ddof=1)
    se = np.sqrt(sa2/na + sb2/nb)
    df = (sa2/na + sb2/nb)**2 / ((sa2**2)/((na**2)*(na-1)) + (sb2**2)/((nb**2)*(nb-1)))
    alpha = 1 - confidence
    tcrit = stats.t.ppf(1 - alpha/2, df=df)
    diff = mb - ma
    return diff - tcrit*se, diff + tcrit*se, diff

ci_lo, ci_hi, diff = diff_means_ci(A, B)

In [6]:
# Effect size (Cohen's d)
def cohens_d_welch(a, b):
    sa2, sb2 = a.var(ddof=1), b.var(ddof=1)
    sp = np.sqrt((sa2 + sb2)/2)
    return (b.mean() - a.mean()) / sp

d = cohens_d_welch(A, B)

Print Report

In [7]:
print("\n===== Statistical Analysis Report =====")
print(f"Strategy A mean KPI: {A.mean():.3f} (n={len(A)})")
print(f"Strategy B mean KPI: {B.mean():.3f} (n={len(B)})")
print(f"Mean difference (B - A): {diff:.3f}")
print(f"95% CI for difference: [{ci_lo:.3f}, {ci_hi:.3f}]")
print(f"Welch t-test: t={t_stat:.3f}, p={p_val:.4f}")
print(f"Cohen's d (effect size): {d:.3f}")
print("======================================")


===== Statistical Analysis Report =====
Strategy A mean KPI: 14.059 (n=275)
Strategy B mean KPI: 14.191 (n=294)
Mean difference (B - A): 0.132
95% CI for difference: [-0.450, 0.714]
Welch t-test: t=-0.446, p=0.6556
Cohen's d (effect size): 0.037
