# 02 — Hypothesis Testing

Including: one-sample and two-sample t-test, ratio test, chi-square test of independence, and normality test.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from scipy import stats

ab = pd.read_csv("../data/ab_test.csv")
ab.head()

## Proportion test (two independent groups)

In [None]:
# نConversion rate in each group
p_c = ab.loc[ab.variant=="control", "converted"].mean()
p_t = ab.loc[ab.variant=="treatment", "converted"].mean()
n_c = (ab.variant=="control").sum()
n_t = (ab.variant=="treatment").sum()

# Z-statistic for the difference in proportions
p_pool = (ab.converted.sum())/(n_c+n_t)
se = np.sqrt(p_pool*(1-p_pool)*(1/n_c + 1/n_t))
z = (p_t - p_c)/se
p_value = 2*(1-stats.norm.cdf(abs(z)))
p_c, p_t, z, p_value

## t-test (two independent samples)

In [None]:
# Constructing synthetic data: scores of two groups
np.random.seed(0)
g1 = np.random.normal(70, 10, 80)
g2 = np.random.normal(74, 10, 75)
t_stat, p_val = stats.ttest_ind(g1, g2, equal_var=False)
t_stat, p_val

## Chi-square test of independence

In [None]:
cats = pd.read_csv("../data/categorical.csv")
cont = pd.crosstab(cats["gender"], cats["preference"])
chi2, p, dof, exp = stats.chi2_contingency(cont)
cont, chi2, p

## Normality test (Shapiro–Wilk)

In [None]:
w, p = stats.shapiro(g1)
w, p

Interpretation: If `p-value < α` (e.g. 0.05), we reject the null hypothesis.  
Tips: Report the power of the test and the effect size.