In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.weightstats import ztest

# Create a dataset with 10 rows
data = {
    'Customer': range(1, 11),
    'PurchaseAmount': [100, 110, 105, 120, 115, 95, 102, 108, 112, 118],
    'Group': ['Control', 'Control', 'Control', 'Control', 'Control', 'Test', 'Test', 'Test', 'Test', 'Test']
}
df = pd.DataFrame(data)
print(df)

   Customer  PurchaseAmount    Group
0         1             100  Control
1         2             110  Control
2         3             105  Control
3         4             120  Control
4         5             115  Control
5         6              95     Test
6         7             102     Test
7         8             108     Test
8         9             112     Test
9        10             118     Test


In [2]:
# One-Sample T-Test
# H0: Mean Purchase Amount = 100
# H1: Mean Purchase Amount != 100
t_stat, p_val = stats.ttest_1samp(df['PurchaseAmount'], 100)
print(f"One-Sample T-Test: T-stat={t_stat:.4f}, P-value={p_val:.4f}")
if p_val < 0.05:
    print("Reject H0")
else:
    print("Fail to reject H0")

One-Sample T-Test: T-stat=3.3240, P-value=0.0089
Reject H0


In [3]:
# Two-Sample T-Test (Independent)
# Compare Control vs Test group
group_control = df[df['Group'] == 'Control']['PurchaseAmount']
group_test = df[df['Group'] == 'Test']['PurchaseAmount']

t_stat_ind, p_val_ind = stats.ttest_ind(group_control, group_test)
print(f"Two-Sample T-Test: T-stat={t_stat_ind:.4f}, P-value={p_val_ind:.4f}")

Two-Sample T-Test: T-stat=0.5639, P-value=0.5883


In [4]:
# One-Sample Z-Test
# Assuming we know population std dev (simulated)
z_stat, p_val_z = ztest(df['PurchaseAmount'], value=100)
print(f"One-Sample Z-Test: Z-stat={z_stat:.4f}, P-value={p_val_z:.4f}")

One-Sample Z-Test: Z-stat=3.3240, P-value=0.0009
