### H-Tests in Python

In [2]:
# In python to compare that 2 different samples have same mean
import scipy.stats as stats
import numpy as np

In [3]:
# Example: Independent Samples T-test
group1 = np.random.normal(loc=50, scale=10, size=30) #loc-mean, scale-std, size=samples
group2 = np.random.normal(loc=55, scale=10, size=30)

t_statistic, p_value = stats.ttest_ind(group1, group2)
print(f"T-statistic: {t_statistic}")
print(f"P-value: {p_value}")

# is pvalues < .05 : Yes: reject Ho in favor or Ha
#Ho : means of group1 = group2  - no difference
#Ha : means of group1 != group2 - significant difference

T-statistic: -2.949539548227186
P-value: 0.004582524652453492


In [4]:
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis.")
else:
    print("Fail to reject the null hypothesis.")

Reject the null hypothesis.


### 1-sample t-test

- Compare sample mean to a known population mean.

In [5]:
from scipy.stats import ttest_1samp
import numpy as np

In [8]:
data = np.random.normal(loc=25, scale=5, size=30) # sample data
data

array([26.0711462 , 17.69172495, 23.64331885, 27.53575618, 21.17253964,
       22.86286089, 21.04412521, 29.65789135, 29.49158876, 23.24446248,
       18.42255484, 30.43918527, 21.55355103, 29.82454888, 26.09825048,
       24.34897704, 25.18574763, 25.8638339 , 28.35740802, 14.7391708 ,
       21.14462855, 23.2936933 , 29.36694831, 24.33381755, 34.75766687,
       24.16476467, 24.65939481, 28.22970577, 28.12631211, 21.13950829])

In [9]:
t_stat, p_val = ttest_1samp(data, popmean=50)

print("t-statistic:", t_stat)
print("p-value;", p_val)

t-statistic: -31.952352573378782
p-value; 3.719562429344521e-24


##### Interpretation:

- If p_val < 0.05 → Reject H₀ (mean ≠ 50)
- If p_val ≥ 0.05 → Fail to reject H₀

#### Independent two-sample t-test

- Compare means of two independent groups
- If p < 0.05 → significant difference between groups.

In [10]:
from scipy.stats import ttest_ind

In [11]:
group1 = np.random.normal(50, 5, 30)
group2 = np.random.normal(55, 5, 30)
print("Group-1;", group1, '\nGroup-2:',group2)

Group-1; [46.42210661 39.05698496 43.24283755 48.69590307 47.4666155  56.95761769
 49.30834282 42.63934917 47.15559677 49.15504625 45.14297215 50.31575995
 46.29241367 50.93753304 56.2012614  43.81208261 57.59242795 53.614152
 41.77256185 64.40362189 47.42869546 44.2249333  45.00714734 47.15352789
 54.21829239 46.48883819 41.48350008 47.23368574 44.14357884 50.90741042] 
Group-2: [54.58487221 58.60629672 53.242493   55.81792998 60.93887005 49.866082
 49.49513633 49.03555046 59.18578071 59.72174006 58.90855291 53.34252508
 57.93100008 59.33928534 53.97413144 55.03256378 55.94100074 53.55369027
 62.23545797 53.69185186 52.61407876 55.96963573 65.26687406 55.56936958
 59.23883595 52.92182088 54.9168352  54.11669184 53.08535262 62.33477954]


In [12]:
t_stat, p_val = ttest_ind(group1, group2)
print("t-statu=istic:", t_stat, "p-value:", p_val)

t-statu=istic: -6.234181927281126 p-value: 5.584794013141387e-08


#### Paired t-test

- For before/after or matched pairs.
- If p < 0.05 → the treatment had a significant effect.

In [13]:
from scipy.stats import ttest_rel

In [14]:
before = np.random.normal(100, 10, 30)
after = before - np.random.normal(5, 3, 30) # after treatment

print("Before;", before, '\n After:', after)

Before; [110.93579802  98.65402053 101.0730244  101.96686522  99.55086741
  91.30407643 101.24323555 123.22603752  89.481409    95.06586499
  79.42490106 109.31419608  95.59990868 107.19050336  84.01123345
  97.98289078  94.32978868 102.6339287   90.86391359  98.93641836
  95.66476946  97.77857026  93.92204724  90.37448278  81.49347176
 105.42454132  82.35942075  92.63680395  99.83375425  85.47512218] 
 After: [105.97073694  92.59176517  97.19933622  99.33333674  96.17004883
  80.94379686  99.36136172 115.85936673  81.4793226   84.94098627
  74.11789444 109.07082112  86.44758763 101.99099673  80.07491473
  95.91667839  91.02699815  96.9783565   85.123534    95.0120825
  88.0359656   94.3348387   83.85667437  84.28355065  76.40981152
 100.24318929  72.98045677  88.42767657  91.78991859  77.79371922]


In [15]:
t_stat, p_val = ttest_rel(before, after)
print("t-statistic:", t_stat, "p-value:", p_val)

t-statistic: 11.705096538604927 p-value: 1.6543250377224349e-12


#### Chi-square test

- Check if two categorical variables are related.
- If p < 0.05 → significant relationship between gender & product liking.

In [17]:
import pandas as pd
from scipy.stats import chi2_contingency

In [18]:
data = pd.DataFrame({"Male": [30,10], "Female": [20,40]}, index=["Like Product", "Don't Like"]) # Example contigency table
data

Unnamed: 0,Male,Female
Like Product,30,20
Don't Like,10,40


In [22]:
chi2, p, dof, expected = chi2_contingency(data)

print("Chi2:", chi2)
print("p-value:", p)

Chi2: 15.041666666666668
p-value: 0.00010516355403363114


#### ANOVA (3 or more groups)

- Compare multiple group means.
- If p < 0.05 → at least one group mean differs.

In [24]:
from scipy.stats import f_oneway

In [26]:
groupA = np.random.normal(50, 5, 30)
groupB = np.random.normal(55, 5, 30)
groupC = np.random.normal(60, 5, 30)

print('A:', groupA, '\n B;', groupB,'\n C:', groupC)
            

A: [49.54488685 47.14097181 63.63102118 55.0737603  45.1017202  62.55048175
 45.6449275  54.54340441 46.92409844 43.07785586 41.28940059 55.52803792
 51.48957434 53.90755701 50.23673219 46.93910073 47.65650086 58.41101528
 42.5261155  52.52183114 57.37885611 46.70239274 55.05937674 56.8293682
 50.71807417 46.86408189 47.21555377 52.61777185 43.50057634 54.43273778] 
 B; [50.07576227 46.73476048 48.30700297 54.55234439 52.98203628 53.35758623
 52.61105088 55.01799077 58.31940706 42.44141831 59.14628214 55.39898579
 47.14272151 58.36448543 56.49896827 57.4833482  50.82118079 54.32955686
 55.9610714  53.01682128 48.80973722 59.8420061  57.50879159 60.13138828
 54.01579779 59.33940593 57.47650912 49.88979407 50.81854604 56.39661958] 
 C: [68.12205474 54.78935401 56.46903221 60.76697362 58.71071861 67.39920824
 64.18733226 58.90991099 62.92072801 72.62987748 64.56340839 62.42547791
 58.79477358 50.78577241 63.28541956 58.32980919 63.51391051 66.38426566
 64.11927322 55.36895851 51.82131815 

In [27]:
f_stat, p_val= f_oneway(groupA, groupB, groupC)
print("F-statistic:", f_stat, "p-value:", p_val)

F-statistic: 27.041583449844797 p-value: 7.360652256731915e-10
