In [3]:
from scipy.stats import t as t_test
import statistics as stat

In [20]:
def t_test_1sample(data_list,u0,print_out=True):
    # Test if the data has the same mean of u0
    mean=stat.mean(data_list)
    stdev=stat.stdev(data_list)
    t=(mean-u0)/stdev*(len(data_list)**.5)
#     print(t)
    abs_t=t if t>0 else -t
    df=len(data_list)-1
    p=(1-t_test.cdf(abs_t,df))*2
    # Here P is two tailed test, for one tailed test 1/2
    if print_out:
        print("mean = %.2f"%mean)
        print("stdev = %.2f"%stdev)
        print("t = %.2f"%t)
        print("df = %.2f"%df)
        print("Two-tailed test p=%.2f"%p)
    return {"t":t,"p":p,"df":df,"mean":mean,"stdev":stdev}
    

In [21]:
t_test_1sample([1,2,3,4,5,6,7,8,9,10],3)

mean = 5.50
stdev = 3.03
t = 2.61
df = 9.00
Two-tailed test p=0.03


{'t': 2.611164839335468,
 'p': 0.028216803759726572,
 'df': 9,
 'mean': 5.5,
 'stdev': 3.0276503540974917}

In [9]:
numbers = [37, 42, 39, 35, 35, 35, 39, 41, 43, 42, 41, 39, 44, 43, 42, 42, 38, 41, 41, 42, 38, 42, 42, 42, 43, 39, 41, 42, 42, 32, 38, 38, 42, 47, 40, 38, 39, 36, 39, 42, 37, 33, 45, 40, 44, 38, 38, 42, 41, 39, 38, 34, 35, 37, 41, 34, 32, 34, 35, 41, 42, 40, 42, 43, 45, 39, 39, 39, 33, 36, 36, 34, 37, 36, 39, 37, 41, 36, 38, 38, 38, 42, 38, 37, 39, 38, 41, 39, 42, 37, 39, 39, 35, 36, 37, 37, 40, 42, 32, 37, 35, 36, 39, 39, 36, 36, 40, 35, 34, 34, 35, 35, 35, 37, 37, 37]
t_test_1sample(numbers,40)

t = -4.86
Two tails p=0.00


{'t': -4.8616386142837085, 'p': 3.7207722420884437e-06}

In [30]:
# Assumptions:
#   Pooled t-test assumes equal population variances between the two groups.
#   Unpooled t-test (Welch's t-test) does not assume equal population variances.
# Degrees of freedom (df) calculation:
#   Pooled t-test: df = n1 + n2 - 2
#   Unpooled t-test: Uses a more complex formula (Welch–Satterthwaite equation) 
#   that typically results in non-integer df.
#        df = (s1^2/n1 + s2^2/n2)^2 / [(s1^2/n1)^2/(n1-1) + (s2^2/n2)^2/(n2-1)]
# Variance estimation:
#   Pooled t-test uses a weighted average of the two sample variances (pooled variance).
#   Unpooled t-test uses separate variance estimates for each group.
# Applicability:
#   Pooled t-test is more appropriate when variances are truly equal or sample sizes are equal.
#   Unpooled t-test is more robust and generally recommended when unsure about variance equality.


def t_test_2samples(l1,l2,print_out=True):
    u1=stat.mean(l1)
    u2=stat.mean(l2)
    s1=stat.stdev(l1)
    s2=stat.stdev(l2)
    n1=len(l1)
    n2=len(l2)
    # t = (x̄₁ - x̄₂) / √((s₁²/n₁) + (s₂²/n₂))
    # df = (s1^2/n1 + s2^2/n2)^2 / [(s1^2/n1)^2/(n1-1) + (s2^2/n2)^2/(n2-1)]
    t=(u1-u2)/(s1*s1/n1+s2*s2/n2)**.5
    df=(s1*s1/n1+s2*s2/n2)**2/((s1*s1/n1)**2/(n1-1)+(s2*s2/n2)**2/(n2-1))
    t=t if t>0 else -t
    p=(1-t_test.cdf(t,df))*2
    # Here P is two tailed test, for one tailed test 1/2
    if print_out:
        print("u1 = %.2f\ts1 = %.2f\tn1 = %d"%(u1,s1,n1))
        print("u2 = %.2f\ts2 = %.2f\tn2 = %d"%(u2,s2,n2))
        print("t = %.2f"%t)
        print("df = %.2f"%df)
        print("Two-tailed unpooled test p=%.2f"%p)
    return {"t":t,"p":p,"df":df,"u1":u1,"u2":u2,"s1":s1,"s2":s2,"n1":n1,"n2":n2}

In [31]:
females = [95, 123, 74, 145, 64, 112, 107, 67, 81, 91, 142, 84, 85, 92, 112, 112, 115, 116]

males = [84, 128, 79, 98, 105, 95, 79, 93, 99, 119, 92, 112, 99, 113, 128, 111, 105, 104, 106, 128, 134, 172]
t_test_2samples(females,males)

u1 = 100.94	s1 = 23.44	n1 = 18
u2 = 108.32	s2 = 21.10	n2 = 22
t = 1.04
df = 34.68
Two-tailed unpooled test p=0.31


{'t': 1.0350868507284576,
 'p': 0.30779289334506643,
 'df': 34.678156799282156,
 'u1': 100.94444444444444,
 'u2': 108.31818181818181,
 's1': 23.435699821933863,
 's2': 21.09928088062846,
 'n1': 18,
 'n2': 22}