In [2]:
# Leeds Doctoral College - University of Leeds
# Quantitative Data Analysis with Python 
# By Heider Jeffer
# June 20, 2024

# One-sample t-test example in Python

import numpy as np  
from scipy import stats  

# Population Mean 
mu = 10

# Sample Size
N1 = 21

# Degrees of freedom  
dof = N1 - 1

# Generate a random sample with mean = 11 and standard deviation = 1
x = np.random.randn(N1) + 11

# Using the Stats library, compute t-statistic and p-value
t_stat, p_val = stats.ttest_1samp(a=x, popmean = mu)
print("t-statistic = " + str(t_stat))  
print("p-value = " + str(p_val)) 


t-statistic = 3.806834533835485
p-value = 0.0011047641393841622


In [3]:
# A different approach
# 1. compute the sample mean (x_bar)
# 2. ompute the sample standard deviation with the degree of freedom of one (it represents the standard deviation of the sample).
# 3. Compute the standard error 
# 4. Use the one-sample t-statistic formula above
# 5. Compute the p-value to establish the significance of the t-statistic.


# Sample Mean
x_bar = x.mean()

# Standard Deviation  
std = np.std(x, ddof=1)

# Standard Error
ste = std/np.sqrt(N1)

# Calculating the T-Statistics  
t_stat = (x_bar - mu) / ste

# p-value of the t-statistic
p_val = 2*(1 - stats.t.cdf(abs(t_stat), df = dof))
print("t-statistic = " + str(t_stat))  
print("p-value = " + str(p_val)) 

t-statistic = 3.8068345338354854
p-value = 0.0011047641393842067


In [4]:
# Two-sample t-test example in Python
# Def.
# X1= first sample mean
# X2 = second sample mean
# S1 = first sample standard deviation
# S2 = second sample standard deviation
# N1 = first sample size
# N2 = second sample siz

# Sample Sizes
N1, N2 = 21, 25

# Degrees of freedom  
dof = min(N1,N2) - 1

# Gaussian distributed data with mean = 10.5 and var = 1  
x = np.random.randn(N1) + 10.5

# Gaussian distributed data with mean = 9.5 and var = 1  
y = np.random.randn(N2) + 9.5

## Using the internal function from SciPy Package  
t_stat, p_val = stats.ttest_ind(x, y)  
print("t-statistic = " + str(t_stat))  
print("p-value = " + str(p_val))

t-statistic = 4.864444171753306
p-value = 1.5067062040159094e-05


In [5]:
# Perform Paired t-test in Python

# Sample Sizes
N = 25

# Degrees of freedom  
dof = N - 1

# Gaussian distributed data with mean = 10.5 and var = 1  
x = np.random.randn(N) + 10.5

# Gaussian distributed data with mean = 9.9 and var = 1  
y = np.random.randn(N) + 9.9

t_stat, p_val = stats.ttest_rel(x,y)
print("t-statistic = " + str(t_stat))  
print("p-value = " + str(p_val))

t-statistic = 3.6414609079714952
p-value = 0.001296582478322071


In [6]:
# Welch’s t-test example in Python

# Sample Sizes
N1, N2 = 21, 25

# Degrees of freedom  
dof = min(N1,N2) - 1

# Gaussian distributed data with mean = 9.9 and var = 1  
x = np.random.randn(N1) + 9.9

# Gaussian distributed data with mean = 10 and var = 3
y = 3*np.random.randn(N2) + 10

## Using SciPy Package  
t_stat, p_val = stats.ttest_ind(x, y, equal_var = False) 
print("t-statistic = " + str(t_stat))  
print("p-value = " + str(p_val))

t-statistic = -0.03672047725192544
p-value = 0.9709688878873393
