In [8]:
import numpy as np
from scipy import stats 

**One sample t-test**

Determine the hypothesis

H0 : population mean is greater or equal to sample mean (μ> = x)
H1 : population mean is less than sample mean (μ< x)

Next, we will create a random sample or we can read it from a data frame.

In [9]:
sample=[183, 152, 178, 157, 194, 163, 144, 114, 178, 152, 118, 158, 172, 138]


pop_mean=165

# I have created a random sample stored in a variable sample and defined the population mean in the variable pop_mean. 
#Let’s calculate mean for sample and standard error for the sample.

mean = np.mean(sample)
std_error = np.std(sample) / np.sqrt(len(sample))
print("sample mean :",mean)
print("standard error:",std_error)

# According to the formula we need sample mean and standard error. The formula for the standard error is:

#                                    standard error= (s / √n)

#Where,

# s  = standard deviation of the sample
# n  = sample size

# np.std() is used for calculating the standard deviation
# np.sqrt(len()) is used to calculate the square root of sample size

# Let’s calculate the t-static, t-critical and p-value for the comparison.

# calculate t statistics
t = abs(mean - pop_mean) / std_error
print('t static:',t)
# two-tailed critical value at alpha = 0.05
# q is lower tail probability and df is the degrees of freedom
t_c = stats.t.ppf(q=0.975, df=13)
print("Critical value for t two tailed:",t_c)


# one-tailed critical value at alpha = 0.05
t_c = stats.t.ppf(q=0.95, df=13)
print("Critical value for t one tailed:",t_c)


# get two-tailed p value
p_value = 2*(1-stats.t.cdf(x=t, df=13))
print("p-value:",p_value)

sample mean : 157.21428571428572
standard error: 6.034914208534632
t static: 1.2901118419717794
Critical value for t two tailed: 2.1603686564610127
Critical value for t one tailed: 1.7709333959867988
p-value: 0.21948866305060366


**Two sample t-test**

First, we will specify the hypothesis:

H0 : There is no significant mean difference (μ1= μ2)
H1 : There is significant mean difference (μ1!= μ2)

In [10]:
sample_1=[13.4,10.9,11.2,11.8,14,15.3,14.2,12.6,17,16.2,16.5,15.7]
sample_2=[12,11.7,10.7,11.2,14.8,14.4,13.9,13.7,16.9,16,15.6,16]

I have created two random samples containing a list of random float numbers. Calculate a mean for sample and variance for both the variance

In [11]:
sample1_bar, sample2_bar = np.mean(sample_1), np.mean(sample_2)
n1, n2 = len(sample_1), len(sample_2)
var_sample1, var_sample2= np.var(sample_1, ddof=1), np.var(sample_2, ddof=1)

# pooled sample variance
var = ( ((n1-1)*var_sample1) + ((n2-1)*var_sample2) ) / (n1+n2-2)

# standard error
std_error = np.sqrt(var * (1.0 / n1 + 1.0 / n2))



In [12]:
print("sample_1 mean:",np.round(sample1_bar,4))
print("sample_2 mean:",np.round(sample2_bar,4))
print("varience of sample_1:",np.round(var_sample1,4))
print("varience of sample_2:",np.round(var_sample2,4))
print("pooled sample varience:",var)
print("standard error:",std_error)

sample_1 mean: 14.0667
sample_2 mean: 13.9083
varience of sample_1: 4.4788
varience of sample_2: 4.3445
pooled sample varience: 4.411628787878788
standard error: 0.8574797167551339


Let’s calculate the t-static, t-critical and p-value for the comparison.

In [13]:
# calculate t statistics
t = abs(sample1_bar - sample2_bar) / std_error
print('t static:',t)
# two-tailed critical value at alpha = 0.05
t_c = stats.t.ppf(q=0.975, df=12)
print("Critical value for t two tailed:",t_c)


# one-tailed critical value at alpha = 0.05
t_c = stats.t.ppf(q=0.95, df=12)
print("Critical value for t one tailed:",t_c)


# get two-tailed p value
p_two = 2*(1-stats.t.cdf(x=t, df=12))
print("p-value for two tailed:",p_two)

# get one-tailed p value
p_one = 1-stats.t.cdf(x=t, df=12)
print("p-value for one tailed:",p_one)


t static: 0.1846496543760765
Critical value for t two tailed: 2.1788128296634177
Critical value for t one tailed: 1.782287555649159
p-value for two tailed: 0.8565876978891662
p-value for one tailed: 0.4282938489445831


**Paired t-test**

First, we will specify the hypothesis:

H0 : There is no change after the tuition (x̄1= x̄2)

H1 : There is a change after the tuition (x̄1!= x̄2)

In [14]:
alpha=0.05
result_1 =[23, 20, 19, 21, 18, 20, 18, 17, 23, 16, 19]
result_2=[24, 19, 22, 18, 20, 22, 20, 20, 23, 20, 18]

The above sample is the record of students’ marks before and after the tuition. Calculate a mean, standard error, statics, t-critical and p-value

In [15]:
mean1, mean2 = np.mean(result_1), np.mean(result_2)
n = len(result_1)
# sum squared difference between observations
d1 = sum([(result_1[i]-result_2[i])**2 for i in range(n)])
# sum difference between observations
d2 = sum([result_1[i]-result_2[i] for i in range(n)])
std_dev = np.sqrt((d1 - (d2**2 / n)) / (n - 1))
# standard error of the difference between the means
se = std_dev / np.sqrt(n)
t_stat = (mean1 - mean2) / se
df = n - 1
# calculate the critical value
critical =stats.t.ppf(1.0 - alpha, df)
p = (1.0 - stats.t.cdf(abs(t_stat), df)) * 2.0
print(t_stat,critical,p)

-1.7073311796734205 1.8124611228107335 0.11856467647601066


In [16]:
print("sample_1 mean:",np.round(sample1_bar,4))
print("sample_2 mean:",np.round(sample2_bar,4))
print("varience of sample_1:",np.round(var_sample1,4))
print("varience of sample_2:",np.round(var_sample2,4))
print("pooled sample varience:",var)
print("standard error:",std_error)

sample_1 mean: 14.0667
sample_2 mean: 13.9083
varience of sample_1: 4.4788
varience of sample_2: 4.3445
pooled sample varience: 4.411628787878788
standard error: 0.8574797167551339
