# Student T-test

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Q. Is the Mean of the sample (1,2,3,4,5) equal to the population mean of 3.5 with confidence level of 95 percentage

In [2]:
# Import t distribution
from scipy.stats import t

In [3]:
sample=np.array([1,2,3,4,5])
print("Sample values",sample)

Sample values [1 2 3 4 5]


Null Hypothesis

$ H_0 : \mu = 3.5 $

Alternative Hypothesis

$ H_1 :\mu \neq 3.5$

In [4]:
pop_mean=3.5
sample_mean=np.mean(sample)
sample_std=np.std(sample,ddof=1)
n=len(sample)
dof=n-1
cl=0.95
significant=0.05
print("Population Mean",pop_mean)
print("No of Samples",n)
print("Sample Mean",sample_mean)
print("Sample Standard Deviation",sample_std)
print("Degree of Freedome",dof)
print("Confidence level",cl)
print("Significant level",significant)

Population Mean 3.5
No of Samples 5
Sample Mean 3.0
Sample Standard Deviation 1.5811388300841898
Degree of Freedome 4
Confidence level 0.95
Significant level 0.05


### To find the T statistics

$\Large t=\frac{\bar{x} - \mu}{\frac{s}{\sqrt{n}}}$

$\bar{x}$ - Sample Mean

$\mu$ - Population Mean

$s$ - Standard deviation of population

$n$ - Sample size

In [5]:
tstatistics=(sample_mean-pop_mean)/(sample_std/np.sqrt(n))
print("T Statistics is",tstatistics)

T Statistics is -0.7071067811865475


### Calculating T Critical Values

In [6]:
tcritical_l=t.ppf(q=significant/2,df=dof)
tcritical_u=-tcritical_l
print("Critical Values are ",tcritical_l,tcritical_u)

Critical Values are  -2.7764451051977996 2.7764451051977996


In [7]:
if tstatistics<tcritical_l or tstatistics>tcritical_u:
    print("Reject the Null hypothesis")
else:
    print("Fail to reject the Null hypothesis")

Fail to reject the Null hypothesis


In [8]:
pvalue=2*t.cdf(tstatistics,df=dof)
print("pvalue",pvalue)
if pvalue<0.05:
    print("Reject the Null hypothesis")
else:
    print("Fail to reject the Null hypothesis")

pvalue 0.5185185185185183
Fail to reject the Null hypothesis


### Standard Error

$ S.E.=\frac{s}{\sqrt(n)} $

s - Sample SD

n - Number of samples

In [9]:
SE=sample_std/np.sqrt(n)
print("Standard Error",SE)

Standard Error 0.7071067811865476


### Confidence Interval 

$C.I. =\bar{x} \pm SE * Tcritical_\frac{\alpha}{2}$

In [10]:
print("Confidence Interval",sample_mean+(SE)*np.array([tcritical_l,tcritical_u]))

Confidence Interval [1.03675684 4.96324316]


## Single sample T Test using ttest_1samp method

In [13]:
from scipy.stats import ttest_1samp

In [14]:
tstats,pvalue=ttest_1samp(sample, pop_mean)
print("Test Satistics",tstats)
print("pvalue",pvalue)
if pvalue<0.05:
    print("Reject the Null hypothesis")
else:
    print("Fail to reject the Null hypothesis")

Test Satistics -0.7071067811865475
pvalue 0.5185185185185183
Fail to reject the Null hypothesis


In [11]:
t.interval(0.95,dof,loc=sample_mean,scale=SE)

(1.036756838522439, 4.9632431614775605)

# Using SciPy Package

### One Sample T test

In [None]:
from scipy.stats import ttest_1samp

In [None]:
np.random.seed(123) # fix seed to get the same result
sample1=np.random.normal(5,1,20) # 20 samples following normal with mean 5 and sd 1
print(sample1)

In [None]:
sample1_mean=np.mean(sample1)
print(sample1_mean)

In [None]:
#One sample t-test
#To check whether the sample's mean is equal to 5
tstatistics, pvalue = ttest_1samp(sample1, 5)
print("T Statistics",tstatistics)
print("P value",pvalue)

In [None]:
# Same ttest
# Same sample But is its mean equal to 7
np.random.seed(123)
sample1=np.random.normal(5,1,20)
tstatistics, pvalue = ttest_1samp(sample1, 7)
print("T Statistics",tstatistics)
print("P value",pvalue)
if pvalue<0.05:
    print("\nReject the Null Hypothesis")
else:
    print("\nFails to reject the Null Hypothesis")

### Two Sampled T test

In [None]:
from scipy.stats import ttest_ind

In [None]:
np.random.seed(12345)
sample1=np.random.normal(4.8,1,20)
sample2=np.random.normal(5.0,1,20)
print("Sample1",sample1)
print("Sample2",sample2)

In [None]:
print("Mean of Sample1",np.mean(sample1))
print("Mean of Sample2",np.mean(sample2))

In [None]:
print("T Statistics",tstatistics)
print("P value",pvalue)

In [None]:
# Same ttest
# Same sample But with different means
np.random.seed(123)
sample1=np.random.normal(4.7,1,20)
sample2=np.random.normal(5.3,1,20)
print("Mean of Sample1",np.mean(sample1))
print("Mean of Sample2",np.mean(sample2))
tstatistics, pvalue = ttest_ind(sample1,sample2)
print("\nT Statistics",tstatistics)
print("P value",pvalue)
if pvalue<0.05:
    print("\nReject the Null Hypothesis")
else:
    print("\nFails to reject the Null Hypothesis")