In [5]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import math
import seaborn as sns

## T Test
- A T-Test is a type of inferential statistic which is used to determine if there is a significant difference between the means of two groups which may be related in certain features

**T-Test has 2 types**
 - One-sample T-Test
 - Two-sample T-Test
 

##  One-sample T-Test

The test will tell us whether means of the sample and the population are different

![image.png](attachment:image.png)

In [15]:
ages = [10,20,35,50,28,40,55,18,16,55,30,25,43,18,30,28,14,24,16,17,32,35,26,27,65,18,43,23,21,20,19,70]
def test_p_value(p_value):
    if p_value < significance_level:
        print("Reject the null hypothesis, there is a difference between population mean and sample mean")
    else:
        print("Accept the null hypothesis, there is no difference between population mean and sample mean")

In [7]:
ages_mean = np.mean(ages)
ages_mean

30.34375

In [8]:
# Take a sample
sample_size = 10
age_sample = np.random.choice(ages, sample_size)
age_sample

array([32, 18, 55, 20, 43, 26, 28, 25, 35, 16])

In [9]:
from scipy.stats import ttest_1samp

In [16]:
t_test, p_value = ttest_1samp(age_sample, ages_mean)
significance_level = 0.05
print(t_test, p_value)
print(test_p_value(p_value))

-0.14265370930249494 0.8897062816108605
Accept the null hypothesis, there is no difference between population mean and sample mean
None


## Consider the age of students in college and in Class A

In [24]:
np.random.seed(6)
school_ages = stats.poisson.rvs(loc=18, mu=35, size=1500) # mu is mean
classA_ages = stats.poisson.rvs(loc=18, mu=25, size=60)
print(classA_ages.mean())
print(school_ages.mean())

42.083333333333336
53.303333333333335


In [26]:
_, p_value = stats.ttest_1samp(a=classA_ages, popmean=school_ages.mean())
print(p_value)
print(test_p_value(p_value))

3.1561418385474138e-24
Reject the null hypothesis, there is a difference between population mean and sample mean
None


In [27]:
## Cast the mu (mean) to be both the same '35', the null hypothesis will NOT be rejected, aka accepted as truth
school_ages2 = stats.poisson.rvs(loc=18, mu=35, size=1500) # mu is mean
classA_ages2 = stats.poisson.rvs(loc=18, mu=35, size=60)
print(classA_ages2.mean())
print(school_ages2.mean())
_, p_value = stats.ttest_1samp(a=classA_ages2, popmean=school_ages2.mean())
print(p_value)
print(test_p_value(p_value))

52.166666666666664
53.24333333333333
0.1573371802864444
Accept the null hypothesis, there is no difference between population mean and sample mean
None


## Two sample T-Test

- The independent Samples T-Test or Two Sample T-Test compares the means of 2 independent groups in order to determine whether there is statistical evidence that the associated population means are significantly different. The independent samples T-Test is a parametric test. This test is also known as **independent T-Test**

![image.png](attachment:image.png)

