# T Test implementation using Python

### One sample T test

In [7]:
ages = [i for i in range(0,100,3)]

In [8]:
ages

[0,
 3,
 6,
 9,
 12,
 15,
 18,
 21,
 24,
 27,
 30,
 33,
 36,
 39,
 42,
 45,
 48,
 51,
 54,
 57,
 60,
 63,
 66,
 69,
 72,
 75,
 78,
 81,
 84,
 87,
 90,
 93,
 96,
 99]

In [10]:
import numpy as np
ages_num  = np.mean(ages)

In [11]:
ages_num

49.5

In [12]:
sample_size = 10
age_sample = np.random.choice(ages,sample_size)

In [13]:
age_sample

array([66, 33,  3, 75, 27, 48, 87, 39, 45,  6])

In [14]:
from scipy.stats import ttest_1samp

In [15]:
### Calucating ttest p value for a feature 
ttest, p_value = ttest_1samp(age_sample,49)

In [16]:
print(p_value)

0.5023616372283934


In [17]:
if p_value < 0.05:
    print("We are rejecting the Null Hypothesis")
else:
    print("We are accepting the Null Hypothesis")

We are accepting the Null Hypothesis


### Two sample T test

In [19]:
import numpy as np
import pandas as pd
import scipy.stats as stats


In [21]:
ClassA_ages = stats.poisson.rvs(loc=18,mu=30,size=60)

In [20]:
np.random.seed(12)
Class_ages = stats.poisson.rvs(loc=18,mu=33,size=60)
Class_ages.mean()


50.63333333333333

In [22]:
### Taking 2 diff groups
_,p_values = stats.ttest_ind(a=ClassA_ages,b=Class_ages,equal_var=False)

In [23]:
p_values

0.00148827761873106

## Chi Square test

In [1]:
import scipy.stats as stats
import seaborn as sns
import pandas as pd
import numpy as np
df = sns.load_dataset('tips')

In [2]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [10]:
df_table = pd.crosstab(df['sex'],df['smoker'])
print(df_table)

smoker  Yes  No
sex            
Male     60  97
Female   33  54


In [4]:
val = stats.chi2_contingency(df_table)
val

(0.008763290531773594,
 0.925417020494423,
 1,
 array([[59.84016393, 97.15983607],
        [33.15983607, 53.84016393]]))

In [5]:
observed_values = df_table.values

In [7]:
Expected_values = val[3]

In [11]:
no_of_col = len(df_table.iloc[0,0:2])
no_of_row = len(df_table.iloc[0:2,0])
ddof = (no_of_row-1)*(no_of_col-1)
print("Degree of Freedom",ddof)
alpha = 0.05

Degree of Freedom 1


In [14]:
from scipy.stats import chi2
chi_square = sum([(o-e)**2./e for o,e in zip(observed_values,Expected_values)])
chi_sq_stat = chi_square[0] + chi_square[1]

In [15]:
print(chi_sq_stat)

0.001934818536627623


In [16]:
critical_values = chi2.ppf(q=1-alpha,df=ddof)
critical_values

3.841458820694124

In [17]:
p_value = 1-chi2.cdf(x=chi_sq_stat,df=ddof)
p_value

0.964915107315732

In [19]:
### conditions for hypothesis

if chi_sq_stat >= critical_values:
    print("Reject H0, There is a relationship betweeen the 2 features")
else:
    print("Accept H0, There is no relationship between the 2 features")
if p_value <= alpha:
    print("Reject H0, There is a relationship between the 2 features")
else:
    print("Accept H0, There is no relationship between the 2 features")

Accept H0, There is no relationship between the 2 features
Accept H0, There is no relationship between the 2 features
