## Hypothesis Testing

### T-test & P-test

In [3]:
import pandas as pd
import numpy as np
from scipy import stats

In [4]:
# Step 1 : Creating two random sample datasets
# Suppose these are test scores from two groups (e.g., group A and group B)

group_A = np.random.normal(75,10,30) # Group A with mean 75, std 10,and 30 samples
group_B = np.random.normal(80,12,30) # Group B with mean 80, std 12,and 30 samples

In [5]:
print(group_A)

[63.65663925 87.3290189  81.83317527 88.08969211 68.21937323 66.47287855
 58.65904137 89.37407787 64.33132425 82.3457226  75.62771239 87.49653093
 64.35844367 52.3173348  48.46639967 79.01395827 83.23347672 86.16600885
 64.18843446 63.12966127 78.24133321 71.67392688 52.18618267 59.58985788
 85.77650541 70.80208667 72.58148161 89.33032319 78.99239242 57.11055658]


In [6]:
print(group_B)

[ 86.48796647  71.9741663   84.67324831  84.62277374  76.62966619
  71.88558092  85.1114184   69.65909156  76.26196874  93.7390728
  87.97242406  76.99356155  96.65476069  78.99092727  88.22120181
  77.69821293  88.82951954  88.73763725  75.48523953  83.79418795
 102.74112968  72.85059191  72.51379284  71.97500256  75.73608229
  72.27839232  63.96510263  84.4908083   86.82443337  90.83946142]


In [7]:
# Step 2 : Perform an independent t-test

t_statistic, p_value = stats.ttest_ind(group_A,group_B)

In [8]:
# Step 3 : Display the p-value and t-statistic

print(f"T-statistic : {t_statistic}")
print(f"p-value : {p_value}")

T-statistic : -3.196440546694304
p-value : 0.002253013542437539


In [9]:
# Step 4: Iterpretation based on p-value

alpha = 0.05 #Significance Level
if p_value <= alpha:
    print('Reject the Null hypothesis (Significant difference between groups)')
else:
    print('Fail to reject the null hypothesis(No significant difference between groups)')
    



Reject the Null hypothesis (Significant difference between groups)


### Z-test

Imagine a company claims that the average weight of their product is 500 grams. we want to test if this claim is true by collecting a sample of 30 products the sample mean is 505 grams and the population standard deviation is 10 grams. We will conduct a Z-test to see if the difference in the mean is significant or not

In [20]:
# Step 1 : Define the known parameter
population_mean = 500 #claimed population mean (mu)
sample_mean = 505 # Sample mean (X bar)
population_std = 10 # Population standard deviation(sigma)
sample_size = 30 # Sample size (n)


In [32]:
# Step 2 : Calculate the Z-value

z_value = (sample_mean - population_mean) / (population_std / np.sqrt(sample_size))
print(f'Z-value : {z_value}')

Z-value : 2.7386127875258306


In [34]:
# Step 3 : calculate p-value (2-tailed test)

p_value = 2 * (1 - stats.norm.cdf(abs(z_value)))
print(f'P-value : {p_value}')

P-value : 0.0061698993205441255


In [36]:
# Step 4: Set significance level

alpha = 0.05 #Significance Level

# Step 5 : compare p_value with significance level

if p_value <= alpha:
    print('Reject the Null hypothesis (Significant difference between groups)')
else:
    print('Fail to reject the null hypothesis(No significant difference between groups)')
    


Reject the Null hypothesis (Significant difference between groups)
