# Two Sample Z-Test

In [5]:
import numpy as np
import random
from scipy.stats import norm
from statsmodels.stats import weightstats as stests
from scipy import stats

Used to compare the means of two independent groups to determine if there is a significant difference between them. It is typically employed when the sample sizes are large and the population standard deviations are known.

H0: Mean is same of both <br>
Ha: Mean is different

**When we only have measurement, not whole sample data** <br>
z=(mean1-mean2)/np.sqrt(((std1^2)/n1)+((std2^2)/n2)) <br> <br>

Two-Tailed: <br>
pval=2*(1-norm.cdf(abs(z))) <br>
Using abs(z_score) ignores the sign of the deviation, focusing solely on its magnitude, ensuring we consider both directions and calculation <br>
if p_value < significance_level:<br>
    conclusion = "Reject the null hypothesis. Group 2 mean is high."<br>
else:<br>
    conclusion ="Fail to reject the null hypothesis. No significant difference."<br>

**When we have the data**

In [11]:
#Generating 2 groups

# Set a random seed for reproducibility
random.seed(123) # You can use any integer as the seed

# Create an empty list to store the recovery times
M1_data = []
# Generate 100 random recovery times
for _ in range(100):
    recovery_time = random.uniform(5.0, 20.0) # Generating values between 10 and 20
    M1_data.append(np.round(recovery_time, 0))

M2_data = []
# Generate 100 random recovery times
for _ in range(90):
    recovery_time = random.uniform(5.0, 20.0) # Generating values between 10 and 20
    M2_data.append(np.round(recovery_time, 0))

print(M1_data)
print(M2_data)

[6.0, 6.0, 11.0, 7.0, 19.0, 6.0, 13.0, 10.0, 18.0, 7.0, 10.0, 10.0, 9.0, 5.0, 12.0, 6.0, 14.0, 6.0, 10.0, 12.0, 19.0, 6.0, 7.0, 17.0, 5.0, 19.0, 14.0, 9.0, 18.0, 17.0, 10.0, 17.0, 8.0, 14.0, 13.0, 17.0, 10.0, 11.0, 17.0, 13.0, 15.0, 15.0, 15.0, 18.0, 12.0, 15.0, 11.0, 5.0, 16.0, 8.0, 17.0, 19.0, 15.0, 8.0, 10.0, 11.0, 6.0, 12.0, 14.0, 10.0, 8.0, 6.0, 14.0, 5.0, 10.0, 8.0, 13.0, 16.0, 19.0, 15.0, 20.0, 19.0, 14.0, 9.0, 12.0, 8.0, 20.0, 12.0, 16.0, 6.0, 7.0, 8.0, 9.0, 10.0, 9.0, 12.0, 11.0, 7.0, 6.0, 10.0, 6.0, 11.0, 5.0, 13.0, 6.0, 15.0, 11.0, 20.0, 9.0, 12.0]
[16.0, 10.0, 10.0, 15.0, 20.0, 18.0, 11.0, 16.0, 15.0, 11.0, 9.0, 6.0, 18.0, 6.0, 13.0, 13.0, 7.0, 15.0, 19.0, 17.0, 11.0, 7.0, 20.0, 13.0, 14.0, 17.0, 11.0, 6.0, 7.0, 15.0, 10.0, 18.0, 16.0, 20.0, 15.0, 10.0, 17.0, 5.0, 13.0, 18.0, 13.0, 7.0, 13.0, 13.0, 11.0, 18.0, 10.0, 9.0, 13.0, 8.0, 10.0, 11.0, 19.0, 6.0, 8.0, 8.0, 20.0, 18.0, 8.0, 13.0, 6.0, 10.0, 10.0, 13.0, 19.0, 17.0, 13.0, 17.0, 12.0, 16.0, 17.0, 9.0, 17.0, 14.0, 12.0, 

In [6]:
z_score, pval = stests.ztest(x1 = M1_data, x2 = M2_data, value = 0, alternative = 'two-sided')
z_score, pval

(-1.9781793102198233, 0.047908481917186434)

In [7]:
alpha = 0.01
if pval < alpha:
    print("Reject the null hypothesis, (i.e, Means are different)")
else:
    print("Fail to reject the null hypothesis (i.e, Means are same)")

Fail to reject the null hypothesis (i.e, Means are same)


In [10]:
#Double Checking
np.mean(M1_data), np.mean(M2_data)

(11.57, 12.8)