In [1]:
"""
Z-Test (Large Sample, n = 30)

Dataset: z_test_data.csv
Industry Standard (μ0) = 50,000
Population Std Dev (σ) = 2,500 → Use z-test
Significance Level (α) = 0.05

Questions:
1. Two-tailed test: Test whether the average salary of employees differs from the industry standard of $50,000.
   H0: μ = 50,000
   H1: μ ≠ 50,000

2. One-tailed test (Right-tailed): Test whether the average salary of employees is greater than $50,000.
   H0: μ ≤ 50,000
   H1: μ > 50,000
"""

import pandas as pd
import numpy as np
from scipy import stats

# Load dataset
z_data = pd.read_csv("z_test_data.csv")

# Parameters
mu_0 = 50000
sigma = 2500
alpha = 0.05

# Sample stats
sample_mean = z_data["Salary"].mean()
n = len(z_data)

z_stat = (sample_mean - mu_0) / (sigma / np.sqrt(n))

p_value_two_tailed = 2 * (1 - stats.norm.cdf(abs(z_stat)))

p_value_one_tailed = 1 - stats.norm.cdf(z_stat)

print("=== Z-Test Results ===")
print(f"Sample Mean: {sample_mean:.2f}, n = {n}")
print(f"Z-statistic: {z_stat:.4f}")

print(f"\nTwo-tailed p-value: {p_value_two_tailed:.4f}")
if p_value_two_tailed < alpha:
    print("➡ Reject H0: Mean salary differs from 50,000")
else:
    print("➡ Fail to reject H0: No significant difference")

print(f"\nOne-tailed p-value: {p_value_one_tailed:.4f}")
if p_value_one_tailed < alpha:
    print("➡ Reject H0: Mean salary is greater than 50,000")
else:
    print("➡ Fail to reject H0: Not enough evidence mean > 50,000")


=== Z-Test Results ===
Sample Mean: 50866.67, n = 30
Z-statistic: 1.8988

Two-tailed p-value: 0.0576
➡ Fail to reject H0: No significant difference

One-tailed p-value: 0.0288
➡ Reject H0: Mean salary is greater than 50,000
