In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from statsmodels.stats.weightstats import ztest

In [11]:
# Q.1 - Create a random sample of 50 exam scores (between 40 and 100).

exam_scores = np.random.randint(40, 101, size=50)
print("Sample Exam Scores:", exam_scores)
print("\n--------------------------------------------\n")

# - Calculate the sample mean and standard deviation.

sample_mean = np.mean(exam_scores)
sample_std = np.std(exam_scores, ddof=1)  # Using ddof=1 for sample standard deviation
print(f"Sample Mean: {sample_mean:.2f}")
print(f"Sample Standard Deviation: {sample_std:.2f}")
print("\n--------------------------------------------\n")

# - Use scipy.stats.t.interval() to compute the 95% confidence interval.

confidence_level = 0.95
degrees_freedom = len(exam_scores) - 1
confidence_interval = stats.t.interval(confidence_level, degrees_freedom, loc=sample_mean, scale=sample_std/np.sqrt(len(exam_scores)))
print(f"95% Confidence Interval: {confidence_interval[0]:.2f} to {confidence_interval[1]:.2f}")
print("\n--------------------------------------------\n")

# - Increase the sample size to 500 and recalculate the confidence interval.

large_sample_scores = np.random.randint(40, 101, size=500)
large_sample_mean = np.mean(large_sample_scores)
large_sample_std = np.std(large_sample_scores, ddof=1)
large_sample_degrees_freedom = len(large_sample_scores) - 1
large_sample_confidence_interval = stats.t.interval(confidence_level, large_sample_degrees_freedom, loc=large_sample_mean, scale=large_sample_std/np.sqrt(len(large_sample_scores)))
print(f"95% Confidence Interval for Large Sample: {large_sample_confidence_interval[0]:.2f} to {large_sample_confidence_interval[1]:.2f}")
print("\n--------------------------------------------\n")

# - Compare how the interval changes with a larger sample size.

print("Comparison of Confidence Intervals:")
print(f"Original Sample (n=50): {confidence_interval[0]:.2f} to {confidence_interval[1]:.2f}")
print(f"Large Sample (n=500): {large_sample_confidence_interval[0]:.2f} to {large_sample_confidence_interval[1]:.2f}")

Sample Exam Scores: [84 42 50 84 57 56 67 97 55 78 85 40 74 75 71 41 68 51 78 64 83 56 61 90
 55 78 58 79 63 47 62 99 83 88 51 70 47 77 54 57 47 77 63 71 67 47 55 72
 41 72]

--------------------------------------------

Sample Mean: 65.74
Sample Standard Deviation: 15.30

--------------------------------------------

95% Confidence Interval: 61.39 to 70.09

--------------------------------------------

95% Confidence Interval for Large Sample: 69.47 to 72.48

--------------------------------------------

Comparison of Confidence Intervals:
Original Sample (n=50): 61.39 to 70.09
Large Sample (n=500): 69.47 to 72.48


In [15]:
# Q.2
# - Assume the population mean is 70 and the standard deviation is 10.

population_mean = 70
population_std = 10

# - Generate a random sample of size 100.

sample_size = 100
sample_scores = np.random.normal(loc=population_mean, scale=population_std, size=sample_size)

# - Use the z-test formula manually or with statsmodels.stats.weightstats.ztest() to check if the 
# sample mean is significantly different from the population mean.

z_stat, p_value = ztest(sample_scores, value=population_mean)
print(f"Z-statistic: {z_stat:.2f}")
print(f"P-value: {p_value:.4f}")

# - Interpret the z-stat and p-value.

if p_value < 0.05:
    print("Reject the null hypothesis: The sample mean is significantly different from the population mean.")  
else:
    print("Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean.")

Z-statistic: -0.41
P-value: 0.6797
Fail to reject the null hypothesis: The sample mean is not significantly different from the population mean.


In [18]:
# Q.3
# - Create a sample of 30 weights of people (assume random values between 50-90 kg).
weights = np.random.randint(50, 91, size=30)
print("Sample Weights:", weights)

# - Assume the population mean is 70.

population_mean_weight = 70

# Use scipy.stats.ttest_1samp() to perform a one-sample t-test.

t_stat, p_value_weight = stats.ttest_1samp(weights, population_mean_weight)
print(f"T-statistic: {t_stat:.2f}")
print(f"P-value: {p_value_weight:.4f}")

# - Interpret the result:
# - p-value < 0.05 → reject null hypothesis
# - p-value > 0.05 → fail to reject null hypothesis

if p_value_weight < 0.05:
    print("Reject the null hypothesis: The sample mean weight is significantly different from the population mean weight.")
else:
    print("Fail to reject the null hypothesis: The sample mean weight is not significantly different from the population mean weight.")

Sample Weights: [56 73 73 82 66 51 85 53 54 87 59 89 82 56 58 70 84 57 56 90 88 52 88 63
 60 90 51 78 58 73]
T-statistic: -0.23
P-value: 0.8180
Fail to reject the null hypothesis: The sample mean weight is not significantly different from the population mean weight.
