In [None]:
import numpy as np
import scipy.stats as stats
import math

# Q1: Difference between a t-test and a z-test with example scenarios.
# Z-test: Used when the population variance is known or the sample size is large (n > 30).
# T-test: Used when the population variance is unknown and the sample size is small (n < 30).
# Example Scenario (Z-test): Testing whether the average height of a population is 170 cm using a large sample.
# Example Scenario (T-test): Testing whether a new drug reduces blood pressure using a small sample of patients.

# Q2: Differentiate between one-tailed and two-tailed tests.
# One-tailed Test: Tests for an effect in one direction (e.g., whether a mean is greater than a value).
# Two-tailed Test: Tests for an effect in either direction (e.g., whether a mean is different from a value).

# Q3: Explain Type 1 and Type 2 errors with examples.
# Type 1 Error: Rejecting the null hypothesis when it is true (false positive).
# Example: Concluding that a drug is effective when it is not.
# Type 2 Error: Failing to reject the null hypothesis when it is false (false negative).
# Example: Concluding that a drug is not effective when it actually is.

# Q4: Explain Bayes's theorem with an example.
# Bayes' Theorem: P(A|B) = P(B|A) * P(A) / P(B)
# Example: Testing for a disease where P(A) is the probability of having the disease, P(B) is the probability of testing positive.

# Q5: What is a confidence interval? How to calculate it with an example.
# Confidence Interval: A range of values within which the population parameter is expected to lie with a certain level of confidence.
# Example: For a sample mean of 50, standard deviation of 5, and sample size of 30, a 95% confidence interval can be calculated.

# Q6. Use Bayes' Theorem for a sample problem.
# Example: A diagnostic test for a disease has a 99% sensitivity and a 5% false positive rate. If the disease prevalence is 0.1%, what is the probability of having the disease if tested positive?
P_A = 0.001  # Prevalence of disease
P_B_given_A = 0.99  # Sensitivity
P_B_given_not_A = 0.05  # False positive rate
P_not_A = 1 - P_A

P_A_given_B = (P_B_given_A * P_A) / (P_B_given_A * P_A + P_B_given_not_A * P_not_A)
print(f"Probability of having the disease given a positive test result: {P_A_given_B:.5f}")

# Q7. Calculate the 95% confidence interval for a sample with mean 50 and std 5.
sample_mean = 50
sample_std = 5
sample_size = 30
confidence_level = 0.95
z_score = stats.norm.ppf((1 + confidence_level) / 2)
margin_of_error = z_score * (sample_std / np.sqrt(sample_size))
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)
print(f"95% Confidence Interval: {confidence_interval}")

# Q8. Explain margin of error and its relation to sample size with an example.
# Margin of Error: The maximum expected difference between the true population parameter and a sample estimate.
# Larger sample sizes result in a smaller margin of error.
# Example: For a sample size of 1000 with the same mean and standard deviation as above.
sample_size_large = 1000
margin_of_error_large = z_score * (sample_std / np.sqrt(sample_size_large))
confidence_interval_large = (sample_mean - margin_of_error_large, sample_mean + margin_of_error_large)
print(f"95% Confidence Interval with larger sample size: {confidence_interval_large}")

# Q9. Calculate the z-score for a data point with value 75, mean 70, and std dev 5.
data_point = 75
population_mean = 70
population_std = 5
z_score_data_point = (data_point - population_mean) / population_std
print(f"Z-score for the data point: {z_score_data_point}")

# Q10. Hypothesis test for a new weight loss drug using a t-test.
sample_mean_weight_loss = 6
population_mean_weight_loss = 0  # Assuming no effect
std_dev_weight_loss = 2.5
sample_size_weight_loss = 50
t_statistic_weight_loss = (sample_mean_weight_loss - population_mean_weight_loss) / (std_dev_weight_loss / np.sqrt(sample_size_weight_loss))
p_value_weight_loss = stats.t.sf(np.abs(t_statistic_weight_loss), df=sample_size_weight_loss-1) * 2
alpha_weight_loss = 0.05
print(f"T-statistic: {t_statistic_weight_loss}, P-value: {p_value_weight_loss}")
if p_value_weight_loss < alpha_weight_loss:
    print("Reject the null hypothesis: The drug is significantly effective.")
else:
    print("Fail to reject the null hypothesis: The drug is not significantly effective.")

# Q11. Calculate the 95% confidence interval for the proportion of satisfied jobholders.
sample_proportion = 0.65
sample_size_proportion = 500
z_score_proportion = stats.norm.ppf((1 + confidence_level) / 2)
margin_of_error_proportion = z_score_proportion * np.sqrt((sample_proportion * (1 - sample_proportion)) / sample_size_proportion)
confidence_interval_proportion = (sample_proportion - margin_of_error_proportion, sample_proportion + margin_of_error_proportion)
print(f"95% Confidence Interval for job satisfaction: {confidence_interval_proportion}")

# Q12. Hypothesis test for the difference in teaching methods using a t-test.
mean_A, std_A, n_A = 85, 6, 30
mean_B, std_B, n_B = 82, 5, 40
pooled_std_teaching = np.sqrt(((n_A - 1) * std_A**2 + (n_B - 1) * std_B**2) / (n_A + n_B - 2))
t_statistic_teaching = (mean_A - mean_B) / (pooled_std_teaching * np.sqrt(1/n_A + 1/n_B))
df_teaching = n_A + n_B - 2
p_value_teaching = stats.t.sf(np.abs(t_statistic_teaching), df=df_teaching) * 2
alpha_teaching = 0.01
print(f"T-statistic: {t_statistic_teaching}, P-value: {p_value_teaching}")
if p_value_teaching < alpha_teaching:
    print("Reject the null hypothesis: The teaching methods have a significant difference.")
else:
    print("Fail to reject the null hypothesis: No significant difference between teaching methods.")

# Q13. Calculate the 90% confidence interval for a population mean with sample mean 65 and std 8.
population_mean_13 = 60
sample_mean_13 = 65
std_dev_13 = 8
sample_size_13 = 50
confidence_level_13 = 0.90
z_score_13 = stats.norm.ppf((1 + confidence_level_13) / 2)
margin_of_error_13 = z_score_13 * (std_dev_13 / np.sqrt(sample_size_13))
confidence_interval_13 = (sample_mean_13 - margin_of_error_13, sample_mean_13 + margin_of_error_13)
print(f"90% Confidence Interval for population mean: {confidence_interval_13}")

# Q14. Hypothesis test for the effect of caffeine on reaction time using a t-test.
sample_mean_rt = 0.25
population_mean_rt = 0.0  # Assuming no effect
std_dev_rt = 0.05
sample_size_rt = 30
t_statistic_rt = (sample_mean_rt - population_mean_rt) / (std_dev_rt / np.sqrt(sample_size_rt))
p_value_rt = stats.t.sf(np.abs(t_statistic_rt), df=sample_size_rt-1) * 2
alpha_rt = 0.10
print(f"T-statistic: {t_statistic_rt}, P-value: {p_value_rt}")
if p_value_rt < alpha_rt:
    print("Reject the null hypothesis: Caffeine has a significant effect on reaction time.")
else:
    print("Fail to reject the null hypothesis: Caffeine does not have a significant effect on reaction time.")
