In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Q1: What is Estimation Statistics? Explain point estimate and interval estimate.
# Estimation Statistics involves estimating population parameters based on sample data.
# Point Estimate: A single value estimate of a population parameter (e.g., sample mean).
# Interval Estimate: A range of values within which the population parameter is expected to lie (e.g., confidence interval).

# Q2. Python function to estimate the population mean using sample mean and standard deviation.
def estimate_population_mean(sample_mean, sample_std, sample_size):
    return sample_mean

# Example
sample_mean = 50
sample_std = 5
sample_size = 30
estimated_mean = estimate_population_mean(sample_mean, sample_std, sample_size)
print(f"Estimated Population Mean: {estimated_mean}")

# Q3: What is Hypothesis testing? Why is it used? State the importance of Hypothesis testing.
# Hypothesis Testing: A method to test assumptions (hypotheses) about a population parameter.
# Importance: Helps in making decisions based on sample data.

# Q4. Create a hypothesis for comparing the average weight of male and female college students.
# Null Hypothesis (H0): The average weight of male college students is equal to the average weight of female college students.
# Alternative Hypothesis (H1): The average weight of male college students is greater than the average weight of female college students.

# Q5. Python script to conduct a hypothesis test on the difference between two population means.
def two_sample_t_test(sample1, sample2, alpha=0.05):
    t_stat, p_value = stats.ttest_ind(sample1, sample2)
    print(f"T-statistic: {t_stat}, P-value: {p_value}")
    if p_value < alpha:
        print("Reject the null hypothesis")
    else:
        print("Fail to reject the null hypothesis")

# Example
sample1 = np.random.normal(70, 10, 30)  # Sample 1 (e.g., male weights)
sample2 = np.random.normal(65, 8, 30)   # Sample 2 (e.g., female weights)
two_sample_t_test(sample1, sample2)

# Q6: What is a null and alternative hypothesis? Give some examples.
# Null Hypothesis (H0): A statement that there is no effect or no difference.
# Alternative Hypothesis (H1): A statement that there is an effect or a difference.
# Example: Testing whether a coin is fair (H0: p = 0.5, H1: p ≠ 0.5).

# Q7: Steps involved in hypothesis testing.
# 1. State the hypotheses.
# 2. Choose a significance level (alpha).
# 3. Collect data and calculate a test statistic.
# 4. Determine the p-value.
# 5. Make a decision (reject or fail to reject the null hypothesis).

# Q8. Define p-value and explain its significance in hypothesis testing.
# P-value: The probability of observing the test statistic or something more extreme under the null hypothesis.
# Significance: A low p-value indicates strong evidence against the null hypothesis.

# Q9. Generate a Student's t-distribution plot with degrees of freedom set to 10.
x = np.linspace(-4, 4, 100)
y = stats.t.pdf(x, df=10)
plt.plot(x, y, label="t-distribution (df=10)")
plt.title("Student's t-distribution")
plt.xlabel("t-value")
plt.ylabel("Probability Density")
plt.legend()
plt.show()

# Q10. Python program to calculate the two-sample t-test for independent samples.
def two_sample_t_test_independent(sample1, sample2, alpha=0.05):
    t_stat, p_value = stats.ttest_ind(sample1, sample2)
    print(f"T-statistic: {t_stat}, P-value: {p_value}")
    if p_value < alpha:
        print("Reject the null hypothesis")
    else:
        print("Fail to reject the null hypothesis")

# Example
sample1 = np.random.normal(70, 10, 30)
sample2 = np.random.normal(65, 8, 30)
two_sample_t_test_independent(sample1, sample2)

# Q11: What is Student’s t distribution? When to use the t-Distribution.
# Student’s t-distribution: A probability distribution used when estimating population parameters when the sample size is small and/or population variance is unknown.
# Use: When sample size < 30 and population standard deviation is unknown.

# Q12: What is t-statistic? State the formula for t-statistic.
# T-statistic: A ratio that compares the difference between the sample mean and the population mean relative to the standard error.
# Formula: t = (sample_mean - population_mean) / (sample_std / sqrt(sample_size))

# Q13. Estimate the population mean revenue with a 95% confidence interval.
sample_mean_revenue = 500
std_dev_revenue = 50
sample_size_revenue = 50
confidence_level = 0.95
z_score = stats.norm.ppf((1 + confidence_level) / 2)
margin_of_error = z_score * (std_dev_revenue / np.sqrt(sample_size_revenue))
confidence_interval = (sample_mean_revenue - margin_of_error, sample_mean_revenue + margin_of_error)
print(f"95% Confidence Interval: {confidence_interval}")

# Q14. Test the hypothesis with a significance level of 0.05 (Drug decreases BP).
sample_mean_decrease = 8
hypothesized_mean = 10
std_dev_decrease = 3
sample_size_decrease = 100
t_statistic = (sample_mean_decrease - hypothesized_mean) / (std_dev_decrease / np.sqrt(sample_size_decrease))
p_value = stats.t.sf(np.abs(t_statistic), df=sample_size_decrease-1) * 2
alpha = 0.05
print(f"T-statistic: {t_statistic}, P-value: {p_value}")
if p_value < alpha:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

# Q15. Test the hypothesis that the true mean weight of products is less than 5 pounds with alpha=0.01.
sample_mean_weight = 4.8
hypothesized_mean_weight = 5
std_dev_weight = 0.5
sample_size_weight = 25
t_statistic_weight = (sample_mean_weight - hypothesized_mean_weight) / (std_dev_weight / np.sqrt(sample_size_weight))
p_value_weight = stats.t.cdf(t_statistic_weight, df=sample_size_weight-1)
alpha_weight = 0.01
print(f"T-statistic: {t_statistic_weight}, P-value: {p_value_weight}")
if p_value_weight < alpha_weight:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

# Q16. Test the hypothesis that the population means for two groups are equal with alpha=0.01.
mean1, std1, n1 = 80, 10, 30
mean2, std2, n2 = 75, 8, 40
pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))
t_statistic_groups = (mean1 - mean2) / (pooled_std * np.sqrt(1/n1 + 1/n2))
df_groups = n1 + n2 - 2
p_value_groups = stats.t.sf(np.abs(t_statistic_groups), df=df_groups) * 2
alpha_groups = 0.01
print(f"T-statistic: {t_statistic_groups}, P-value: {p_value_groups}")
if p_value_groups < alpha_groups:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

# Q17. Estimate the population mean with a 99% confidence interval.
sample_mean_ads = 4
std_dev_ads = 1.5
sample_size_ads = 50
confidence_level_ads = 0.99
z_score_ads = stats.norm.ppf((1 + confidence_level_ads) / 2)
margin_of_error_ads = z_score_ads * (std_dev_ads / np.sqrt(sample_size_ads))
confidence_interval_ads = (sample_mean_ads - margin_of_error_ads, sample_mean_ads + margin_of_error_ads)
print(f"99% Confidence Interval: {confidence_interval_ads}")
