In [None]:
# Q1: Estimation Statistics is the process of inferring population parameters (such as the population mean)
# from sample data. There are two main types of estimation:
# - Point estimate: A single value used to approximate a population parameter. Example: The sample mean is a point estimate for the population mean.
# - Interval estimate: A range of values within which the population parameter is likely to fall. Example: A 95% confidence interval for the population mean.

# Q2: Python function to estimate the population mean using a sample mean and standard deviation:
import math

def estimate_population_mean(sample_mean, sample_std_dev, sample_size, confidence_level=0.95):
    """
    Estimate the population mean using sample mean, sample standard deviation, and sample size.
    :param sample_mean: The mean of the sample.
    :param sample_std_dev: The standard deviation of the sample.
    :param sample_size: The size of the sample.
    :param confidence_level: The confidence level for the interval.
    :return: A tuple with the lower and upper bounds of the confidence interval.
    """
    z_value = 1.96  # z-value for 95% confidence level
    margin_of_error = z_value * (sample_std_dev / math.sqrt(sample_size))
    lower_bound = sample_mean - margin_of_error
    upper_bound = sample_mean + margin_of_error
    return lower_bound, upper_bound

# Q3: Hypothesis testing is a statistical method used to test a claim or assumption about a population parameter.
# It helps in decision-making by determining whether the observed data supports the null hypothesis or the alternative hypothesis.
# Importance: It provides a formal procedure for decision-making and testing scientific claims.

# Q4: Hypothesis for average weight of male college students being greater than female students:
# Null hypothesis (H0): μ_male ≤ μ_female
# Alternative hypothesis (H1): μ_male > μ_female

# Q5: Python script for hypothesis test on the difference between two population means:
import scipy.stats as stats

def hypothesis_test_diff_means(sample1, sample2):
    """
    Perform a t-test for the difference between two independent population means.
    :param sample1: Data from the first sample.
    :param sample2: Data from the second sample.
    :return: The t-statistic and p-value from the test.
    """
    t_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False)
    return t_stat, p_value

# Q6: Null hypothesis (H0) represents no effect or no difference, while the alternative hypothesis (H1) represents the effect or difference.
# Example:
# H0: The mean salary of employees in two departments is the same.
# H1: The mean salary of employees in the two departments is different.

# Q7: Steps involved in hypothesis testing:
# 1. State the null and alternative hypotheses.
# 2. Set the significance level (α).
# 3. Collect sample data.
# 4. Calculate the test statistic (e.g., t-statistic).
# 5. Determine the p-value.
# 6. Compare the p-value with α to decide whether to reject the null hypothesis.

# Q8: The p-value is the probability of obtaining a test statistic at least as extreme as the one observed, assuming the null hypothesis is true.
# If the p-value is less than the significance level (α), it suggests that the null hypothesis can be rejected in favor of the alternative hypothesis.

# Q9: Generate a Student's t-distribution plot with 10 degrees of freedom:
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(-5, 5, 1000)
y = stats.t.pdf(x, df=10)
plt.plot(x, y)
plt.title("Student's t-distribution with 10 degrees of freedom")
plt.xlabel('x')
plt.ylabel('Density')
plt.show()

# Q10: Python program to calculate two-sample t-test for independent samples:
def two_sample_t_test(sample1, sample2):
    """
    Perform a two-sample t-test for independent samples.
    :param sample1: Data from the first sample.
    :param sample2: Data from the second sample.
    :return: The t-statistic and p-value from the test.
    """
    t_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=True)
    return t_stat, p_value

# Q11: Student's t-distribution is used for hypothesis testing and constructing confidence intervals when the sample size is small (typically n < 30)
# or when the population variance is unknown. It is widely used in cases with small sample sizes and unknown population standard deviations.

# Q12: The t-statistic is a measure of how far the sample mean is from the population mean in terms of standard errors.
# Formula: t = (X̄ - μ) / (s / √n)
# Where X̄ is the sample mean, μ is the population mean, s is the sample standard deviation, and n is the sample size.

# Q13: Estimate the population mean revenue with a 95% confidence interval for a sample mean of 500, sample standard deviation of 50, and sample size of 50:
sample_mean = 500
sample_std_dev = 50
sample_size = 50
confidence_level = 0.95

lower, upper = estimate_population_mean(sample_mean, sample_std_dev, sample_size, confidence_level)
print(f"95% Confidence Interval for Population Mean Revenue: {lower} to {upper}")

# Q14: Hypothesis testing for the new drug's effect on blood pressure:
# Null hypothesis (H0): μ = 10 (The drug has no effect).
# Alternative hypothesis (H1): μ ≠ 10 (The drug has an effect).
# Use t-test to test this hypothesis.

# Q15: Test the hypothesis that the true mean weight of the products is less than 5 pounds with a significance level of 0.01.
# Use a one-sample t-test to test this hypothesis.

# Q16: Two groups of students test:
# Null hypothesis (H0): μ1 = μ2 (The means are equal).
# Use a two-sample t-test to compare the means of both groups with a significance level of 0.01.

# Q17: Estimate the population mean with a 99% confidence interval for ads watched by viewers:
sample_mean = 4
sample_std_dev = 1.5
sample_size = 50
confidence_level = 0.99

lower, upper = estimate_population_mean(sample_mean, sample_std_dev, sample_size, confidence_level)
print(f"99% Confidence Interval for Population Mean Ads Watched: {lower} to {upper}")
