# Part-2

1. Write a Python program to perform a Z-test for comparing a sample mean to a known population mean and interpret the results.

In [None]:
from scipy.stats import norm
import numpy as np

def z_test(sample_mean, pop_mean, std_dev, n, alpha=0.05):
    z = (sample_mean - pop_mean) / (std_dev / np.sqrt(n))
    p_value = 2 * (1 - norm.cdf(abs(z)))
    print(f"Z = {z:.3f}, P-value = {p_value:.4f}")
    if p_value < alpha:
        print("Reject null hypothesis")
    else:
        print("Fail to reject null hypothesis")

z_test(sample_mean=52, pop_mean=50, std_dev=10, n=30)

2. Simulate random data to perform hypothesis testing and calculate the corresponding P-value using Python.

In [None]:
np.random.seed(42)
data = np.random.normal(100, 10, 50)
sample_mean = np.mean(data)
pop_mean = 102
std_dev = np.std(data, ddof=1)
z = (sample_mean - pop_mean) / (std_dev / np.sqrt(len(data)))
p_value = 2 * (1 - norm.cdf(abs(z)))
print(f"Z = {z:.2f}, P = {p_value:.4f}")

3. Implement a one-sample Z-test using Python to compare the sample mean with the population mean.

In [None]:
def one_sample_z(data, pop_mean, std_dev):
    n = len(data)
    sample_mean = np.mean(data)
    z = (sample_mean - pop_mean) / (std_dev / np.sqrt(n))
    p = 2 * (1 - norm.cdf(abs(z)))
    return z, p

data = np.random.normal(60, 10, 40)
z, p = one_sample_z(data, 62, 10)
print(f"Z = {z:.2f}, P = {p:.4f}")

4. Perform a two-tailed Z-test using Python and visualize the decision region on a plot.

In [None]:
import matplotlib.pyplot as plt

def visualize_z_test(z_stat, alpha=0.05):
    x = np.linspace(-4, 4, 1000)
    y = norm.pdf(x)
    plt.plot(x, y)
    z_crit = norm.ppf(1 - alpha / 2)
    plt.axvline(z_crit, color='red', linestyle='--', label='Critical value')
    plt.axvline(-z_crit, color='red', linestyle='--')
    plt.axvline(z_stat, color='blue', label='Z-stat')
    plt.title("Two-Tailed Z-Test")
    plt.legend()
    plt.show()

visualize_z_test(z_stat=2.1)

5. Create a Python function that calculates and visualizes Type 1 and Type 2 errors during hypothesis testing.

In [None]:
def plot_errors():
    x = np.linspace(-4, 4, 1000)
    null = norm.pdf(x, 0, 1)
    alt = norm.pdf(x, 1, 1)
    plt.plot(x, null, label='Null Hypothesis')
    plt.plot(x, alt, label='Alternative Hypothesis')

    plt.fill_between(x, null, where=(x > 1.64), color='red', alpha=0.3, label='Type I Error')
    plt.fill_between(x, alt, where=(x <= 1.64), color='blue', alpha=0.3, label='Type II Error')

    plt.legend()
    plt.title("Type I and Type II Errors")
    plt.show()

plot_errors()

6. Write a Python program to perform an independent T-test and interpret the results.

In [None]:
from scipy.stats import ttest_ind

a = np.random.normal(70, 5, 30)
b = np.random.normal(75, 5, 30)

t_stat, p_val = ttest_ind(a, b)
print(f"T-stat = {t_stat:.2f}, P-value = {p_val:.4f}")

7. Perform a paired sample T-test using Python and visualize the comparison results.

In [None]:
from scipy.stats import ttest_rel

before = np.random.normal(100, 10, 30)
after = before + np.random.normal(-2, 5, 30)
t_stat, p_val = ttest_rel(before, after)

plt.plot(before, label='Before')
plt.plot(after, label='After')
plt.title('Paired Samples')
plt.legend()
plt.show()

print(f"T-stat = {t_stat:.2f}, P = {p_val:.4f}")

8. Simulate data and perform both Z-test and T-test, then compare the results using Python.

In [None]:
from scipy.stats import ttest_1samp

data = np.random.normal(100, 10, 30)
pop_mean = 98
z, p_z = one_sample_z(data, pop_mean, 10)
t_stat, p_t = ttest_1samp(data, pop_mean)
print(f"Z-test: Z={z:.2f}, P={p_z:.4f}")
print(f"T-test: T={t_stat:.2f}, P={p_t:.4f}")

9. Write a Python function to calculate the confidence interval for a sample mean and explain its significance.

In [None]:
def confidence_interval(data, confidence=0.95):
    mean = np.mean(data)
    sem = stats.sem(data)
    n = len(data)
    h = stats.t.ppf((1 + confidence) / 2., n-1) * sem
    return mean - h, mean + h

ci = confidence_interval(data)
print("95% Confidence Interval:", ci)

10. Write a Python program to calculate the margin of error for a given confidence level using sample data.

In [None]:
def margin_of_error(data, confidence=0.95):
    sem = stats.sem(data)
    n = len(data)
    moe = stats.t.ppf((1 + confidence) / 2., n-1) * sem
    return moe

print("Margin of Error:", margin_of_error(data))

11. Implement a Bayesian inference method using Bayes' Theorem in Python and explain the process.

In [None]:
def bayes_theorem(prior, likelihood, evidence):
    return (likelihood * prior) / evidence

posterior = bayes_theorem(0.01, 0.9, 0.01*0.9 + 0.99*0.05)
print("Posterior Probability:", round(posterior, 4))

12. Perform a Chi-square test for independence between two categorical variables in Python.

In [None]:
from scipy.stats import chi2_contingency

table = [[20, 15], [30, 35]]
chi2, p, dof, ex = chi2_contingency(table)
print(f"Chi2 = {chi2:.2f}, P = {p:.4f}")

13. Write a Python program to calculate the expected frequencies for a Chi-square test based on observed data.

In [None]:
_, _, _, expected = chi2_contingency(table)
print("Expected Frequencies:")
print(expected)

14. Perform a goodness-of-fit test using Python to compare the observed data to an expected distribution.

In [None]:
from scipy.stats import chisquare

observed = [50, 30, 20]
expected = [40, 40, 20]
chi2, p = chisquare(f_obs=observed, f_exp=expected)
print(f"Chi2 = {chi2:.2f}, P = {p:.4f}")

15. Create a Python script to simulate and visualize the Chi-square distribution and discuss its characteristics.

In [None]:
from scipy.stats import chi2

x = np.linspace(0, 20, 1000)
y = chi2.pdf(x, df=3)
plt.plot(x, y)
plt.title("Chi-square Distribution (df=3)")
plt.xlabel("Value")
plt.ylabel("Density")
plt.grid()
plt.show()

16. Implement an F-test using Python to compare the variances of two random samples.

In [None]:
data1 = np.random.normal(10, 2, 50)
data2 = np.random.normal(10, 3, 50)

f_stat = np.var(data1, ddof=1) / np.var(data2, ddof=1)
print(f"F-statistic = {f_stat:.2f}")

17. Write a Python program to perform an ANOVA test to compare means between multiple groups and interpret the results.

In [None]:
from scipy.stats import f_oneway

g1 = np.random.normal(50, 5, 30)
g2 = np.random.normal(55, 5, 30)
g3 = np.random.normal(52, 5, 30)

f, p = f_oneway(g1, g2, g3)
print(f"F = {f:.2f}, P = {p:.4f}")

18. Perform a one-way ANOVA test using Python to compare the means of different groups and plot the results.

In [None]:
plt.boxplot([g1, g2, g3], labels=['Group1', 'Group2', 'Group3'])
plt.title("ANOVA Group Means")
plt.grid()
plt.show()

19. Write a Python function to check the assumptions (normality, independence, and equal variance) for ANOVA.

In [None]:
from scipy.stats import shapiro, levene

print("Shapiro Test (Normality):", shapiro(g1))
print("Levene Test (Equal Var):", levene(g1, g2, g3))

20. Perform a two-way ANOVA test using Python to study the interaction between two factors and visualize the results.

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

df = pd.DataFrame({
    'score': np.random.normal(60, 10, 60),
    'gender': ['M']*30 + ['F']*30,
    'group': ['A']*15 + ['B']*15 + ['A']*15 + ['B']*15
})

model = ols('score ~ C(gender) + C(group) + C(gender):C(group)', data=df).fit()
anova_result = sm.stats.anova_lm(model, typ=2)
print(anova_result)

21. Write a Python program to visualize the F-distribution and discuss its use in hypothesis testing.

In [None]:
x = np.linspace(0, 5, 1000)
y = stats.f.pdf(x, dfn=5, dfd=10)
plt.plot(x, y)
plt.title("F-distribution (dfn=5, dfd=10)")
plt.grid()
plt.show()

22. Perform a one-way ANOVA test in Python and visualize the results with boxplots to compare group means.

In [None]:
plt.boxplot([g1, g2, g3], labels=['A', 'B', 'C'])
plt.title("Boxplot for ANOVA")
plt.grid()
plt.show()

23. Simulate random data from a normal distribution, then perform hypothesis testing to evaluate the means.

In [None]:
data = np.random.normal(70, 10, 50)
print(ttest_1samp(data, 72))

24. Perform a hypothesis test for population variance using a Chi-square distribution and interpret the results.

In [None]:
sample = np.random.normal(50, 5, 30)
sample_var = np.var(sample, ddof=1)
chi2_stat = (len(sample) - 1) * sample_var / 25
p = 1 - chi2.cdf(chi2_stat, df=29)
print(f"Chi2 stat = {chi2_stat:.2f}, P = {p:.4f}")

25. Write a Python script to perform a Z-test for comparing proportions between two datasets or groups.

In [None]:
from statsmodels.stats.proportion import proportions_ztest

count = np.array([40, 30])
nobs = np.array([100, 100])
stat, p = proportions_ztest(count, nobs)
print(f"Z = {stat:.2f}, P = {p:.4f}")

26. Implement an F-test for comparing the variances of two datasets, then interpret and visualize the results.

In [None]:
var1 = np.var(data1, ddof=1)
var2 = np.var(data2, ddof=1)
f = var1 / var2
print(f"F-statistic: {f:.2f}")
plt.hist([data1, data2], label=['Group 1', 'Group 2'], alpha=0.7)
plt.legend()
plt.show()

27. Perform a Chi-square test for goodness of fit with simulated data and analyze the results.

In [None]:
observed = [25, 35, 40]
expected = [30, 30, 40]
chi2, p = chisquare(observed, expected)
print(f"Chi2 = {chi2:.2f}, P = {p:.4f}")