# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [3]:
def calculate_f_value(data1, data2):
    var1 = np.var(data1, ddof=1)  # ddof=1 sample variance
    var2 = np.var(data2,ddof=1)
    
    f_value  = var1 / var2
    
# calculate the f -value
    df1 = len(data1)  - 1
    df2 = len(data2) - 1
    
# calculate the p-value
    p_value = 1 - f.cdf(f_value, df1,df2)
    return f_value, p_value

data1 = [4,5,6,7,8]
data2 = [3,4,2,5,6]
f_value, p_value = calculate_f_value(data1,data2)
print(f"F-value: {f_value}")
print(f"p-value: {p_value}")

F-value: 1.0
p-value: 0.5


# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [2]:
import scipy.stats as stats
def critical_f_value(alpha,dfn,dfd):
    # calculate the critical F-value for a two-tailed test
    f_critical =  stats.f.ppf(1-alpha / 2, dfn,dfd)
    return f_critical
alpha = 0.05
dfn = 3
dfd = 16

critical_f = critical_f_value(alpha,dfn,dfd)
print(f"Critical F-value at alpha={alpha} for dfn={dfn} and dfd={dfd} is {critical_f:.4f}")


Critical F-value at alpha=0.05 for dfn=3 and dfd=16 is 4.0768


# Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F-
#value, degrees of freedom, and p-value for the test.

In [4]:
import numpy as np
import scipy.stats as stats

# Set random seed for reproducibility
np.random.seed(0)

# Parameters for the two normal distributions
mean1 = 0
variance1 = 4
sample_size1 = 30

mean2 = 0
variance2 = 9
sample_size2 = 30

# Generate random samples from the two normal distributions
sample1 = np.random.normal(mean1, np.sqrt(variance1), sample_size1)
sample2 = np.random.normal(mean2, np.sqrt(variance2), sample_size2)

# Perform the F-test to compare variances
f_statistic = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)
dfn = sample_size1 - 1
dfd = sample_size2 - 1
p_value = 2 * min(stats.f.cdf(f_statistic, dfn, dfd), 1 - stats.f.cdf(f_statistic, dfn, dfd))


print(f"F-statistic: {f_statistic:.4f}")
print(f"Degrees of freedom (numerator, denominator): ({dfn}, {dfd})")
print(f"P-value: {p_value:.4f}")

# Check the p-value against a significance level (e.g., 0.05) to make a decision about variances
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")





F-statistic: 0.6438
Degrees of freedom (numerator, denominator): (29, 29)
P-value: 0.2416
Fail to reject the null hypothesis: Variances are equal.


# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are
# significantly different.

In [1]:
import scipy.stats as stats


In [2]:
variance1 = 10
variance2 = 15

n1 =12
n2 =12

# degrees of freedom for both population
df1 = n1 - 1
df2 = n2 -1

F_statistic = variance1 / variance2
p_value = 1 - stats.f.cdf(F_statistic, df1, df2)
alpha = 0.05

if p_value < alpha:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")

Fail to reject the null hypothesis: The variances are not significantly different.


# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
# products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [3]:
import scipy.stats as stats

# Define the claimed population variance
sigma_squared_claimed = 0.005

sample_variance = 0.006
sample_size = 25


df1 = sample_size - 1

# Degrees of freedom for the denominator (claimed population variance)
df2 = 1


F_statistic = sample_variance / sigma_squared_claimed


alpha = 0.01  # 1% significance level


F_critical = stats.f.ppf(1 - alpha / 2, df1, df2)

# Compare the F-statistic to the critical F-value for a two-tailed test
if F_statistic < 1/F_critical or F_statistic > F_critical:
    print("Reject the null hypothesis: The claim is not justified.")
else:
    print("Fail to reject the null hypothesis: The claim is justified.")

Fail to reject the null hypothesis: The claim is justified.


# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the
# mean and variance as a tuple.

In [4]:
def f_distribution_mean_and_variance(df1, df2):
    if df1 <= 0 or df2 <= 0:
        raise ValueError("Degrees of freedom must be greater than zero.")

    # Calculate the mean
    if df2 > 2:
        mean = df2 / (df2 - 2)
    else:
        mean = float("inf")

    # Calculate the variance
    if df2 > 4:
        variance = (2 * (df2 ** 2 * (df1 + df2 - 2))) / (df1 * (df2 - 2) ** 2 * (df2 - 4))
    else:
        variance = float("inf")

    return mean, variance

In [5]:
df1 = 5
df2 = 10
mean, variance = f_distribution_mean_and_variance(df1, df2)
print(f"Mean: {mean}")
print(f"Variance: {variance}")

Mean: 1.25
Variance: 1.3541666666666667


# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another
# normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [7]:
import scipy.stats as stats


variance1 = 25
variance2 = 20

n1 = 10
n2 = 15

df1 = n1 - 1
df2 = n2 - 1


F_statistic = variance1 / variance2

alpha = 0.10  # 10% significance level

# Calculate the critical F-value
F_critical = stats.f.ppf(1 - alpha / 2, df1, df2)

# Compare the F-statistic to the critical F-value for a two-tailed test
if F_statistic < 1/F_critical or F_statistic > F_critical:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")

Fail to reject the null hypothesis: The variances are not significantly different.


# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
# significance level to determine if the variances are significantly different.

In [8]:
import scipy.stats as stats


waiting_times_A = [24, 25, 28, 23, 22, 20, 27]
waiting_times_B = [31, 33, 35, 30, 32, 36]


variance_A = sum((x - sum(waiting_times_A) / len(waiting_times_A))**2 for x in waiting_times_A) / (len(waiting_times_A) - 1)
variance_B = sum((x - sum(waiting_times_B) / len(waiting_times_B))**2 for x in waiting_times_B) / (len(waiting_times_B) - 1)


df1 = len(waiting_times_A) - 1
df2 = len(waiting_times_B) - 1

F_statistic = variance_A / variance_B

# Set the significance level
alpha = 0.05  # 5% significance level


F_critical = stats.f.ppf(1 - alpha / 2, df1, df2)

# Compare the F-statistic to the critical F-value for a two-tailed test
if F_statistic < 1/F_critical or F_statistic > F_critical:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")

Fail to reject the null hypothesis: The variances are not significantly different.


# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

In [9]:
import scipy.stats as stats


group_A_scores = [80, 85, 90, 92, 87, 83]
group_B_scores = [75, 78, 82, 79, 81, 84]


variance_A = sum((x - sum(group_A_scores) / len(group_A_scores))**2 for x in group_A_scores) / (len(group_A_scores) - 1)
variance_B = sum((x - sum(group_B_scores) / len(group_B_scores))**2 for x in group_B_scores) / (len(group_B_scores) - 1)


df1 = len(group_A_scores) - 1
df2 = len(group_B_scores) - 1


F_statistic = variance_A / variance_B


alpha = 0.01  # 1% significance level

F_critical = stats.f.ppf(1 - alpha / 2, df1, df2)

# Compare the F-statistic to the critical F-value for a two-tailed test
if F_statistic < 1/F_critical or F_statistic > F_critical:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")

Fail to reject the null hypothesis: The variances are not significantly different.
