In [5]:
# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
# test. The function should return the F-value and the corresponding p-value for the test.

import scipy.stats as stats
import numpy as np

def f_test(data1, data2):
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    F_value = var1 / var2 if var1 > var2 else var2 / var1
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    p_value = 1 - stats.f.cdf(F_value, df1, df2)
    return F_value, p_value

data1 = [23, 29, 21, 25, 27, 22, 24]
data2 = [31, 35, 29, 33, 32, 30, 34]

F_value, p_value = f_test(data1, data2)
print(f"F-value: {F_value}")
print(f"P-value: {p_value}")

F-value: 1.7040816326530615
P-value: 0.26670472889711405


In [6]:
# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
# F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

import scipy.stats as stats

def critical_f_value(alpha, df1, df2):
    return stats.f.ppf(1 - alpha/2, df1, df2)

alpha = 0.05
df1 = 10
df2 = 15
F_critical = critical_f_value(alpha, df1, df2)
print(f"Critical F-value: {F_critical}")

Critical F-value: 3.060196851411249


In [3]:
# Q3. Write a Python program that generates random samples from two normal distributions with known
# variances and uses an F-test to determine if the variances are equal. The program should output the F-
# value, degrees of freedom, and p-value for the test.

import numpy as np
import scipy.stats as stats
np.random.seed(42)

sample1 = np.random.normal(loc=0, scale=np.sqrt(10), size=30)
sample2 = np.random.normal(loc=0, scale=np.sqrt(15), size=30)

var1 = np.var(sample1, ddof=1)
var2 = np.var(sample2, ddof=1)

F_value = var1 / var2 if var1 > var2 else var2 / var1

df1 = len(sample1) - 1
df2 = len(sample2) - 1

p_value = 1 - stats.f.cdf(F_value, df1, df2)

print(f"F-value: {F_value}, Degrees of Freedom: ({df1}, {df2}), p-value: {p_value}")


F-value: 1.6054424447517857, Degrees of Freedom: (29, 29), p-value: 0.10420460064830606


In [7]:
# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
# each population. Conduct an F-test at the 5% significance level to determine if the variances are
# significantly different.

import scipy.stats as stats

var1 = 10
var2 = 15
n1 = 12
n2 = 12

F_value = var1 / var2 if var1 > var2 else var2 / var1

df1 = n1 - 1
df2 = n2 - 1

p_value = 1 - stats.f.cdf(F_value, df1, df2)

alpha = 0.05

if p_value < alpha:
    print(f"Reject null hypothesis: Variances are significantly different (p-value = {p_value})")
else:
    print(f"Fail to reject null hypothesis: Variances are not significantly different (p-value = {p_value})")


Fail to reject null hypothesis: Variances are not significantly different (p-value = 0.25619489936789974)


In [8]:
# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
# products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
# level to determine if the claim is justified.

import scipy.stats as stats

sample_var = 0.006
claimed_var = 0.005
n = 25
alpha = 0.01

F_value = sample_var / claimed_var

df1 = n - 1
df2 = n - 1

p_value = 1 - stats.f.cdf(F_value, df1, df2)

if p_value < alpha:
    print(f"Reject the manufacturer's claim (p-value = {p_value})")
else:
    print(f"Fail to reject the manufacturer's claim (p-value = {p_value})")


Fail to reject the manufacturer's claim (p-value = 0.3293654682817244)


In [12]:
# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
# F-distribution and calculates the mean and variance of the distribution. The function should return the
# mean and variance as a tuple.

def f_distribution_mean_variance(df1, df2):
    mean = df2 / (df2 - 2) if df2 > 2 else None
    variance = (2 * df2**2 * (df1 + df2 - 2)) / (df1 * (df2 - 2)**2 * (df2 - 4)) if df2 > 4 else None
    return mean, variance

df1 = 5
df2 = 10

mean, variance = f_distribution_mean_variance(df1, df2)

if mean is not None:
    print(f"Mean of the F-distribution: {mean}")
else:
    print("Mean is undefined for the given degrees of freedom.")

if variance is not None:
    print(f"Variance of the F-distribution: {variance}")
else:
    print("Variance is undefined for the given degrees of freedom.")

Mean of the F-distribution: 1.25
Variance of the F-distribution: 1.3541666666666667


In [9]:
# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
# sample variance is found to be 25. Another random sample of 15 measurements is taken from another
# normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
# at the 10% significance level to determine if the variances are significantly different.

import numpy as np
import scipy.stats as stats

sample1 = np.random.normal(0, np.sqrt(25), 10)
sample2 = np.random.normal(0, np.sqrt(20), 15)
var1 = np.var(sample1, ddof=1)
var2 = np.var(sample2, ddof=1)
F_value = var1 / var2 if var1 > var2 else var2 / var1

df1 = len(sample1) - 1
df2 = len(sample2) - 1
p_value = 1 - stats.f.cdf(F_value, df1, df2)
alpha = 0.10

if p_value < alpha:
    print(f"Reject null hypothesis: Variances are significantly different (p-value = {p_value})")
else:
    print(f"Fail to reject null hypothesis: Variances are not significantly different (p-value = {p_value})")


Fail to reject null hypothesis: Variances are not significantly different (p-value = 0.25303241375763474)


In [10]:
# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
# night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
# significance level to determine if the variances are significantly different.

import scipy.stats as stats
import numpy as np

A = [24, 25, 28, 23, 22, 20, 27]
B = [31, 33, 35, 30, 32, 36]

var_A = np.var(A, ddof=1)
var_B = np.var(B, ddof=1)

F_value = var_A / var_B if var_A > var_B else var_B / var_A

df_A = len(A) - 1
df_B = len(B) - 1

p_value = 1 - stats.f.cdf(F_value, df_A, df_B)

alpha = 0.05
if p_value < alpha:
    print(f"Reject null hypothesis: Variances are significantly different (p-value = {p_value})")
else:
    print(f"Fail to reject null hypothesis: Variances are not significantly different (p-value = {p_value})")


Fail to reject null hypothesis: Variances are not significantly different (p-value = 0.3487407873968742)


In [11]:
# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
# Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
# are significantly different.

import scipy.stats as stats
import numpy as np

A = [80, 85, 90, 92, 87, 83]
B = [75, 78, 82, 79, 81, 84]

var_A = np.var(A, ddof=1)
var_B = np.var(B, ddof=1)

F_value = var_A / var_B if var_A > var_B else var_B / var_A

df_A = len(A) - 1
df_B = len(B) - 1

p_value = 1 - stats.f.cdf(F_value, df_A, df_B)

alpha = 0.01
if p_value < alpha:
    print(f"Reject null hypothesis: Variances are significantly different (p-value = {p_value})")
else:
    print(f"Fail to reject null hypothesis: Variances are not significantly different (p-value = {p_value})")


Fail to reject null hypothesis: Variances are not significantly different (p-value = 0.2415521774535344)
