In [1]:
# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance
# ratio test. The function should return the F-value and the corresponding p-value for the test.

import numpy as np
import scipy.stats as stats

def variance_ratio_test(data1, data2):
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    
    f_value = var1 / var2

    df1 = len(data1) - 1
    df2 = len(data2) - 1
    
    p_value = 1 - stats.f.cdf(f_value, df1, df2)
    
    return f_value, p_value

data1 = [10, 12, 15, 14, 11]
data2 = [8, 9, 10, 11, 12]
f_value, p_value = variance_ratio_test(data1, data2)
print("F-value:", f_value)
print("p-value:", p_value)


F-value: 1.72
p-value: 0.3061075208630164


In [2]:
# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of
# F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

import scipy.stats as stats

def critical_f_value(significance_level, df_num, df_den):
    alpha = significance_level / 2  
    critical_f = stats.f.ppf(1 - alpha, df_num, df_den)
    
    return critical_f

significance_level = 0.05
df_num = 3 
df_den = 20
critical_f = critical_f_value(significance_level, df_num, df_den)
print("Critical F-value:", critical_f)



Critical F-value: 3.858698666273216


In [3]:
# Q3. Write a Python program that generates random samples from two normal distributions with known
# variances and uses an F-test to determine if the variances are equal. The program should output the F-
# value, degrees of freedom, and p-value for the test.

import numpy as np
import scipy.stats as stats

np.random.seed(42)

sample_size1 = 30
sample_size2 = 30
variance1 = 9  
variance2 = 16  
significance_level = 0.05

sample1 = np.random.normal(loc=0, scale=np.sqrt(variance1), size=sample_size1)
sample2 = np.random.normal(loc=0, scale=np.sqrt(variance2), size=sample_size2)

f_statistic = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)
df1 = sample_size1 - 1
df2 = sample_size2 - 1
p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

print("F-value:", f_statistic)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)

if p_value < significance_level:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")


F-value: 0.5255560563745094
Degrees of freedom (numerator): 29
Degrees of freedom (denominator): 29
p-value: 0.9557089511933672
Fail to reject the null hypothesis: Variances are equal.


In [4]:
# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
# each population. Conduct an F-test at the 5% significance level to determine if the variances are
# significantly different.

import scipy.stats as stats

known_variance1 = 10
known_variance2 = 15
sample_size = 12
significance_level = 0.05

f_statistic = known_variance1 / known_variance2

df1 = sample_size - 1
df2 = sample_size - 1

p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

print("F-value:", f_statistic)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)

if p_value < significance_level:
    print("Reject the null hypothesis: Variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: Variances are not significantly different.")


F-value: 0.6666666666666666
Degrees of freedom (numerator): 11
Degrees of freedom (denominator): 11
p-value: 0.7438051006321
Fail to reject the null hypothesis: Variances are not significantly different.


In [5]:
# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
# products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
# level to determine if the claim is justified.

import scipy.stats as stats

claimed_variance = 0.005
sample_variance = 0.006
sample_size = 25
significance_level = 0.01

f_statistic = sample_variance / claimed_variance

df1 = sample_size - 1
df2 = sample_size - 1

p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

print("F-value:", f_statistic)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)

if p_value < significance_level:
    print("Reject the null hypothesis: The claim about variance is not justified.")
else:
    print("Fail to reject the null hypothesis: The claim about variance is justified.")


F-value: 1.2
Degrees of freedom (numerator): 24
Degrees of freedom (denominator): 24
p-value: 0.3293654682817244
Fail to reject the null hypothesis: The claim about variance is justified.


In [6]:
# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
# F-distribution and calculates the mean and variance of the distribution. The function should return the 
# mean and variance as a tuple.

def f_distribution_mean_variance(df_num, df_den):
    if df_num <= 2 and df_den <= 2:
        raise ValueError("Degrees of freedom must be greater than 2.")
    
    if df_den <= 4:
        variance = float("inf")
    else:
        variance = (2 * df_den ** 2 * (df_num + df_den - 2)) / (df_num * (df_den - 2) ** 2 * (df_den - 4))
    
    mean = df_den / (df_den - 2)
    
    return mean, variance

df_num = 5
df_den = 10
mean, variance = f_distribution_mean_variance(df_num, df_den)
print("Mean:", mean)
print("Variance:", variance)


Mean: 1.25
Variance: 1.3541666666666667


In [7]:
# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
# sample variance is found to be 25. Another random sample of 15 measurements is taken from another
# normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
# at the 10% significance level to determine if the variances are significantly different.

import scipy.stats as stats

# Given data
sample_variance1 = 25
sample_variance2 = 20
sample_size1 = 10
sample_size2 = 15
significance_level = 0.10

f_statistic = sample_variance1 / sample_variance2

df1 = sample_size1 - 1
df2 = sample_size2 - 1

p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

print("F-value:", f_statistic)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)

if p_value < significance_level:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")



F-value: 1.25
Degrees of freedom (numerator): 9
Degrees of freedom (denominator): 14
p-value: 0.3416097191292976
Fail to reject the null hypothesis: The variances are not significantly different.


In [8]:
# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
# night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
# significance level to determine if the variances are significantly different.

import scipy.stats as stats

data_a = [24, 25, 28, 23, 22, 20, 27]
data_b = [31, 33, 35, 30, 32, 36]
significance_level = 0.05

sample_variance_a = sum((x - sum(data_a) / len(data_a))**2 for x in data_a) / (len(data_a) - 1)
sample_variance_b = sum((x - sum(data_b) / len(data_b))**2 for x in data_b) / (len(data_b) - 1)

f_statistic = sample_variance_a / sample_variance_b

df1 = len(data_a) - 1
df2 = len(data_b) - 1

p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

print("F-value:", f_statistic)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)

if p_value < significance_level:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")


F-value: 1.4551907719609583
Degrees of freedom (numerator): 6
Degrees of freedom (denominator): 5
p-value: 0.3487407873968742
Fail to reject the null hypothesis: The variances are not significantly different.


In [9]:
# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
# Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
# are significantly different.

import scipy.stats as stats

group_a_scores = [80, 85, 90, 92, 87, 83]
group_b_scores = [75, 78, 82, 79, 81, 84]
significance_level = 0.01

sample_variance_a = sum((x - sum(group_a_scores) / len(group_a_scores))**2 for x in group_a_scores) / (len(group_a_scores) - 1)
sample_variance_b = sum((x - sum(group_b_scores) / len(group_b_scores))**2 for x in group_b_scores) / (len(group_b_scores) - 1)

f_statistic = sample_variance_a / sample_variance_b

df1 = len(group_a_scores) - 1
df2 = len(group_b_scores) - 1

p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

print("F-value:", f_statistic)
print("Degrees of freedom (numerator):", df1)
print("Degrees of freedom (denominator):", df2)
print("p-value:", p_value)

if p_value < significance_level:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")


F-value: 1.9442622950819677
Degrees of freedom (numerator): 5
Degrees of freedom (denominator): 5
p-value: 0.2415521774535344
Fail to reject the null hypothesis: The variances are not significantly different.
