In [66]:
# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
# test. The function should return the F-value and the corresponding p-value for the test.

In [67]:
import numpy as np
from scipy.stats import f, bartlett
import scipy.stats as stats

In [68]:
def variance_ratio_test(data1, data2):
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    F_value = var1 / var2
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    p_value = f.sf(F_value, df1, df2)

    return F_value, p_value

In [69]:
data1 = [1, 2, 3, 4, 5]
data2 = [2, 4, 6, 8, 10]

In [70]:
F_value, p_value = variance_ratio_test(data1, data2)

In [71]:
F_value, p_value

(0.25, 0.896)

In [72]:
# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
# F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [73]:
def critical_f_value(alpha, df_numerator, df_denominator):
    critical_value = stats.f.ppf(1 - alpha / 2, df_numerator, df_denominator)
    return critical_value

In [74]:
alpha = 0.05
df_numerator = 3 
df_denominator = 20  

In [75]:
critical_value = critical_f_value(alpha, df_numerator, df_denominator)

In [76]:
critical_value

3.8586986662732143

In [77]:
# Q3. Write a Python program that generates random samples from two normal distributions with known
# variances and uses an F-test to determine if the variances are equal. The program should output the F-
# value, degrees of freedom, and p-value for the test.

In [78]:
def f_test_variances(sample1, sample2):
    var1 = np.var(sample1, ddof=1)
    var2 = np.var(sample2, ddof=1)
    f_value = var1 / var2 if var1 > var2 else var2 / var1
    df1 = len(sample1) - 1
    df2 = len(sample2) - 1
    p_value = 1 - f.cdf(f_value, df1, df2)

    return f_value, df1, df2, p_value

In [79]:
np.random.seed(0)
n1 = 50
n2 = 50
mean1, mean2 = 0, 0  
var1, var2 = 1, 2

In [80]:
sample1 = np.random.normal(loc=mean1, scale=np.sqrt(var1), size=n1)
sample2 = np.random.normal(loc=mean2, scale=np.sqrt(var2), size=n2)

In [81]:
f_value, df1, df2, p_value = f_test_variances(sample1, sample2)

In [82]:
f_value, df1, df2, p_value

(1.1869415346080736, 49, 49, 0.275449186933693)

In [83]:
# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
# each population. Conduct an F-test at the 5% significance level to determine if the variances are
# significantly different.

In [84]:
def perform_f_test(var1, var2, n1, n2, alpha=0.05):
    f_value = var1 / var2 if var1 > var2 else var2 / var1
    df1 = n1 - 1
    df2 = n2 - 1
    p_value = 1 - stats.f.cdf(f_value, df1, df2)
    critical_value = stats.f.ppf(1 - alpha, df1, df2)
    return f_value, df1, df2, p_value, critical_value

In [85]:
var1 = 10 
var2 = 15 
n1 = 12   
n2 = 12   
alpha = 0.05

In [86]:
f_value, df1, df2, p_value, critical_value = perform_f_test(var1, var2, n1, n2, alpha)

In [87]:
if f_value > critical_value:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")

Fail to reject the null hypothesis: The variances are not significantly different.


In [88]:
# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
# products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
# level to determine if the claim is justified.

In [89]:
def perform_f_test(sample_var, claimed_var, n, alpha=0.01):
    f_value = sample_var / claimed_var
    df = n - 1
    p_value = 2 * (1 - stats.f.cdf(f_value, df, df))
    critical_value = stats.f.ppf(1 - alpha / 2, df, df)

    return f_value, df, df, p_value, critical_value

In [90]:
sample_var = 0.006 
claimed_var = 0.005  
n = 25  
alpha = 0.01

In [91]:
f_value, df, _, p_value, critical_value = perform_f_test(sample_var, claimed_var, n, alpha)

In [92]:
if f_value > critical_value:
    print("Reject the null hypothesis: The sample variance is significantly different from the claimed variance.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the sample variance and the claimed variance.")

Fail to reject the null hypothesis: There is no significant difference between the sample variance and the claimed variance.


In [93]:
# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
# F-distribution and calculates the mean and variance of the distribution. The function should return the
# mean and variance as a tuple.

In [94]:
def f_distribution_mean_variance(df_numerator, df_denominator):
    if df_denominator > 2:
        mean = df_denominator / (df_denominator - 2)
    else:
        mean = None 

    if df_denominator > 4:
        variance = (2 * df_denominator**2 * (df_numerator + df_denominator - 2)) / \
                   (df_numerator * (df_denominator - 2)**2 * (df_denominator - 4))
    else:
        variance = None 

    return mean, variance

In [95]:
df_numerator = 5
df_denominator = 10

In [96]:
mean, variance = f_distribution_mean_variance(df_numerator, df_denominator)

In [97]:
mean, variance

(1.25, 1.3541666666666667)

In [98]:
# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
# sample variance is found to be 25. Another random sample of 15 measurements is taken from another
# normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
# at the 10% significance level to determine if the variances are significantly different.

In [99]:
def perform_f_test(sample_var1, sample_var2, n1, n2, alpha=0.10):
    f_value = sample_var1 / sample_var2 if sample_var1 > sample_var2 else sample_var2 / sample_var1
    df1 = n1 - 1
    df2 = n2 - 1
    p_value = 1 - stats.f.cdf(f_value, df1, df2)
    critical_value = stats.f.ppf(1 - alpha, df1, df2)

    return f_value, df1, df2, p_value, critical_value

In [100]:
sample_var1 = 25  
sample_var2 = 20  
n1 = 10          
n2 = 15           
alpha = 0.10

In [101]:
f_value, df1, df2, p_value, critical_value = perform_f_test(sample_var1, sample_var2, n1, n2, alpha)

In [102]:
if f_value > critical_value:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the variances.")

Fail to reject the null hypothesis: There is no significant difference between the variances.


In [103]:
# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
# night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
# significance level to determine if the variances are significantly different.

In [104]:
def perform_f_test(data1, data2, alpha=0.05):
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    if var1 > var2:
        f_value = var1 / var2
        df1, df2 = len(data1) - 1, len(data2) - 1
    else:
        f_value = var2 / var1
        df1, df2 = len(data2) - 1, len(data1) - 1
    p_value = 1 - stats.f.cdf(f_value, df1, df2)

    critical_value = stats.f.ppf(1 - alpha, df1, df2)

    return f_value, df1, df2, p_value, critical_value

In [105]:
data_a = [24, 25, 28, 23, 22, 20, 27]  
data_b = [31, 33, 35, 30, 32, 36]     
alpha = 0.05

In [106]:
f_value, df1, df2, p_value, critical_value = perform_f_test(data_a, data_b, alpha)

In [107]:
f_value, df1, df2, p_value, critical_value

(1.4551907719609583, 6, 5, 0.3487407873968742, 4.950288068694318)

In [108]:
if f_value > critical_value:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the variances.")

Fail to reject the null hypothesis: There is no significant difference between the variances.


In [109]:
# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
# Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
# are significantly different.

In [110]:
def perform_f_test(data1, data2, alpha=0.01):
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    if var1 > var2:
        f_value = var1 / var2
        df1, df2 = len(data1) - 1, len(data2) - 1
    else:
        f_value = var2 / var1
        df1, df2 = len(data2) - 1, len(data1) - 1
    p_value = 1 - stats.f.cdf(f_value, df1, df2)

    critical_value = stats.f.ppf(1 - alpha, df1, df2)

    return f_value, df1, df2, p_value, critical_value

In [111]:
data_a = [80, 85, 90, 92, 87, 83]  
data_b = [75, 78, 82, 79, 81, 84]  
alpha = 0.01

In [112]:
f_value, df1, df2, p_value, critical_value = perform_f_test(data_a, data_b, alpha)

In [113]:
if f_value > critical_value:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the variances.")

Fail to reject the null hypothesis: There is no significant difference between the variances.
