In [None]:
Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
test. The function should return the F-value and the corresponding p-value for the test.


In [5]:
import numpy as np
from scipy import stats

def variance_ratio_test(data1, data2):
    # Calculate the variances of both datasets
    var1 = np.var(data1, ddof=1)  # Sample variance (ddof=1 for unbiased estimation)
    var2 = np.var(data2, ddof=1)
    
    # Calculate the F-statistic (variance ratio)
    f_value = var1 / var2 if var1 >= var2 else var2 / var1
    
    # Degrees of freedom for each sample
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    
    # Calculate the p-value from the F-distribution
    p_value = 2 * min(stats.f.cdf(f_value, df1, df2), 1 - stats.f.cdf(f_value, df1, df2))
    
    return f_value, p_value

# Example usage:
data1 = np.array([10, 12, 9, 11, 13, 14, 8, 15])
data2 = np.array([22, 20, 21, 23, 19, 24, 25, 20])

f_value, p_value = variance_ratio_test(data1, data2)
print(f"F-value: {f_value}")
print(f"P-value: {p_value}")


F-value: 1.3333333333333333
P-value: 0.7138778999549844


In [None]:
Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
F-distribution, write a Python function that returns the critical F-value for a two-tailed test.


In [2]:
from scipy.stats import f

def critical_f_value(alpha, dfn, dfd):
    # Calculate the critical F-value for the upper tail (1 - alpha/2)
    f_upper = f.ppf(1 - alpha / 2, dfn, dfd)
    
    # Calculate the critical F-value for the lower tail (alpha/2)
    f_lower = f.ppf(alpha / 2, dfn, dfd)
    
    return f_lower, f_upper

# Example usage:
alpha = 0.05  # Significance level
dfn = 3       # Degrees of freedom for the numerator (e.g., number of groups - 1)
dfd = 20      # Degrees of freedom for the denominator (e.g., total sample size - number of groups)

f_lower, f_upper = critical_f_value(alpha, dfn, dfd)
print(f"Lower critical F-value: {f_lower}")
print(f"Upper critical F-value: {f_upper}")


Lower critical F-value: 0.07058467426541322
Upper critical F-value: 3.858698666273216


In [12]:
# Q3. Write a Python program that generates random samples from two normal distributions with known

# variances and uses an F-test to determine if the variances are equal. The program should output the F-
# value, degrees of freedom, and p-value for the test.

import numpy as np
from scipy.stats import f

def f_test_sample_variances(sample1, sample2):
    # Calculate variances of the two samples
    var1 = np.var(sample1, ddof=1)  # Sample variance with ddof=1
    var2 = np.var(sample2, ddof=1)  # Sample variance with ddof=1
    
    # Determine degrees of freedom for each sample
    df1 = len(sample1) - 1  # Degrees of freedom for sample 1
    df2 = len(sample2) - 1  # Degrees of freedom for sample 2
    
    # Calculate the F-statistic: ratio of variances (larger variance / smaller variance)
    if var1 > var2:
        f_statistic = var1 / var2
    else:
        f_statistic = var2 / var1
        
    # Calculate the p-value using the F-distribution
    p_value = 1 - f.cdf(f_statistic, df1, df2)  # One-tailed test (right tail)
    
    return f_statistic, df1, df2, p_value

# Example usage:
# Generate two random samples from normal distributions
np.random.seed(42)  # For reproducibility
sample1 = np.random.normal(loc=0, scale=5, size=50)  # Normal distribution with mean 0, std 5
sample2 = np.random.normal(loc=0, scale=10, size=50)  # Normal distribution with mean 0, std 10

# Perform F-test to compare variances
f_stat, df1, df2, p_value= f_test_sample_variances(sample1, sample2)

print(f"F-value: {f_stat}")
print(f"Degrees of freedom for sample 1: {df1}")
print(f"Degrees of freedom for sample 2: {df2}")
print(f"P-value: {p_value}")
# Interpretation
if p_value < 0.05:
    print("The variances are significantly different.")
else:
    print("The variances are not significantly different.")


F-value: 3.507681045961634
Degrees of freedom for sample 1: 49
Degrees of freedom for sample 2: 49
P-value: 1.123844379480321e-05
The variances are significantly different.


In [None]:
Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
each population. Conduct an F-test at the 5% significance level to determine if the variances are
significantly different.


In [6]:
import scipy.stats as stats

# Given data
sigma1_squared = 10  # Population variance 1
sigma2_squared = 15  # Population variance 2
n1 = 12  # Sample size for population 1
n2 = 12  # Sample size for population 2
alpha = 0.05  # Significance level

# Degrees of freedom
df1 = n1 - 1
df2 = n2 - 1

# F-statistic calculation (ratio of the two variances)
F_statistic = sigma1_squared / sigma2_squared if sigma1_squared > sigma2_squared else sigma2_squared / sigma1_squared

# Critical F-values from the F-distribution (two-tailed test)
F_lower = stats.f.ppf(alpha / 2, df1, df2)  # Critical value for the lower tail
F_upper = stats.f.ppf(1 - alpha / 2, df1, df2)  # Critical value for the upper tail

F_statistic, F_lower, F_upper


(1.5, 0.28787755798459863, 3.473699051085809)

In [None]:
Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
level to determine if the claim is justified.


In [7]:
import scipy.stats as stats

# Given data
sigma1_squared = 0.005 # Population variance 1
sigma2_squared = 0.006  # Population variance 2
n1 = 25  # Sample size for population 1
n2 = 25  # Sample size for population 2
alpha = 0.01  # Significance level

# Degrees of freedom
df1 = n1 - 1
df2 = n2 - 1

# F-statistic calculation (ratio of the two variances)
F_statistic = sigma1_squared / sigma2_squared if sigma1_squared > sigma2_squared else sigma2_squared / sigma1_squared

# Critical F-values from the F-distribution (two-tailed test)
F_lower = stats.f.ppf(alpha / 2, df1, df2)  # Critical value for the lower tail
F_upper = stats.f.ppf(1 - alpha / 2, df1, df2)  # Critical value for the upper tail

F_statistic, F_lower, F_upper
# accepts the null hypothesis that is  variance are same 

(1.2, 0.3370701342685674, 2.966741631292762)

In [None]:
Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
F-distribution and calculates the mean and variance of the distribution. The function should return the
mean and variance as a tuple.


In [9]:
def f_distribution_mean_variance(df1, df2):
    """
    Calculates the mean and variance of an F-distribution.
    
    Parameters:
    df1 (int): Degrees of freedom for the numerator
    df2 (int): Degrees of freedom for the denominator
    
    Returns:
    tuple: (mean, variance) of the F-distribution
    """
    
    # Check if the degrees of freedom are valid
    if df2 <= 2:
        raise ValueError("For calculating mean, df2 must be greater than 2.")
    if df2 <= 4:
        raise ValueError("For calculating variance, df2 must be greater than 4.")
    
    # Mean of the F-distribution
    mean = df2 / (df2 - 2)
    
    # Variance of the F-distribution
    variance = (2 * df2**2 * (df1 + df2 - 2)) / (df1 * (df2 - 2)**2 * (df2 - 4))
    
    return mean, variance
mean, variance = f_distribution_mean_variance(5, 10)
print("Mean:", mean)
print("Variance:", variance)

Mean: 1.25
Variance: 1.3541666666666667


In [None]:
Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
sample variance is found to be 25. Another random sample of 15 measurements is taken from another
normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
at the 10% significance level to determine if the variances are significantly different.


In [10]:
import scipy.stats as stats

# Given data
sigma1_squared = 25 # Population variance 1
sigma2_squared = 20  # Population variance 2
n1 = 10  # Sample size for population 1
n2 = 15  # Sample size for population 2
alpha = 0.1  # Significance level

# Degrees of freedom
df1 = n1 - 1
df2 = n2 - 1

# F-statistic calculation (ratio of the two variances)
F_statistic = sigma1_squared / sigma2_squared if sigma1_squared > sigma2_squared else sigma2_squared / sigma1_squared

# Critical F-values from the F-distribution (two-tailed test)
F_lower = stats.f.ppf(alpha / 2, df1, df2)  # Critical value for the lower tail
F_upper = stats.f.ppf(1 - alpha / 2, df1, df2)  # Critical value for the upper tail

F_statistic, F_lower, F_upper
# accepts the null hypothesis that is  variance are same 


(1.25, 0.3305268601412525, 2.6457907352338195)

In [None]:
Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
significance level to determine if the variances are significantly different.


In [13]:
import scipy.stats as stats
restaurant_A = [24, 25, 28, 23, 22, 20, 27]
restaurant_B = [31, 33, 35, 30, 32, 36]
# Sample sizes
n_A = len(restaurant_A)
n_B = len(restaurant_B)
sigma1_squared = np.var(restaurant_A, ddof=1)  # Sample variance for A (ddof=1 for sample variance)
sigma2_squared = np.var(restaurant_B, ddof=1) 
n1 = n_A # Sample size for population 1
n2 = n_B  # Sample size for population 2
alpha = 0.05  # Significance level

# Degrees of freedom
df1 = n1 - 1
df2 = n2 - 1

# F-statistic calculation (ratio of the two variances)
F_statistic = sigma1_squared / sigma2_squared if sigma1_squared > sigma2_squared else sigma2_squared / sigma1_squared

# Critical F-values from the F-distribution (two-tailed test)
F_lower = stats.f.ppf(alpha / 2, df1, df2)  # Critical value for the lower tail
F_upper = stats.f.ppf(1 - alpha / 2, df1, df2)  # Critical value for the upper tail

F_statistic, F_lower, F_upper
# accepts the null hypothesis that is  variance are same 


(1.4551907719609583, 0.16701279718024772, 6.977701858535566)

In [None]:
Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
are significantly different.

In [16]:
import numpy as np

# Data for the restaurants
A = [80, 85, 90, 92, 87, 83]
B=[75, 78, 82, 79, 81, 84]
# Sample sizes
n_A = len(A)
n_B = len(B)

# Sample variances
s_A_squared = np.var(A, ddof=1)  # Sample variance for A (ddof=1 for sample variance)
s_B_squared = np.var(B, ddof=1)  # Sample variance for B

# Degrees of freedom
df_A = n_A - 1
df_B = n_B - 1

# F-statistic calculation (ratio of the two sample variances)
F_statistic = s_A_squared / s_B_squared if s_A_squared > s_B_squared else s_B_squared / s_A_squared

# Critical F-values from the F-distribution (two-tailed test)
F_lower = stats.f.ppf(0.025, df_A, df_B)  # Critical value for the lower tail
F_upper = stats.f.ppf(0.975, df_A, df_B)  # Critical value for the upper tail

F_statistic, F_lower, F_upper


(1.9442622950819677, 0.13993095022986143, 7.146381828732832)