Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
test. The function should return the F-value and the corresponding p-value for the test.

In [1]:
import numpy as np
from scipy.stats import f

def variance_ratio_test(data1, data2):
    """
    Perform a variance ratio test (F-test) to compare the variances of two datasets.
    
    Parameters:
    data1 (array-like): The first dataset.
    data2 (array-like): The second dataset.
    
    Returns:
    float: The F-value for the test.
    float: The p-value for the test.
    """
    # Calculate variances
    var1 = np.var(data1, ddof=1)  # ddof=1 provides the unbiased estimator
    var2 = np.var(data2, ddof=1)

    # Calculate the F-value
    if var1 > var2:
        F = var1 / var2
        dfn = len(data1) - 1
        dfd = len(data2) - 1
    else:
        F = var2 / var1
        dfn = len(data2) - 1
        dfd = len(data1) - 1

    # Calculate the p-value
    p_value = 2 * min(f.cdf(F, dfn, dfd), 1 - f.cdf(F, dfn, dfd))

    return F, p_value

# Example usage
data1 = [20.1, 22.3, 19.8, 21.4, 22.0]
data2 = [19.1, 21.2, 18.7, 20.3, 21.5]

F_value, p_value = variance_ratio_test(data1, data2)
print(f"F-value: {F_value}")
print(f"P-value: {p_value}")


F-value: 1.2235481304693723
P-value: 0.8497030844248135


Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [2]:
import scipy.stats as stats

def critical_f_value(alpha, dfn, dfd):
    """
    Calculate the critical F-value for a two-tailed test.
    
    Parameters:
    alpha (float): The significance level.
    dfn (int): Degrees of freedom for the numerator.
    dfd (int): Degrees of freedom for the denominator.
    
    Returns:
    float: The critical F-value.
    """
    # Adjust alpha for the two-tailed test
    alpha_half = alpha / 2
    
    # Calculate the critical F-values for both tails
    f_critical_low = stats.f.ppf(alpha_half, dfn, dfd)
    f_critical_high = stats.f.ppf(1 - alpha_half, dfn, dfd)
    
    return f_critical_low, f_critical_high

# Example usage
alpha = 0.05
dfn = 5
dfd = 10

f_critical_low, f_critical_high = critical_f_value(alpha, dfn, dfd)
print(f"Lower critical F-value: {f_critical_low}")
print(f"Upper critical F-value: {f_critical_high}")


Lower critical F-value: 0.15107670102998205
Upper critical F-value: 4.236085668188633


Q3. Write a Python program that generates random samples from two normal distributions with known
variances and uses an F-test to determine if the variances are equal. The program should output the F-
value, degrees of freedom, and p-value for the test.

In [3]:
import numpy as np
from scipy.stats import f

def generate_samples(mean1, var1, size1, mean2, var2, size2, random_state=None):
    """
    Generate random samples from two normal distributions.
    
    Parameters:
    mean1 (float): Mean of the first distribution.
    var1 (float): Variance of the first distribution.
    size1 (int): Sample size for the first distribution.
    mean2 (float): Mean of the second distribution.
    var2 (float): Variance of the second distribution.
    size2 (int): Sample size for the second distribution.
    random_state (int, optional): Seed for the random number generator.
    
    Returns:
    array-like: Samples from the first distribution.
    array-like: Samples from the second distribution.
    """
    np.random.seed(random_state)
    sample1 = np.random.normal(mean1, np.sqrt(var1), size1)
    sample2 = np.random.normal(mean2, np.sqrt(var2), size2)
    return sample1, sample2

def variance_ratio_test(sample1, sample2):
    """
    Perform an F-test to compare the variances of two samples.
    
    Parameters:
    sample1 (array-like): The first sample.
    sample2 (array-like): The second sample.
    
    Returns:
    float: The F-value for the test.
    int: Degrees of freedom for the numerator.
    int: Degrees of freedom for the denominator.
    float: The p-value for the test.
    """
    var1 = np.var(sample1, ddof=1)
    var2 = np.var(sample2, ddof=1)
    
    if var1 > var2:
        F = var1 / var2
        dfn = len(sample1) - 1
        dfd = len(sample2) - 1
    else:
        F = var2 / var1
        dfn = len(sample2) - 1
        dfd = len(sample1) - 1
    
    p_value = 2 * min(f.cdf(F, dfn, dfd), 1 - f.cdf(F, dfn, dfd))
    
    return F, dfn, dfd, p_value

# Example usage
mean1, var1, size1 = 10, 4, 30
mean2, var2, size2 = 15, 5, 30

sample1, sample2 = generate_samples(mean1, var1, size1, mean2, var2, size2, random_state=42)

F_value, dfn, dfd, p_value = variance_ratio_test(sample1, sample2)
print(f"F-value: {F_value}")
print(f"Degrees of freedom (numerator): {dfn}")
print(f"Degrees of freedom (denominator): {dfd}")
print(f"P-value: {p_value}")


F-value: 1.3378687039598216
Degrees of freedom (numerator): 29
Degrees of freedom (denominator): 29
P-value: 0.43794277355585454


Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
F-distribution and calculates the mean and variance of the distribution. The function should return the
mean and variance as a tuple.

In [4]:
def f_distribution_mean_variance(dfn, dfd):
    """
    Calculate the mean and variance of an F-distribution.

    Parameters:
    dfn (int): Degrees of freedom for the numerator.
    dfd (int): Degrees of freedom for the denominator.

    Returns:
    tuple: Mean and variance of the F-distribution.
    """
    if dfd > 2:
        mean = dfd / (dfd - 2)
    else:
        mean = None  # Mean is undefined for dfd <= 2

    if dfd > 4:
        variance = (2 * (dfd**2) * (dfn + dfd - 2)) / (dfn * (dfd - 2)**2 * (dfd - 4))
    else:
        variance = None  # Variance is undefined for dfd <= 4

    return mean, variance

# Example usage
dfn = 5
dfd = 10
mean, variance = f_distribution_mean_variance(dfn, dfd)
print(f"Mean: {mean}")
print(f"Variance: {variance}")


Mean: 1.25
Variance: 1.3541666666666667
