# **Statistic Advanced 7**

Q1: Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [1]:
import numpy as np
from scipy.stats import f

def variance_ratio_test(array1, array2):
    # Calculate the variances
    var1 = np.var(array1, ddof=1)
    var2 = np.var(array2, ddof=1)
    
    # Calculate the F-value
    F = var1 / var2
    
    # Degrees of freedom
    df1 = len(array1) - 1
    df2 = len(array2) - 1
    
    # Calculate the p-value
    p_value = f.cdf(F, df1, df2)
    p_value = min(p_value, 1 - p_value) * 2  # Two-tailed test
    
    return F, p_value

# Example usage
array1 = [24, 25, 28, 23, 22, 20, 27]
array2 = [31, 33, 35, 30, 32, 36]
f_value, p_value = variance_ratio_test(array1, array2)
print(f"F-value: {f_value}, p-value: {p_value}")


F-value: 1.4551907719609583, p-value: 0.6974815747937484


Q2: Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [2]:
from scipy.stats import f

def critical_f_value(significance_level, df1, df2):
    alpha = significance_level / 2
    critical_value = f.ppf(1 - alpha, df1, df2)
    return critical_value

# Example usage
df1 = 5
df2 = 10
significance_level = 0.05
critical_value = critical_f_value(significance_level, df1, df2)
print(f"Critical F-value: {critical_value}")


Critical F-value: 4.236085668188633


Q3: Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F-value, degrees of freedom, and p-value for the test.

In [3]:
import numpy as np
from scipy.stats import f

def generate_samples_and_test(mu1, sigma1, n1, mu2, sigma2, n2):
    # Generate random samples
    sample1 = np.random.normal(mu1, sigma1, n1)
    sample2 = np.random.normal(mu2, sigma2, n2)
    
    # Calculate variances
    var1 = np.var(sample1, ddof=1)
    var2 = np.var(sample2, ddof=1)
    
    # F-value
    F = var1 / var2 if var1 > var2 else var2 / var1
    
    # Degrees of freedom
    df1 = n1 - 1
    df2 = n2 - 1
    
    # P-value
    p_value = f.cdf(F, df1, df2)
    p_value = min(p_value, 1 - p_value) * 2  # Two-tailed test
    
    return F, df1, df2, p_value

# Example usage
mu1, sigma1, n1 = 0, 1, 30
mu2, sigma2, n2 = 0, 1.5, 30
F, df1, df2, p_value = generate_samples_and_test(mu1, sigma1, n1, mu2, sigma2, n2)
print(f"F-value: {F}, df1: {df1}, df2: {df2}, p-value: {p_value}")


F-value: 1.4159065351291593, df1: 29, df2: 29, p-value: 0.35441470431965616


Q4: The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [4]:
from scipy.stats import f

def f_test_known_variances(var1, var2, n1, n2, alpha=0.05):
    F = var1 / var2 if var1 > var2 else var2 / var1
    df1 = n1 - 1
    df2 = n2 - 1
    critical_value = f.ppf(1 - alpha / 2, df1, df2)
    p_value = f.cdf(F, df1, df2)
    p_value = min(p_value, 1 - p_value) * 2  # Two-tailed test
    return F, critical_value, p_value

# Given data
var1 = 10
var2 = 15
n1 = 12
n2 = 12

F, critical_value, p_value = f_test_known_variances(var1, var2, n1, n2)
print(f"F-value: {F}, Critical value: {critical_value}, p-value: {p_value}")


F-value: 1.5, Critical value: 3.473699051085809, p-value: 0.5123897987357995


Q5: A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [5]:
from scipy.stats import chi2

def chi_square_test(sample_variance, claimed_variance, n, alpha=0.01):
    chi_square_stat = (n - 1) * sample_variance / claimed_variance
    df = n - 1
    critical_value = chi2.ppf(1 - alpha, df)
    p_value = 1 - chi2.cdf(chi_square_stat, df)
    return chi_square_stat, critical_value, p_value

# Given data
claimed_variance = 0.005
sample_variance = 0.006
n = 25

chi_square_stat, critical_value, p_value = chi_square_test(sample_variance, claimed_variance, n)
print(f"Chi-square statistic: {chi_square_stat}, Critical value: {critical_value}, p-value: {p_value}")


Chi-square statistic: 28.800000000000004, Critical value: 42.97982013935165, p-value: 0.22774877881074673


Q6: Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.

In [6]:
def f_distribution_mean_variance(df1, df2):
    if df2 > 2:
        mean = df2 / (df2 - 2)
    else:
        mean = np.nan  # Mean is undefined for df2 <= 2

    if df2 > 4:
        variance = (2 * (df2**2) * (df1 + df2 - 2)) / (df1 * (df2 - 2)**2 * (df2 - 4))
    else:
        variance = np.nan  # Variance is undefined for df2 <= 4

    return mean, variance

# Example usage
df1 = 5
df2 = 10
mean, variance = f_distribution_mean_variance(df1, df2)
print(f"Mean: {mean}, Variance: {variance}")


Mean: 1.25, Variance: 1.3541666666666667


Q7: A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [7]:
import numpy as np
from scipy.stats import f

def f_test_sample_variances(var1, var2, n1, n2, alpha=0.10):
    F = var1 / var2 if var1 > var2 else var2 / var1
    df1 = n1 - 1
    df2 = n2 - 1
    critical_value = f.ppf(1 - alpha / 2, df1, df2)
    p_value = f.cdf(F, df1, df2)
    p_value = min(p_value, 1 - p_value) * 2  # Two-tailed test
    return F, critical_value, p_value

# Given data
var1 = 25
var2 = 20
n1 = 10
n2 = 15

F, critical_value, p_value = f_test_sample_variances(var1, var2, n1, n2)
print(f"F-value: {F}, Critical value: {critical_value}, p-value: {p_value}")


F-value: 1.25, Critical value: 2.6457907352338195, p-value: 0.6832194382585954


Q8: The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [8]:
import numpy as np
from scipy.stats import f

def f_test_waiting_times(data1, data2, alpha=0.05):
    # Calculate variances
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    
    # Calculate the F-value
    F = var1 / var2 if var1 > var2 else var2 / var1
    
    # Degrees of freedom
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    
    # Calculate the critical value for a two-tailed test
    critical_value = f.ppf(1 - alpha / 2, df1, df2)
    
    # Calculate the p-value
    p_value = f.cdf(F, df1, df2)
    p_value = min(p_value, 1 - p_value) * 2  # Two-tailed test
    
    return F, critical_value, p_value

# Given data
restaurant_A = [24, 25, 28, 23, 22, 20, 27]
restaurant_B = [31, 33, 35, 30, 32, 36]

F, critical_value, p_value = f_test_waiting_times(restaurant_A, restaurant_B)
print(f"F-value: {F}, Critical value: {critical_value}, p-value: {p_value}")


F-value: 1.4551907719609583, Critical value: 6.977701858535566, p-value: 0.6974815747937484


Q9: The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

In [9]:
import numpy as np
from scipy.stats import f

def f_test_scores(data1, data2, alpha=0.01):
    # Calculate variances
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    
    # Calculate the F-value
    F = var1 / var2 if var1 > var2 else var2 / var1
    
    # Degrees of freedom
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    
    # Calculate the critical value for a two-tailed test
    critical_value = f.ppf(1 - alpha / 2, df1, df2)
    
    # Calculate the p-value
    p_value = f.cdf(F, df1, df2)
    p_value = min(p_value, 1 - p_value) * 2  # Two-tailed test
    
    return F, critical_value, p_value

# Given data
group_A = [80, 85, 90, 92, 87, 83]
group_B = [75, 78, 82, 79, 81, 84]

F, critical_value, p_value = f_test_scores(group_A, group_B)
print(f"F-value: {F}, Critical value: {critical_value}, p-value: {p_value}")


F-value: 1.9442622950819677, Critical value: 14.939605459912224, p-value: 0.4831043549070688


# **Complete**