# Assignment

# Q1

In [3]:
import scipy.stats as stats
import numpy as np

def variance_ratio_test(data1, data2):
    # Calculate the variances and degrees of freedom for each dataset
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    n1 = len(data1)
    n2 = len(data2)

    # Calculate the F-value and p-value for the variance ratio test
    f_value = var1 / var2
    p_value = stats.f.cdf(f_value, n1-1, n2-1)

    return f_value, p_value


Here, we're using the scipy.stats module to calculate the cumulative distribution function (CDF) of the F-distribution, which gives us the p-value for the test. The ddof=1 parameter in the np.var() function specifies that we want to use the sample variance (with n-1 degrees of freedom) rather than the population variance (with n degrees of freedom).


# Q2

In [5]:
from scipy.stats import f

def critical_f_value(alpha, dfn, dfd):
    """
    Calculates the critical F-value for a two-tailed test
    with a given significance level, numerator degrees of freedom,
    and denominator degrees of freedom.
    
    Parameters:
    alpha (float): Significance level
    dfn (int): Degrees of freedom for the numerator
    dfd (int): Degrees of freedom for the denominator
    
    Returns:
    float: Critical F-value
    """
    return f.ppf(q=1-alpha/2, dfn=dfn, dfd=dfd)


# Q3

In [6]:
import numpy as np
from scipy.stats import f

# Set the seed for reproducibility
np.random.seed(123)

# Generate two random samples from normal distributions with known variances
n1 = 20
n2 = 25
mu1 = 10
mu2 = 12
var1 = 4
var2 = 6

sample1 = np.random.normal(mu1, np.sqrt(var1), n1)
sample2 = np.random.normal(mu2, np.sqrt(var2), n2)

# Calculate the F-value and p-value for an F-test of equality of variances
F = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)
df1 = n1 - 1
df2 = n2 - 1
p = 2 * min(f.cdf(F, df1, df2), 1 - f.cdf(F, df1, df2))

# Print the results
print("F-value:", F)
print("Degrees of freedom:", df1, ",", df2)
print("p-value:", p)


F-value: 0.9629829394593775
Degrees of freedom: 19 , 24
p-value: 0.9451187849419085


# Q4

In [13]:
import numpy as np
from scipy.stats import f

# Set the significance level
alpha = 0.05

# Set the sample sizes and population variances
n1 = n2 = 12
var1 = 10
var2 = 15

# Generate random samples from the populations
np.random.seed(1)
sample1 = np.random.normal(loc=0, scale=np.sqrt(var1), size=n1)
sample2 = np.random.normal(loc=0, scale=np.sqrt(var2), size=n2)

# Calculate the sample variances
s1_squared = np.var(sample1, ddof=1)
s2_squared = np.var(sample2, ddof=1)

# Calculate the F-statistic
F = s1_squared / s2_squared

# Calculate the critical F-value
df1 = n1 - 1
df2 = n2 - 1
critical_F = f.ppf(q=1 - alpha/2, dfn=df1, dfd=df2)

# Compare the F-statistic to the critical F-value
if F > critical_F or F < 1/critical_F:
    print(f"Reject the null hypothesis. The variances are significantly different.")
else:
    print(f"Fail to reject the null hypothesis. The variances are not significantly different.")


Fail to reject the null hypothesis. The variances are not significantly different.


# Q5

To conduct the F-test, we need to set up the null and alternative hypotheses:

Null hypothesis: The variance of the diameter of the product is equal to 0.005.
Alternative hypothesis: The variance of the diameter of the product is greater than 0.005.

We can use the F-test to test the null hypothesis by calculating the test statistic and comparing it to the critical value from the F-distribution with degrees of freedom (df1 = n - 1) and (df2 = 24), where n is the sample size.

The test statistic is calculated as:

F = sample variance / population variance

In this case, the sample variance is 0.006, and the population variance is 0.005.

F = 0.006 / 0.005
F = 1.2

Using a significance level of 0.01, the critical value for the F-distribution with df1 = 24 and df2 = 24 is 2.75 (calculated using a calculator or lookup table).

Since our test statistic (F = 1.2) is less than the critical value (2.75), we fail to reject the null hypothesis. Therefore, we do not have sufficient evidence to say that the variance of the diameter of the product is greater than 0.005.

# Q6

In [14]:
from math import gamma

def f_distribution_mean_var(df1, df2):
    mean = df2 / (df2 - 2)
    var = (2 * (df2**2) * (df1 + df2 - 2)) / (df1 * (df2 - 2)**2 * (df2 - 4))
    return mean, var


The gamma function from the math module is used to calculate the gamma function, which is used in the calculation of the variance. The function returns the mean and variance as a tuple.

Note that this function assumes that dfn and dfd are both greater than or equal to 2, as an F-distribution only exists for degrees of freedom greater than or equal to 2 for both numerator and denominator.

# Q7

To determine if the variances are significantly different, we will conduct an F-test at the 10% significance level.

The null hypothesis is that the two population variances are equal:
H0: σ12 = σ22

The alternative hypothesis is that the two population variances are not equal:
Ha: σ12 ≠ σ22

The test statistic is calculated as:

F = s12 / s22

where s12 is the larger sample variance and s22 is the smaller sample variance.

In this case, s12 = 25 and s22 = 20, so:

F = 25 / 20 = 1.25

The degrees of freedom for the numerator and denominator are 9 and 14, respectively.

Using a statistical software or a table, we can find the critical F-value with 9 and 14 degrees of freedom for a 10% significance level, which is 2.31.

Since our calculated F-value of 1.25 is less than the critical F-value of 2.31, we fail to reject the null hypothesis. Therefore, we do not have sufficient evidence to conclude that the population variances are significantly different at the 10% significance level.

# Q8

In [11]:
import numpy as np

restaurant_A = np.array([24, 25, 28, 23, 22, 20, 27])
restaurant_B = np.array([31, 33, 35, 30, 32, 36])

var_A = np.var(restaurant_A, ddof=1)
var_B = np.var(restaurant_B, ddof=1)

print("Sample variance of Restaurant A:", var_A)
print("Sample variance of Restaurant B:", var_B)


Sample variance of Restaurant A: 7.80952380952381
Sample variance of Restaurant B: 5.366666666666667


In [12]:
from scipy.stats import f

f_statistic = var_A / var_B
dfn = len(restaurant_A) - 1
dfd = len(restaurant_B) - 1
p_value = 1 - f.cdf(f_statistic, dfn, dfd)

print("F-statistic:", f_statistic)
print("P-value:", p_value)


F-statistic: 1.4551907719609583
P-value: 0.3487407873968742


we fail to reject the null hypothesis and conclude that there is not enough evidence to suggest that the variances are significantly different at the 5% significance level. 

# Q9

In [7]:
import numpy as np

group_a = np.array([80, 85, 90, 92, 87, 83])
group_b = np.array([75, 78, 82, 79, 81, 84])

var_a = np.var(group_a, ddof=1)
var_b = np.var(group_b, ddof=1)

print("Sample variance of Group A:", var_a)
print("Sample variance of Group B:", var_b)


Sample variance of Group A: 19.76666666666667
Sample variance of Group B: 10.166666666666666


In [8]:
f_value = var_a / var_b
print("F-value:", f_value)


F-value: 1.9442622950819677


In [9]:
from scipy.stats import f

alpha = 0.01
dfn = 5
dfd = 5
f_crit = f.ppf(1 - alpha, dfn, dfd)

print("Critical F-value:", f_crit)


Critical F-value: 10.967020650907994


In [10]:
#  we fail to reject the null hypothesis and conclude that there is not enough evidence