In [1]:
"""Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
test. The function should return the F-value and the corresponding p-value for the test."""

import numpy as np
from scipy import stats

def variance_ratio_test(data1, data2):
    """
    Calculates the F-value and p-value for a variance ratio test.

    Args:
        data1 (array-like): First array of data.
        data2 (array-like): Second array of data.

    Returns:
        f_value (float): F-value for the variance ratio test.
        p_value (float): p-value for the variance ratio test.
    """
    # Convert data arrays to numpy arrays
    data1 = np.array(data1)
    data2 = np.array(data2)

    # Calculate variances
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)

    # Calculate F-value and p-value
    f_value = var1 / var2
    p_value = stats.f.cdf(f_value, len(data1)-1, len(data2)-1)

    return f_value, p_value


In [3]:
"""Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
F-distribution, write a Python function that returns the critical F-value for a two-tailed test."""

from scipy import stats

def critical_f_value(significance_level, df_num, df_den):
    """
    Calculates the critical F-value for a two-tailed test.

    Args:
        significance_level (float): Significance level (e.g., 0.05 for a 5% significance level).
        df_num (int): Degrees of freedom for the numerator.
        df_den (int): Degrees of freedom for the denominator.

    Returns:
        critical_f (float): Critical F-value.
    """
    # Calculate the upper tail probability
    alpha = 1 - significance_level

    # Calculate the critical F-value
    critical_f = stats.f.ppf(1 - alpha/2, df_num, df_den)

    return critical_f


significance_level = 0.05
df_num = 3
df_den = 16

critical_f = critical_f_value(significance_level, df_num, df_den)
print("Critical F-value:", critical_f)


Critical F-value: 0.8741985756574948


In [4]:
"""Q3. Write a Python program that generates random samples from two normal distributions with known
variances and uses an F-test to determine if the variances are equal. The program should output the Fvalue, degrees of freedom, 
and p-value for the test."""

import numpy as np
from scipy import stats

def f_test_equal_variances(sample1, sample2):
    """
    Performs an F-test to determine if the variances of two samples are equal.

    Args:
        sample1 (array-like): First sample.
        sample2 (array-like): Second sample.

    Returns:
        f_value (float): F-value for the F-test.
        df_num (int): Degrees of freedom for the numerator.
        df_den (int): Degrees of freedom for the denominator.
        p_value (float): p-value for the F-test.
    """
    # Convert samples to numpy arrays
    sample1 = np.array(sample1)
    sample2 = np.array(sample2)

    # Calculate variances
    var1 = np.var(sample1, ddof=1)
    var2 = np.var(sample2, ddof=1)

    # Calculate F-value and degrees of freedom
    f_value = var1 / var2
    df_num = len(sample1) - 1
    df_den = len(sample2) - 1

    # Calculate p-value
    p_value = stats.f.cdf(f_value, df_num, df_den)

    return f_value, df_num, df_den, p_value


# Generate random samples from two normal distributions
np.random.seed(42)  # Set a random seed for reproducibility
sample1 = np.random.normal(loc=0, scale=1, size=100)
sample2 = np.random.normal(loc=0, scale=1, size=120)

# Perform the F-test
f_value, df_num, df_den, p_value = f_test_equal_variances(sample1, sample2)

# Output the results
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_den)
print("p-value:", p_value)


F-value: 0.8549787324004299
Degrees of freedom (numerator): 99
Degrees of freedom (denominator): 119
p-value: 0.21060420769452098


In [5]:
"""Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
each population. Conduct an F-test at the 5% significance level to determine if the variances are
significantly different.
Ans.: To conduct an F-test at the 5% significance level to determine if the variances of two populations with known variances are 
significantly different, we can follow these steps:

Define the null hypothesis (H0) and the alternative hypothesis (H1):

H0: The variances of the two populations are equal.
H1: The variances of the two populations are significantly different.
Set the significance level (α) to 0.05.

Calculate the F-value using the formula F = var1 / var2, where var1 and var2 are the known variances of the populations.

Determine the degrees of freedom for the numerator (df_num) and denominator (df_den):

df_num = n1 - 1, where n1 is the sample size of the first population.
df_den = n2 - 1, where n2 is the sample size of the second population.
Calculate the p-value using the F-distribution and the degrees of freedom.

p-value = P(F > F-value), where P denotes the probability.
Compare the p-value with the significance level (α) to make a decision:

If the p-value is less than α, reject the null hypothesis (H0) and conclude that the variances are significantly different.
If the p-value is greater than or equal to α, fail to reject the null hypothesis (H0) and conclude that there is not enough evidence 
to suggest a significant difference in variances.
Now, let's calculate the F-value, degrees of freedom, and p-value for the given scenario using the provided information:

Known variance of the first population: var1 = 10
Known variance of the second population: var2 = 15
Sample size of the first population: n1 = 12
Sample size of the second population: n2 = 12
Significance level: α = 0.05 """

import scipy.stats as stats

# Known variances
var1 = 10
var2 = 15

# Sample sizes
n1 = 12
n2 = 12

# Calculate F-value
f_value = var1 / var2

# Degrees of freedom
df_num = n1 - 1
df_den = n2 - 1

# Calculate p-value
p_value = 2 * (1 - stats.f.cdf(f_value, df_num, df_den))

# Compare p-value with significance level
alpha = 0.05
if p_value < alpha:
    print("Reject null hypothesis. Variances are significantly different.")
else:
    print("Fail to reject null hypothesis. Variances are not significantly different.")

# Output the F-value, degrees of freedom, and p-value
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_den)
print("p-value:", p_value)


Fail to reject null hypothesis. Variances are not significantly different.
F-value: 0.6666666666666666
Degrees of freedom (numerator): 11
Degrees of freedom (denominator): 11
p-value: 1.4876102012642005


In [6]:
"""Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
level to determine if the claim is justified.
Ans.: To conduct an F-test at the 1% significance level to determine if the manufacturer's claim about the variance of the diameter of a
certain product is justified, we can follow these steps:

Define the null hypothesis (H0) and the alternative hypothesis (H1):

H0: The variance of the diameter of the product is 0.005 (as claimed by the manufacturer).
H1: The variance of the diameter of the product is not 0.005 (claim is not justified).
Set the significance level (α) to 0.01.

Calculate the F-value using the formula F = sample_variance / claimed_variance, where the sample_variance is the variance calculated 
from the sample and the claimed_variance is the variance claimed by the manufacturer.

Determine the degrees of freedom for the numerator (df_num) and denominator (df_den):

df_num = n - 1, where n is the sample size.
df_den = 1, as the claimed variance has only one degree of freedom.
Calculate the p-value using the F-distribution and the degrees of freedom.

p-value = P(F > F-value), where P denotes the probability.
Compare the p-value with the significance level (α) to make a decision:

If the p-value is less than α, reject the null hypothesis (H0) and conclude that the claim is not justified.
If the p-value is greater than or equal to α, fail to reject the null hypothesis (H0) and conclude that there is not enough evidence
to suggest the claim is unjustified.
Now, let's calculate the F-value, degrees of freedom, and p-value for the given scenario using the provided information:

Claimed variance: claimed_variance = 0.005
Sample variance: sample_variance = 0.006
Sample size: n = 25
Significance level: α = 0.01"""

import scipy.stats as stats

# Claimed variance
claimed_variance = 0.005

# Sample variance
sample_variance = 0.006

# Sample size
n = 25

# Calculate F-value
f_value = sample_variance / claimed_variance

# Degrees of freedom
df_num = n - 1
df_den = 1

# Calculate p-value
p_value = 2 * (1 - stats.f.cdf(f_value, df_num, df_den))

# Compare p-value with significance level
alpha = 0.01
if p_value < alpha:
    print("Reject null hypothesis. The claim is not justified.")
else:
    print("Fail to reject null hypothesis. There is not enough evidence to suggest the claim is unjustified.")

# Output the F-value, degrees of freedom, and p-value
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_den)
print("p-value:", p_value)


Fail to reject null hypothesis. There is not enough evidence to suggest the claim is unjustified.
F-value: 1.2
Degrees of freedom (numerator): 24
Degrees of freedom (denominator): 1
p-value: 1.2592199239918715


In [7]:
"""Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
F-distribution and calculates the mean and variance of the distribution. The function should return the
mean and variance as a tuple."""

def f_distribution_mean_variance(df_num, df_den):
    """
    Calculates the mean and variance of an F-distribution given the degrees of freedom for the numerator and denominator.

    Args:
        df_num (int): Degrees of freedom for the numerator.
        df_den (int): Degrees of freedom for the denominator.

    Returns:
        mean_var_tuple (tuple): Tuple containing the mean and variance of the F-distribution.
    """
    # Calculate the mean of the F-distribution
    mean = df_den / (df_den - 2)

    # Calculate the variance of the F-distribution
    variance = (2 * (df_den**2) * (df_num + df_den - 2)) / ((df_num * (df_den - 2)**2 * (df_den - 4)))

    return mean, variance


df_num = 3
df_den = 16

mean, variance = f_distribution_mean_variance(df_num, df_den)
print("Mean:", mean)
print("Variance:", variance)


Mean: 1.1428571428571428
Variance: 1.2335600907029478


In [8]:
"""Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The
sample variance is found to be 25. Another random sample of 15 measurements is taken from another
normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
at the 10% significance level to determine if the variances are significantly different.
Ans.: To conduct an F-test at the 10% significance level to determine if the variances of two populations with unknown variances are 
significantly different, we can follow these steps:

Define the null hypothesis (H0) and the alternative hypothesis (H1):

H0: The variances of the two populations are equal.
H1: The variances of the two populations are significantly different.
Set the significance level (α) to 0.10.

Calculate the F-value using the formula F = sample_variance1 / sample_variance2, where sample_variance1 and sample_variance2 are the 
sample variances obtained from the two samples.

Determine the degrees of freedom for the numerator (df_num) and denominator (df_den):

df_num = n1 - 1, where n1 is the sample size of the first population.
df_den = n2 - 1, where n2 is the sample size of the second population.
Calculate the p-value using the F-distribution and the degrees of freedom.

p-value = P(F > F-value), where P denotes the probability.
Compare the p-value with the significance level (α) to make a decision:

If the p-value is less than α, reject the null hypothesis (H0) and conclude that the variances are significantly different.
If the p-value is greater than or equal to α, fail to reject the null hypothesis (H0) and conclude that there is not enough evidence 
to suggest a significant difference in variances.
Now, let's calculate the F-value, degrees of freedom, and p-value for the given scenario using the provided information:

Sample variance of the first sample: sample_variance1 = 25
Sample size of the first sample: n1 = 10
Sample variance of the second sample: sample_variance2 = 20
Sample size of the second sample: n2 = 15
Significance level: α = 0.10 """

import scipy.stats as stats

# Sample variances
sample_variance1 = 25
sample_variance2 = 20

# Sample sizes
n1 = 10
n2 = 15

# Calculate F-value
f_value = sample_variance1 / sample_variance2

# Degrees of freedom
df_num = n1 - 1
df_den = n2 - 1

# Calculate p-value
p_value = 2 * (1 - stats.f.cdf(f_value, df_num, df_den))

# Compare p-value with significance level
alpha = 0.10
if p_value < alpha:
    print("Reject null hypothesis. Variances are significantly different.")
else:
    print("Fail to reject null hypothesis. Variances are not significantly different.")

# Output the F-value, degrees of freedom, and p-value
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_den)
print("p-value:", p_value)


Fail to reject null hypothesis. Variances are not significantly different.
F-value: 1.25
Degrees of freedom (numerator): 9
Degrees of freedom (denominator): 14
p-value: 0.6832194382585954


In [9]:
"""Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday
night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
significance level to determine if the variances are significantly different.
Ans.: To conduct an F-test at the 5% significance level to determine if the variances of the waiting times at two different restaurants 
are significantly different, we can follow these steps:

Define the null hypothesis (H0) and the alternative hypothesis (H1):

H0: The variances of the waiting times at the two restaurants are equal.
H1: The variances of the waiting times at the two restaurants are significantly different.
Set the significance level (α) to 0.05.

Calculate the sample variances for each restaurant.

Determine the degrees of freedom for the numerator (df_num) and denominator (df_den):

df_num = n1 - 1, where n1 is the number of data points for Restaurant A.
df_den = n2 - 1, where n2 is the number of data points for Restaurant B.
Calculate the F-value using the formula F = sample_variance_A / sample_variance_B, where sample_variance_A and sample_variance_B are 
the sample variances for Restaurant A and Restaurant B, respectively.

Calculate the p-value using the F-distribution and the degrees of freedom.

p-value = P(F > F-value), where P denotes the probability.
Compare the p-value with the significance level (α) to make a decision:

If the p-value is less than α, reject the null hypothesis (H0) and conclude that the variances are significantly different.
If the p-value is greater than or equal to α, fail to reject the null hypothesis (H0) and conclude that there is not enough evidence 
to suggest a significant difference in variances.
Now, let's calculate the F-value, degrees of freedom, and p-value for the given data using the provided information:

Waiting times at Restaurant A: [24, 25, 28, 23, 22, 20, 27]
Waiting times at Restaurant B: [31, 33, 35, 30, 32, 36]
Significance level: α = 0.05 """

import scipy.stats as stats
import numpy as np

# Waiting times at Restaurant A
data_A = np.array([24, 25, 28, 23, 22, 20, 27])
n1 = len(data_A)

# Waiting times at Restaurant B
data_B = np.array([31, 33, 35, 30, 32, 36])
n2 = len(data_B)

# Calculate sample variances
sample_variance_A = np.var(data_A, ddof=1)
sample_variance_B = np.var(data_B, ddof=1)

# Calculate F-value
f_value = sample_variance_A / sample_variance_B

# Degrees of freedom
df_num = n1 - 1
df_den = n2 - 1

# Calculate p-value
p_value = 2 * (1 - stats.f.cdf(f_value, df_num, df_den))

# Compare p-value with significance level
alpha = 0.05
if p_value < alpha:
    print("Reject null hypothesis. Variances are significantly different.")
else:
    print("Fail to reject null hypothesis. Variances are not significantly different.")

# Output the F-value, degrees of freedom, and p-value
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_den)
print("p-value:", p_value)



Fail to reject null hypothesis. Variances are not significantly different.
F-value: 1.4551907719609583
Degrees of freedom (numerator): 6
Degrees of freedom (denominator): 5
p-value: 0.6974815747937484


In [10]:
"""Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
are significantly different.
Ans.: To conduct an F-test at the 1% significance level to determine if the variances of the test scores of two groups of students are 
significantly different, we can follow these steps:

Define the null hypothesis (H0) and the alternative hypothesis (H1):

H0: The variances of the test scores of the two groups are equal.
H1: The variances of the test scores of the two groups are significantly different.
Set the significance level (α) to 0.01.

Calculate the sample variances for each group.

Determine the degrees of freedom for the numerator (df_num) and denominator (df_den):

df_num = n1 - 1, where n1 is the number of data points for Group A.
df_den = n2 - 1, where n2 is the number of data points for Group B.
Calculate the F-value using the formula F = sample_variance_A / sample_variance_B, where sample_variance_A and sample_variance_B are the 
sample variances for Group A and Group B, respectively.

Calculate the p-value using the F-distribution and the degrees of freedom.

p-value = P(F > F-value), where P denotes the probability.
Compare the p-value with the significance level (α) to make a decision:

If the p-value is less than α, reject the null hypothesis (H0) and conclude that the variances are significantly different.
If the p-value is greater than or equal to α, fail to reject the null hypothesis (H0) and conclude that there is not enough evidence to 
suggest a significant difference in variances.
Now, let's calculate the F-value, degrees of freedom, and p-value for the given data using the provided information:

Test scores of Group A: [80, 85, 90, 92, 87, 83]
Test scores of Group B: [75, 78, 82, 79, 81, 84]
Significance level: α = 0.01 """

import scipy.stats as stats
import numpy as np

# Test scores of Group A
data_A = np.array([80, 85, 90, 92, 87, 83])
n1 = len(data_A)

# Test scores of Group B
data_B = np.array([75, 78, 82, 79, 81, 84])
n2 = len(data_B)

# Calculate sample variances
sample_variance_A = np.var(data_A, ddof=1)
sample_variance_B = np.var(data_B, ddof=1)

# Calculate F-value
f_value = sample_variance_A / sample_variance_B

# Degrees of freedom
df_num = n1 - 1
df_den = n2 - 1

# Calculate p-value
p_value = 2 * (1 - stats.f.cdf(f_value, df_num, df_den))

# Compare p-value with significance level
alpha = 0.01
if p_value < alpha:
    print("Reject null hypothesis. Variances are significantly different.")
else:
    print("Fail to reject null hypothesis. Variances are not significantly different.")

# Output the F-value, degrees of freedom, and p-value
print("F-value:", f_value)
print("Degrees of freedom (numerator):", df_num)
print("Degrees of freedom (denominator):", df_den)
print("p-value:", p_value)


Fail to reject null hypothesis. Variances are not significantly different.
F-value: 1.9442622950819677
Degrees of freedom (numerator): 5
Degrees of freedom (denominator): 5
p-value: 0.4831043549070688
