In [2]:
#Q1
import numpy as np
import scipy.stats as stats

def calculate_f_value(data1, data2):
    """
    Calculate the F-value and p-value for a variance ratio test (ANOVA).

    Parameters:
    - data1: Array-like or list of values for the first group.
    - data2: Array-like or list of values for the second group.

    Returns:
    - f_value: The F-value statistic.
    - p_value: The corresponding p-value for the test.
    """

    f_value, p_value = stats.f_oneway(data1, data2)

    return f_value, p_value

group1_data = [20, 25, 30, 35, 40]
group2_data = [15, 22, 28, 32, 38]

f_value, p_value = calculate_f_value(group1_data, group2_data)
print("F-value:", f_value)
print("p-value:", p_value)


F-value: 0.31802120141342755
p-value: 0.5882550250918399


In [3]:
#Q2
import scipy.stats as stats

def calculate_critical_f_value(alpha, df_num, df_denom):
    """
    Calculate the critical F-value for a two-tailed test given a significance level (alpha) and
    degrees of freedom for the numerator and denominator of an F-distribution.

    Parameters:
    - alpha: Significance level (e.g., 0.05 for a 95% confidence level).
    - df_num: Degrees of freedom for the numerator.
    - df_denom: Degrees of freedom for the denominator.

    Returns:
    - critical_f_value: The critical F-value for the specified alpha and degrees of freedom.
    """

    # Calculate the critical F-value using the percent point function (ppf)
    # The ppf function returns the value x such that P(X <= x) = alpha/2 for a two-tailed test
    critical_f_value = stats.f.ppf(1 - alpha / 2, df_num, df_denom)

    return critical_f_value

alpha = 0.05
df_num = 3
df_denom = 20

critical_f_value = calculate_critical_f_value(alpha, df_num, df_denom)
print("Critical F-value:", critical_f_value)


Critical F-value: 3.8586986662732143


In [4]:
#Q3
import numpy as np
import scipy.stats as stats

mean1 = 0  # Mean for the first distribution
mean2 = 0  # Mean for the second distribution
variance1 = 1  # Variance for the first distribution (known)
variance2 = 2  # Variance for the second distribution (known)

np.random.seed(0)  # For reproducibility
sample_size = 30  # Sample size for each distribution
sample1 = np.random.normal(mean1, np.sqrt(variance1), sample_size)
sample2 = np.random.normal(mean2, np.sqrt(variance2), sample_size)

# Perform an F-test to compare variances
f_statistic = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)
df1 = sample_size - 1  # Degrees of freedom for the numerator
df2 = sample_size - 1  # Degrees of freedom for the denominator
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))

print("F-value:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)

# Check if the variances are equal (use a significance level, e.g., 0.05)
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")


F-value: 0.7242619039631234
Degrees of freedom (numerator, denominator): 29 29
p-value: 0.3901538481296291
Fail to reject the null hypothesis: Variances are equal.


In [5]:
#Q4
import scipy.stats as stats

# Given variances
variance1 = 10
variance2 = 15

# Sample sizes
n1 = 12
n2 = 12

# Calculate the F-statistic
if variance1 > variance2:
    f_statistic = variance1 / variance2
    df1 = n1 - 1
    df2 = n2 - 1
else:
    f_statistic = variance2 / variance1
    df1 = n2 - 1
    df2 = n1 - 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))

# Significance level
alpha = 0.05

# Print the results
print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: The variances are not significantly different.")


F-statistic: 1.5
Degrees of freedom (numerator, denominator): 11 11
p-value: 0.5123897987357995
Fail to reject the null hypothesis: The variances are not significantly different.


In [6]:
#Q5
import scipy.stats as stats

# Given claimed variance and sample variance
claimed_variance = 0.005
sample_variance = 0.006

# Sample size
n = 25

# Calculate the F-statistic
f_statistic = sample_variance / claimed_variance

# Degrees of freedom
df1 = n - 1
df2 = 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))

# Significance level
alpha = 0.01

# Print the results
print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: The claim is not justified.")
else:
    print("Fail to reject the null hypothesis: The claim is justified.")


F-statistic: 1.2
Degrees of freedom (numerator, denominator): 24 1
p-value: 0.7407800760081285
Fail to reject the null hypothesis: The claim is justified.


In [7]:
#Q6
def calculate_f_distribution_mean_variance(df1, df2):
    """
    Calculate the mean and variance of an F-distribution given degrees of freedom 
    for the numerator and denominator.

    Parameters:
    - df1: Degrees of freedom for the numerator.
    - df2: Degrees of freedom for the denominator.

    Returns:
    - (mean, variance): A tuple containing the mean and variance of the F-distribution.
    """

    # Check if degrees of freedom are valid
    if df1 <= 0 or df2 <= 0:
        raise ValueError("Degrees of freedom must be positive.")

    # Calculate the mean of the F-distribution
    if df2 > 2:
        mean = df2 / (df2 - 2)
    else:
        mean = None  # Mean is undefined for df2 <= 2

    # Calculate the variance of the F-distribution
    if df2 > 4:
        variance = (2 * df2 ** 2 * (df1 + df2 - 2)) / (df1 * (df2 - 2) ** 2 * (df2 - 4))
    else:
        variance = None  # Variance is undefined for df2 <= 4

    return mean, variance

# Example usage:
df1 = 3
df2 = 20
mean, variance = calculate_f_distribution_mean_variance(df1, df2)
print("Mean:", mean)
print("Variance:", variance)


Mean: 1.1111111111111112
Variance: 1.0802469135802468


In [8]:
#Q7
import scipy.stats as stats

# Given sample variances and sample sizes
sample_variance1 = 25
sample_size1 = 10
sample_variance2 = 20
sample_size2 = 15

# Calculate the F-statistic
if sample_variance1 > sample_variance2:
    f_statistic = sample_variance1 / sample_variance2
    df1 = sample_size1 - 1
    df2 = sample_size2 - 1
else:
    f_statistic = sample_variance2 / sample_variance1
    df1 = sample_size2 - 1
    df2 = sample_size1 - 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))

# Significance level
alpha = 0.10

# Print the results
print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: The variances are not equal.")
else:
    print("Fail to reject the null hypothesis: The variances are equal.")


F-statistic: 1.25
Degrees of freedom (numerator, denominator): 9 14
p-value: 0.6832194382585954
Fail to reject the null hypothesis: The variances are equal.


In [11]:
#Q8
import scipy.stats as stats

# Waiting times data for Restaurant A and Restaurant B
waiting_times_A = [24, 25, 28, 23, 22, 20, 27]
waiting_times_B = [31, 33, 35, 30, 32, 36]

# Calculate sample variances
sample_variance_A = sum((x - sum(waiting_times_A) / len(waiting_times_A))**2 
                        for x in waiting_times_A) / (len(waiting_times_A) - 1)
sample_variance_B = sum((x - sum(waiting_times_B) / len(waiting_times_B))**2 
                        for x in waiting_times_B) / (len(waiting_times_B) - 1)

# Calculate the F-statistic
if sample_variance_A > sample_variance_B:
    f_statistic = sample_variance_A / sample_variance_B
    df1 = len(waiting_times_A) - 1
    df2 = len(waiting_times_B) - 1
else:
    f_statistic = sample_variance_B / sample_variance_A
    df1 = len(waiting_times_B) - 1
    df2 = len(waiting_times_A) - 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))

# Significance level
alpha = 0.05

# Print the results
print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: The variances are not equal.")
else:
    print("Fail to reject the null hypothesis: The variances are equal.")


F-statistic: 1.4551907719609583
Degrees of freedom (numerator, denominator): 6 5
p-value: 0.6974815747937484
Fail to reject the null hypothesis: The variances are equal.


In [12]:
#Q9
import scipy.stats as stats

# Test scores data for Group A and Group B
test_scores_A = [80, 85, 90, 92, 87, 83]
test_scores_B = [75, 78, 82, 79, 81, 84]

# Calculate sample variances
sample_variance_A = sum((x - sum(test_scores_A) / len(test_scores_A))**2
                        for x in test_scores_A) / (len(test_scores_A) - 1)
sample_variance_B = sum((x - sum(test_scores_B) / len(test_scores_B))**2 
                        for x in test_scores_B) / (len(test_scores_B) - 1)

# Calculate the F-statistic
if sample_variance_A > sample_variance_B:
    f_statistic = sample_variance_A / sample_variance_B
    df1 = len(test_scores_A) - 1
    df2 = len(test_scores_B) - 1
else:
    f_statistic = sample_variance_B / sample_variance_A
    df1 = len(test_scores_B) - 1
    df2 = len(test_scores_A) - 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))

# Significance level
alpha = 0.01

# Print the results
print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: The variances are not equal.")
else:
    print("Fail to reject the null hypothesis: The variances are equal.")


F-statistic: 1.9442622950819677
Degrees of freedom (numerator, denominator): 5 5
p-value: 0.4831043549070688
Fail to reject the null hypothesis: The variances are equal.
