# `Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.`

In [6]:
import scipy.stats as stats

def calculate_f_and_p_value(data1, data2):  # data1: First array of data & data2: Second array of data.
    
    
    #f_value: The calculated F-value.
    #p_value: The corresponding p-value.
    
    f_value, p_value = stats.f_oneway(data1, data2)
    
    return f_value, p_value

# Example usage:
data_group1 = [28, 32, 24, 30, 25]
data_group2 = [37, 40, 45, 36, 39]

f_value, p_value = calculate_f_and_p_value(data_group1, data_group2)
print("F-value:", f_value)
print("p-value:", p_value)


F-value: 28.629787234042553
p-value: 0.000685289458963983


# `Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.`

In [23]:
import scipy.stats as stats

def critical_f_values(alpha, dfn, dfd):
   
    # alpha: Significance level (e.g., 0.05 for 5% significance).
    # dfn: Degrees of freedom for the numerator.
    # dfd: Degrees of freedom for the denominator.

   
    alpha_over_2 = alpha / 2  # For a two-tailed test, divide alpha by 2

    # Calculate lower and upper critical F-values
    lower_critical_f = stats.f.ppf(alpha_over_2, dfn, dfd)
    upper_critical_f = stats.f.ppf(1 - alpha_over_2, dfn, dfd)

    return lower_critical_f, upper_critical_f

# Example usage:
alpha = 0.05
dfn = 3  # Degrees of freedom for the numerator
dfd = 20  # Degrees of freedom for the denominator

lower_critical_f, upper_critical_f = critical_f_values(alpha, dfn, dfd)
print("Lower Critical F-value:", lower_critical_f)
print("Upper Critical F-value:", upper_critical_f)


Lower Critical F-value: 0.07058467426541322
Upper Critical F-value: 3.8586986662732143


# Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F-value, degrees of freedom, and p-value for the test.

In [33]:
import numpy as np
import scipy.stats as stats

# Set random seed for reproducibility
np.random.seed(0)

# Generate random samples from two normal distributions with known variances
variance1 = 4.0  # Variance of the first distribution
variance2 = 9.0  # Variance of the second distribution
sample_size1 = 30  # Sample size for the first distribution
sample_size2 = 40  # Sample size for the second distribution

data1 = np.random.normal(loc=0, scale=np.sqrt(variance1), size=sample_size1)
data2 = np.random.normal(loc=0, scale=np.sqrt(variance2), size=sample_size2)

# Perform an F-test for equality of variances
f_value = np.var(data1, ddof=1) / np.var(data2, ddof=1)
dfn = sample_size1 - 1  # Degrees of freedom for the numerator
dfd = sample_size2 - 1  # Degrees of freedom for the denominator
p_value = 2 * min(stats.f.cdf(f_value, dfn, dfd), 1 - stats.f.cdf(f_value, dfn, dfd))

# Output the results
print("F-value:", f_value)
print("Degrees of Freedom (Numerator, Denominator):", dfn, dfd)
print("p-value:", p_value)

# Perform a significance test at a specific alpha level (e.g., 0.05)
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")


F-value: 0.7087959958093473
Degrees of Freedom (Numerator, Denominator): 29 39
p-value: 0.3382337757135684
Fail to reject the null hypothesis: Variances are equal.


array([ 1.45818112,  0.25796582,  2.27880137, -2.46965164,  0.80468328,
       -1.36962018, -1.7415943 , -1.15769933, -0.62310506,  0.11233068,
       -2.33029968,  1.80165297,  0.93132488, -3.07248737,  2.97650439,
        3.79177835,  2.35755914, -0.35984967, -2.14150524,  2.10890345,
       -0.80635389,  2.44489014,  0.41654996,  1.95327807,  0.71273279,
        1.41314634,  0.02100004,  3.57174099,  0.25382419,  0.80397873])

# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [35]:
# To conduct an F-test to determine if the variances of two populations are significantly different

In [34]:
import scipy.stats as stats

# Known population variances
variance1 = 10
variance2 = 15

# Sample sizes
n1 = 12
n2 = 12

# Calculate F-statistic
f_statistic = variance1 / variance2

# Degrees of freedom
dfn = n1 - 1
dfd = n2 - 1

# Calculate the p-value for the F-test
p_value = 2 * min(stats.f.cdf(f_statistic, dfn, dfd), 1 - stats.f.cdf(f_statistic, dfn, dfd))

# Significance level
alpha = 0.05

# Output results
print("F-statistic:", f_statistic)
print("Degrees of Freedom (Numerator, Denominator):", dfn, dfd)
print("p-value:", p_value)

# Perform the significance test
if p_value < alpha:
    print("Reject the null hypothesis: Variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: Variances are not significantly different.")


F-statistic: 0.6666666666666666
Degrees of Freedom (Numerator, Denominator): 11 11
p-value: 0.5123897987357996
Fail to reject the null hypothesis: Variances are not significantly different.


# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [37]:
import scipy.stats as stats

# Manufacturer's claim about the variance
claimed_variance = 0.005

# Sample variance
sample_variance = 0.006

# Sample size
sample_size = 25

# Calculate F-statistic
f_statistic = sample_variance / claimed_variance

# Degrees of freedom
dfn = sample_size - 1
dfd = float('inf')  # Assuming infinite degrees of freedom for the claimed variance

# Calculate the p-value for the F-test
p_value = 2 * min(stats.f.cdf(f_statistic, dfn, dfd), 1 - stats.f.cdf(f_statistic, dfn, dfd))

# Significance level
alpha = 0.01

# Output results
print("F-statistic:", f_statistic)
print("Degrees of Freedom (Numerator, Denominator):", dfn, dfd)
print("p-value:", p_value)

# Perform the significance test
if p_value < alpha:
    print("Reject the null hypothesis: The claim is not justified.")
else:
    print("Fail to reject the null hypothesis: The claim is justified.")


F-statistic: 1.2
Degrees of Freedom (Numerator, Denominator): 24 inf
p-value: 0.0
Reject the null hypothesis: The claim is not justified.


# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple

In [47]:
def f_distribution_mean_variance(dfn, dfd):
    
#dfn: Degrees of freedom for the numerator.
#dfd: Degrees of freedom for the denominator.

    if dfd > 2:
        mean = dfd / (dfd - 2)
    else:
        mean = float('NaN')

    if dfd > 4:
        variance = (2 * dfd**2 * (dfn + dfd - 2)) / (dfn * (dfd - 2)**2 * (dfd - 4))
    else:
        variance = float('NaN')

    return mean, variance

# Example usage:
dfn = 5  # Degrees of freedom for the numerator
dfd = 10  # Degrees of freedom for the denominator

mean, variance = f_distribution_mean_variance(dfn, dfd)
print("Mean of F-distribution:", mean)
print("Variance of F-distribution:", variance)


Mean of F-distribution: 1.25
Variance of F-distribution: 1.3541666666666667


# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [48]:
import scipy.stats as stats

# Sample variances
sample_variance1 = 25
sample_variance2 = 20

# Sample sizes
sample_size1 = 10
sample_size2 = 15

# Calculate F-statistic
f_statistic = sample_variance1 / sample_variance2

# Degrees of freedom
dfn1 = sample_size1 - 1
dfn2 = sample_size2 - 1
dfd1 = dfn1
dfd2 = dfn2

# Calculate the p-value for the F-test
p_value = 2 * min(stats.f.cdf(f_statistic, dfn1, dfd1), 1 - stats.f.cdf(f_statistic, dfn2, dfd2))

# Significance level
alpha = 0.10

# Output results
print("F-statistic:", f_statistic)
print("Degrees of Freedom (Numerator 1, Denominator 1):", dfn1, dfd1)
print("Degrees of Freedom (Numerator 2, Denominator 2):", dfn2, dfd2)
print("p-value:", p_value)

# Perform the significance test
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")


F-statistic: 1.25
Degrees of Freedom (Numerator 1, Denominator 1): 9 9
Degrees of Freedom (Numerator 2, Denominator 2): 14 14
p-value: 0.682051981692404
Fail to reject the null hypothesis: Variances are equal.


## `Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.`

In [49]:
import scipy.stats as stats

# Waiting times data for Restaurant A and Restaurant B
data_a = [24, 25, 28, 23, 22, 20, 27]
data_b = [31, 33, 35, 30, 32, 36]

# Calculate sample variances
sample_variance_a = sum((x - sum(data_a) / len(data_a))**2 for x in data_a) / (len(data_a) - 1)
sample_variance_b = sum((x - sum(data_b) / len(data_b))**2 for x in data_b) / (len(data_b) - 1)

# Calculate F-statistic
f_statistic = sample_variance_a / sample_variance_b

# Degrees of freedom
dfn = len(data_a) - 1
dfd = len(data_b) - 1

# Calculate the p-value for the F-test
p_value = 2 * min(stats.f.cdf(f_statistic, dfn, dfd), 1 - stats.f.cdf(f_statistic, dfn, dfd))

# Significance level
alpha = 0.05

# Output results
print("F-statistic:", f_statistic)
print("Degrees of Freedom (Numerator, Denominator):", dfn, dfd)
print("p-value:", p_value)

# Perform the significance test
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")


F-statistic: 1.4551907719609583
Degrees of Freedom (Numerator, Denominator): 6 5
p-value: 0.6974815747937484
Fail to reject the null hypothesis: Variances are equal.


## `Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.`

The formula for the sample variance is:
s2(S square)=1/n-1 E(i=1)(xi -ˉxˉ)2                    *2 squares each of these differences
 
 
 
s2i s the sample variance.
n is the sample size.
xi: represents each individual data point in the sample.
ˉxˉis the sample mean.

#### sample_variance_a = sum((x - sum(group_a_scores) / len(group_a_scores))**2 for x in group_a_scores) / (len(group_a_scores) - 1)



Step1:-Define the null and alternative hypotheses:

Null Hypothesis (H0): The variances of the two populations are equal
Alternative Hypothesis (H1): The variances of the two populations are not equal

Step2:-
Set the significance level (alpha) to 0.01 (1%).

Step3:-
Calculate the F-statistic using the formula:
F = (sample variance of Group A) / (sample variance of Group B)


Step4:-
Determine the degrees of freedom for the numerator (dfn) and denominator (dfd) for the F-distribution:

dfn = sample size of Group A - 1
dfd = sample size of Group B - 1

Step5:-
Use the F-distribution to calculate the p-value associated with the F-statistic.

Step6:-
Compare the p-value to the significance level (alpha):

If p-value < alpha, reject the null hypothesis.
If p-value >= alpha, fail to reject the null hypothesis.




In [51]:
import scipy.stats as stats

# Test scores data for Group A and Group B
group_a_scores = [80, 85, 90, 92, 87, 83]
group_b_scores = [75, 78, 82, 79, 81, 84]

# Calculate sample variances
sample_variance_a = sum((x - sum(group_a_scores) / len(group_a_scores))**2 for x in group_a_scores) / (len(group_a_scores) - 1)
sample_variance_b = sum((x - sum(group_b_scores) / len(group_b_scores))**2 for x in group_b_scores) / (len(group_b_scores) - 1)

# Calculate F-statistic
f_statistic = sample_variance_a / sample_variance_b

# Degrees of freedom
dfn = len(group_a_scores) - 1
dfd = len(group_b_scores) - 1

# Calculate the p-value for the F-test
p_value = 2 * min(stats.f.cdf(f_statistic, dfn, dfd), 1 - stats.f.cdf(f_statistic, dfn, dfd))

# Significance level
alpha = 0.01

# Output results
print("F-statistic:", f_statistic)
print("Degrees of Freedom (Numerator, Denominator):", dfn, dfd)
print("p-value:", p_value)

# Perform the significance test
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")


F-statistic: 1.9442622950819677
Degrees of Freedom (Numerator, Denominator): 5 5
p-value: 0.4831043549070688
Fail to reject the null hypothesis: Variances are equal.
