# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.


In [1]:
from scipy.stats import f
import math
import numpy as np

In [2]:
def variance_ratio_test( sample1, sample2 ):
    "This function take two arrays of data and calculate the F-value for a variance ratio test"
    #sample standard deviation
    var1 = np.var( sample1, ddof = 1 )
    var2 = np.var( sample2, ddof = 1 )
    
    #f value
    if var1 >= var2:
        f_value = var1/var2
        dfn = len(sample1)-1
        dfd = len(sample2)-1
    else :
        f_value = var2/var1
        dfn = len(sample2)-1
        dfd = len(sample1)-1
    
    #p value
    p_value =  1 - f.cdf( f_value, dfn = dfn, dfd = dfd )
    
    return f_value, p_value

In [3]:
print("F value and p value for two random sample using f test are")
variance_ratio_test( np.random.randint(10,30,10), np.random.randint(10,30,10) )

F value and p value for two random sample using f test are


(1.0012391573729862, 0.49927919244216246)

# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [4]:
def variance_ratio_crit( dfn, dfd ):
    "This function return critical value for a two-tailed test"
    #significance level
    alpha = 0.05
    # decision bondary
    f_crit1 = f.ppf( q = alpha/2, dfn = dfn, dfd = dfd)
    f_crit2 = f.ppf( q = 1 - alpha/2, dfn = dfn, dfd = dfd )
    
    return f_crit1, f_crit2

In [5]:
print("Critical value for (5,7) of degree of freedom with 0.05 significance level are")
variance_ratio_crit( 5,7)

Critical value for (5,7) of degree of freedom with 0.05 significance level are


(0.14591988388835198, 5.285236851504277)

# Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the Fvalue, degrees of freedom, and p-value for the test.


In [6]:
sample1 = np.random.normal( 10, 2, 30)
sample2 = np.random.normal( 10, 2.7, 30 )

Ho = "Variance of two samples are similar"
Ha = "Variance of two samples are different"

#significance value
alpha = 0.05
#degree of freedom
dfn = len(sample1)-1
dfd = len(sample2)-1
#standard deviation
var1 = np.var(sample1, ddof = 1 )
var2 = np.var(sample2, ddof = 1)

print(f"Sample 1: Mean1= {np.mean(sample1)}, Variance1 = {var1}, dof = {dfn}")
print(f"Sample 2: Mean2= {np.mean(sample2)}, Variance2 = {var2}, dof = {dfd}" )

#f test statistic
f_test = var1/var2
#decision bondary
f_crit1 = f.ppf( alpha/2, dfn, dfd )
f_crit2 = f.ppf( 1-alpha/2, dfn, dfd )
#p value
p_value = 2*min( f.cdf(f_test, dfn, dfd), 1 - f.cdf(f_test, dfn, dfd))

print(f"_F-value = {f_test}")
print(f"_F_crit1 = {f_crit1}, F_crit2 = {f_crit2}")
print(f"_p-value = {p_value}")
if f_test < f_crit1 or f_test > f_crit2:
    print("Reject Ho.", Ha)
else:
    print("Fail to reject Ho.", Ho)


Sample 1: Mean1= 10.177525253477778, Variance1 = 8.624817642782604, dof = 29
Sample 2: Mean2= 10.303628386417323, Variance2 = 7.876141853010648, dof = 29
_F-value = 1.0950561586807601
_F_crit1 = 0.4759647743100316, F_crit2 = 2.100995817284211
_p-value = 0.8084995728624329
Fail to reject Ho. Variance of two samples are similar


# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.


In [7]:
Ho = "Variance are similar"
Ha = "Variance are different"

#variance of sample
var1 = 10
var2 = 15
#sample size
n1 = n2 = 12
#significance level
alpha = 0.05
#f test
f_test = var2/var1
#degree of freedom
df = n1-1
#critical value
f_crit1 = f.ppf( alpha/2, df, df)
f_crit2 = f.ppf( 1-alpha/2, df, df )
# Calculate the p-value
p_value = 2 * min( f.cdf(f_test, df, df), 1 - f.cdf( f_test, df, df) ) 


print(f"_F-value = {f_test}")
print(f"_F_crit1 = {f_crit1}, F_crit2 = {f_crit2}")
print(f"_p-value = {p_value}")
if f_test < f_crit1 or f_test > f_crit2:
    print("Reject Ho.",Ha)
else:
    print("Fail to reject Ho.",Ho)

    

_F-value = 1.5
_F_crit1 = 0.28787755798459863, F_crit2 = 3.473699051085809
_p-value = 0.5123897987357999
Fail to reject Ho. Variance are similar


# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.
 

In [8]:
Ho = "The claim is justified"
Ha = "The calim is injustified"

#variance of sample
var1 = 0.006
var2 = 0.005
#sample size
n1 = n2 = 25
#significance level
alpha = 0.01
#f test
f_test = var1/var2
#degree of freedom
df = n1-1
#critical value
f_crit1 = f.ppf( alpha/2, df, df)
f_crit2 = f.ppf( 1-alpha/2, df, df )
# Calculate the p-value
p_value = 2 * min( f.cdf(f_test, df, df), 1 - f.cdf( f_test, df, df) ) 


print(f"_F-value = {f_test}")
print(f"_F_crit1 = {f_crit1}, F_crit2 = {f_crit2}")
print(f"_p-value = {p_value}")
if f_test < f_crit1 or f_test > f_crit2:
    print("Reject Ho.",Ha)
else:
    print("Fail to reject Ho.",Ho)

    

_F-value = 1.2
_F_crit1 = 0.3370701342685674, F_crit2 = 2.966741631292762
_p-value = 0.6587309365634488
Fail to reject Ho. The claim is justified


# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.
 

* for degree of freedom of denominator greater than 2, the mean is 
<img src="img/esp.svg">

* for degree of freedom of denominator greater than 4, variance is given by
<img src="img/var.svg">

In [9]:
def mean_var( dfn, dfd ):
    """This function take degree of freedom for the numerator and denominator of an F-distribution and calculates the mean
    and variance"""
    # for d > 2
    mean = dfd / (dfd - 2)
    
    # for d > 4 
    variance = (2 * (dfd ** 2) * (dfn + dfd - 2)) / ((dfn * (dfd - 2) ** 2 * (dfd - 4)))
    
    return (mean, variance)
    

In [10]:
mean_var( 20,10 )

(1.25, 0.7291666666666666)

# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.
  


In [11]:
Ho = "Variance of two samples are similar"
Ha = "Variance of two samples are different"

#sample variance
var1 = 25
var2 = 20
#sample size
n1 = 10
n2 = 15
#significance level
alpha = 0.1
#f test
f_test = var1/var2
#degree of freedom
dfn = n1-1
dfd = n2-1
#critical value
f_crit1 = f.ppf( alpha/2, dfn, dfd)
f_crit2 = f.ppf( 1-alpha/2, dfn ,dfd)
#p value
p_value = 2 * min( f.cdf(f_test, dfn, dfd), 1 - f.cdf( f_test, dfn, dfd ) ) 

print(f"_F-value = {f_test}")
print(f"_F_crit1 = {f_crit1}, F_crit2 = {f_crit2}")
print(f"_p-value = {p_value}")
if f_test < f_crit1 or f_test > f_crit2:
    print("Reject Ho.",Ha)
else:
    print("Fail to reject Ho.",Ho)



_F-value = 1.25
_F_crit1 = 0.3305268601412525, F_crit2 = 2.6457907352338195
_p-value = 0.6832194382585952
Fail to reject Ho. Variance of two samples are similar


# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.
 


In [12]:
Ho = "Variance of two samples are similar"
Ha = "Variance of two samples are different"

restA = [24, 25, 28, 23, 22, 20, 27]
restB = [31, 33, 35, 30, 32, 36]

#variance
var1 = np.var(restA, ddof = 1)
var2 = np.var(restB, ddof = 1)
#size
n1 = len(restA)
n2 = len(restB)
#f test and dof 
if var1 >= var2 :
    f_test = var1/var2
    dfn = n1-1
    dfd = n2-1
else:
    f_test = var2/var1
    dfn = n2-1
    dfd = n1-1
    
#alpha
alpha = 0.05
#critical value
f_crit1 = f.ppf( alpha/2, dfn, dfd)
f_crit2 = f.ppf( 1-alpha/2, dfn, dfd)
#p value
p_value = 2*min( f.cdf(f_test, dfn, dfd), 1-f.cdf(f_test, dfn, dfd) )

print(f"_F-value = {f_test}")
print(f"_F_crit1 = {f_crit1}, F_crit2 = {f_crit2}")
print(f"_p-value = {p_value}")
if f_test < f_crit1 or f_test > f_crit2:
    print("Reject Ho.",Ha)
else:
    print("Fail to reject Ho.",Ho)

_F-value = 1.4551907719609583
_F_crit1 = 0.16701279718024772, F_crit2 = 6.977701858535566
_p-value = 0.6974815747937484
Fail to reject Ho. Variance of two samples are similar


# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different. 

In [13]:
Ho = "Variance of two samples are similar"
Ha = "Variance of two samples are different"

groupA = [80,85,90,92,87,83]
groupB = [75,78,82,79,81,84]

#variance
var1 = np.var(groupA, ddof = 1)
var2 = np.var(groupB, ddof = 1)
#size
n1 = len(groupA)
n2 = len(groupB)
#f test 
f_test = var1/var2 if var1/var2 >= 1 else var2/var1
#degree of freedom
dfn = dfd = n1-1
#alpha
alpha = 0.01
#critical value
f_crit1 = f.ppf( alpha/2, dfn, dfd)
f_crit2 = f.ppf( 1-alpha/2, dfn, dfd)
#p value
p_value = 2*min( f.cdf(f_test, dfn, dfd), 1-f.cdf(f_test, dfn, dfd) )

print(f"_F-value = {f_test}")
print(f"_F_crit1 = {f_crit1}, F_crit2 = {f_crit2}")
print(f"_p-value = {p_value}")
if f_test < f_crit1 or f_test > f_crit2:
    print("Reject Ho.",Ha)
else:
    print("Fail to reject Ho.",Ho)

_F-value = 1.9442622950819677
_F_crit1 = 0.06693617195469603, F_crit2 = 14.939605459912219
_p-value = 0.4831043549070688
Fail to reject Ho. Variance of two samples are similar
