# ANOVA tests



In [2]:
import numpy as np
from scipy import stats


##### One-Factor ANOVA F Test Example 01
You want to see if three 
different golf clubs yield 
different distances. You 
randomly select five 
measurements from 
trials on an automated 
driving machine for each 
club. At the .05 
significance level, is 
there a difference in 
mean distance?

In [21]:
group1 = np.array([254, 263, 241, 237, 251])
group2 = np.array([234, 218, 235, 227, 216])
group3 = np.array([200, 222, 206, 197, 204])

num_of_group = 3
num_of_all_data = 15
level_of_significance = 0.05
level_of_confidence = 1 - level_of_significance
size_of_g1 = len(group1)
mean_of_g1 = np.mean(group1)
mean_of_g2 = np.mean(group2)
mean_of_g3 = np.mean(group3)
grant_mean = np.mean([mean_of_g1, mean_of_g2, mean_of_g3])

sum_of_sqaure_1 = (group1[0] - mean_of_g1)**2 + (group1[1] - mean_of_g1)**2 + (group1[2] - mean_of_g1)**2 + (group1[3] - mean_of_g1)**2 + (group1[4] - mean_of_g1)**2
sum_of_sqaure_2 = (group2[0] - mean_of_g2)**2 + (group2[1] - mean_of_g2)**2 + (group2[2] - mean_of_g2)**2 + (group2[3] - mean_of_g2)**2 + (group2[4] - mean_of_g2)**2
sum_of_sqaure_3 = (group3[0] - mean_of_g3)**2 + (group3[1] - mean_of_g3)**2 + (group3[2] - mean_of_g3)**2 + (group3[3] - mean_of_g3)**2 + (group3[4] - mean_of_g3)**2

sum_of_sqaure_within = sum_of_sqaure_1 + sum_of_sqaure_2 + sum_of_sqaure_3
sum_of_sqaure_between = size_of_g1 * (mean_of_g1 - grant_mean)**2 + size_of_g1 * (mean_of_g2 - grant_mean)**2 + size_of_g1 * (mean_of_g3 - grant_mean)**2
sum_of_sqaure_total = sum_of_sqaure_within + sum_of_sqaure_between

degree_of_freedom_between = num_of_group - 1
degree_of_freedom_within = num_of_all_data - num_of_group

mean_square_between = sum_of_sqaure_between / degree_of_freedom_between
mean_square_within = sum_of_sqaure_within / degree_of_freedom_within

f_value = mean_square_between / mean_square_within
f_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_between, degree_of_freedom_within)

print("sum of sqaure within: ", sum_of_sqaure_within)
print("sum of sqaure between: ", sum_of_sqaure_between)
print("sum of sqaure total: ", sum_of_sqaure_total)
print("mean square between: ", mean_square_between)
print("mean square within: ", mean_square_within)
print("F value: ", f_value)
print("F critical: ", f_critical)
if f_value > f_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

sum of sqaure within:  1119.6000000000001
sum of sqaure between:  4716.399999999995
sum of sqaure total:  5835.999999999995
mean square between:  2358.1999999999975
mean square within:  93.30000000000001
F value:  25.27545551982848
F critical:  3.8852938346523946
Reject the null hypothesis


##### One-Factor ANOVA F Test Example 02
Is there a statistically significant difference in the mean values between Group 1, Group 
2, and Group 3?

In [22]:
group1 = np.array([0, 6, 2, 4, 3])
group2 = np.array([1, 4, 3, 2, 0])
group3 = np.array([5, 6, 10, 8, 6])

num_of_group = 3
num_of_all_data = 15
level_of_significance = 0.05
level_of_confidence = 1 - level_of_significance
size_of_g1 = len(group1)
mean_of_g1 = np.mean(group1)
mean_of_g2 = np.mean(group2)
mean_of_g3 = np.mean(group3)
grant_mean = np.mean([mean_of_g1, mean_of_g2, mean_of_g3])

sum_of_sqaure_1 = (group1[0] - mean_of_g1)**2 + (group1[1] - mean_of_g1)**2 + (group1[2] - mean_of_g1)**2 + (group1[3] - mean_of_g1)**2 + (group1[4] - mean_of_g1)**2
sum_of_sqaure_2 = (group2[0] - mean_of_g2)**2 + (group2[1] - mean_of_g2)**2 + (group2[2] - mean_of_g2)**2 + (group2[3] - mean_of_g2)**2 + (group2[4] - mean_of_g2)**2
sum_of_sqaure_3 = (group3[0] - mean_of_g3)**2 + (group3[1] - mean_of_g3)**2 + (group3[2] - mean_of_g3)**2 + (group3[3] - mean_of_g3)**2 + (group3[4] - mean_of_g3)**2

sum_of_sqaure_within = sum_of_sqaure_1 + sum_of_sqaure_2 + sum_of_sqaure_3
sum_of_sqaure_between = size_of_g1 * (mean_of_g1 - grant_mean)**2 + size_of_g1 * (mean_of_g2 - grant_mean)**2 + size_of_g1 * (mean_of_g3 - grant_mean)**2
sum_of_sqaure_total = sum_of_sqaure_within + sum_of_sqaure_between

degree_of_freedom_between = num_of_group - 1
degree_of_freedom_within = num_of_all_data - num_of_group

mean_square_between = sum_of_sqaure_between / degree_of_freedom_between
mean_square_within = sum_of_sqaure_within / degree_of_freedom_within

f_value = mean_square_between / mean_square_within
f_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_between, degree_of_freedom_within)

print("sum of sqaure within: ", sum_of_sqaure_within)
print("sum of sqaure between: ", sum_of_sqaure_between)
print("sum of sqaure total: ", sum_of_sqaure_total)
print("mean square between: ", mean_square_between)
print("mean square within: ", mean_square_within)
print("F value: ", f_value)
print("F critical: ", f_critical)
if f_value > f_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

sum of sqaure within:  46.0
sum of sqaure between:  70.0
sum of sqaure total:  116.0
mean square between:  35.0
mean square within:  3.8333333333333335
F value:  9.130434782608695
F critical:  3.8852938346523946
Reject the null hypothesis


##### One-Factor ANOVA F Test Example 03
Is there a significant difference in the average test scores of students across three 
different teaching methods?

In [23]:
group1 = np.array([78, 84, 76, 82, 75])
group2 = np.array([85, 88, 80, 89, 87])
group3 = np.array([92, 90, 85, 91, 86])

num_of_group = 3
num_of_all_data = 15
level_of_significance = 0.05
level_of_confidence = 1 - level_of_significance
size_of_g1 = len(group1)
mean_of_g1 = np.mean(group1)
mean_of_g2 = np.mean(group2)
mean_of_g3 = np.mean(group3)
grant_mean = np.mean([mean_of_g1, mean_of_g2, mean_of_g3])

sum_of_sqaure_1 = (group1[0] - mean_of_g1)**2 + (group1[1] - mean_of_g1)**2 + (group1[2] - mean_of_g1)**2 + (group1[3] - mean_of_g1)**2 + (group1[4] - mean_of_g1)**2
sum_of_sqaure_2 = (group2[0] - mean_of_g2)**2 + (group2[1] - mean_of_g2)**2 + (group2[2] - mean_of_g2)**2 + (group2[3] - mean_of_g2)**2 + (group2[4] - mean_of_g2)**2
sum_of_sqaure_3 = (group3[0] - mean_of_g3)**2 + (group3[1] - mean_of_g3)**2 + (group3[2] - mean_of_g3)**2 + (group3[3] - mean_of_g3)**2 + (group3[4] - mean_of_g3)**2

sum_of_sqaure_within = sum_of_sqaure_1 + sum_of_sqaure_2 + sum_of_sqaure_3
sum_of_sqaure_between = size_of_g1 * (mean_of_g1 - grant_mean)**2 + size_of_g1 * (mean_of_g2 - grant_mean)**2 + size_of_g1 * (mean_of_g3 - grant_mean)**2
sum_of_sqaure_total = sum_of_sqaure_within + sum_of_sqaure_between

degree_of_freedom_between = num_of_group - 1
degree_of_freedom_within = num_of_all_data - num_of_group

mean_square_between = sum_of_sqaure_between / degree_of_freedom_between
mean_square_within = sum_of_sqaure_within / degree_of_freedom_within

f_value = mean_square_between / mean_square_within
f_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_between, degree_of_freedom_within)

print("sum of sqaure within: ", sum_of_sqaure_within)
print("sum of sqaure between: ", sum_of_sqaure_between)
print("sum of sqaure total: ", sum_of_sqaure_total)
print("mean square between: ", mean_square_between)
print("mean square within: ", mean_square_within)
print("F value: ", f_value)
print("F critical: ", f_critical)
if f_value > f_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

sum of sqaure within:  149.6
sum of sqaure between:  252.13333333333318
sum of sqaure total:  401.7333333333332
mean square between:  126.06666666666659
mean square within:  12.466666666666667
F value:  10.112299465240635
F critical:  3.8852938346523946
Reject the null hypothesis


##### One-Factor ANOVA F Test Example 04
Do three different fertilizers affect the growth of plants differently?



In [24]:
group1 = np.array([14, 16, 15, 13, 14])
group2 = np.array([18, 17, 19, 18, 20])
group3 = np.array([22, 24, 21, 23, 20])

num_of_group = 3
num_of_all_data = 15
level_of_significance = 0.05
level_of_confidence = 1 - level_of_significance
size_of_g1 = len(group1)
mean_of_g1 = np.mean(group1)
mean_of_g2 = np.mean(group2)
mean_of_g3 = np.mean(group3)
grant_mean = np.mean([mean_of_g1, mean_of_g2, mean_of_g3])

sum_of_sqaure_1 = (group1[0] - mean_of_g1)**2 + (group1[1] - mean_of_g1)**2 + (group1[2] - mean_of_g1)**2 + (group1[3] - mean_of_g1)**2 + (group1[4] - mean_of_g1)**2
sum_of_sqaure_2 = (group2[0] - mean_of_g2)**2 + (group2[1] - mean_of_g2)**2 + (group2[2] - mean_of_g2)**2 + (group2[3] - mean_of_g2)**2 + (group2[4] - mean_of_g2)**2
sum_of_sqaure_3 = (group3[0] - mean_of_g3)**2 + (group3[1] - mean_of_g3)**2 + (group3[2] - mean_of_g3)**2 + (group3[3] - mean_of_g3)**2 + (group3[4] - mean_of_g3)**2

sum_of_sqaure_within = sum_of_sqaure_1 + sum_of_sqaure_2 + sum_of_sqaure_3
sum_of_sqaure_between = size_of_g1 * (mean_of_g1 - grant_mean)**2 + size_of_g1 * (mean_of_g2 - grant_mean)**2 + size_of_g1 * (mean_of_g3 - grant_mean)**2
sum_of_sqaure_total = sum_of_sqaure_within + sum_of_sqaure_between

degree_of_freedom_between = num_of_group - 1
degree_of_freedom_within = num_of_all_data - num_of_group

mean_square_between = sum_of_sqaure_between / degree_of_freedom_between
mean_square_within = sum_of_sqaure_within / degree_of_freedom_within

f_value = mean_square_between / mean_square_within
f_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_between, degree_of_freedom_within)

print("sum of sqaure within: ", sum_of_sqaure_within)
print("sum of sqaure between: ", sum_of_sqaure_between)
print("sum of sqaure total: ", sum_of_sqaure_total)
print("mean square between: ", mean_square_between)
print("mean square within: ", mean_square_within)
print("F value: ", f_value)
print("F critical: ", f_critical)
if f_value > f_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

sum of sqaure within:  20.4
sum of sqaure between:  144.5333333333333
sum of sqaure total:  164.9333333333333
mean square between:  72.26666666666665
mean square within:  1.7
F value:  42.50980392156862
F critical:  3.8852938346523946
Reject the null hypothesis


# Two way ANOVA

#### Question 1:
A researcher wants to study the effects of Diet Type (Factor A) and Exercise Type 
(Factor B) on weight loss

In [45]:
group1 = np.array([2, 3, 4, 5])
group2 = np.array([4 ,5, 6, 7])
group3 = np.array([6, 7, 8, 9])

num_of_group = 3
num_of_all_data = 12
level_of_significance = 0.05
level_of_confidence = 1 - level_of_significance
size_of_g1 = len(group1)
degree_of_freedom_between = num_of_group - 1
degree_of_freedom_within = num_of_all_data - num_of_group
degree_of_freedom_error = degree_of_freedom_within * degree_of_freedom_between
degree_of_freedom_total = num_of_all_data - 1

sum_of_row1 = np.sum(group1)
sum_of_row2 = np.sum(group2)
sum_of_row3 = np.sum(group3)

sum_ofcol1 = group1[0] + group2[0] + group3[0]
sum_ofcol2 = group1[1] + group2[1] + group3[1]
sum_ofcol3 = group1[2] + group2[2] + group3[2]
sum_ofcol4 = group1[3] + group2[3] + group3[3]

sum_of_all_data = sum_of_row1 + sum_of_row2 + sum_of_row3

sum_of_squares = (group1[0]**2 + group1[1]**2 + group1[2]**2 + group1[3]**2) + (group2[0]**2 + group2[1]**2 + group2[2]**2 + group2[3]**2) + (group3[0]**2 + group3[1]**2 + group3[2]**2 + group3[3]**2)
corrction_factor = sum_of_all_data**2 / num_of_all_data

SST = sum_of_squares - corrction_factor

SSC = ((sum_ofcol1**2)/3 + (sum_ofcol2**2)/3 + (sum_ofcol3**2)/3  + (sum_ofcol4**2/3)) - corrction_factor
SSR = ((sum_of_row1**2)/4 + (sum_of_row2**2)/4 + (sum_of_row3**2)/4) - corrction_factor
SSE = SST - SSC - SSR

MSC = SSC / degree_of_freedom_between
MSR = SSR / degree_of_freedom_within
MSE = SSE / degree_of_freedom_error

f1_value = MSC / MSE
f2_value = MSR / MSE

f1_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_between, degree_of_freedom_error)
f2_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_within, degree_of_freedom_error)

print("sum of sqaure total: ", sum_of_all_data)
print("sum of square of total: ", SST)
print("sum of square of columns: ", SSC)
print("sum of square of rows: ", SSR)
print("sum of square of error: ", SSE)
print("mean square of columns: ", MSC)
print("mean square of rows: ", MSR)
print("mean square of error: ", MSE)
print("F1 value: ", f1_value)
print("F2 value: ", f2_value)
print("F1 critical: ", f1_critical)
print("F2 critical: ", f2_critical)
if f1_value > f1_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")
if f2_value > f2_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

    

sum of sqaure total:  66
sum of square of total:  47.0
sum of square of columns:  15.0
sum of square of rows:  32.0
sum of square of error:  0.0
mean square of columns:  7.5
mean square of rows:  3.5555555555555554
mean square of error:  0.0
F1 value:  inf
F2 value:  inf
F1 critical:  3.554557145661787
F2 critical:  2.4562811491592678
Reject the null hypothesis
Reject the null hypothesis


  f1_value = MSC / MSE
  f2_value = MSR / MSE


### Question 3:
A company wants to analyze the impact of two factors, advertising method (Factor A) and 
product type (Factor B), on sales. The goal is to determine:
- If there is a significant effect of the advertising method.
- If there is a significant effect of the product type.
- If there is an interaction effect between the two factor

In [47]:
group1 = np.array([2500, 2300, 2400])
group2 = np.array([2200, 2100, 2150])
group3 = np.array([2300, 2200, 2250])

num_of_group = 3
num_of_all_data = 9
level_of_significance = 0.05
level_of_confidence = 1 - level_of_significance
size_of_g1 = len(group1)
degree_of_freedom_between = num_of_group - 1
degree_of_freedom_within = num_of_all_data - num_of_group
degree_of_freedom_error = degree_of_freedom_within * degree_of_freedom_between
degree_of_freedom_total = num_of_all_data - 1

sum_of_row1 = np.sum(group1)
sum_of_row2 = np.sum(group2)
sum_of_row3 = np.sum(group3)

sum_ofcol1 = group1[0] + group2[0] + group3[0]
sum_ofcol2 = group1[1] + group2[1] + group3[1]
sum_ofcol3 = group1[2] + group2[2] + group3[2]

sum_of_all_data = sum_of_row1 + sum_of_row2 + sum_of_row3

sum_of_squares = (group1[0]**2 + group1[1]**2 + group1[2]**2) + (group2[0]**2 + group2[1]**2 + group2[2]**2) + (group3[0]**2 + group3[1]**2 + group3[2]**2)
corrction_factor = sum_of_all_data**2 / num_of_all_data

SST = sum_of_squares - corrction_factor

SSC = ((sum_ofcol1**2)/3 + (sum_ofcol2**2)/3 + (sum_ofcol3**2)/3 ) - corrction_factor
SSR = ((sum_of_row1**2)/3 + (sum_of_row2**2)/3 + (sum_of_row3**2)/3) - corrction_factor
SSE = SST - SSC - SSR

MSC = SSC / degree_of_freedom_between
MSR = SSR / degree_of_freedom_within
MSE = SSE / degree_of_freedom_error

f1_value = MSC / MSE
f2_value = MSR / MSE

f1_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_between, degree_of_freedom_error)
f2_critical = stats.f.ppf(level_of_confidence, degree_of_freedom_within, degree_of_freedom_error)

print("sum of sqaure total: ", sum_of_all_data)
print("sum of square of total: ", SST)
print("sum of square of columns: ", SSC)
print("sum of square of rows: ", SSR)
print("sum of square of error: ", SSE)
print("mean square of columns: ", MSC)
print("mean square of rows: ", MSR)
print("mean square of error: ", MSE)
print("F1 value: ", f1_value)
print("F2 value: ", f2_value)
print("F1 critical: ", f1_critical)
print("F2 critical: ", f2_critical)
if f1_value > f1_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")
if f2_value > f2_critical:
    print("Reject the null hypothesis")
else:
    print("Fail to reject the null hypothesis")

    

sum of sqaure total:  20400
sum of square of total:  125000.0
sum of square of columns:  26666.666666671634
sum of square of rows:  95000.0
sum of square of error:  3333.3333333283663
mean square of columns:  13333.333333335817
mean square of rows:  15833.333333333334
mean square of error:  277.77777777736384
F1 value:  48.00000000008047
F2 value:  57.000000000084945
F1 critical:  3.8852938346523946
F2 critical:  2.9961203775171077
Reject the null hypothesis
Reject the null hypothesis


In [None]:
group1 = np.array([2500, 2300, 2400])
group2 = np.array([2200, 2100, 2150])
group3 = np.array([2300, 2200, 2250])

#sum_of_all_data = np.sum(group1) + np.sum(group2)+ np.sum(group3)
#print(sum_of_all_data)

sum_of_all_data = np.sum([group1,group3,group2])
print(sum_of_all_data)

sum_of_group1 = http://172.17.0.2:3000