# Type 1 ANOVA

SS(A) for factor A
SS(B | A) for factor B
SS(AB | A, B) for interaction AB

SS(A)
SS(B | A) = SS (A, B) – SS(A)
SS(AB|A,B) = SS (AB, A, B) – SS(A, B)

# Type 2 ANOVA

SS(A | B) for factor A
SS(B | A) for factor B
SS(AB | A, B) for interaction AB

SS(A|B) = SS (A, B) – SS(B)
SS(B|A) = SS (A, B) – SS(A)
SS(AB|A,B) = SS (AB, A, B) – SS(A, B)

# Type 3 ANOVA

SS(A | B, AB) for factor A
SS(B | A, AB) for factor B
SS(AB | A, B) for interaction AB

SS(A|B,AB) = SS (AB, A, B) – SS(AB, B)
SS(B|A,AB)= SS (AB, A, B) – SS(AB, A)
SS(AB|A,B)= SS (AB, A, B) – SS(A, B)

# Step 1: Generating data


In [1]:
import numpy as np
import pandas as pd

Brands = np.repeat(['Brand_A','Brand_B'],20)
print(Brands)

['Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A'
 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A'
 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_A' 'Brand_B'
 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B'
 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B'
 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B' 'Brand_B']


In [2]:
Stores = np.tile(['Store_1','Store_2'], 20)
print(Stores)

['Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1'
 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2'
 'Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1'
 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2'
 'Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2' 'Store_1'
 'Store_2' 'Store_1' 'Store_2' 'Store_1' 'Store_2']


In [3]:
mu_1, sigma_1 = 5, 1 
mu_2, sigma_2 = 8, 1 
Sales=np.concatenate((np.random.normal(mu_1, sigma_1, 20),np.random.normal(mu_2, sigma_2, 20)))
print(Sales)

[3.56435299 4.33986453 4.23246001 6.83866871 2.42707948 5.47867456
 4.57077591 5.65497171 5.76071917 6.1884872  3.196842   6.47970742
 6.33950437 3.69842303 5.23070843 4.97884916 4.26785467 5.70779457
 4.08605187 3.59486698 7.18681043 7.53004382 8.15901984 9.1813522
 9.23949979 8.04039463 8.15324758 7.45596201 8.33283073 6.44446531
 9.95784578 6.86450825 8.21507338 6.96188718 9.21363376 7.40707548
 7.06536653 7.49478703 6.05952939 7.64696114]


In [4]:
df_sales=pd.DataFrame({'Brands':Brands, 'Stores': Stores, 'Sales': Sales})
print(df_sales)

     Brands   Stores     Sales
0   Brand_A  Store_1  3.564353
1   Brand_A  Store_2  4.339865
2   Brand_A  Store_1  4.232460
3   Brand_A  Store_2  6.838669
4   Brand_A  Store_1  2.427079
5   Brand_A  Store_2  5.478675
6   Brand_A  Store_1  4.570776
7   Brand_A  Store_2  5.654972
8   Brand_A  Store_1  5.760719
9   Brand_A  Store_2  6.188487
10  Brand_A  Store_1  3.196842
11  Brand_A  Store_2  6.479707
12  Brand_A  Store_1  6.339504
13  Brand_A  Store_2  3.698423
14  Brand_A  Store_1  5.230708
15  Brand_A  Store_2  4.978849
16  Brand_A  Store_1  4.267855
17  Brand_A  Store_2  5.707795
18  Brand_A  Store_1  4.086052
19  Brand_A  Store_2  3.594867
20  Brand_B  Store_1  7.186810
21  Brand_B  Store_2  7.530044
22  Brand_B  Store_1  8.159020
23  Brand_B  Store_2  9.181352
24  Brand_B  Store_1  9.239500
25  Brand_B  Store_2  8.040395
26  Brand_B  Store_1  8.153248
27  Brand_B  Store_2  7.455962
28  Brand_B  Store_1  8.332831
29  Brand_B  Store_2  6.444465
30  Brand_B  Store_1  9.957846
31  Bran

# Step 2: Use statsmodels

In [5]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

model = ols('Sales ~ C(Brands)*C(Stores)', data=df_sales).fit()
Type1 = sm.stats.anova_lm(model, typ=1)
print(Type1)

                       df     sum_sq    mean_sq          F        PR(>F)
C(Brands)             1.0  89.920930  89.920930  79.085902  1.294847e-10
C(Stores)             1.0   0.186123   0.186123   0.163696  6.881700e-01
C(Brands):C(Stores)   1.0   6.272148   6.272148   5.516385  2.444815e-02
Residual             36.0  40.932118   1.137003        NaN           NaN


In [6]:
Type2 = sm.stats.anova_lm(model, typ=2)
print(Type2)

                        sum_sq    df          F        PR(>F)
C(Brands)            89.920930   1.0  79.085902  1.294847e-10
C(Stores)             0.186123   1.0   0.163696  6.881700e-01
C(Brands):C(Stores)   6.272148   1.0   5.516385  2.444815e-02
Residual             40.932118  36.0        NaN           NaN


In [7]:
Type3 = sm.stats.anova_lm(model, typ=3)
print(Type3)

                         sum_sq    df           F        PR(>F)
Intercept            190.762345   1.0  167.776426  4.066664e-15
C(Brands)             71.845169   1.0   63.188181  1.947851e-09
C(Stores)              4.309595   1.0    3.790310  5.938497e-02
C(Brands):C(Stores)    6.272148   1.0    5.516385  2.444815e-02
Residual              40.932118  36.0         NaN           NaN
