In [42]:
import numpy as np
import pandas as pd

import scipy.stats as stats

import statsmodels.api as sm
from statsmodels.formula.api import ols

# One Way Anova

In [14]:
data = {
    "Store": ["A", "A", "A", "B", "B", "B", "C", "C", "C"],
    "Sales": [200, 220, 215, 250, 260, 255, 230, 225, 240]
}

In [16]:
df1 = pd.DataFrame(data)

In [34]:
Store_A = df1.loc[df1['Store'] == 'A', ['Sales']]
Store_B = df1.loc[df1['Store'] == 'B', ['Sales']]
Store_C = df1.loc[df1['Store'] == 'C', ['Sales']]

In [36]:
print(Store_A, '\n')
print(Store_B, '\n')
print(Store_C, '\n')

   Sales
0    200
1    220
2    215 

   Sales
3    250
4    260
5    255 

   Sales
6    230
7    225
8    240 



In [48]:
# Perform One-Way ANOVA
f_statistic, p_value = stats.f_oneway(Store_A, Store_B, Store_C)

In [50]:
print(f"f_statistic: {f_statistic} | p_value: {p_value}")

f_statistic: [22.08695652] | p_value: [0.00171009]


In [54]:
# Interpretation
if p_value < 0.05:
    print("There is a significant difference between at least one pair of stores.")
else:
    print("No significant difference in sales between stores.")

There is a significant difference between at least one pair of stores.


# Two Way Anova

In [46]:
data = {
    "Store": ["A", "A", "A", "B", "B", "B", "C", "C", "C"],
    "Ad_Type": ["Online", "Offline", "Online", "Online", "Offline", "Online", "Offline", "Online", "Offline"],
    "Sales": [200, 220, 210, 250, 265, 255, 230, 225, 240]
}

In [8]:
df = pd.DataFrame(data)

In [12]:
df.head()

Unnamed: 0,Store,Ad_Type,Sales
0,A,Online,200
1,A,Offline,220
2,A,Online,210
3,B,Online,250
4,B,Offline,265


In [64]:
fomula = "Sales ~ C(Store) + C(Ad_Type) + C(Store):C(Ad_Type)"
fomula

'Sales ~ C(Store) + C(Ad_Type) + C(Store):C(Ad_Type)'

In [60]:
# Categorical variables are wrapped in C() to indicate that they are not numeric but categorical.
#💡 The interaction effect (C(Store):C(Ad_Type)) tells us if the impact of Ad Type depends on the Store.

model = ols("Sales ~ C(Store) + C(Ad_Type) + C(Store):C(Ad_Type)", data=df).fit()

anova_table = sm.stats.anova_lm(model, typ=2)

print(anova_table)

                          sum_sq   df          F    PR(>F)
C(Store)             3327.916667  2.0  44.372222  0.005913
C(Ad_Type)            312.500000  1.0   8.333333  0.063180
C(Store):C(Ad_Type)     8.333333  2.0   0.111111  0.898356
Residual              112.500000  3.0        NaN       NaN
