In [6]:
# (Run this once at the top)
!pip install scipy statsmodels





[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import pandas as pd
import numpy as np

from scipy.stats import f_oneway
import statsmodels.api as sm
from statsmodels.formula.api import ols


In [8]:
np.random.seed(42)

data = {
    'Advertising_Spend': np.random.uniform(10, 100, 50),
    'Region':            np.random.choice(['North', 'South', 'East', 'West'], 50),
    'Sales':             np.random.uniform(20, 200, 50),
    'Time_Period':       np.arange(1, 51)
}

# Add your new categorical variable
data['Product_Category'] = np.random.choice(['A', 'B', 'C'], 50)

df = pd.DataFrame(data)
df.head(3)


Unnamed: 0,Advertising_Spend,Region,Sales,Time_Period,Product_Category
0,43.708611,East,151.22129,1,B
1,95.564288,South,158.828662,2,A
2,75.879455,South,33.328037,3,A


In [9]:
# Save to CSV
df.to_csv('sales_data.csv', index=False)

# Read it back (to demonstrate)
df = pd.read_csv('sales_data.csv')
df.head(3)


Unnamed: 0,Advertising_Spend,Region,Sales,Time_Period,Product_Category
0,43.708611,East,151.22129,1,B
1,95.564288,South,158.828662,2,A
2,75.879455,South,33.328037,3,A


In [10]:
# Group Sales by Region
groups = [grp["Sales"].values for _, grp in df.groupby("Region")]

# SciPy ANOVA
f_stat, p_val = f_oneway(*groups)
print(f"SciPy one-way ANOVA → F = {f_stat:.4f}, p = {p_val:.4f}")



SciPy one-way ANOVA → F = 0.1538, p = 0.9267


In [11]:
# Fit OLS model
model_one = ols("Sales ~ C(Region)", data=df).fit()

# Get ANOVA table (Type II)
anova_one = sm.stats.anova_lm(model_one, typ=2)

# Display results
print("----- OLS Summary (One‑Way) -----")
print(model_one.summary())

print("\n----- ANOVA Table (One‑Way) -----")
print(anova_one)


----- OLS Summary (One‑Way) -----
                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                 -0.055
Method:                 Least Squares   F-statistic:                    0.1538
Date:                Sun, 18 May 2025   Prob (F-statistic):              0.927
Time:                        12:22:17   Log-Likelihood:                -267.95
No. Observations:                  50   AIC:                             543.9
Df Residuals:                      46   BIC:                             551.5
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
In

In [12]:
model_two = ols(
    "Sales ~ C(Region) + C(Product_Category) + C(Region):C(Product_Category)", 
    data=df
).fit()

anova_two = sm.stats.anova_lm(model_two, typ=2)

print("----- ANOVA Table (Two‑Way + Interaction) -----")
print(anova_two)


----- ANOVA Table (Two‑Way + Interaction) -----
                                      sum_sq    df         F    PR(>F)
C(Region)                        1740.455552   3.0  0.208419  0.889941
C(Product_Category)              9048.966150   2.0  1.625415  0.210217
C(Region):C(Product_Category)   17371.833733   6.0  1.040135  0.415003
Residual                       105776.290153  38.0       NaN       NaN
