In [None]:
import pandas as pd
from scipy.stats import f_oneway

# Load data
df = pd.read_csv("AugmentedData.product_pricing1.csv")
df['date'] = pd.to_datetime(df['date'])

# Define holidays
holidays = {
    'GoodFriday': pd.Timestamp('2025-04-18'),
    'EasterMonday': pd.Timestamp('2025-04-21'),
    'ANZACDay': pd.Timestamp('2025-04-25'),
    'EOFY': pd.Timestamp('2025-06-30'),
}
window = 7

# Flag holiday proximity
df['near_holiday'] = df['date'].apply(
    lambda d: any(abs((d - h).days) <= window for h in holidays.values())
)

# Collect results per product
results = []

for pid, group in df.groupby('product_id'):
    # Ensure both groups have data
    holiday_prices = group[group['near_holiday']]['price']
    non_holiday_prices = group[~group['near_holiday']]['price']
    
    if len(holiday_prices) > 1 and len(non_holiday_prices) > 1:
        f_stat, p_value = f_oneway(holiday_prices, non_holiday_prices)
        results.append({
            'product_id': pid,
            'f_stat': f_stat,
            'p_value': p_value,
            'holiday_count': len(holiday_prices),
            'non_holiday_count': len(non_holiday_prices)
        })

# Create DataFrame of results and sort by significance
anova_df = pd.DataFrame(results)
anova_df = anova_df.sort_values('p_value')

# Show top products with lowest p-values
print(anova_df.head(10))


                              product_id    f_stat   p_value  holiday_count  \
3   4ae84bdf-f3a5-45ef-817b-d2a69204a251  0.558115  0.457488              8   
8   7a4fb5ec-2e60-429c-a284-a688a7f07387  0.282634  0.596639              8   
1   3b3e2c46-9899-4f14-8f8f-74ec3a1d31a7  0.220146  0.640366              8   
20  e79a6d75-9fed-4859-a342-cedc1fa5a132  0.209151  0.648830              8   
11  b9935dad-617a-4bf2-9dff-5f01faf98355  0.208360  0.649450              8   
6   5875ebb3-bba7-4356-bb0e-9f3c7679c975  0.143218  0.706232              8   
0   1f59c631-ca0d-47d9-aac2-8da2dbc1f134  0.137991  0.711392              8   
22  feaf28a3-4c63-40e2-9b33-a5a00273f91b  0.112561  0.738237              8   
15  ca79f7e0-b57e-471e-962e-6bd2c77eac2f  0.069869  0.792293              8   
12  ba6585d8-4881-4569-a4ef-e458cb9dabd6  0.057368  0.811395              8   

    non_holiday_count  
3                  65  
8                  65  
1                  65  
20                 65  
11        