In [4]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Simulate purchase amounts for 3 customer segments
bronze_purchases = np.random.normal(loc=50, scale=10, size=30)
silver_purchases = np.random.normal(loc=65, scale=12, size=30)
gold_purchases = np.random.normal(loc=80, scale=15, size=30)

# Create DataFrame
df_anova = pd.DataFrame({
    'Segment': ['Bronze'] * 30 + ['Silver'] * 30 + ['Gold'] * 30,
    'Purchase_Amount': np.concatenate([bronze_purchases, silver_purchases, gold_purchases])
})
df_anova

Unnamed: 0,Segment,Purchase_Amount
0,Bronze,54.967142
1,Bronze,48.617357
2,Bronze,56.476885
3,Bronze,65.230299
4,Bronze,47.658466
...,...,...
85,Gold,72.473644
86,Gold,93.731032
87,Gold,84.931267
88,Gold,72.053597


In [10]:
bronze = df_anova[df_anova['Segment'] == 'Bronze']['Purchase_Amount']
silver = df_anova[df_anova['Segment'] == 'Silver']['Purchase_Amount']
gold = df_anova[df_anova['Segment'] == 'Gold']['Purchase_Amount']
from scipy.stats import f_oneway
f_stat,p_value=f_oneway(bronze,silver,gold)
f_stat

54.20471673148835

In [12]:
p_value

5.163653889699995e-16

In [14]:
# p_value < 0.05 so, I interprit that if the null hypothesis was true, then there is less than 5% chance that I would see a mean
# difference as extreme as this just by randomness so I reject the null hypothesis so, atleast one of the mean is different than
# other 2.

In [18]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
tukey=pairwise_tukeyhsd(endog=df_anova['Purchase_Amount'],groups=df_anova['Segment'],alpha=0.05)
print(tukey)

Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj  lower    upper  reject
----------------------------------------------------
Bronze   Gold  32.0747   0.0  24.7274  39.422   True
Bronze Silver  15.4275   0.0   8.0802 22.7748   True
  Gold Silver -16.6472   0.0 -23.9945 -9.2999   True
----------------------------------------------------


so we conclude that Gold purchase amounts are significantly greater than Bronze purchase amounts,
Silver purchase amounts are significantly greater than Bronze purchase amounts,
Gold purchase amounts are significantly greater than Silver purchase amounts not just due to randomness.