In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Load Iris dataset
iris = load_iris()
df = pd.DataFrame(
    iris.data, columns=["sepalLength", "sepalWidth", "petalLength", "petalWidth"]
)
df["target"] = iris.target

# Display mean sepalWidth for each species
print(df.groupby("target")["sepalWidth"].mean())

# ANOVA test
F_value, P_value = stats.f_oneway(
    df["sepalWidth"][df["target"] == 0],
    df["sepalWidth"][df["target"] == 1],
    df["sepalWidth"][df["target"] == 2],
)
print("F_value =", F_value, ", P_value =", P_value)

if F_value > 1:
    print("****** SAMPLES HAVE DIFFERENT MEAN ******")
else:
    print("****** SAMPLES HAVE EQUAL MEAN ******")

if P_value < 0.05:
    print("****** REJECT NULL HYPOTHESIS ******")
else:
    print("****** ACCEPT NULL HYPOTHESIS ******")

# Tukey HSD test for pairwise comparison
tukey = pairwise_tukeyhsd(endog=df["sepalWidth"], groups=df["target"], alpha=0.05)
print(tukey)

target
0    3.428
1    2.770
2    2.974
Name: sepalWidth, dtype: float64
F_value = 49.160040089612075 , P_value = 4.49201713330911e-17
****** SAMPLES HAVE DIFFERENT MEAN ******
****** REJECT NULL HYPOTHESIS ******
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
     0      1   -0.658    0.0 -0.8189 -0.4971   True
     0      2   -0.454    0.0 -0.6149 -0.2931   True
     1      2    0.204 0.0088  0.0431  0.3649   True
----------------------------------------------------
