# Statistical Analysis: ANOVA with Tukey HSD

This notebook performs statistical analysis to compare the performance of different driver identification algorithms using ANOVA and Tukey's Honestly Significant Difference (HSD) test.

Reference: https://scipy.github.io/devdocs/reference/generated/scipy.stats.tukey_hsd.html

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
from scipy.stats import tukey_hsd

import matplotlib.pyplot as plt

%matplotlib inline

## Load Experimental Data

Load the algorithm execution results from the Excel file containing performance metrics.

In [None]:
# Read the experimental results
executions = pd.read_excel("data/executions.xlsx")
executions

In [None]:
# Group data by algorithm for comparative analysis
test_data = executions.groupby("algorithm")

In [None]:
# Extract performance metrics for each algorithm
# R_HV: Recall for Human Vehicles
# R_AV: Recall for Autonomous Vehicles
# R_W: Weighted Recall
R_HV = {}
R_AV = {}
R_W = {}
for alg in executions["algorithm"].unique():
    R_HV[alg] = test_data.get_group(alg)["R_HV"].values
    R_AV[alg] = test_data.get_group(alg)["R_AV"].values
    R_W[alg] = test_data.get_group(alg)["R_W"].values

R_HV

## Statistical Testing: Tukey HSD and Kruskal-Wallis Test

### Test Executions

#### Human Vehicle (HV) Recall Analysis and Visualization

In [None]:
# Perform Tukey HSD test for all three metrics
# This test determines which algorithm pairs have significantly different performance
res_HV = tukey_hsd(R_HV["biLSTM"], R_HV["Li_et_al + our features"], R_HV["Li_et_al"])
res_AV = tukey_hsd(R_AV["biLSTM"], R_AV["Li_et_al + our features"], R_AV["Li_et_al"])
res_W = tukey_hsd(R_W["biLSTM"], R_W["Li_et_al + our features"], R_W["Li_et_al"])

In [None]:
# Display p-values from Tukey HSD test for HV recall
# Matrix showing pairwise comparison significance
res_HV.pvalue

In [None]:
# Perform Kruskal-Wallis test for pairwise algorithm comparisons
# This non-parametric test is used when normality assumptions are not met
import itertools
from scipy import stats

kruskal_results = pd.DataFrame(columns=["algorithm1", "algorithm2", "pvalue"])

stuff = ["biLSTM", "Li_et_al + our features", "Li_et_al"]

for num, subset in enumerate(itertools.combinations(stuff, 2)):
    kruskal_results.loc[num] = [
        subset[0],
        subset[1],
        stats.kruskal(R_HV[subset[0]], R_HV[subset[1]]).pvalue,
    ]

kruskal_results

In [None]:
# Visualize HV recall distribution across algorithms using box plots
import seaborn as sns
from statannotations.Annotator import Annotator

algorithms = np.unique(executions["algorithm"])

ax = sns.boxplot(data=executions, x="algorithm", y="R_HV")

# Define algorithm pairs for statistical annotation
pairs = [(i[1]["algorithm1"], i[1]["algorithm2"]) for i in kruskal_results.iterrows()]
p_values = [i[1]["pvalue"] for i in kruskal_results.iterrows()]

# Set custom labels for better readability
ax.set_xticklabels(["propoused", "Li_et_al + our features", "Li_et_al"])
ax.set_ylabel("$\overline{R_{HV}}$")

# Statistical annotations are commented out for cleaner visualization
# annotator = Annotator(ax, pairs, data=executions, x="algorithm", y="R_HV")
# annotator.configure(text_format="simple", loc="inside")
# annotator.set_pvalues_and_annotate(p_values)

plt.show()

#### Autonomous Vehicle (AV) Recall Analysis and Visualization

In [None]:
# Perform Kruskal-Wallis test for AV recall metric
import itertools

kruskal_results = pd.DataFrame(columns=["algorithm1", "algorithm2", "pvalue"])

stuff = ["biLSTM", "Li_et_al + our features", "Li_et_al"]

for num, subset in enumerate(itertools.combinations(stuff, 2)):
    kruskal_results.loc[num] = [
        subset[0],
        subset[1],
        stats.kruskal(R_AV[subset[0]], R_AV[subset[1]]).pvalue,
    ]

kruskal_results

In [None]:
# Visualize AV recall distribution across algorithms
import seaborn as sns
from statannotations.Annotator import Annotator

algorithms = np.unique(executions["algorithm"])

# Create box plot without outliers for cleaner visualization
ax = sns.boxplot(data=executions, x="algorithm", y="R_AV", showfliers=False)

pairs = [(i[1]["algorithm1"], i[1]["algorithm2"]) for i in kruskal_results.iterrows()]
p_values = [i[1]["pvalue"] for i in kruskal_results.iterrows()]

ax.set_xticklabels(["biLSTM", "Li_et_al + our features", "Li_et_al"])
ax.set_ylabel("Mean R_AV")

# Statistical annotations are commented out for cleaner visualization
# annotator = Annotator(ax, pairs, data=executions, x="algorithm", y="R_AV")
# annotator.configure(text_format="simple", loc="inside")
# annotator.set_pvalues_and_annotate(p_values)

plt.show()

#### Weighted Recall (W) Analysis and Visualization

In [None]:
# Perform Kruskal-Wallis test for weighted recall metric
import itertools

kruskal_results = pd.DataFrame(columns=["algorithm1", "algorithm2", "pvalue"])

stuff = ["biLSTM", "Li_et_al + our features", "Li_et_al"]

for num, subset in enumerate(itertools.combinations(stuff, 2)):
    kruskal_results.loc[num] = [
        subset[0],
        subset[1],
        stats.kruskal(R_W[subset[0]], R_W[subset[1]]).pvalue,
    ]

kruskal_results

In [None]:
# Create publication-ready visualization for weighted recall
import seaborn as sns
from statannotations.Annotator import Annotator

# Define custom plot parameters for professional appearance
custom_params = {
    "axes.spines.right": False,
    "axes.spines.top": False,
    "figure.figsize": (12, 9),
    "text.color": "black",
    "xtick.color": "black",
    "ytick.color": "black",
    "axes.facecolor": "white",
    "axes.edgecolor": "black",
    "axes.labelcolor": "black",
}

algorithms = np.unique(executions["algorithm"])

sns.set_theme(style="ticks", rc=custom_params, font="Times New Roman", font_scale=2.7)

ax = sns.boxplot(data=executions, x="algorithm", y="R_W")

pairs = [(i[1]["algorithm1"], i[1]["algorithm2"]) for i in kruskal_results.iterrows()]
p_values = [i[1]["pvalue"] for i in kruskal_results.iterrows()]

ax.set_xticklabels(["our", "Literature + our features", "Literature"])
ax.set_ylabel("$\overline{R_{W}}$")
ax.set_xlabel("Algorithm")

# Save figure with transparent background for publication
fig = ax.get_figure()
fig.savefig(f"Rw_test.png", transparent=True)

plt.show()

### Concept Drift Scenario Executions

#### Human Vehicle (HV) Recall Analysis with Concept Drift

In [None]:
# Extract concept drift performance metrics (columns with .1 suffix)
# These metrics evaluate algorithm performance under distribution shift conditions
R_HV = {}
R_AV = {}
R_W = {}
for alg in executions["algorithm"].unique():
    R_HV[alg] = test_data.get_group(alg)["R_HV.1"].values
    R_AV[alg] = test_data.get_group(alg)["R_AV.1"].values
    R_W[alg] = test_data.get_group(alg)["R_W.1"].values

R_HV

In [None]:
# Perform Kruskal-Wallis test for HV recall under concept drift conditions
import itertools
from scipy import stats

kruskal_results = pd.DataFrame(columns=["algorithm1", "algorithm2", "pvalue"])

stuff = ["biLSTM", "Li_et_al + our features", "Li_et_al"]

for num, subset in enumerate(itertools.combinations(stuff, 2)):
    kruskal_results.loc[num] = [
        subset[0],
        subset[1],
        stats.kruskal(R_HV[subset[0]], R_HV[subset[1]]).pvalue,
    ]

kruskal_results

In [None]:
# Visualize HV recall under concept drift conditions
import seaborn as sns
from statannotations.Annotator import Annotator

algorithms = np.unique(executions["algorithm"])

ax = sns.boxplot(data=executions, x="algorithm", y="R_HV.1")

pairs = [(i[1]["algorithm1"], i[1]["algorithm2"]) for i in kruskal_results.iterrows()]
p_values = [i[1]["pvalue"] for i in kruskal_results.iterrows()]

ax.set_xticklabels(["biLSTM", "Li_et_al + our features", "Li_et_al"])
ax.set_ylabel("Mean R_HV")

# Statistical annotations are commented out for cleaner visualization
# annotator = Annotator(ax, pairs, data=executions, x="algorithm", y="R_HV.1")
# annotator.configure(text_format="simple", loc="inside")
# annotator.set_pvalues_and_annotate(p_values)

plt.show()

#### Autonomous Vehicle (AV) Recall Analysis with Concept Drift

In [None]:
# Perform Kruskal-Wallis test for AV recall under concept drift conditions
import itertools
from scipy import stats

kruskal_results = pd.DataFrame(columns=["algorithm1", "algorithm2", "pvalue"])

stuff = ["biLSTM", "Li_et_al + our features", "Li_et_al"]

for num, subset in enumerate(itertools.combinations(stuff, 2)):
    kruskal_results.loc[num] = [
        subset[0],
        subset[1],
        stats.kruskal(R_AV[subset[0]], R_AV[subset[1]]).pvalue,
    ]

kruskal_results

In [None]:
# Visualize AV recall under concept drift conditions
import seaborn as sns
from statannotations.Annotator import Annotator

algorithms = np.unique(executions["algorithm"])

ax = sns.boxplot(data=executions, x="algorithm", y="R_AV.1")

pairs = [(i[1]["algorithm1"], i[1]["algorithm2"]) for i in kruskal_results.iterrows()]
p_values = [i[1]["pvalue"] for i in kruskal_results.iterrows()]

ax.set_xticklabels(["biLSTM", "Li_et_al + our features", "Li_et_al"])
ax.set_ylabel("Mean R_AV")

# Statistical annotations are commented out for cleaner visualization
# annotator = Annotator(ax, pairs, data=executions, x="algorithm", y="R_AV.1")
# annotator.configure(text_format="simple", loc="inside")
# annotator.set_pvalues_and_annotate(p_values)

plt.show()

#### Weighted Recall (W) Analysis with Concept Drift

In [None]:
# Perform Kruskal-Wallis test for weighted recall under concept drift conditions
import itertools
from scipy import stats

kruskal_results = pd.DataFrame(columns=["algorithm1", "algorithm2", "pvalue"])

stuff = ["biLSTM", "Li_et_al + our features", "Li_et_al"]

for num, subset in enumerate(itertools.combinations(stuff, 2)):
    kruskal_results.loc[num] = [
        subset[0],
        subset[1],
        stats.kruskal(R_W[subset[0]], R_W[subset[1]]).pvalue,
    ]

kruskal_results

In [None]:
# Create publication-ready visualization for weighted recall under concept drift
import seaborn as sns
from statannotations.Annotator import Annotator

# Define custom plot parameters for professional appearance
custom_params = {
    "axes.spines.right": False,
    "axes.spines.top": False,
    "figure.figsize": (12, 9),
    "text.color": "black",
    "xtick.color": "black",
    "ytick.color": "black",
    "axes.facecolor": "white",
    "axes.edgecolor": "black",
    "axes.labelcolor": "black",
}

algorithms = np.unique(executions["algorithm"])

sns.set_theme(style="ticks", rc=custom_params, font="Times New Roman", font_scale=2.7)

# Create box plot without outliers for cleaner visualization
ax = sns.boxplot(data=executions, x="algorithm", y="R_W.1", showfliers=False)

pairs = [(i[1]["algorithm1"], i[1]["algorithm2"]) for i in kruskal_results.iterrows()]
p_values = [i[1]["pvalue"] for i in kruskal_results.iterrows()]

ax.set_xticklabels(["our", "Literature + our features", "Literature"])
ax.set_ylabel("$\overline{R_{W}}$")
ax.set_xlabel("Algorithm")

# Save figure with transparent background for publication
fig = ax.get_figure()
fig.savefig(f"Rw_cd.png", transparent=True)

plt.show()