In [45]:
import pandas as pd

vivit_metrics = pd.read_csv("vivit_metrics_new/vivit_validation_metrics_all_runs.csv")
vivit_metrics.fillna(0, inplace=True)

vivit_precision_metrics = vivit_metrics[vivit_metrics["Metric"] == "Precision"]
vivit_recall_metrics = vivit_metrics[vivit_metrics["Metric"] == "Recall"]
vivit_accuracy_metrics = vivit_metrics[vivit_metrics["Metric"] == "Accuracy"]

convnext_metrics = pd.read_csv("convnext_metrics_new/convnext_validation_metrics_all_runs.csv")
convnext_metrics.fillna(0, inplace=True)

convnext_precision_metrics = convnext_metrics[convnext_metrics["Metric"] == "Precision"]
convnext_recall_metrics = convnext_metrics[convnext_metrics["Metric"] == "Recall"]
convnext_accuracy_metrics = convnext_metrics[convnext_metrics["Metric"] == "Accuracy"]

In [46]:
from scipy.stats import shapiro, levene
import numpy as np
from matplotlib.colors import ListedColormap
from matplotlib.colors import LinearSegmentedColormap


def interpret_pvalue(pvalue: float, threshold: float = 0.05) -> str:
    """Interpret p-value against significance threshold"""
    return "PASSED" if pvalue > threshold else "FAILED"


def check_normality(metric_name: str, values: np.ndarray) -> None:
    """Test and interpret normality for a given metric"""
    statistic, pvalue = shapiro(values)
    result = interpret_pvalue(pvalue)
    print(f"{metric_name:10} {result:8} (p={pvalue:.4f})")
    if result == "FAILED":
        print(f"          WARNING: {metric_name} is not normally distributed!")


def check_equal_variances(
    sample1: np.ndarray,
    sample2: np.ndarray,
    metric_name : str,
) -> None:
    """Test and interpret equality of variances across metrics"""
    statistic, pvalue = levene(sample1, sample2)
    result = interpret_pvalue(pvalue)
    print(f"\nEqual Variances Test: {metric_name} {result} (p={pvalue:.4f})")
    if result == "FAILED":
        print(f"WARNING: {metric_name} Metrics have significantly different variances!")

# Statistics

## Normality

### Vivit

In [47]:
# Vivit
vivit_accuracy_values = pd.to_numeric(vivit_accuracy_metrics["Value"], errors='coerce')
vivit_recall_values = pd.to_numeric(vivit_recall_metrics["Value"], errors='coerce')
vivit_precision_values = pd.to_numeric(vivit_precision_metrics["Value"], errors='coerce')

print("\nNormality Tests (Shapiro-Wilk):")
print("-" * 40)
check_normality("Accuracy", vivit_accuracy_values)
check_normality("Recall", vivit_recall_values)
check_normality("Precision", vivit_precision_values)

print("\nEquality of Variance Tests (Levene):")
print("-" * 40)
vivit_precision_values


Normality Tests (Shapiro-Wilk):
----------------------------------------
Accuracy   PASSED   (p=0.2045)
Recall     PASSED   (p=0.9055)
Precision  PASSED   (p=0.9133)

Equality of Variance Tests (Levene):
----------------------------------------


1     0.750000
4     0.714286
7     0.636364
10    0.750000
13    0.500000
16    1.000000
19    0.625000
22    0.800000
25    0.615385
28    0.888889
Name: Value, dtype: float64

### Convnext

In [None]:
# Vivit
convnext_accuracy_values = convnext_accuracy_metrics["Value"].astype(float)
convnext_recall_values = convnext_recall_metrics["Value"].astype(float)
convnext_precision_values = convnext_precision_metrics["Value"].astype(float).reset_index(drop=True)

print("\nNormality Tests (Shapiro-Wilk):")
print("-" * 40)
check_normality("Accuracy", convnext_accuracy_values)
check_normality("Recall", convnext_recall_values)
check_normality("Precision", convnext_precision_values)
convnext_accuracy_values


Normality Tests (Shapiro-Wilk):
----------------------------------------
Accuracy   PASSED   (p=0.0549)
Recall     PASSED   (p=0.1621)
Precision  FAILED   (p=0.0084)


0     0.48
3     0.64
6     0.56
9     0.60
12    0.64
15    0.68
18    0.64
21    0.48
24    0.60
27    0.64
Name: Value, dtype: float64

## Variances

In [39]:
print("\nEquality of Variance Tests (Levene):")
print("-" * 40)

check_equal_variances(vivit_accuracy_metrics, convnext_accuracy_metrics, "Accuracy")
check_equal_variances(vivit_recall_metrics, convnext_recall_metrics, "Recall")
check_equal_variances(vivit_precision_metrics, convnext_precision_metrics, "Precision")



Equality of Variance Tests (Levene):
----------------------------------------


TypeError: ufunc 'divide' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


def create_metrics_boxplot(metrics: pd.DataFrame, name: str) -> None:
    """Create a box plot comparing accuracy, precision and recall"""
    plt.rcParams["font.family"] = "sans-serif"
    plt.rcParams["font.size"] = 12

    plt.figure(figsize=(12, 8))

    colors = sns.color_palette("mako", n_colors=3)

    sns.boxplot(
        x="Metric",
        y="Value",
        hue="Metric",
        data=metrics,
        palette=colors,
        legend=False,
    )

    plt.title(f"Distribution of {name} Performance Metrics", pad=20, fontsize=14)
    plt.xlabel("Metric", fontsize=12)
    plt.ylabel("Score", fontsize=12)
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.xticks(rotation=0)

    plt.tight_layout()
    plt.show()


# Create and display the plot
create_metrics_boxplot(vivit_metrics, name="ViViT")

KeyboardInterrupt: 