In [None]:
from pymoo.operators.selection.tournament import TournamentSelection

TournamentSelection()

In [None]:
import numpy as np
from scipy.stats import wilcoxon


def compare_architectures(scores_a, scores_b, alpha=0.05, architecture_names=None):
    """
    Compare two lists of paired performance scores using Wilcoxon Signed-Rank Test.

    Parameters:
    -----------
    scores_a : array-like
        Performance scores for architecture A (e.g., accuracy, F1-score)
    scores_b : array-like
        Performance scores for architecture B (must be same length as scores_a)
    alpha : float, default=0.05
        Significance level for the statistical test
    architecture_names : tuple, default=None
        Optional names for the architectures (name_a, name_b)

    Returns:
    --------
    str
        One of: "A is better", "B is better", or "Can't tell"

    Raises:
    -------
    ValueError
        If input arrays have different lengths or other validation issues
    """

    # Convert to numpy arrays for easier handling
    scores_a = np.array(scores_a)
    scores_b = np.array(scores_b)

    # Validation
    if len(scores_a) != len(scores_b):
        raise ValueError("Both score lists must have the same length")

    if len(scores_a) < 3:
        raise ValueError("Need at least 3 paired observations for meaningful results")

    # Set default names if not provided
    if architecture_names is None:
        name_a, name_b = "A", "B"
    else:
        name_a, name_b = architecture_names

    try:
        # Perform Wilcoxon Signed-Rank Test
        # alternative='two-sided' tests if medians are different
        statistic, p_value = wilcoxon(scores_a, scores_b, alternative="two-sided")

        # If not significant, we can't tell which is better
        if p_value >= alpha:
            return "Can't tell"

        # If significant, determine which is better based on median difference
        median_diff = np.median(scores_a - scores_b)

        if median_diff > 0:
            return f"{name_a} is better"
        else:
            return f"{name_b} is better"

    except ValueError as e:
        # Handle cases where all differences are zero or other issues
        if "zero" in str(e).lower():
            return "Can't tell"
        else:
            raise e


# Example usage and demonstration
if __name__ == "__main__":
    # Example 1: Architecture A clearly better
    scores_arch_a = [
        75.80645161290323,
        80.64516129032258,
        48.38709677419355,
        41.935483870967744,
        85.48387096774194,
        75.80645161290323,
        79.03225806451613,
        72.58064516129032,
        72.58064516129032,
        66.12903225806451,
    ]
    scores_arch_b = [
        74.19354838709677,
        67.74193548387096,
        45.16129032258065,
        38.70967741935484,
        83.87096774193549,
        70.96774193548387,
        82.25806451612904,
        75.80645161290323,
        70.96774193548387,
        79.03225806451613,
    ]

    result = compare_architectures(scores_arch_a, scores_arch_b)
    print(f"Example 1: {result}")

Example 1: Can't tell
