In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Tuple

In [18]:
DIMENSIONS: Tuple = (5, 15, 30)

In [8]:
def generate_thresholds_F(dimensions):
    """Generate thresholds for function evaluations."""
    thresholds = []
    w = 0
    while len(thresholds) < 41:  # 41 thresholds
        threshold = dimensions * (10 ** w)
        thresholds.append(threshold if abs(threshold - round(threshold)) > 1e-7 else round(threshold))
        w += 0.1
    return thresholds

In [9]:
def generate_thresholds_J():
    """Generate thresholds for solution quality."""
    thresholds = []
    w = -8
    while len(thresholds) < 51:  # 51 thresholds
        threshold = 10 ** w
        thresholds.append(threshold if abs(threshold - round(threshold)) > 1e-9 else round(threshold))
        w += 0.2
    return thresholds

In [10]:
def count_exceeding_thresholds(value, quality_thresholds):
    """Count the number of quality thresholds exceeded by a value."""
    return sum(1 for t in reversed(quality_thresholds) if value < t)

In [11]:
def process_ecdf(raw_data_path, output_path, dimensions):
    # Generate thresholds
    eval_thresholds = generate_thresholds_F(dimensions)
    quality_thresholds = generate_thresholds_J()

    # Load raw data
    df = pd.read_excel(raw_data_path, sheet_name=f"rosenbrock_{dimensions}")
    num_runs = df.shape[1]

    # Prepare the ECDF output
    ecdf_data = []

    # Iterate through each evaluation threshold
    for eval_threshold in eval_thresholds:
        # Aggregate the number of thresholds crossed for each column (run)
        eval_results = []
        for run in df.columns:
            # Get the fitness values up to the current evaluation threshold
            fitness_values = df[run][:int(eval_threshold)].to_list()
            best_value = min(fitness_values) if fitness_values else float('inf')

            # Count the number of quality thresholds exceeded
            eval_results.append(count_exceeding_thresholds(best_value, quality_thresholds))
        
        # Write ECDF data row
        ecdf_data.append(eval_results)
    
    # Convert results to DataFrame
    ecdf_df = pd.DataFrame(ecdf_data, columns=[f"Run_{i+1}" for i in range(num_runs)])
    
    # Save the processed ECDF data
    ecdf_df.to_csv(output_path, index=False, header=True)
    print(f"ECDF data saved to {output_path}")

In [12]:
def plot_ecdf(data_path):
    # Load processed ECDF data
    ecdf_data = pd.read_csv(data_path)

    # Plot ECDF for each quality threshold
    plt.figure(figsize=(10, 6))
    for col in ecdf_data.columns:
        plt.plot(ecdf_data[col], label=col)

    # Labeling
    plt.xlabel('Function Evaluation Threshold')
    plt.ylabel('Cumulative Count (Exceeding Quality Threshold)')
    plt.title('Empirical Cumulative Distribution Function (ECDF)')
    plt.legend(title='Runs', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()

    # Show plot
    plt.show()

In [20]:
# Process raw data for all dimensions:
for dim in DIMENSIONS:
    raw_data_path = "results.xlsx"
    output_path = f"ecdf_data_dim_{dim}.csv"
    print(f"Processing ECDF for dimension {dim}...")
    process_ecdf(raw_data_path, output_path, dim)

Processing ECDF for dimension 5...


ValueError: Worksheet named 'rosenbrock_5' not found