In [2]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import os

In [4]:
def load_relert_data(file_path):
    """Load optimization_data_relert.csv."""
    if not os.path.exists(file_path):
        print(f"Error: File {file_path} does not exist.")
        return None
    try:
        data = pd.read_csv(file_path)
        required_columns = ['Algorithm', 'Function', 'Instance', 'Dimension', 'ERT']
        if not all(col in data.columns for col in required_columns):
            print(f"Error: Missing required columns in {file_path}. Found: {data.columns.tolist()}")
            return None
        data['ERT'] = pd.to_numeric(data['ERT'], errors='coerce')
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

In [5]:
def select_top_algorithms(data, top_n=10):
    """Calculate mean ERT per algorithm and return top N algorithms' names."""
    if data is None:
        return None
    try:
        # Replace infinite ERT values with a large number to avoid skewing mean
        data['ERT'] = data['ERT'].replace([float('inf')], 1e6)
        # Calculate mean ERT per algorithm
        mean_ert = data.groupby('Algorithm')['ERT'].mean().reset_index()
        # Sort by mean ERT (ascending) and select top N
        top_algorithms = mean_ert.sort_values('ERT').head(top_n)['Algorithm'].tolist()
        return top_algorithms
    except KeyError as e:
        print(f"Error: Missing column {e}")
        return None

In [6]:
def filter_data_by_algorithms(data, top_algorithms):
    """Filter data to include only rows for top algorithms."""
    if data is None or top_algorithms is None:
        return None
    try:
        filtered_data = data[data['Algorithm'].isin(top_algorithms)]
        return filtered_data
    except KeyError as e:
        print(f"Error: Missing column {e}")
        return None

In [7]:
def save_results(filtered_data, output_path):
    """Save filtered data to CSV."""
    if filtered_data is not None:
        filtered_data.to_csv(output_path, index=False)
        print(f"Filtered data for top 10 algorithms saved to {output_path}")
        print(f"Number of rows in filtered data: {len(filtered_data)}")
        print(f"Algorithms included: {filtered_data['Algorithm'].unique().tolist()}")
    else:
        print("No results to save.")

In [8]:
# Main execution
if __name__ == "__main__":
    relert_file = '/content/drive/MyDrive/Wail-Projet-F/Data-00/optimization_data_relert.csv'
    output_file = '/content/drive/MyDrive/Wail-Projet-F/Data-02/C5_top_10_algorithms_data.csv'

    data = load_relert_data(relert_file)
    if data is not None:
        top_algorithms = select_top_algorithms(data, top_n=10)
        if top_algorithms is not None:
            print(f"Top 10 Algorithms by Mean ERT: {top_algorithms}")
            filtered_data = filter_data_by_algorithms(data, top_algorithms)
            save_results(filtered_data, output_file)
        else:
            print("Failed to select top algorithms.")
    else:
        print("Failed to load data.")

Top 10 Algorithms by Mean ERT: ['CMAES-APOP-KMA_Nguyen', 'DE-BFGS_voglis_noiseless', 'ad-CMA-ES_Gissler', 'adm-CMA-ES_Gissler', 's-CMA-ES_Gissler', 'dm-CMA-ES_Gissler', 'a-CMA-ES', 'a-CMA-ES_Gissler', 'sd-CMA-ES_Gissler', 'BIPOP-CMA-ES']
Filtered data for top 10 algorithms saved to /content/drive/MyDrive/Wail-Projet-F/Data-02/C5_top_10_algorithms_data.csv
Number of rows in filtered data: 4800
Algorithms included: ['BIPOP-CMA-ES', 'CMAES-APOP-KMA_Nguyen', 'DE-BFGS_voglis_noiseless', 'a-CMA-ES', 'a-CMA-ES_Gissler', 'ad-CMA-ES_Gissler', 'adm-CMA-ES_Gissler', 'dm-CMA-ES_Gissler', 's-CMA-ES_Gissler', 'sd-CMA-ES_Gissler']
