In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import re
import pandas as pd

In [None]:
def parse_dat_file(file_path, algorithm, function, dimension, filter_last=False):
    """Parse a .dat file, processing exactly 5 instances, taking first 5 if more, error if fewer."""
    data = []
    current_fopt = None
    current_instance = 0
    current_section = []

    with open(file_path, 'r') as f:
        for line in f:
            # Stop if we've already processed 5 instances
            if current_instance >= 5:
                break
            # Check for header line with Fopt (handle both formats)
            if line.startswith(('% f evaluations', '% function evaluation')):
                if current_section and (current_fopt is not None):  # Save previous section
                    # Filter to last evaluation if requested
                    section_data = [current_section[-1]] if filter_last else current_section
                    data.extend([{
                        'Algorithm': algorithm,
                        'Function': function,
                        'Dimension': dimension,
                        'Instance': current_instance + 1,  # 1-based indexing
                        'Function Evaluations': evals,
                        'Fopt': current_fopt,
                        'Measured Fitness': fitness
                    } for evals, fitness in section_data])
                    current_instance += 1
                current_section = []
                # Extract Fopt from header
                match = re.search(r'Fopt \((-?\d+\.\d+e[+-]\d+)\)', line)
                if match:
                    current_fopt = float(match.group(1))
                else:
                    print(f"Warning: Invalid Fopt in header of {file_path}")
            elif not line.startswith('%') and line.strip():
                # Parse data line
                parts = line.split()
                if len(parts) >= 4:
                    try:
                        func_evals = int(parts[0])
                        measured_fitness = float(parts[3])
                        current_section.append((func_evals, measured_fitness))
                    except (ValueError, TypeError):
                        continue
        # Save the last section if we haven't reached 5 instances
        if current_section and (current_fopt is not None) and (current_instance < 5):
            section_data = [current_section[-1]] if filter_last else current_section
            data.extend([{
                'Algorithm': algorithm,
                'Function': function,
                'Dimension': dimension,
                'Instance': current_instance + 1,
                'Function Evaluations': evals,
                'Fopt': current_fopt,
                'Measured Fitness': fitness
            } for evals, fitness in section_data])
            current_instance += 1

    # Validate instance count
    if current_instance < 5:
        print(f"Error: {file_path} has only {current_instance} instances, expected 5. Skipping file.")
        return []

    return data

In [None]:
def generate_excel(algorithm_folders, output_file, filter_last=False):
    """Generate an Excel file from all algorithm folders."""
    all_data = []
    dimensions = [2, 3, 5, 10]

    for algo_folder in algorithm_folders:
        algo_name = os.path.basename(algo_folder)
        for func_num in range(1, 25):
            func_folder = os.path.join(algo_folder, f"data_f{func_num}")
            if not os.path.exists(func_folder):
                print(f"Warning: {func_folder} not found.")
                continue

            for dim in dimensions:
                # Try both possible file names
                file_names = [
                    f"bbobexp_f{func_num}_DIM{dim}_i1.dat",
                    f"bbobexp_f{func_num}_DIM{dim}.dat"
                ]
                file_path = None
                for fname in file_names:
                    potential_path = os.path.join(func_folder, fname)
                    if os.path.exists(potential_path):
                        file_path = potential_path
                        break

                if file_path:
                    # Use numeric function number (1, 2, ..., 24) instead of f1, f2, ...
                    data = parse_dat_file(file_path, algo_name, func_num, dim, filter_last)
                    if data:  # Only add if parsing was successful
                        all_data.extend(data)
                else:
                    print(f"Warning: No file found for f{func_num}_DIM{dim} in {func_folder}.")

    # Create DataFrame
    df = pd.DataFrame(all_data, columns=[
        'Algorithm', 'Function', 'Dimension', 'Instance',
        'Function Evaluations', 'Fopt', 'Measured Fitness'
    ])

    # Sort for consistency
    df = df.sort_values(['Algorithm', 'Function', 'Dimension', 'Instance', 'Function Evaluations'])

    # Save to Excel
    df.to_excel(output_file, index=False, sheet_name='Optimization Data')
    print(f"Excel file saved: {output_file}")

In [None]:
# List of algorithm folders
algorithm_folders = [
    "/content/drive/MyDrive/merged_folder/BIPOP-CMA-ES",
    "/content/drive/MyDrive/merged_folder/BIPOP-aCMA-STEP_loshchilov_noiseless",
    "/content/drive/MyDrive/merged_folder/BIPOPsaACM_loshchilov_noiseless",
    "/content/drive/MyDrive/merged_folder/CMA-CSA_Atamna",
    .
    .
    .
    .
]

In [None]:
# usage
output_file = "/content/drive/MyDrive/merged_folder/optimization_data.xlsx"
generate_excel(algorithm_folders, output_file, filter_last=False)

Excel file saved: /content/drive/MyDrive/merged_folder/optimization_data.xlsx
