In [None]:
import os
import pandas as pd
import yaml

def create_detailed_summary_with_args(base_folder):
    all_rows = []  # Will collect dictionaries

    for folder_name in os.listdir(base_folder):
        if folder_name.startswith("train_"): #change this to your own train path
            full_folder_path = os.path.join(base_folder, folder_name)
            results_file = os.path.join(full_folder_path, "results.csv")
            args_file = os.path.join(full_folder_path, "args.yaml")

            if os.path.exists(results_file):
                df = pd.read_csv(results_file)
                if df.empty:
                    continue

                # Try reading args.yaml
                args_data = {}
                if os.path.exists(args_file):
                    with open(args_file, 'r') as f:
                        args = yaml.safe_load(f)
                        args_data = {
                            'epochs_arg': args.get('epochs', None),
                            'optimizer': args.get('optimizer', None),
                            'lr0': args.get('lr0', None),
                            'momentum': args.get('momentum', None),
                            'weight_decay': args.get('weight_decay', None),
                            'warmup_epochs': args.get('warmup_epochs', None),
                            'imgsz': args.get('imgsz', None),
                            'batch': args.get('batch', None),
                            'device': args.get('device', None),
                            'model_name': args.get('name', None),
                        }

                # Take first and last epoch rows
                first_row = df.iloc[0].copy()
                last_row = df.iloc[-1].copy()

                # Add 'run_name' and 'args' columns manually
                for key, value in args_data.items():
                    first_row[key] = value
                    last_row[key] = value

                first_row['run_name'] = folder_name
                last_row['run_name'] = folder_name

                all_rows.append(first_row)
                all_rows.append(last_row)
            else:
                print(f"No results.csv found in {folder_name}, skipping...")

    if all_rows:
        final_df = pd.DataFrame(all_rows)
        # Move 'run_name' to be the first column
        cols = ['run_name'] + [col for col in final_df.columns if col != 'run_name']
        final_df = final_df[cols]
        return final_df
    else:
        print("No valid result files found.")
        return None

# Usage
base_folder = "../YOLO/material_koulutus"
summary_table = create_detailed_summary_with_args(base_folder)

if summary_table is not None:
    print(summary_table)
    # Optional: Save it
    summary_table.to_csv("full_training_summary_with_args.csv", index=False)


In [None]:
def find_best_run(summary_table):
    # Ensure metrics/mAP50(B) is a float
    summary_table['metrics/mAP50(B)'] = pd.to_numeric(summary_table['metrics/mAP50(B)'], errors='coerce')

    # Only consider last epoch rows
    last_epochs = summary_table.groupby('run_name').apply(lambda x: x.iloc[-1]).reset_index(drop=True)

    # Find the run with maximum mAP50
    best_row = last_epochs.loc[last_epochs['metrics/mAP50(B)'].idxmax()]
    
    print("\nüèÜ Best Run Found:")
    print(f"Run Name: {best_row['run_name']}")
    print(f"Final Epoch: {int(best_row['epoch'])}")
    print(f"Final mAP50(B): {best_row['metrics/mAP50(B)']:.5f}")
    print(f"Final mAP50-95(B): {best_row['metrics/mAP50-95(B)']:.5f}")
    print("\nTraining Arguments:")
    print(f"Optimizer: {best_row.get('optimizer', 'N/A')}")
    print(f"Learning Rate (lr0): {best_row.get('lr0', 'N/A')}")
    print(f"Momentum: {best_row.get('momentum', 'N/A')}")
    print(f"Weight Decay: {best_row.get('weight_decay', 'N/A')}")
    print(f"Warmup Epochs: {best_row.get('warmup_epochs', 'N/A')}")
    print(f"Batch Size: {best_row.get('batch', 'N/A')}")
    print(f"Image Size: {best_row.get('imgsz', 'N/A')}")
    print(f"Device: {best_row.get('device', 'N/A')}")
    print(f"Model Name: {best_row.get('model_name', 'N/A')}")
    
    return best_row

# Usage:
best_run = find_best_run(summary_table)
