# Description: 
##### This script implements a robust, manually controlled 5-fold cross-validation.
* For each of the 5 predefined folds, it performs the following steps:
    1. Trains all models on `train.csv`.
    2. Selects the best models based on performance on `valid.csv`.
    3. Retrains EACH model on the combined `train.csv` + `valid.csv`.
    4. Evaluates EACH retrained model on `test.csv` to get its test performance.
    
    Finally, it averages the test performance for each model type across all 5 folds.

# Instructions:
1. Make sure you have PyCaret and its dependencies installed.
If not, uncomment the line below and run it once.
> !pip install pycaret pandas

2. Place this script in the same directory as your 90 CSV data files.

3. Run the script.

In [1]:
import pandas as pd
from pycaret.classification import *
import os

In [2]:
!pwd

/Users/leechangmin/Desktop/Project/ETRI-Emotion/cardio_exp1_given_data2


In [None]:
def run_strict_manual_5_fold_experiment():
    """
    This function runs the experiment with a strict, manual 5-fold
    process that honors the predefined train, validation, and test files.
    """
    # --- Configuration ---
    TARGET_COLUMN_NAME = 'label'
    GROUPS = ['Total', 'High', 'Low']
    VARIABLES = ['arousal', 'valence']
    FOLDS = range(1, 6)
    
    final_averaged_results = {}

    print("Starting PyCaret Classification with Manual 5-Fold Cross-Validation...")

    for group in GROUPS:
        for variable in VARIABLES:
            experiment_name = f"{group}_{variable}"
            print(f"\n--- Starting Experiment: [{experiment_name}] ---")

            # This list will store the test performance DataFrames from each of the 5 folds.
            all_folds_test_performance = []

            for fold in FOLDS:
                try:
                    print(f"  - Processing Fold {fold}...")
                    # 1. Load the strictly separated datasets
                    train_df = pd.read_csv(f'./data/fold{fold}_{group}_{variable}_train.csv')
                    valid_df = pd.read_csv(f'./data/fold{fold}_{group}_{variable}_valid.csv')
                    test_df = pd.read_csv(f'./data/fold{fold}_{group}_{variable}_test.csv')

                    # 2. Setup PyCaret to train on `train_df` and use `valid_df` as the hold-out
                    #    set for initial model ranking.
                    s = setup(data=train_df,
                              test_data=valid_df,
                              target=TARGET_COLUMN_NAME,
                              index=False,
                              session_id=123,
                              verbose=False)

                    # 3. Compare all models to get a performance grid on the validation set.
                    compare_models(verbose=False)
                    validation_grid = pull() # This contains all model types and their valid scores.

                    # This list will store the test results for ALL models within THIS fold.
                    current_fold_test_results = []
                    
                    # 4. Iterate through every model type, finalize it, and evaluate on the test set.
                    print(f"    > Evaluating all models on Fold {fold} test set...")
                    for model_id in validation_grid.index:
                        # Create the model instance trained on train_df
                        model = create_model(model_id, verbose=False)
                        # Retrain on combined train_df + valid_df
                        final_model = finalize_model(model)
                        # Predict on the unseen test_df
                        test_predictions = predict_model(final_model, data=test_df, verbose=False)
                        # Extract and store the test metrics
                        test_metrics = pull()
                        # Manually add the 'Model' column since the metrics row doesn't have it.
                        test_metrics['Model'] = validation_grid.loc[model_id, 'Model']
                        
                        current_fold_test_results.append(test_metrics)
                    
                    # Combine all model results for the current fold and add to the main list
                    all_folds_test_performance.append(pd.concat(current_fold_test_results))

                except Exception as e:
                    print(f"  - An error occurred in Fold {fold}: {e}")

            # After all 5 folds are processed, average the results
            if all_folds_test_performance:
                # Combine the performance DataFrames from all 5 folds
                full_results_df = pd.concat(all_folds_test_performance)
                
                # Calculate the mean test performance for each model type across the 5 folds
                numeric_cols = full_results_df.select_dtypes(include='number').columns
                average_results = full_results_df.groupby('Model')[numeric_cols].mean().sort_values('Accuracy', ascending=False)
                
                final_averaged_results[experiment_name] = average_results
                print(f"  > Finished experiment [{experiment_name}]. Averaged test results from 5 folds.")

    # --- Print and Save Final Averaged Results ---
    if final_averaged_results:
        print("\n--- Final Averaged Test Performance Across All Experiments ---")
        for name, result_df in final_averaged_results.items():
            print(f"\n[{name}] - Top 5 Models by Averaged Test Performance:")
            print(result_df.head())
            output_filename = f'results_{name}_manual_5fold_summary.csv'
            result_df.to_csv(output_filename)
            print(f"> Results saved to '{output_filename}'")
    else:
        print("\nNo results were processed.")

In [8]:
# This ensures the script runs when executed directly.
if __name__ == '__main__':
    run_strict_manual_5_fold_experiment()

Starting PyCaret Classification with Manual 5-Fold Cross-Validation...

--- Starting Experiment: [Total_arousal] ---
  - Processing Fold 1...
    > Evaluating all models on Fold 1 test set...
  - Processing Fold 2...
    > Evaluating all models on Fold 2 test set...
  - Processing Fold 3...
    > Evaluating all models on Fold 3 test set...
  - Processing Fold 4...
    > Evaluating all models on Fold 4 test set...
  - Processing Fold 5...
    > Evaluating all models on Fold 5 test set...


KeyError: 'Accuracy'

### Load Results

In [3]:
def load_all_results():
    """
    Read 'results_{group}_{variable}_classification_summary.csv' formatted files
    and return a dictionary of DataFrames.
    """
    # --- Configuration (can be adjusted as needed) ---
    groups = ['Total', 'High', 'Low']
    variables = ['arousal', 'valence']
    
    # dictionary to hold all results
    all_results = {}
    
    print("Reading all result CSV files...")

    # Iterate through each group and variable to construct filenames
    for group in groups:
        for variable in variables:
            # Construct the filename based on the group and variable
            experiment_name = f"{group}_{variable}"
            filename = f"./res/results_{experiment_name}_classification_summary.csv"
            
            try:
                # CSV file into DataFrame
                # index_col=0 to use the first column as index
                df = pd.read_csv(filename, index_col=0)
                
                # Store the DataFrame in the dictionary with the experiment name as key
                all_results[experiment_name] = df
                print(f"  - Successfully loaded: {filename}")
                
            except FileNotFoundError:
                print(f"  - File not found, skipping: {filename}")
                
    return all_results

In [4]:
if __name__ == '__main__':
    loaded_results = load_all_results()
    print("\n--- Summary of Loaded Results ---")
    
    if not loaded_results:
        print("No result files were found.")
    else:
        # Display the top 5 models for each loaded result
        for name, result_df in loaded_results.items():
            print(f"\n--- Top 5 Models for [{name}] ---")
            print(result_df.head())

Reading all result CSV files...
  - Successfully loaded: ./res/results_Total_arousal_classification_summary.csv
  - Successfully loaded: ./res/results_Total_valence_classification_summary.csv
  - Successfully loaded: ./res/results_High_arousal_classification_summary.csv
  - Successfully loaded: ./res/results_High_valence_classification_summary.csv
  - Successfully loaded: ./res/results_Low_arousal_classification_summary.csv
  - Successfully loaded: ./res/results_Low_valence_classification_summary.csv

--- Summary of Loaded Results ---

--- Top 5 Models for [Total_arousal] ---
                                 Accuracy      AUC   Recall    Prec.       F1  \
Model                                                                           
Gradient Boosting Classifier      0.95500  0.99220  0.95500  0.95646  0.95466   
Random Forest Classifier          0.95374  0.99182  0.95374  0.95572  0.95346   
CatBoost Classifier               0.95166  0.99230  0.95166  0.95406  0.95124   
Ada Boost Cl