In [1]:
import os
import pandas as pd
import numpy as np

# Define the CSV file path relative to this script.
csv_filepath = os.path.join(os.path.dirname(__file__), "Classifier", "hyper_results.csv")

if not os.path.exists(csv_filepath):
    print(f"CSV file not found at: {csv_filepath}")
    exit(1)

# Load CSV file
df = pd.read_csv(csv_filepath)
print(f"Loaded {len(df)} experiment results from: {csv_filepath}")

# ------------------------------
# Aggregation and reporting
# ------------------------------
# Average results over seeds for identical hyperparameter settings (excluding 'seed')
group_cols = ['test_idx', 'hbr_multiplier', 'hbr_shift', 'window_length', 'add_hbr']
grouped = df.groupby(group_cols)['accuracy'].mean().reset_index()
grouped.rename(columns={'accuracy': 'avg_accuracy'}, inplace=True)

# Sort to get top 5 hyperparameter configurations (by average accuracy)
sorted_results = grouped.sort_values(by='avg_accuracy', ascending=False)
top_5 = sorted_results.head(5)

print("\nTop 5 hyperparameter configurations (averaged over seeds):")
for index, row in top_5.iterrows():
    print(f"test_idx={row['test_idx']}, hbr_multiplier={row['hbr_multiplier']}, " +
          f"hbr_shift={row['hbr_shift']}, window_length={row['window_length']}, " +
          f"add_hbr={row['add_hbr']} -> Average Val Accuracy: {row['avg_accuracy']*100:.2f}%")

# Determine which test_idx has the highest overall average validation accuracy
test_idx_groups = df.groupby('test_idx')['accuracy'].mean()
best_test_idx = test_idx_groups.idxmax()
best_test_idx_accuracy = test_idx_groups.loc[best_test_idx]
print(f"\nTest subject index with highest average validation accuracy: {best_test_idx} " +
      f"(Average Accuracy: {best_test_idx_accuracy*100:.2f}%)")

# Full summary of aggregated results
print("\nFull aggregated results (each row corresponds to one hyperparameter configuration):")
print(grouped.to_string(index=False))

NameError: name '__file__' is not defined