# Pain Classification Pipeline - Multi-class LSTMClean pipeline for pain classification using LSTM on time series data.

## 1. Setup

In [None]:
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport jsonimport osfrom pathlib import Pathimport warningswarnings.filterwarnings('ignore')from sklearn.preprocessing import StandardScaler, MinMaxScalerfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import (    classification_report, confusion_matrix, f1_score,    roc_curve, auc, precision_recall_curve, average_precision_score)import tensorflow as tffrom tensorflow import kerasfrom tensorflow.keras import layers, models, callbacks, optimizersfrom tensorflow.keras.utils import to_categorical# Set random seeds for reproducibilitySEED = 42np.random.seed(SEED)tf.random.set_seed(SEED)# Create figures directoryPath('./figures').mkdir(exist_ok=True)print(f"TensorFlow version: {tf.__version__}")print(f"NumPy version: {np.__version__}")print(f"Pandas version: {pd.__version__}")

## 2. Data Loading

In [None]:
# Load data filestrain_df = pd.read_csv('pirate_pain_train.csv')labels_df = pd.read_csv('pirate_pain_train_labels.csv')test_df = pd.read_csv('pirate_pain_test.csv')sample_submission = pd.read_csv('sample_submission.csv')# Merge labels with train datatrain_df = train_df.merge(labels_df, on='sample_index', how='left')# Sort by time within each sequencetrain_df = train_df.sort_values(['sample_index', 'time']).reset_index(drop=True)test_df = test_df.sort_values(['sample_index', 'time']).reset_index(drop=True)print("Data loaded successfully")print(f"Train shape: {train_df.shape}")print(f"Test shape: {test_df.shape}")print(f"Sample submission shape: {sample_submission.shape}")

In [None]:
# Data statisticsprint("\n=== Label Distribution ===")label_col = [col for col in train_df.columns if col not in test_df.columns and col != 'sample_index'][0]print(f"Label column: {label_col}")print(train_df.groupby('sample_index')[label_col].first().value_counts().sort_index())num_classes = train_df[label_col].nunique()print(f"\nNumber of classes: {num_classes}")# Sequence length statisticstrain_seq_lengths = train_df.groupby('sample_index').size()test_seq_lengths = test_df.groupby('sample_index').size()print("\n=== Training Sequence Lengths ===")print(f"Min: {train_seq_lengths.min()}")print(f"Median: {train_seq_lengths.median():.0f}")print(f"Max: {train_seq_lengths.max()}")print(f"Total sequences: {len(train_seq_lengths)}")print("\n=== Test Sequence Lengths ===")print(f"Min: {test_seq_lengths.min()}")print(f"Median: {test_seq_lengths.median():.0f}")print(f"Max: {test_seq_lengths.max()}")print(f"Total sequences: {len(test_seq_lengths)}")

## 3. Feature Groups Definition

In [None]:
# Identify feature columnsexclude_cols = {'sample_index', 'time', label_col}all_feature_cols = [col for col in train_df.columns if col not in exclude_cols]# JOINT features: joint_00 to joint_30 (continuous, time-varying)joint_cols = [col for col in all_feature_cols if col.startswith('joint_')]# SURVEY features: pain_survey_1 to pain_survey_4 (ordinal 0/1/2)survey_cols = [col for col in all_feature_cols if col.startswith('pain_survey_')]# BODY features: static subject featuresbody_cols = [col for col in all_feature_cols if col not in joint_cols + survey_cols]# Separate body into categorical and continuousbody_cat_cols = [col for col in body_cols if train_df[col].dtype == 'object']body_cont_cols = [col for col in body_cols if col not in body_cat_cols]print("=== Feature Groups ===")print(f"JOINT features ({len(joint_cols)}): {joint_cols[:3] if len(joint_cols) >= 3 else joint_cols}...{joint_cols[-3:] if len(joint_cols) >= 3 else []}")print(f"SURVEY features ({len(survey_cols)}): {survey_cols}")print(f"BODY categorical ({len(body_cat_cols)}): {body_cat_cols}")print(f"BODY continuous ({len(body_cont_cols)}): {body_cont_cols}")print(f"\nTotal features: {len(all_feature_cols)}")

## 4. Preprocessing

In [None]:
def preprocess_data(df, is_train=True, scaler_joint=None, scaler_survey=None, body_cat_medians=None):    """    Preprocess data:    - Interpolate NaN in JOINT features only    - Map categorical BODY features to numeric    - Fill missing BODY categorical with median    - Scale JOINT and BODY continuous features    - Optionally scale SURVEY features with MinMax to [0,1]    """    df = df.copy()        # 1. Interpolate JOINT features (only NaN, not zeros)    for col in joint_cols:        if col in df.columns:            df[col] = df.groupby('sample_index')[col].transform(                lambda x: x.interpolate(method='linear', limit_direction='both')            )        # 2. Map categorical BODY features to numeric    body_cat_mapping = {        'zero': 0, 'one': 1, 'two': 2, 'three': 3    }        for col in body_cat_cols:        if col in df.columns:            df[col] = df[col].map(body_cat_mapping)        # 3. Fill missing BODY categorical with median (training set)    if is_train:        body_cat_medians = {}        for col in body_cat_cols:            if col in df.columns:                median_val = df.groupby('sample_index')[col].first().median()                body_cat_medians[col] = median_val                df[col] = df.groupby('sample_index')[col].transform(                    lambda x: x.fillna(median_val)                )    else:        for col in body_cat_cols:            if col in df.columns and col in body_cat_medians:                df[col] = df.groupby('sample_index')[col].transform(                    lambda x: x.fillna(body_cat_medians[col])                )        # 4. Replicate BODY features across all timesteps (they are static per sequence)    for col in body_cols:        if col in df.columns:            df[col] = df.groupby('sample_index')[col].transform('first')        # 5. Scaling    # Prepare data for scaling (collect all rows)    scale_cols_joint_body = joint_cols + body_cont_cols        if is_train:        # Fit scalers on training data        if scale_cols_joint_body:            scaler_joint = StandardScaler()            df[scale_cols_joint_body] = scaler_joint.fit_transform(df[scale_cols_joint_body])                # Optional: MinMax scale SURVEY to preserve ordinality in [0,1]        if survey_cols:            scaler_survey = MinMaxScaler(feature_range=(0, 1))            df[survey_cols] = scaler_survey.fit_transform(df[survey_cols])    else:        # Transform using fitted scalers        if scale_cols_joint_body and scaler_joint is not None:            df[scale_cols_joint_body] = scaler_joint.transform(df[scale_cols_joint_body])                if survey_cols and scaler_survey is not None:            df[survey_cols] = scaler_survey.transform(df[survey_cols])        return df, scaler_joint, scaler_survey, body_cat_medians# Preprocess training datatrain_df, scaler_joint, scaler_survey, body_cat_medians = preprocess_data(    train_df, is_train=True)# Preprocess test data using training scalerstest_df, _, _, _ = preprocess_data(    test_df, is_train=False,     scaler_joint=scaler_joint,     scaler_survey=scaler_survey,    body_cat_medians=body_cat_medians)print("Preprocessing completed")print(f"Training data shape: {train_df.shape}")print(f"Test data shape: {test_df.shape}")

## 5. Sequence Length Uniformization

In [None]:
def create_sequences(df, label_col=None, T_target=None):    """    Convert dataframe to sequences of uniform length.    - If sequence is shorter than T_target: edge padding (repeat last frame)    - If sequence is longer than T_target: truncate from the end    """    feature_cols = joint_cols + survey_cols + body_cols        # Calculate T_target from training data if not provided    if T_target is None:        seq_lengths = df.groupby('sample_index').size()        T_target = int(seq_lengths.max())        print(f"T_target automatically set to: {T_target}")        X_list = []    y_list = []    sample_indices = []        for sample_idx in df['sample_index'].unique():        seq_df = df[df['sample_index'] == sample_idx].sort_values('time')                # Extract features        seq_features = seq_df[feature_cols].values        seq_len = len(seq_features)                # Pad or truncate to T_target        if seq_len < T_target:            # Edge padding: repeat last frame            padding = np.repeat(seq_features[-1:], T_target - seq_len, axis=0)            seq_features = np.vstack([seq_features, padding])        elif seq_len > T_target:            # Truncate from end            seq_features = seq_features[:T_target]                X_list.append(seq_features)        sample_indices.append(sample_idx)                # Extract label if available        if label_col is not None:            y_list.append(seq_df[label_col].iloc[0])        X = np.array(X_list)        # Verify shape    expected_shape = (len(X), T_target, len(feature_cols))    assert X.shape == expected_shape, f"Shape mismatch: {X.shape} != {expected_shape}"    print(f"Sequences created: {X.shape}")        if label_col is not None:        y = np.array(y_list)        return X, y, T_target, sample_indices    else:        return X, None, T_target, sample_indices# Create training sequences (determine T_target from training data only)X_train_full, y_train_full, T_target, train_sample_indices = create_sequences(    train_df, label_col=label_col, T_target=None)# Create test sequences using the same T_targetX_test, _, _, test_sample_indices = create_sequences(    test_df, label_col=None, T_target=T_target)print(f"\n=== Sequence Shapes ===")print(f"T_target: {T_target}")print(f"Training sequences: {X_train_full.shape}")print(f"Test sequences: {X_test.shape}")print(f"Number of features: {X_train_full.shape[2]}")

In [None]:
# Split into train and validationX_train, X_val, y_train, y_val = train_test_split(    X_train_full, y_train_full, test_size=0.2, random_state=SEED, stratify=y_train_full)print(f"Training set: {X_train.shape}, {y_train.shape}")print(f"Validation set: {X_val.shape}, {y_val.shape}")print(f"Test set: {X_test.shape}")

## 6. Model Architecture

In [None]:
def build_model(input_shape, num_classes, units1=128, units2=64, head_units=64,                 lr=6e-4, weight_decay=1e-4):    """    Build LSTM model with the specified architecture.    """    inputs = layers.Input(shape=input_shape)        # Spatial dropout    x = layers.SpatialDropout1D(0.15)(inputs)        # First LSTM layer    x = layers.LSTM(units1, return_sequences=True, dropout=0.2, recurrent_dropout=0.1)(x)    x = layers.LayerNormalization()(x)        # Second LSTM layer    x = layers.LSTM(units2, return_sequences=True, dropout=0.2, recurrent_dropout=0.1)(x)    x = layers.LayerNormalization()(x)        # Global pooling    avg_pool = layers.GlobalAveragePooling1D()(x)    max_pool = layers.GlobalMaxPooling1D()(x)    x = layers.Concatenate()([avg_pool, max_pool])        # Dense head    x = layers.Dense(head_units, activation='relu')(x)    x = layers.Dropout(0.3)(x)        # Output    outputs = layers.Dense(num_classes, activation='softmax')(x)        model = models.Model(inputs=inputs, outputs=outputs)        # Compile with AdamW    optimizer = optimizers.AdamW(        learning_rate=lr,        weight_decay=weight_decay,        clipnorm=1.0    )        model.compile(        optimizer=optimizer,        loss='sparse_categorical_crossentropy',        metrics=['accuracy']    )        return model# Test model creationtest_model = build_model(    input_shape=(T_target, X_train.shape[2]),    num_classes=num_classes)test_model.summary()

## 7. Grid Search

In [None]:
# Define grid search configurationsgrid_configs = [    {'units1': 96, 'units2': 48, 'head_units': 64, 'lr': 6e-4, 'wd': 1e-4},    {'units1': 128, 'units2': 64, 'head_units': 64, 'lr': 6e-4, 'wd': 1e-4},    {'units1': 160, 'units2': 80, 'head_units': 96, 'lr': 5e-4, 'wd': 1e-4},    {'units1': 128, 'units2': 64, 'head_units': 96, 'lr': 7e-4, 'wd': 2e-4},    {'units1': 128, 'units2': 48, 'head_units': 64, 'lr': 6e-4, 'wd': 2e-4},]print(f"Testing {len(grid_configs)} configurations")print("Each configuration will be evaluated on validation set using F1 macro score")

In [None]:
# Grid search executionbest_f1_macro = 0best_config = Nonebest_model = Nonegrid_results = []for idx, config in enumerate(grid_configs):    print(f"\n{'='*60}")    print(f"Testing config {idx+1}/{len(grid_configs)}: {config}")    print('='*60)        # Reset seeds    np.random.seed(SEED)    tf.random.set_seed(SEED)        # Build model    model = build_model(        input_shape=(T_target, X_train.shape[2]),        num_classes=num_classes,        units1=config['units1'],        units2=config['units2'],        head_units=config['head_units'],        lr=config['lr'],        weight_decay=config['wd']    )        # Callbacks    early_stop = callbacks.EarlyStopping(        monitor='val_loss',        patience=10,        restore_best_weights=True,        verbose=0    )        reduce_lr = callbacks.ReduceLROnPlateau(        monitor='val_loss',        factor=0.5,        patience=4,        min_lr=1e-5,        verbose=0    )        # Train    history = model.fit(        X_train, y_train,        validation_data=(X_val, y_val),        epochs=100,        batch_size=32,        callbacks=[early_stop, reduce_lr],        verbose=0    )        # Evaluate    y_val_pred = model.predict(X_val, verbose=0).argmax(axis=1)        # Calculate metrics    f1_macro = f1_score(y_val, y_val_pred, average='macro')    f1_per_class = f1_score(y_val, y_val_pred, average=None)    accuracy = (y_val_pred == y_val).mean()        result = {        'config': config,        'f1_macro': f1_macro,        'f1_per_class': f1_per_class.tolist(),        'accuracy': accuracy,        'epochs_trained': len(history.history['loss'])    }    grid_results.append(result)        print(f"F1 Macro: {f1_macro:.4f}")    print(f"Accuracy: {accuracy:.4f}")    print(f"F1 per class: {[f'{f:.4f}' for f in f1_per_class]}")    print(f"Epochs trained: {result['epochs_trained']}")        # Update best model    if f1_macro > best_f1_macro:        best_f1_macro = f1_macro        best_config = config        best_model = model        print("*** New best model! ***")print(f"\n{'='*60}")print("Grid search completed")print(f"Best config: {best_config}")print(f"Best F1 Macro: {best_f1_macro:.4f}")# Save best configwith open('best_config.json', 'w') as f:    json.dump({        'config': best_config,        'f1_macro': best_f1_macro,        'T_target': T_target    }, f, indent=2)print("\nBest config saved to best_config.json")

## 8. Final Training with Best Model

In [None]:
# The best model is already trained from grid search# Get predictions for evaluationy_val_pred = best_model.predict(X_val, verbose=0).argmax(axis=1)y_val_proba = best_model.predict(X_val, verbose=0)print("Best model ready for evaluation")print(f"Validation accuracy: {(y_val_pred == y_val).mean():.4f}")print(f"Validation F1 macro: {f1_score(y_val, y_val_pred, average='macro'):.4f}")

## 9. Evaluation and Visualization

In [None]:
# Retrain best model to get clean history for plotsnp.random.seed(SEED)tf.random.set_seed(SEED)final_model = build_model(    input_shape=(T_target, X_train.shape[2]),    num_classes=num_classes,    **best_config)early_stop = callbacks.EarlyStopping(    monitor='val_loss',    patience=10,    restore_best_weights=True,    verbose=1)reduce_lr = callbacks.ReduceLROnPlateau(    monitor='val_loss',    factor=0.5,    patience=4,    min_lr=1e-5,    verbose=1)history = final_model.fit(    X_train, y_train,    validation_data=(X_val, y_val),    epochs=100,    batch_size=32,    callbacks=[early_stop, reduce_lr],    verbose=1)best_model = final_model

In [None]:
# Training curvesfig, axes = plt.subplots(1, 2, figsize=(14, 5))# Lossaxes[0].plot(history.history['loss'], label='Train Loss', linewidth=2)axes[0].plot(history.history['val_loss'], label='Val Loss', linewidth=2)axes[0].set_xlabel('Epoch', fontsize=12)axes[0].set_ylabel('Loss', fontsize=12)axes[0].set_title('Training and Validation Loss', fontsize=14, fontweight='bold')axes[0].legend(fontsize=11)axes[0].grid(True, alpha=0.3)# Accuracyaxes[1].plot(history.history['accuracy'], label='Train Accuracy', linewidth=2)axes[1].plot(history.history['val_accuracy'], label='Val Accuracy', linewidth=2)axes[1].set_xlabel('Epoch', fontsize=12)axes[1].set_ylabel('Accuracy', fontsize=12)axes[1].set_title('Training and Validation Accuracy', fontsize=14, fontweight='bold')axes[1].legend(fontsize=11)axes[1].grid(True, alpha=0.3)plt.tight_layout()plt.savefig('./figures/training_curves.png', dpi=300, bbox_inches='tight')plt.show()print("Training curves saved to ./figures/training_curves.png")

In [None]:
# Confusion Matrixy_val_pred = best_model.predict(X_val, verbose=0).argmax(axis=1)cm = confusion_matrix(y_val, y_val_pred)plt.figure(figsize=(10, 8))sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',             xticklabels=range(num_classes),             yticklabels=range(num_classes),            cbar_kws={'label': 'Count'})plt.xlabel('Predicted Label', fontsize=12)plt.ylabel('True Label', fontsize=12)plt.title('Confusion Matrix - Validation Set', fontsize=14, fontweight='bold')plt.tight_layout()plt.savefig('./figures/confusion_matrix.png', dpi=300, bbox_inches='tight')plt.show()print("Confusion matrix saved to ./figures/confusion_matrix.png")

In [None]:
# Classification Reportprint("\n=== Classification Report ===")report = classification_report(y_val, y_val_pred, digits=4)print(report)# Calculate and display per-class F1 and macro F1f1_per_class = f1_score(y_val, y_val_pred, average=None)f1_macro = f1_score(y_val, y_val_pred, average='macro')print("\n=== F1 Scores ===")for i, f1 in enumerate(f1_per_class):    print(f"Class {i}: {f1:.4f}")print(f"\nMacro F1: {f1_macro:.4f}")

In [None]:
# ROC Curves (One-vs-Rest)y_val_proba = best_model.predict(X_val, verbose=0)y_val_binary = to_categorical(y_val, num_classes=num_classes)fig, axes = plt.subplots(1, 2, figsize=(16, 6))# ROCfor i in range(num_classes):    fpr, tpr, _ = roc_curve(y_val_binary[:, i], y_val_proba[:, i])    roc_auc = auc(fpr, tpr)    axes[0].plot(fpr, tpr, label=f'Class {i} (AUC={roc_auc:.3f})', linewidth=2)axes[0].plot([0, 1], [0, 1], 'k--', linewidth=1, label='Random')axes[0].set_xlabel('False Positive Rate', fontsize=12)axes[0].set_ylabel('True Positive Rate', fontsize=12)axes[0].set_title('ROC Curves (One-vs-Rest)', fontsize=14, fontweight='bold')axes[0].legend(fontsize=10)axes[0].grid(True, alpha=0.3)# Precision-Recallfor i in range(num_classes):    precision, recall, _ = precision_recall_curve(y_val_binary[:, i], y_val_proba[:, i])    avg_precision = average_precision_score(y_val_binary[:, i], y_val_proba[:, i])    axes[1].plot(recall, precision, label=f'Class {i} (AP={avg_precision:.3f})', linewidth=2)axes[1].set_xlabel('Recall', fontsize=12)axes[1].set_ylabel('Precision', fontsize=12)axes[1].set_title('Precision-Recall Curves (One-vs-Rest)', fontsize=14, fontweight='bold')axes[1].legend(fontsize=10)axes[1].grid(True, alpha=0.3)plt.tight_layout()plt.savefig('./figures/roc_pr_curves.png', dpi=300, bbox_inches='tight')plt.show()print("ROC and PR curves saved to ./figures/roc_pr_curves.png")

In [None]:
# Probability distributions per classfig, axes = plt.subplots(2, (num_classes + 1) // 2, figsize=(16, 8))axes = axes.flatten()for i in range(num_classes):    # Get probabilities for this class    true_class_probs = y_val_proba[y_val == i, i]    false_class_probs = y_val_proba[y_val != i, i]        axes[i].hist(true_class_probs, bins=30, alpha=0.6, label=f'True Class {i}', color='green')    axes[i].hist(false_class_probs, bins=30, alpha=0.6, label=f'Other Classes', color='red')    axes[i].set_xlabel('Predicted Probability', fontsize=10)    axes[i].set_ylabel('Frequency', fontsize=10)    axes[i].set_title(f'Class {i} Probabilities', fontsize=11, fontweight='bold')    axes[i].legend(fontsize=9)    axes[i].grid(True, alpha=0.3)# Hide extra subplots if odd number of classesfor i in range(num_classes, len(axes)):    axes[i].axis('off')plt.tight_layout()plt.savefig('./figures/probability_distributions.png', dpi=300, bbox_inches='tight')plt.show()print("Probability distributions saved to ./figures/probability_distributions.png")

In [None]:
# Feature distribution visualization# Sample a few features to visualizesample_joint_cols = joint_cols[:3] if len(joint_cols) >= 3 else joint_colssample_survey_cols = survey_cols[:2] if len(survey_cols) >= 2 else survey_colsfig, axes = plt.subplots(2, 3, figsize=(16, 10))# Plot scaled JOINT featuresfor i, col in enumerate(sample_joint_cols):    col_idx = all_feature_cols.index(col)    feature_data = X_train[:, :, col_idx].flatten()    axes[0, i].hist(feature_data, bins=50, alpha=0.7, color='blue', edgecolor='black')    axes[0, i].set_title(f'{col} (Scaled)', fontsize=11, fontweight='bold')    axes[0, i].set_xlabel('Value', fontsize=10)    axes[0, i].set_ylabel('Frequency', fontsize=10)    axes[0, i].grid(True, alpha=0.3)# Plot scaled SURVEY featuresfor i, col in enumerate(sample_survey_cols):    col_idx = all_feature_cols.index(col)    feature_data = X_train[:, :, col_idx].flatten()    axes[1, i].hist(feature_data, bins=20, alpha=0.7, color='orange', edgecolor='black')    axes[1, i].set_title(f'{col} (MinMax Scaled)', fontsize=11, fontweight='bold')    axes[1, i].set_xlabel('Value', fontsize=10)    axes[1, i].set_ylabel('Frequency', fontsize=10)    axes[1, i].grid(True, alpha=0.3)# Hide unused subplotsfor i in range(len(sample_survey_cols), 3):    axes[1, i].axis('off')plt.suptitle('Feature Distributions After Scaling', fontsize=14, fontweight='bold', y=1.00)plt.tight_layout()plt.savefig('./figures/feature_distributions.png', dpi=300, bbox_inches='tight')plt.show()print("Feature distributions saved to ./figures/feature_distributions.png")

## 10. Test Predictions and Submission

In [None]:
# Predict on test settest_proba = best_model.predict(X_test, verbose=0)test_pred = test_proba.argmax(axis=1)print(f"Test predictions shape: {test_pred.shape}")print(f"Test probabilities shape: {test_proba.shape}")# Verify test sample indices match submission formatprint(f"\nTest sample indices: {len(test_sample_indices)}")print(f"Sample submission rows: {len(sample_submission)}")

In [None]:
# Create submission file matching sample_submission formatsubmission_df = sample_submission.copy()# Map test_sample_indices to predictionspred_map = dict(zip(test_sample_indices, test_pred))# Get the column names from sample_submissionid_col = sample_submission.columns[0]  # Usually 'id' or 'sample_index'label_cols = sample_submission.columns[1:]  # Prediction columns# Fill predictionsfor idx, row in submission_df.iterrows():    sample_id = row[id_col]    if sample_id in pred_map:        submission_df.loc[idx, label_cols] = pred_map[sample_id]# Save submissionsubmission_df.to_csv('submission.csv', index=False)print("Submission saved to submission.csv")print(f"Submission shape: {submission_df.shape}")print(f"\nFirst few rows:")print(submission_df.head())

In [None]:
# Create submission with probabilitiesprob_cols = [f'prob_class_{i}' for i in range(num_classes)]submission_probs_df = pd.DataFrame({    id_col: test_sample_indices})for i, col in enumerate(prob_cols):    submission_probs_df[col] = test_proba[:, i]submission_probs_df.to_csv('submission_probs.csv', index=False)print("Submission with probabilities saved to submission_probs.csv")print(f"Submission probabilities shape: {submission_probs_df.shape}")print(f"\nFirst few rows:")print(submission_probs_df.head())

## Final Summary

In [None]:
print("="*60)print("FINAL SUMMARY")print("="*60)print(f"\nT_target: {T_target}")print(f"\nBest Configuration:")for key, value in best_config.items():    print(f"  {key}: {value}")print(f"\nValidation Metrics:")print(f"  Macro F1: {f1_macro:.4f}")print(f"  Accuracy: {(y_val_pred == y_val).mean():.4f}")print(f"\nF1 Scores per Class:")for i, f1 in enumerate(f1_per_class):    print(f"  Class {i}: {f1:.4f}")print(f"\nOutput Files:")print(f"  - submission.csv")print(f"  - submission_probs.csv")print(f"  - best_config.json")print(f"  - ./figures/training_curves.png")print(f"  - ./figures/confusion_matrix.png")print(f"  - ./figures/roc_pr_curves.png")print(f"  - ./figures/probability_distributions.png")print(f"  - ./figures/feature_distributions.png")print("="*60)