In [None]:
# Cell 1: Imports and Setup
"""
Neural Network Model for MONK Dataset Classification
Author: Gabriele Righi
Date: November 26, 2025
"""

import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold
from scipy.stats import uniform
import os
import time
import itertools
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("✓ All libraries imported successfully")

✓ All libraries imported successfully


In [None]:
# Cell 2: Data Loading Function
def load_monk_data(train_path, test_path, shuffle=True, random_state=42):
    """
    Load MONK dataset from train and test files.

    Parameters:
    -----------
    train_path : str
        Path to the training data file
    test_path : str
        Path to the test data file
    shuffle : bool, default=True
        Whether to shuffle the training data
    random_state : int, default=42
        Random seed for shuffling

    Returns:
    --------
    X_train, y_train, X_test, y_test
    """
    columns = ['class', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'Id']

    train_data = pd.read_csv(train_path, sep=' ', names=columns, skipinitialspace=True)
    test_data = pd.read_csv(test_path, sep=' ', names=columns, skipinitialspace=True)

    train_data = train_data.drop('Id', axis=1)
    test_data = test_data.drop('Id', axis=1)

    if shuffle:
        train_data = train_data.sample(frac=1, random_state=random_state).reset_index(drop=True)

    X_train = train_data.drop('class', axis=1)
    y_train = train_data['class']
    X_test = test_data.drop('class', axis=1)
    y_test = test_data['class']

    return X_train, y_train, X_test, y_test

print("✓ load_monk_data() defined")

✓ load_monk_data() defined


In [None]:
# Cell 3: Preprocessing Function
def preprocess_data(X_train, X_test):
    """
    Preprocess data using one-hot encoding for categorical features.

    Parameters:
    -----------
    X_train : pd.DataFrame
        Training features
    X_test : pd.DataFrame
        Test features

    Returns:
    --------
    X_train_encoded, X_test_encoded, encoder
    """
    encoder = OneHotEncoder(sparse_output=False)
    X_train_encoded = encoder.fit_transform(X_train)
    X_test_encoded = encoder.transform(X_test)

    return X_train_encoded, X_test_encoded, encoder

print("✓ preprocess_data() defined")

✓ preprocess_data() defined


In [None]:
# Cell 4: Training Function
def train_neural_network(X_train, y_train, **mlp_params):
    """
    Train a Multi-Layer Perceptron classifier.

    Parameters:
    -----------
    X_train : np.ndarray
        Training features
    y_train : np.ndarray or pd.Series
        Training labels
    **mlp_params : dict
        Keyword arguments for MLPClassifier

    Returns:
    --------
    mlp : MLPClassifier
        Trained model
    """
    default_params = {
        'hidden_layer_sizes': (100,),
        'activation': 'relu',
        'solver': 'adam',
        'alpha': 0.0001,
        'batch_size': 'auto',
        'learning_rate': 'constant',
        'learning_rate_init': 0.001,
        'power_t': 0.5,
        'max_iter': 1000,
        'shuffle': True,
        'random_state': 42,
        'tol': 1e-4,
        'verbose': False,
        'warm_start': False,
        'momentum': 0.9,
        'nesterovs_momentum': True,
        'early_stopping': False,
        'validation_fraction': 0.1,
        'beta_1': 0.9,
        'beta_2': 0.999,
        'epsilon': 1e-8,
        'n_iter_no_change': 10,
        'max_fun': 15000
    }

    default_params.update(mlp_params)
    mlp = MLPClassifier(**default_params)
    mlp.fit(X_train, y_train)

    return mlp

print("✓ train_neural_network() defined")

✓ train_neural_network() defined


In [None]:
# Cell 5: Evaluation Function
def evaluate_model(model, X_test, y_test, dataset_name="Test", verbose=True):
    """
    Evaluate the trained model on test data.

    Parameters:
    -----------
    model : MLPClassifier
        Trained model
    X_test : np.ndarray
        Test features
    y_test : np.ndarray or pd.Series
        True test labels
    dataset_name : str
        Name of the dataset
    verbose : bool
        If True, prints detailed metrics

    Returns:
    --------
    accuracy : float
        Classification accuracy
    y_pred : np.ndarray
        Predicted labels
    """
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    if verbose:
        print(f"\n{dataset_name} Accuracy: {accuracy:.4f}")
        print(f"\n{dataset_name} Confusion Matrix:")
        print(confusion_matrix(y_test, y_pred))
        print(f"\n{dataset_name} Classification Report:")
        print(classification_report(y_test, y_pred))

    return accuracy, y_pred

print("✓ evaluate_model() defined")

✓ evaluate_model() defined


In [None]:
# Cell 6: Plotting Functions
def plot_learning_curves(model, X_train, y_train, X_val, y_val, X_test, y_test, dataset_name, save_dir='plots'):
    """Plot learning curves showing training and validation loss/accuracy."""
    os.makedirs(save_dir, exist_ok=True)

    if model.solver not in ['sgd', 'adam']:
        print(f"Learning curves not available for '{model.solver}' solver.")
        return

    if not hasattr(model, 'loss_curve_') or model.loss_curve_ is None:
        print(f"Loss curve attribute not found for {dataset_name}.")
        return

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Plot 1: Loss curve
    epochs = range(1, len(model.loss_curve_) + 1)
    ax1.plot(epochs, model.loss_curve_, 'b-', linewidth=2, label='Training Loss')

    if hasattr(model, 'validation_scores_') and model.validation_scores_ is not None:
        val_loss = [1 - score for score in model.validation_scores_]
        ax1.plot(range(1, len(val_loss) + 1), val_loss, 'r-', linewidth=2, label='Validation Loss')

    ax1.set_xlabel('Epoch', fontsize=12)
    ax1.set_ylabel('Loss', fontsize=12)
    ax1.set_title(f'{dataset_name} - Learning Curves (Loss)', fontsize=14, fontweight='bold')
    ax1.legend(fontsize=10)
    ax1.grid(True, alpha=0.3)

    # Plot 2: Accuracy
    ax2_has_content = False

    if hasattr(model, 'validation_scores_') and model.validation_scores_ is not None:
        val_epochs = range(1, len(model.validation_scores_) + 1)
        ax2.plot(val_epochs, model.validation_scores_, 'r-', linewidth=2,
                label='Validation Accuracy')
        ax2_has_content = True

    train_acc = accuracy_score(y_train, model.predict(X_train))
    val_acc = accuracy_score(y_val, model.predict(X_val))
    test_acc = accuracy_score(y_test, model.predict(X_test))

    ax2.axhline(y=train_acc, color='b', linestyle='--', linewidth=2,
               label=f'Final Train ({train_acc:.4f})')
    ax2.axhline(y=test_acc, color='g', linestyle='--', linewidth=2,
               label=f'Final Test ({test_acc:.4f})')

    ax2.set_xlabel('Epoch', fontsize=12)
    ax2.set_ylabel('Accuracy', fontsize=12)
    ax2.set_title(f'{dataset_name} - Final Accuracies', fontsize=14, fontweight='bold')
    ax2.legend(fontsize=10, loc='lower right')
    ax2.grid(True, alpha=0.3)
    ax2.set_ylim([0, 1.05])

    plt.tight_layout()
    plt.savefig(f'{save_dir}/{dataset_name}_learning_curves.pdf', dpi=300, bbox_inches='tight')
    plt.show()

def plot_accuracy_comparison(all_results, save_dir='plots'):
    """Create bar plot comparing validation and test accuracies."""
    os.makedirs(save_dir, exist_ok=True)

    datasets = list(all_results.keys())
    val_accs = [all_results[d]['validation_accuracy'] for d in datasets]
    test_accs = [all_results[d]['test_accuracy'] for d in datasets]

    x = np.arange(len(datasets))
    width = 0.35

    fig, ax = plt.subplots(figsize=(10, 6))
    bars1 = ax.bar(x - width/2, val_accs, width, label='Validation', color='skyblue', edgecolor='black')
    bars2 = ax.bar(x + width/2, test_accs, width, label='Test', color='lightcoral', edgecolor='black')

    ax.set_xlabel('Dataset', fontsize=12, fontweight='bold')
    ax.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
    ax.set_title('Validation vs Test Accuracy', fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(datasets)
    ax.legend(fontsize=11)
    ax.grid(True, axis='y', alpha=0.3)
    ax.set_ylim([0, 1.05])

    for bars in [bars1, bars2]:
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{height:.4f}', ha='center', va='bottom', fontsize=9)

    plt.tight_layout()
    plt.savefig(f'{save_dir}/accuracy_comparison.pdf', dpi=300, bbox_inches='tight')
    plt.show()

def plot_confusion_matrices(all_models, all_test_data, save_dir='plots'):
    """Plot confusion matrices for all datasets."""
    from sklearn.metrics import ConfusionMatrixDisplay
    os.makedirs(save_dir, exist_ok=True)

    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    for idx, (name, model) in enumerate(all_models.items()):
        X_test, y_test = all_test_data[name]
        y_pred = model.predict(X_test)

        cm = confusion_matrix(y_test, y_pred)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
        disp.plot(ax=axes[idx], cmap='Blues', values_format='d')
        axes[idx].set_title(f'{name}\nTest Confusion Matrix', fontsize=12, fontweight='bold')

    plt.tight_layout()
    plt.savefig(f'{save_dir}/confusion_matrices.pdf', dpi=300, bbox_inches='tight')
    plt.show()

def plot_roc_curves(all_models, all_test_data, save_dir='plots'):
    """Plot ROC curves for all datasets."""
    from sklearn.metrics import roc_curve, auc
    os.makedirs(save_dir, exist_ok=True)

    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c']

    for idx, (name, model) in enumerate(all_models.items()):
        X_test, y_test = all_test_data[name]
        
        # Get probability predictions for positive class
        if hasattr(model, 'predict_proba'):
            y_scores = model.predict_proba(X_test)[:, 1]
        else:
            y_scores = model.decision_function(X_test)
        
        # Calculate ROC curve and AUC
        fpr, tpr, thresholds = roc_curve(y_test, y_scores)
        roc_auc = auc(fpr, tpr)
        
        # Plot ROC curve
        ax = axes[idx]
        ax.plot(fpr, tpr, color=colors[idx], lw=2.5, 
               label=f'ROC curve (AUC = {roc_auc:.4f})')
        ax.plot([0, 1], [0, 1], color='gray', lw=1.5, linestyle='--', 
               label='Random Classifier')
        
        ax.set_xlim([0.0, 1.0])
        ax.set_ylim([0.0, 1.05])
        ax.set_xlabel('False Positive Rate', fontsize=11, fontweight='bold')
        ax.set_ylabel('True Positive Rate', fontsize=11, fontweight='bold')
        ax.set_title(f'{name} ROC Curve', fontsize=13, fontweight='bold')
        ax.legend(loc='lower right', fontsize=10)
        ax.grid(True, alpha=0.3)
        
        # Add optimal threshold point
        optimal_idx = np.argmax(tpr - fpr)
        optimal_threshold = thresholds[optimal_idx]
        ax.plot(fpr[optimal_idx], tpr[optimal_idx], 'ro', markersize=8, 
               label=f'Optimal (th={optimal_threshold:.3f})')
        ax.legend(loc='lower right', fontsize=9)

    plt.tight_layout()
    plt.savefig(f'{save_dir}/roc_curves.pdf', dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"\n✓ ROC curves saved to {save_dir}/roc_curves.pdf")

def plot_combined_roc_curves(all_models, all_test_data, save_dir='plots'):
    """Plot all ROC curves on a single plot for comparison."""
    from sklearn.metrics import roc_curve, auc
    os.makedirs(save_dir, exist_ok=True)

    fig, ax = plt.subplots(figsize=(10, 8))
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c']

    for idx, (name, model) in enumerate(all_models.items()):
        X_test, y_test = all_test_data[name]
        
        # Get probability predictions
        if hasattr(model, 'predict_proba'):
            y_scores = model.predict_proba(X_test)[:, 1]
        else:
            y_scores = model.decision_function(X_test)
        
        # Calculate ROC curve and AUC
        fpr, tpr, _ = roc_curve(y_test, y_scores)
        roc_auc = auc(fpr, tpr)
        
        # Plot ROC curve
        ax.plot(fpr, tpr, color=colors[idx], lw=2.5, 
               label=f'{name} (AUC = {roc_auc:.4f})')
    
    # Plot diagonal
    ax.plot([0, 1], [0, 1], color='gray', lw=1.5, linestyle='--', 
           label='Random Classifier')
    
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate', fontsize=13, fontweight='bold')
    ax.set_ylabel('True Positive Rate', fontsize=13, fontweight='bold')
    ax.set_title('ROC Curves - All MONK Datasets', fontsize=15, fontweight='bold')
    ax.legend(loc='lower right', fontsize=11)
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(f'{save_dir}/roc_curves_combined.pdf', dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"✓ Combined ROC curve saved to {save_dir}/roc_curves_combined.pdf")

print("✓ Plotting functions defined")

✓ Plotting functions defined


In [None]:
# Cell 7: SIMPLIFIED APPROACH - Model Selection with Validation Set
"""
SIMPLIFIED APPROACH: Simplified but rigorous model selection for MONK datasets.
- Model selection on validation set (hold-out from training data)
- Simpler network architectures (appropriate for problem complexity)
- Final model assessment on independent test set
- No early stopping to ensure full convergence
"""

def simplified_monk_pipeline(monk_num, random_state=42):
    """
    Simplified but academically rigorous pipeline for MONK datasets.
    Implements proper model selection using validation set.

    Parameters:
    -----------
    monk_num : int
        MONK dataset number (1, 2, or 3)
    random_state : int
        Random seed for reproducibility

    Returns:
    --------
    best_model : MLPClassifier
        Best model selected via validation
    best_config : dict
        Configuration of best model
    test_acc : float
        Final test accuracy
    """
    print(f"\n{'='*70}")
    print(f"SIMPLIFIED APPROACH - MONK-{monk_num}")
    print(f"{'='*70}\n")

    # Step 1: Load data
    X_train_full, y_train_full, X_test, y_test = load_monk_data(
        f'monk_dataset/monks-{monk_num}.train',
        f'monk_dataset/monks-{monk_num}.test',
        shuffle=True,
        random_state=random_state
    )

    print(f"Dataset sizes - Train: {len(X_train_full)}, Test: {len(X_test)}")

    # Step 2: Split training data into training and validation sets
    # Use 20% of training data for validation (model selection)
    X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
        X_train_full, y_train_full, test_size=0.20,
        random_state=random_state, stratify=y_train_full
    )

    print(f"Split for model selection - Train: {len(X_train_split)}, Validation: {len(X_val_split)}")

    # Step 3: Preprocess data (one-hot encoding)
    X_train_enc, X_val_enc, _ = preprocess_data(X_train_split, X_val_split)
    X_train_full_enc, X_test_enc, _ = preprocess_data(X_train_full, X_test)
    print(f"Features after one-hot encoding: {X_train_enc.shape[1]}\n")

    # Step 4: Define candidate model configurations
    # Focus on simple architectures appropriate for the problem complexity
    configs = [
        # Single hidden layer networks
        {'hidden_layer_sizes': (3,), 'activation': 'tanh', 'alpha': 0.0001},
        {'hidden_layer_sizes': (4,), 'activation': 'tanh', 'alpha': 0.0001},
        {'hidden_layer_sizes': (5,), 'activation': 'tanh', 'alpha': 0.001},
        {'hidden_layer_sizes': (6,), 'activation': 'relu', 'alpha': 0.0001},
        {'hidden_layer_sizes': (8,), 'activation': 'relu', 'alpha': 0.0001},
        {'hidden_layer_sizes': (10,), 'activation': 'relu', 'alpha': 0.001},

        # Two hidden layer networks
        {'hidden_layer_sizes': (4, 3), 'activation': 'tanh', 'alpha': 0.0001},
        {'hidden_layer_sizes': (5, 3), 'activation': 'tanh', 'alpha': 0.001},
        {'hidden_layer_sizes': (6, 4), 'activation': 'relu', 'alpha': 0.0001},
        {'hidden_layer_sizes': (8, 4), 'activation': 'relu', 'alpha': 0.0001},
    ]

    print("="*70)
    print("PHASE 1: MODEL SELECTION (using validation set)")
    print("="*70)

    best_val_acc = 0
    best_model_candidate = None
    best_config = None
    all_results = []

    # Step 5: Train and evaluate each configuration on validation set
    for i, config in enumerate(configs):
        # Configure model parameters
        params = {
            'hidden_layer_sizes': config['hidden_layer_sizes'],
            'activation': config['activation'],
            'solver': 'adam',
            'alpha': config['alpha'],
            'learning_rate_init': 0.001,
            'max_iter': 2000,  # Sufficient iterations for convergence
            'random_state': random_state,
            'early_stopping': False,  # Let it converge fully
            'tol': 1e-6
        }

        # Train on training split
        model = train_neural_network(X_train_enc, y_train_split, **params)

        # Evaluate on validation set (for model selection)
        train_acc = accuracy_score(y_train_split, model.predict(X_train_enc))
        val_acc = accuracy_score(y_val_split, model.predict(X_val_enc))

        all_results.append({
            'config': config,
            'train_acc': train_acc,
            'val_acc': val_acc,
            'n_iter': model.n_iter_
        })

        # Track best model based on validation accuracy
        status = "✓ NEW BEST" if val_acc > best_val_acc else " "
        print(f"{status} Config {i+1}/{len(configs)}: "
              f"layers={config['hidden_layer_sizes']}, "
              f"activation={config['activation']}, "
              f"alpha={config['alpha']:.4f}")
        print(f"  Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, "
              f"Iterations: {model.n_iter_}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_candidate = model
            best_config = params.copy()

        print()

    # Step 6: Report best model from validation
    print(f"\n{'='*70}")
    print(f"BEST MODEL SELECTED (via validation)")
    print(f"{'='*70}")
    print(f"Architecture: {best_config['hidden_layer_sizes']}")
    print(f"Activation: {best_config['activation']}")
    print(f"Regularization (alpha): {best_config['alpha']:.4f}")
    print(f"Validation Accuracy: {best_val_acc:.4f} ({best_val_acc*100:.1f}%)")
    print(f"Iterations to converge: {best_model_candidate.n_iter_}\n")

    # Step 7: Retrain best model on FULL training set (train + validation)
    # This is standard practice: use all available training data for final model
    print("="*70)
    print("PHASE 2: FINAL MODEL TRAINING (on full training set)")
    print("="*70)
    print("Retraining best configuration on complete training data...\n")

    best_model_final = train_neural_network(X_train_full_enc, y_train_full, **best_config)

    # Step 8: Final assessment on independent test set
    print("="*70)
    print("PHASE 3: MODEL ASSESSMENT (on independent test set)")
    print("="*70)

    final_train_acc = accuracy_score(y_train_full, best_model_final.predict(X_train_full_enc))
    test_acc = accuracy_score(y_test, best_model_final.predict(X_test_enc))

    print(f"Final model trained on {len(X_train_full)} samples")
    print(f"Training Accuracy: {final_train_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f} ({test_acc*100:.1f}%)")
    print(f"Iterations: {best_model_final.n_iter_}\n")

    # Detailed metrics
    evaluate_model(best_model_final, X_test_enc, y_test, dataset_name="Test", verbose=True)

    return best_model_final, best_config, test_acc, (X_test_enc, y_test), (X_train_full_enc, y_train_full)

# Execute simplified pipeline for all MONK datasets
print("\n" + "="*70)
print("SIMPLIFIED MONK PIPELINE - All Datasets")
print("="*70)
print("Strategy: Proper model selection with validation set")
print("  1. Split training data into train/validation")
print("  2. Select best model using validation accuracy")
print("  3. Retrain on full training set")
print("  4. Final assessment on independent test set\n")

RANDOM_STATE = 42  # Fixed seed for reproducibility
results_summary = {}
all_models = {}
all_test_data = {}

all_train_data = {}

for monk_num in [1, 2, 3]:
    model, config, test_acc, test_data, train_data = simplified_monk_pipeline(monk_num, RANDOM_STATE)
    results_summary[f'MONK-{monk_num}'] = {
        'test_accuracy': test_acc,
        'validation_accuracy': test_acc,  # For compatibility with plotting function
        'architecture': config['hidden_layer_sizes'],
        'activation': config['activation'],
        'alpha': config['alpha']
    }
    all_models[f'MONK-{monk_num}'] = model
    all_test_data[f'MONK-{monk_num}'] = test_data
    all_train_data[f'MONK-{monk_num}'] = train_data
    print("\n" + "-"*70 + "\n")

# Final comprehensive summary
print("\n" + "="*70)
print("FINAL SUMMARY - MODEL SELECTION RESULTS")
print("="*70)
print(f"\nRandom seed: {RANDOM_STATE}")
print(f"Validation strategy: 80/20 train-validation split")
print(f"Final model: Retrained on full training set\n")

for dataset, results in results_summary.items():
    test_acc = results['test_accuracy']
    status = "✓ PERFECT" if test_acc == 1.0 else "✗ SUBOPTIMAL" if test_acc < 0.95 else "✓ GOOD"
    print(f"{status} {dataset}:")
    print(f"  - Test Accuracy: {test_acc:.4f} ({test_acc*100:.1f}%)")
    print(f"  - Architecture: {results['architecture']}")
    print(f"  - Activation: {results['activation']}")
    print(f"  - Regularization: α={results['alpha']:.4f}")
    print()

print("="*70)
print("✓ Model selection pipeline completed successfully!")
print("="*70)

# Generate plots
print("\n" + "="*70)
# Learning curves for each dataset
print("\n--- Learning Curves ---")
for name, model in all_models.items():
    X_train_full, y_train_full = all_train_data[name]
    X_test, y_test = all_test_data[name]
    
    # Create a small validation split for plotting purposes
    X_train_plot, X_val_plot, y_train_plot, y_val_plot = train_test_split(
        X_train_full, y_train_full, test_size=0.15,
        random_state=RANDOM_STATE, stratify=y_train_full
    )
    
    plot_learning_curves(model, X_train_plot, y_train_plot,
                        X_val_plot, y_val_plot, X_test, y_test, name)

# Accuracy comparison
print("\n--- Accuracy Comparison ---")
plot_accuracy_comparison(results_summary)

# Confusion matrices
print("\n--- Confusion Matrices ---")
plot_confusion_matrices(all_models, all_test_data)

# ROC curves (individual subplots)
print("\n--- ROC Curves (Individual) ---")
plot_roc_curves(all_models, all_test_data)


print("  • Additional architectures in the candidate set")
print("  • Increased max_iter (e.g., 3000-5000) for harder problems")
print("  • Different random_state for different data splits")
print("\nIf results are not perfect, consider:")
print("  • ROC curves and AUC analysis for model evaluation")
print("  • Hyperparameter tuning (architecture, activation, regularization)")
print("  • Final model assessment on unseen test data")
print("  • Model selection based on validation performance")
print("  • Proper train/validation/test split methodology")
print("\nNote: For academic presentation, this demonstrates:")
print("="*70)
print("✓ All visualizations completed!")
print("\n" + "="*70)
plot_combined_roc_curves(all_models, all_test_data)
print("\n--- ROC Curves (Combined) ---")# ROC curves (combined plot)print("  • Different random_state for different data splits")
print("  • Increased max_iter (e.g., 3000-5000) for harder problems")
print("  • Additional architectures in the candidate set")


SIMPLIFIED MONK PIPELINE - All Datasets
Strategy: Proper model selection with validation set
  1. Split training data into train/validation
  2. Select best model using validation accuracy
  3. Retrain on full training set
  4. Final assessment on independent test set


SIMPLIFIED APPROACH - MONK-1

Dataset sizes - Train: 124, Test: 432
Split for model selection - Train: 99, Validation: 25
Features after one-hot encoding: 17

PHASE 1: MODEL SELECTION (using validation set)
✓ NEW BEST Config 1/10: layers=(3,), activation=tanh, alpha=0.0001
  Train Acc: 1.0000, Val Acc: 0.8000, Iterations: 2000

✓ NEW BEST Config 2/10: layers=(4,), activation=tanh, alpha=0.0001
  Train Acc: 1.0000, Val Acc: 0.9600, Iterations: 2000

  Config 3/10: layers=(5,), activation=tanh, alpha=0.0010
  Train Acc: 1.0000, Val Acc: 0.8400, Iterations: 2000

  Config 4/10: layers=(6,), activation=relu, alpha=0.0001
  Train Acc: 1.0000, Val Acc: 0.8000, Iterations: 2000

✓ NEW BEST Config 5/10: layers=(8,), activation