In [10]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder
import numpy as np
import matplotlib.pyplot as plt
from google.colab import drive
from sklearn.decomposition import PCA
import joblib

In [12]:
# Load the LabelEncoder to get algorithm names
try:
    label_encoder = joblib.load('/content/drive/MyDrive/Wail-Projet-F/MLP/Data/label_encoder.pkl')
    if not isinstance(label_encoder, LabelEncoder):
        raise ValueError("Loaded object is not a LabelEncoder. Please check the file.")
    algorithm_names = {i: name for i, name in enumerate(label_encoder.classes_)}
    print(f"Algorithm names loaded: {algorithm_names}")
except FileNotFoundError:
    print("Error: label_encoderc_top11.pkl not found. Using placeholder names.")
    algorithm_names = {i: f"Algo{i}" for i in range(10)}
except Exception as e:
    print(f"Error loading label encoder: {e}. Using placeholder names.")
    algorithm_names = {i: f"Algo{i}" for i in range(10)}

Algorithm names loaded: {0: 'BIPOP-CMA-ES', 1: 'CMA-CSA_Atamna', 2: 'CMAES-APOP-KMA_Nguyen', 3: 'DE-BFGS_voglis_noiseless', 4: 'a-CMA-ES', 5: 'ad-CMA-ES_Gissler', 6: 'adm-CMA-ES_Gissler', 7: 'dm-CMA-ES_Gissler', 8: 's-CMA-ES_Gissler', 9: 'sd-CMA-ES_Gissler'}


In [13]:
# Open file to save results
with open('/content/drive/MyDrive/Wail-Projet-F/MLP/Result/results.txt', 'w') as f:
    # Redirect print to file
    import sys
    original_stdout = sys.stdout
    sys.stdout = f

    # -----------------------------------
    # 1. Data Loading and Preprocessing
    # -----------------------------------

    # Load the dataset
    data = pd.read_csv('/content/drive/MyDrive/Wail-Projet-F/MLP/Data/normalized_dataset.csv')

    # Investigate row count
    unique_combinations = data[['Function', 'Instance', 'Dimension']].drop_duplicates()

    # Define columns to drop
    columns_to_drop = ['FID', 'IID', 'Dimension', 'ERT', 'min_ERT', 'RELERT']

    # Drop specified columns if they exist
    data = data.drop(columns=[col for col in columns_to_drop if col in data.columns], axis=1)

    # Define features and target
    X = data.drop('Best Algorithm', axis=1)
    y = data['Best Algorithm']

    # Check class distribution before SMOTE
    print("\nClass Distribution before SMOTE:")
    class_counts = pd.Series(y).value_counts().sort_index()
    print(class_counts)

    # Determine minimum class size for SMOTE k_neighbors
    min_class_size = class_counts.min()
    k_neighbors = min(3, max(1, min_class_size - 1))  # Ensure k_neighbors <= min class size

    # Apply SMOTE to balance classes
    smote = SMOTE(sampling_strategy='not majority', k_neighbors=k_neighbors, random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)

    # Split data into train, validation, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X_resampled, y_resampled, test_size=0.4, random_state=42, shuffle=True)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, shuffle=True)

    # Print class distributions
    print("\nClass distribution in y_train:", pd.Series(y_train).value_counts())
    print("Class distribution in y_val:", pd.Series(y_val).value_counts())
    print("Class distribution in y_test:", pd.Series(y_test).value_counts())

    # Apply PCA to reduce dimensionality
    pca = PCA(n_components=50, random_state=42)
    X_train = pca.fit_transform(X_train)
    X_val = pca.transform(X_val)
    X_test = pca.transform(X_test)

    # -----------------------------------
    # 2. Model Definition
    # -----------------------------------

    # Define the MLP classifier
    model = MLPClassifier(
        hidden_layer_sizes=(128, 128, 64),
        activation='relu',
        solver='adam',
        learning_rate_init=0.0005,
        alpha=0.001,  # L2 regularization
        max_iter=200,
        batch_size=32,
        early_stopping=True,
        validation_fraction=0.1,
        n_iter_no_change=10,
        random_state=42
    )

    # -----------------------------------
    # 3. Model Training
    # -----------------------------------

    # Train the model
    model.fit(X_train, y_train)

    # Calculate and print final training accuracy and loss
    y_train_pred = model.predict(X_train)
    train_accuracy = np.mean(y_train_pred == y_train)
    train_loss = model.loss_curve_[-1] if model.loss_curve_ else float('nan')
    print(f"\nFinal Training Accuracy: {train_accuracy:.4f}")
    print(f"Final Training Loss: {train_loss:.4f}")

    # -----------------------------------
    # 4. Model Visualization and Saving
    # -----------------------------------

    # Simulate training history for plotting
    train_scores = []
    val_scores = []
    for i in range(len(model.loss_curve_)):
        train_scores.append(1 - model.loss_curve_[i])
        val_scores.append(model.validation_scores_[i] if i < len(model.validation_scores_) else val_scores[-1])

    # Plot training history
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

    # Accuracy plot
    ax1.plot(train_scores, label='Training Accuracy (Approximated)')
    ax1.plot(val_scores, label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()

    # Loss plot
    ax2.plot(model.loss_curve_, label='Training Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.text(0.5, -0.1, '', transform=ax2.transAxes, ha='center')

    # Save accuracy/loss plot
    plt.savefig('/content/drive/MyDrive/Wail-Projet-F/MLP/Result/model_architecture.png')
    print(f"Accuracy and Loss plot saved to: /content/drive/MyDrive/Wail-Projet-F/MLP/Result/model_architecture.png")
    plt.close()

    # -----------------------------------
    # 5. Model Evaluation on Test Set
    # -----------------------------------

    # Evaluate on test set
    test_acc = model.score(X_test, y_test)
    print(f'Test Accuracy: {test_acc:.3f}')

    # Generate predictions
    y_pred = model.predict(X_test)

    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    # Convert to percentages
    cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100

    # Plot confusion matrix with algorithm names
    fig, ax = plt.subplots(figsize=(12, 12), dpi=100)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_percentage, display_labels=[algorithm_names[i] for i in range(10)])
    disp.plot(cmap=plt.cm.Blues, ax=ax, values_format=".2f")
    plt.title('Confusion Matrix with Percentages', pad=20)
    plt.xlabel('Predicted Algorithm')
    plt.ylabel('True Algorithm')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/Wail-Projet-F/MLP/Result/confusion_matrix.png')
    print(f"Confusion Matrix plot saved to: /content/drive/MyDrive/Wail-Projet-F/MLP/Result/confusion_matrix.png")
    plt.close()

    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, zero_division=0, target_names=[algorithm_names[i] for i in range(10)]))

    # Save the model
    joblib.dump(model, '/content/drive/MyDrive/Wail-Projet-F/MLP/Result/mlp_model.pkl')
    print("Model saved to: /content/drive/MyDrive/Wail-Projet-F/MLP/Result/mlp_model.pkl")

    # Restore original stdout
    sys.stdout = original_stdout

# Verify file content
with open('/content/drive/MyDrive/Wail-Projet-F/MLP/Result/results.txt', 'r') as f:
    print(f"Results saved to: /content/drive/MyDrive/Wail-Projet-F/MLP/Result/results.txt")
    print(f.read())

Results saved to: /content/drive/MyDrive/Wail-Projet-F/MLP/Result/results.txt

Class Distribution before SMOTE:
Best Algorithm
0     27
1     41
2     26
3    193
4     48
5     36
6     31
7     31
8     17
9     30
Name: count, dtype: int64

Class distribution in y_train: Best Algorithm
1    124
4    124
8    120
5    119
3    116
2    116
6    114
7    112
0    108
9    105
Name: count, dtype: int64
Class distribution in y_val: Best Algorithm
9    50
0    45
2    45
6    41
7    39
5    38
4    36
3    34
8    31
1    27
Name: count, dtype: int64
Class distribution in y_test: Best Algorithm
3    43
1    42
8    42
7    42
0    40
9    38
6    38
5    36
4    33
2    32
Name: count, dtype: int64

Final Training Accuracy: 0.9836
Final Training Loss: 0.0530
Accuracy and Loss plot saved to: /content/drive/MyDrive/Wail-Projet-F/MLP/Result/model_architecture.png
Test Accuracy: 0.847
Confusion Matrix plot saved to: /content/drive/MyDrive/Wail-Projet-F/MLP/Result/confusion_matrix.png

Class