In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from reservoirpy.nodes import Reservoir, Ridge, ESN
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [2]:
# Configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64
# EMBEDDING_SIZE = 256  # Taille des embeddings pour tous les inputs
# NUM_HEADS = 8
# NUM_LAYERS = 4
# DROPOUT = 0.1
# LEARNING_RATE = 1e-4
EMBEDDING_SIZE = 285
NUM_HEADS = 15
NUM_LAYERS = 2
DROPOUT = 0.2618574215322765
LEARNING_RATE = 0.00015775441092497324
EPOCHS = 20
PATIENCE = 3

In [3]:
def load_data():
    partOfData = 1
    X_genres = pd.read_csv("../data/train/input_genres_tags_data.csv")
    X_instruments = pd.read_csv("../data/train/input_instruments_tags_data.csv")
    X_moods = pd.read_csv("../data/train/input_moods_tags_data.csv")
    X_genres_categories = pd.read_csv("../data/train/input_genres_categories_data.csv")
    X_instruments_categories = pd.read_csv(
        "../data/train/input_instruments_categories_data.csv"
    )
    X_moods_categories = pd.read_csv("../data/train/input_moods_categories_data.csv")

    y_genres = pd.read_csv("../data/train/output_genres_tags_data.csv")
    y_instruments = pd.read_csv("../data/train/output_instruments_tags_data.csv")
    y_moods = pd.read_csv("../data/train/output_moods_tags_data.csv")

    # On peut garder seulement une partie des données
    X_genres = X_genres[: int(partOfData * len(X_genres))]
    X_instruments = X_instruments[: int(partOfData * len(X_instruments))]
    X_moods = X_moods[: int(partOfData * len(X_moods))]
    y_genres = y_genres[: int(partOfData * len(y_genres))]
    y_instruments = y_instruments[: int(partOfData * len(y_instruments))]
    y_moods = y_moods[: int(partOfData * len(y_moods))]
    X_genres_categories = X_genres_categories[
        : int(partOfData * len(X_genres_categories))
    ]
    X_instruments_categories = X_instruments_categories[
        : int(partOfData * len(X_instruments_categories))
    ]
    X_moods_categories = X_moods_categories[: int(partOfData * len(X_moods_categories))]

    return (
        X_genres,
        X_instruments,
        X_moods,
        X_genres_categories,
        X_instruments_categories,
        X_moods_categories,
    ), (y_genres, y_instruments, y_moods)


# Ensure the input data is in the correct format
def reshape_input(X):
    if isinstance(X, pd.DataFrame):
        return X.values.reshape(-1, 1, X.shape[1])  # Handles pandas DataFrame
    elif isinstance(X, np.ndarray):
        return X.reshape(-1, 1, X.shape[1])  # Handles numpy ndarray
    else:
        raise ValueError("Input must be a pandas DataFrame or a numpy ndarray")


def format_predictions(predictions):
    # Convert the list to a NumPy array
    predictions_array = np.array(predictions)

    # Reshape the array to 2-dimensional
    predictions_reshaped = predictions_array.reshape(-1, predictions_array.shape[-1])

    return predictions_reshaped

In [4]:
# Charger les données
(
    (
        X_genres,
        X_instruments,
        X_moods,
        X_genres_categories,
        X_instruments_categories,
        X_moods_categories,
    ),
    (y_genres, y_instruments, y_moods),
) = load_data()

# Train-test split
X_genres_train, X_genres_test, y_genres_train, y_genres_test = train_test_split(
    X_genres, y_genres, test_size=0.2, random_state=42
)
X_instruments_train, X_instruments_test, y_instruments_train, y_instruments_test = (
    train_test_split(X_instruments, y_instruments, test_size=0.2, random_state=42)
)
X_moods_train, X_moods_test, y_moods_train, y_moods_test = train_test_split(
    X_moods, y_moods, test_size=0.2, random_state=42
)

# Train-test split for categories
X_genres_categories_train, X_genres_categories_test = train_test_split(
    X_genres_categories, test_size=0.2, random_state=42
)
X_instruments_categories_train, X_instruments_categories_test = train_test_split(
    X_instruments_categories, test_size=0.2, random_state=42
)
X_moods_categories_train, X_moods_categories_test = train_test_split(
    X_moods_categories, test_size=0.2, random_state=42
)

In [5]:
# Préparation des données
X_genres_train = X_genres_train.drop(columns=["ChallengeID"])
X_instruments_train = X_instruments_train.drop(columns=["ChallengeID"])
X_moods_train = X_moods_train.drop(columns=["ChallengeID"])
y_genres_train = y_genres_train.drop(columns=["ChallengeID"])
y_instruments_train = y_instruments_train.drop(columns=["ChallengeID"])
y_moods_train = y_moods_train.drop(columns=["ChallengeID"])
X_genres_categories_train = X_genres_categories_train.drop(columns=["ChallengeID"])
X_instruments_categories_train = X_instruments_categories_train.drop(
    columns=["ChallengeID"]
)
X_moods_categories_train = X_moods_categories_train.drop(columns=["ChallengeID"])

X_genres_test = X_genres_test.drop(columns=["ChallengeID"])
X_instruments_test = X_instruments_test.drop(columns=["ChallengeID"])
X_moods_test = X_moods_test.drop(columns=["ChallengeID"])
y_genres_test = y_genres_test.drop(columns=["ChallengeID"])
y_instruments_test = y_instruments_test.drop(columns=["ChallengeID"])
y_moods_test = y_moods_test.drop(columns=["ChallengeID"])
X_genres_categories_test = X_genres_categories_test.drop(columns=["ChallengeID"])
X_instruments_categories_test = X_instruments_categories_test.drop(
    columns=["ChallengeID"]
)
X_moods_categories_test = X_moods_categories_test.drop(columns=["ChallengeID"])


X_train = np.concatenate(
    [
        X_genres_train,
        X_instruments_train,
        X_moods_train,
        X_genres_categories_train,
        X_instruments_categories_train,
        X_moods_categories_train,
    ],
    axis=1,
)
X_test = np.concatenate(
    [
        X_genres_test,
        X_instruments_test,
        X_moods_test,
        X_genres_categories_test,
        X_instruments_categories_test,
        X_moods_categories_test,
    ],
    axis=1,
)

y_train = np.concatenate([y_genres_train, y_instruments_train, y_moods_train], axis=1)
y_test = np.concatenate([y_genres_test, y_instruments_test, y_moods_test], axis=1)

# Convertir les données en tensors PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(DEVICE)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(DEVICE)

y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(DEVICE)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(DEVICE)


In [34]:
# Initialisation des réservoirs (genre, instrument, mood)
reservoir_Genre = Reservoir(
    units=100,
    sr=0,  # Spectral radius
    lr=1,  # Leak rate
    input_scaling=1.0,
)

reservoir_Instrument = Reservoir(units=100, sr=0, lr=1, input_scaling=1.0)

reservoir_Mood = Reservoir(units=100, sr=0, lr=1, input_scaling=1.0)

# Readout pour chaque réservoir
readout_Genre = Ridge(ridge=1e-4)
readout_Instrument = Ridge(ridge=1e-4)
readout_Mood = Ridge(ridge=1e-4)

# Création des modèles avec ESN (Echo State Network)
model_Genre = ESN(reservoir=reservoir_Genre, readout=readout_Genre, workers=-1)
model_Instrument = ESN(
    reservoir=reservoir_Instrument, readout=readout_Instrument, workers=-1
)
model_Mood = ESN(reservoir=reservoir_Mood, readout=readout_Mood, workers=-1)

In [35]:
X_genres_train_reshaped = reshape_input(X_genres_train)
X_instruments_train_reshaped = reshape_input(X_instruments_train)
X_moods_train_reshaped = reshape_input(X_moods_train)

y_genres_train_reshaped = reshape_input(y_genres_train)
y_instruments_train_reshaped = reshape_input(y_instruments_train)
y_moods_train_reshaped = reshape_input(y_moods_train)

X_genres_test_reshaped = reshape_input(X_genres_test)
X_instruments_test_reshaped = reshape_input(X_instruments_test)
X_moods_test_reshaped = reshape_input(X_moods_test)

y_genres_test_reshaped = reshape_input(y_genres_test)
y_instruments_test_reshaped = reshape_input(y_instruments_test)
y_moods_test_reshaped = reshape_input(y_moods_test)


# Entraîner les réservoirs
# Train the models with one line for single timestep
model_Genre.fit(X_genres_train_reshaped, y_genres_train_reshaped)
model_Instrument.fit(X_instruments_train_reshaped, y_instruments_train_reshaped)
model_Mood.fit(X_moods_train_reshaped, y_moods_train_reshaped)

Running ESN-3: 100%|██████████| 88683/88683 [00:09<00:00, 9244.72it/s] 


Fitting node ESN-3...


Running ESN-4: 100%|██████████| 88683/88683 [00:09<00:00, 8970.70it/s] 


Fitting node ESN-4...


Running ESN-5: 100%|██████████| 88683/88683 [00:08<00:00, 9944.76it/s] 


Fitting node ESN-5...


'ESN-5': ESN('Reservoir-5', 'Ridge-5')

In [6]:
# # Obtenir les sorties des réservoirs
# y_genres_train_pred = model_Genre.run(X_genres_train_reshaped)
# y_instruments_train_pred = model_Instrument.run(X_instruments_train_reshaped)
# y_moods_train_pred = model_Mood.run(X_moods_train_reshaped)

# y_genres_test_pred = model_Genre.run(X_genres_test_reshaped)
# y_instruments_test_pred = model_Instrument.run(X_instruments_test_reshaped)
# y_moods_test_pred = model_Mood.run(X_moods_test_reshaped)

# # Formater les prédictions
# y_genres_train_pred = format_predictions(y_genres_train_pred)
# y_instruments_train_pred = format_predictions(y_instruments_train_pred)
# y_moods_train_pred = format_predictions(y_moods_train_pred)

# y_genres_test_pred = format_predictions(y_genres_test_pred)
# y_instruments_test_pred = format_predictions(y_instruments_test_pred)
# y_moods_test_pred = format_predictions(y_moods_test_pred)


# Combine toutes les sorties (individuelles et croisées)
X_train_final = np.concatenate(
    [
        X_genres_train,
        X_instruments_train,
        X_moods_train,
        X_genres_categories_train,
        X_instruments_categories_train,
        X_moods_categories_train,
    ],
    axis=1,
)
X_test_final = np.concatenate(
    [
        X_genres_test,
        X_instruments_test,
        X_moods_test,
        X_genres_categories_test,
        X_instruments_categories_test,
        X_moods_categories_test,
    ],
    axis=1,
)

In [7]:
# Fraction de données pour la validation
VALIDATION_SPLIT = 0.2

# Division des données en entraînement et validation
(
    X_train_final_train,
    X_train_final_val,
    y_train_tensor_train,
    y_train_tensor_val,
) = train_test_split(
    X_train_final, y_train_tensor, test_size=VALIDATION_SPLIT, random_state=42
)

# Création des datasets
train_dataset = torch.utils.data.TensorDataset(
    torch.tensor(X_train_final_train, dtype=torch.float32).to(DEVICE),
    y_train_tensor_train.clone().detach().to(DEVICE),
)
val_dataset = torch.utils.data.TensorDataset(
    torch.tensor(X_train_final_val, dtype=torch.float32).to(DEVICE),
    y_train_tensor_val.clone().detach().to(DEVICE),
)
test_dataset = torch.utils.data.TensorDataset(
    torch.tensor(X_test_final, dtype=torch.float32).to(DEVICE),
    y_test_tensor.clone().detach().to(DEVICE),
)

# Création des loaders
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True
)
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=BATCH_SIZE, shuffle=False
)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False
)

In [8]:
train_dataset.tensors[0].shape, train_dataset.tensors[1].shape

(torch.Size([70946, 289]), torch.Size([70946, 248]))

In [9]:
# Modèle de Transformeur
class MultiTaskTransformer(nn.Module):
    def __init__(
        self, input_size, embedding_size, num_heads, num_layers, num_labels, dropout
    ):
        super(MultiTaskTransformer, self).__init__()
        self.embedding = nn.Linear(input_size, embedding_size)  # Embedding Layer
        self.transformer = nn.Transformer(
            d_model=embedding_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dropout=dropout,
            batch_first=True,
        )
        self.classifier = nn.Linear(embedding_size, num_labels)  # Final Classifier

    def forward(self, x):
        # Pass input through the embedding layer
        embedded = self.embedding(x)

        # Add a positional encoding (if needed)
        embedded = embedded.unsqueeze(1)  # Add sequence dimension

        # Transformer expects (batch, seq_len, embedding_size)
        transformer_output = self.transformer(embedded, embedded)

        # Take only the output of the first token (classification token equivalent)
        output = transformer_output[:, 0, :]  # Extract first token

        # Pass through the classifier
        predictions = self.classifier(output)
        return predictions


# Initialiser le modèle
model = MultiTaskTransformer(
    input_size=X_train_final.shape[
        1
    ],  # Taille totale des sorties concaténées des réservoirs
    embedding_size=EMBEDDING_SIZE,
    num_heads=NUM_HEADS,
    num_layers=NUM_LAYERS,
    num_labels=y_train.shape[1],  # Nombre total de catégories en sortie
    dropout=DROPOUT,
).to(DEVICE)

# Affichage du nombre de paramètres
print(f"Nombre total de paramètres : {sum(p.numel() for p in model.parameters())}")

# Optimiseur et fonction de perte
criterion = nn.BCEWithLogitsLoss()  # Fonction de perte pour les étiquettes binaires
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)



Nombre total de paramètres : 6795430


In [10]:
# Entraînement
def train_model(
    model, train_loader, val_loader, criterion, optimizer, epochs, patience
):
    """
        Entraîne un modèle avec early stopping.

        Args:
            model: Le modèle à entraîner.
            train_loader: DataLoader pour les données d'entraînement.
            val_loader: DataLoader pour les données de validation.
            criterion: Fonction de perte.
            optimizer: Optimiseur.
            epochs: Nombre maximum d'époques.
            patience: Nombre d'époques à attendre pour une amélioration avant d'arrêter l'entraînement.

        Returns:
            Le meilleur modèle basé sur les performances sur l'ensemble    macro avg     0.0000    0.0000    0.0000    636962
    weighted avg     0.0000    0.0000    0.0000    636962
     samples avg     0.0000    0.0000    0.0000  de validation.
    """
    best_loss = float("inf")
    best_model_state = None
    epochs_no_improve = 0

    for epoch in range(epochs):
        # Mode entraînement
        model.train()
        epoch_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        train_loss = epoch_loss / len(train_loader)

        # Mode validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                predictions = model(X_batch)
                loss = criterion(predictions, y_batch)
                val_loss += loss.item()
        val_loss /= len(val_loader)

        print(
            f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}"
        )

        # Early stopping check
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_state = model.state_dict()
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            print(
                f"Early stopping triggered at epoch {epoch + 1}. Best validation loss: {best_loss:.4f}"
            )
            break

    # Charger le meilleur modèle
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    return model


# Évaluation
def evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            total_loss += loss.item()
    print(f"Test Loss: {total_loss / len(test_loader)}")

In [11]:
# Entraîner le modèle
model = train_model(
    model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE
)

# Évaluer le modèle
evaluate_model(model, test_loader, criterion)


Epoch 1/20, Train Loss: 0.1168, Val Loss: 0.0880
Epoch 2/20, Train Loss: 0.0874, Val Loss: 0.0834
Epoch 3/20, Train Loss: 0.0839, Val Loss: 0.0818
Epoch 4/20, Train Loss: 0.0822, Val Loss: 0.0807
Epoch 5/20, Train Loss: 0.0810, Val Loss: 0.0801
Epoch 6/20, Train Loss: 0.0800, Val Loss: 0.0795
Epoch 7/20, Train Loss: 0.0793, Val Loss: 0.0788
Epoch 8/20, Train Loss: 0.0786, Val Loss: 0.0786
Epoch 9/20, Train Loss: 0.0780, Val Loss: 0.0784
Epoch 10/20, Train Loss: 0.0775, Val Loss: 0.0781
Epoch 11/20, Train Loss: 0.0770, Val Loss: 0.0779
Epoch 12/20, Train Loss: 0.0765, Val Loss: 0.0778
Epoch 13/20, Train Loss: 0.0760, Val Loss: 0.0777
Epoch 14/20, Train Loss: 0.0755, Val Loss: 0.0775
Epoch 15/20, Train Loss: 0.0751, Val Loss: 0.0774
Epoch 16/20, Train Loss: 0.0746, Val Loss: 0.0774
Epoch 17/20, Train Loss: 0.0742, Val Loss: 0.0773
Epoch 18/20, Train Loss: 0.0737, Val Loss: 0.0772
Epoch 19/20, Train Loss: 0.0734, Val Loss: 0.0772
Epoch 20/20, Train Loss: 0.0729, Val Loss: 0.0773
Test Loss

In [12]:
# Evaluation des performances (accuracy, precision, recall, f1-score)
def evaluate_performance(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            predictions = model(X_batch)
            predictions = torch.sigmoid(predictions)
            predictions = (predictions > 0.5).int()
            y_true.append(y_batch.cpu().numpy())
            y_pred.append(predictions.cpu().numpy())
    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)

    # Save 5% of the rows of the predictions as csv files in the data folder in predictions folder
    np.savetxt(
        "../data/predictions/train/y_true.csv",
        y_true[: int(0.05 * len(y_true))],
        delimiter=",",
    )
    np.savetxt(
        "../data/predictions/train/y_pred.csv",
        y_pred[: int(0.05 * len(y_pred))],
        delimiter=",",
    )

    # Histograms plot of the predictions and true values for each tag
    # for i in range(y_true.shape[1]):
    #     plt.hist(y_true[:, i], bins=2, alpha=0.5, label="True")
    #     plt.hist(y_pred[:, i], bins=2, alpha=0.5, label="Predicted")
    #     plt.title(f"Tag {i}")
    #     plt.legend()
    #     plt.savefig(f"../data/predictions/train/histogram_tag_{i}.png")
    #     plt.clf()

    # Accuracy
    accuracy = np.mean(y_true == y_pred)
    print(f"Accuracy: {accuracy}")

    # Precision, Recall, F1-Score
    from sklearn.metrics import classification_report

    report = classification_report(y_true, y_pred, digits=4)
    print(report)


In [16]:
# Evaluer les performances pour test_loader combiné avec le train_loader

# Combine les loaders
# combined_loader = torch.utils.data.DataLoader(
#     torch.utils.data.ConcatDataset([train_dataset, test_dataset, val_dataset]),
#     batch_size=BATCH_SIZE,
#     shuffle=False,
# )

evaluate_performance(model, test_loader)


Accuracy: 0.9719198720793364
              precision    recall  f1-score   support

           0     0.4444    0.1026    0.1667       195
           1     0.6010    0.3935    0.4756      1202
           2     0.4808    0.1004    0.1661       249
           3     1.0000    0.0370    0.0714        81
           4     0.6327    0.4627    0.5345        67
           5     0.5376    0.1887    0.2793       265
           6     0.5882    0.2703    0.3704       111
           7     0.5734    0.1617    0.2523       507
           8     0.5965    0.3920    0.4731      1056
           9     0.5702    0.2974    0.3909       232
          10     0.8229    0.6255    0.7108       713
          11     0.7547    0.6859    0.7187      2041
          12     0.6020    0.4194    0.4944       422
          13     0.6006    0.1490    0.2388      1463
          14     0.5884    0.2009    0.2996       861
          15     0.5484    0.2225    0.3166       382
          16     0.0000    0.0000    0.0000        7

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [20]:
import optuna
from optuna.trial import TrialState

# Define the objective function for Optuna
def objective(trial):
    # Suggest num_heads first
    num_heads = trial.suggest_int("num_heads", 4, 16)
    
    # Calculate valid embedding_size as multiples of num_heads within [128, 512]
    min_embed = ((128 + num_heads - 1) // num_heads) * num_heads
    max_embed = (512 // num_heads) * num_heads
    embedding_size = trial.suggest_int("embedding_size", min_embed, max_embed, step=num_heads)
    
    # Suggest remaining hyperparameters
    num_layers = trial.suggest_int("num_layers", 2, 6)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)

    # Initialize the model with suggested hyperparameters
    model = MultiTaskTransformer(
        input_size=X_train_final.shape[1],
        embedding_size=embedding_size,
        num_heads=num_heads,
        num_layers=num_layers,
        num_labels=y_train.shape[1],
        dropout=dropout,
    ).to(DEVICE)

    # Define the optimizer and criterion
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCEWithLogitsLoss()

    # Train the model
    model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE)

    # Evaluate the model on the validation set
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            val_loss += loss.item()
    val_loss /= len(val_loader)

    return val_loss

# Create a study object and optimize the objective function
study = optuna.create_study(storage="sqlite:///db.sqlite3",direction="minimize")
study.optimize(objective, n_trials=50)

# Print the best hyperparameters
print("Best hyperparameters: ", study.best_params)

# Train the final model with the best hyperparameters
best_params = study.best_params
model = MultiTaskTransformer(
    input_size=X_train_final.shape[1],
    embedding_size=best_params["embedding_size"],
    num_heads=best_params["num_heads"],
    num_layers=best_params["num_layers"],
    num_labels=y_train.shape[1],
    dropout=best_params["dropout"],
).to(DEVICE)

optimizer = optim.Adam(model.parameters(), lr=best_params["learning_rate"])
criterion = nn.BCEWithLogitsLoss()

# Train the final model
model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE)

# Evaluate the final model
evaluate_model(model, test_loader, criterion)
evaluate_performance(model, test_loader)


[I 2025-01-28 15:29:52,126] A new study created in RDB with name: no-name-e82af600-5504-486b-95cb-12b9e9a1ded9


Epoch 1/20, Train Loss: 0.2204, Val Loss: 0.1301
Epoch 2/20, Train Loss: 0.1264, Val Loss: 0.1105
Epoch 3/20, Train Loss: 0.1115, Val Loss: 0.1004
Epoch 4/20, Train Loss: 0.1033, Val Loss: 0.0957
Epoch 5/20, Train Loss: 0.0985, Val Loss: 0.0933
Epoch 6/20, Train Loss: 0.0954, Val Loss: 0.0918
Epoch 7/20, Train Loss: 0.0932, Val Loss: 0.0907
Epoch 8/20, Train Loss: 0.0916, Val Loss: 0.0895
Epoch 9/20, Train Loss: 0.0904, Val Loss: 0.0890
Epoch 10/20, Train Loss: 0.0895, Val Loss: 0.0882
Epoch 11/20, Train Loss: 0.0887, Val Loss: 0.0879
Epoch 12/20, Train Loss: 0.0880, Val Loss: 0.0874
Epoch 13/20, Train Loss: 0.0874, Val Loss: 0.0865
Epoch 14/20, Train Loss: 0.0869, Val Loss: 0.0865
Epoch 15/20, Train Loss: 0.0864, Val Loss: 0.0863
Epoch 16/20, Train Loss: 0.0860, Val Loss: 0.0859
Epoch 17/20, Train Loss: 0.0857, Val Loss: 0.0855
Epoch 18/20, Train Loss: 0.0853, Val Loss: 0.0851
Epoch 19/20, Train Loss: 0.0849, Val Loss: 0.0846
Epoch 20/20, Train Loss: 0.0847, Val Loss: 0.0847


[I 2025-01-28 15:32:46,452] Trial 0 finished with value: 0.0847018271041431 and parameters: {'num_heads': 10, 'embedding_size': 490, 'num_layers': 2, 'dropout': 0.422162743964535, 'learning_rate': 1.2704484884665267e-05}. Best is trial 0 with value: 0.0847018271041431.


Epoch 1/20, Train Loss: 0.1579, Val Loss: 0.1163
Epoch 2/20, Train Loss: 0.1112, Val Loss: 0.1004
Epoch 3/20, Train Loss: 0.1013, Val Loss: 0.0961
Epoch 4/20, Train Loss: 0.0976, Val Loss: 0.0934
Epoch 5/20, Train Loss: 0.0954, Val Loss: 0.0915
Epoch 6/20, Train Loss: 0.0939, Val Loss: 0.0906
Epoch 7/20, Train Loss: 0.0928, Val Loss: 0.0899
Epoch 8/20, Train Loss: 0.0921, Val Loss: 0.0892
Epoch 9/20, Train Loss: 0.0915, Val Loss: 0.0885
Epoch 10/20, Train Loss: 0.0908, Val Loss: 0.0883
Epoch 11/20, Train Loss: 0.0904, Val Loss: 0.0874
Epoch 12/20, Train Loss: 0.0899, Val Loss: 0.0870
Epoch 13/20, Train Loss: 0.0894, Val Loss: 0.0868
Epoch 14/20, Train Loss: 0.0891, Val Loss: 0.0863
Epoch 15/20, Train Loss: 0.0888, Val Loss: 0.0860
Epoch 16/20, Train Loss: 0.0885, Val Loss: 0.0857
Epoch 17/20, Train Loss: 0.0883, Val Loss: 0.0856
Epoch 18/20, Train Loss: 0.0880, Val Loss: 0.0854
Epoch 19/20, Train Loss: 0.0878, Val Loss: 0.0851
Epoch 20/20, Train Loss: 0.0876, Val Loss: 0.0850


[I 2025-01-28 15:37:21,613] Trial 1 finished with value: 0.08504132875542847 and parameters: {'num_heads': 8, 'embedding_size': 376, 'num_layers': 4, 'dropout': 0.43435812817180464, 'learning_rate': 5.560237877967069e-05}. Best is trial 0 with value: 0.0847018271041431.


Epoch 1/20, Train Loss: 0.1116, Val Loss: 0.0892
Epoch 2/20, Train Loss: 0.0888, Val Loss: 0.0854
Epoch 3/20, Train Loss: 0.0863, Val Loss: 0.0839
Epoch 4/20, Train Loss: 0.0849, Val Loss: 0.0830
Epoch 5/20, Train Loss: 0.0840, Val Loss: 0.0825
Epoch 6/20, Train Loss: 0.0833, Val Loss: 0.0821
Epoch 7/20, Train Loss: 0.0827, Val Loss: 0.0818
Epoch 8/20, Train Loss: 0.0821, Val Loss: 0.0810
Epoch 9/20, Train Loss: 0.0817, Val Loss: 0.0810
Epoch 10/20, Train Loss: 0.0812, Val Loss: 0.0806
Epoch 11/20, Train Loss: 0.0808, Val Loss: 0.0804
Epoch 12/20, Train Loss: 0.0804, Val Loss: 0.0805
Epoch 13/20, Train Loss: 0.0800, Val Loss: 0.0801
Epoch 14/20, Train Loss: 0.0797, Val Loss: 0.0800
Epoch 15/20, Train Loss: 0.0793, Val Loss: 0.0798
Epoch 16/20, Train Loss: 0.0790, Val Loss: 0.0796
Epoch 17/20, Train Loss: 0.0786, Val Loss: 0.0792
Epoch 18/20, Train Loss: 0.0784, Val Loss: 0.0793
Epoch 19/20, Train Loss: 0.0780, Val Loss: 0.0792
Epoch 20/20, Train Loss: 0.0777, Val Loss: 0.0792


[I 2025-01-28 15:42:36,075] Trial 2 finished with value: 0.07919551542015385 and parameters: {'num_heads': 9, 'embedding_size': 477, 'num_layers': 4, 'dropout': 0.22808229001597416, 'learning_rate': 0.00014564181656591507}. Best is trial 2 with value: 0.07919551542015385.


Epoch 1/20, Train Loss: 0.1903, Val Loss: 0.1301
Epoch 2/20, Train Loss: 0.1268, Val Loss: 0.1153
Epoch 3/20, Train Loss: 0.1141, Val Loss: 0.1060
Epoch 4/20, Train Loss: 0.1071, Val Loss: 0.1005
Epoch 5/20, Train Loss: 0.1022, Val Loss: 0.0970
Epoch 6/20, Train Loss: 0.0992, Val Loss: 0.0950
Epoch 7/20, Train Loss: 0.0973, Val Loss: 0.0937
Epoch 8/20, Train Loss: 0.0961, Val Loss: 0.0928
Epoch 9/20, Train Loss: 0.0951, Val Loss: 0.0921
Epoch 10/20, Train Loss: 0.0944, Val Loss: 0.0911
Epoch 11/20, Train Loss: 0.0936, Val Loss: 0.0907
Epoch 12/20, Train Loss: 0.0931, Val Loss: 0.0902
Epoch 13/20, Train Loss: 0.0927, Val Loss: 0.0898
Epoch 14/20, Train Loss: 0.0922, Val Loss: 0.0895
Epoch 15/20, Train Loss: 0.0919, Val Loss: 0.0892
Epoch 16/20, Train Loss: 0.0915, Val Loss: 0.0887
Epoch 17/20, Train Loss: 0.0912, Val Loss: 0.0884
Epoch 18/20, Train Loss: 0.0909, Val Loss: 0.0882
Epoch 19/20, Train Loss: 0.0907, Val Loss: 0.0880
Epoch 20/20, Train Loss: 0.0905, Val Loss: 0.0878


[I 2025-01-28 15:46:43,474] Trial 3 finished with value: 0.08775576769555216 and parameters: {'num_heads': 11, 'embedding_size': 297, 'num_layers': 4, 'dropout': 0.4505155841165638, 'learning_rate': 3.4630127467468254e-05}. Best is trial 2 with value: 0.07919551542015385.


Epoch 1/20, Train Loss: 0.2758, Val Loss: 0.1528
Epoch 2/20, Train Loss: 0.1360, Val Loss: 0.1182
Epoch 3/20, Train Loss: 0.1146, Val Loss: 0.1056
Epoch 4/20, Train Loss: 0.1048, Val Loss: 0.0985
Epoch 5/20, Train Loss: 0.0987, Val Loss: 0.0938
Epoch 6/20, Train Loss: 0.0945, Val Loss: 0.0905
Epoch 7/20, Train Loss: 0.0915, Val Loss: 0.0882
Epoch 8/20, Train Loss: 0.0893, Val Loss: 0.0865
Epoch 9/20, Train Loss: 0.0877, Val Loss: 0.0853
Epoch 10/20, Train Loss: 0.0865, Val Loss: 0.0844
Epoch 11/20, Train Loss: 0.0856, Val Loss: 0.0836
Epoch 12/20, Train Loss: 0.0847, Val Loss: 0.0833
Epoch 13/20, Train Loss: 0.0841, Val Loss: 0.0826
Epoch 14/20, Train Loss: 0.0836, Val Loss: 0.0822
Epoch 15/20, Train Loss: 0.0831, Val Loss: 0.0818
Epoch 16/20, Train Loss: 0.0827, Val Loss: 0.0816
Epoch 17/20, Train Loss: 0.0823, Val Loss: 0.0814
Epoch 18/20, Train Loss: 0.0820, Val Loss: 0.0810
Epoch 19/20, Train Loss: 0.0817, Val Loss: 0.0808
Epoch 20/20, Train Loss: 0.0814, Val Loss: 0.0807


[I 2025-01-28 15:49:49,395] Trial 4 finished with value: 0.08069136896900994 and parameters: {'num_heads': 7, 'embedding_size': 259, 'num_layers': 3, 'dropout': 0.14530502458490924, 'learning_rate': 1.1147418052979788e-05}. Best is trial 2 with value: 0.07919551542015385.


Epoch 1/20, Train Loss: 0.1164, Val Loss: 0.0871
Epoch 2/20, Train Loss: 0.0868, Val Loss: 0.0829
Epoch 3/20, Train Loss: 0.0833, Val Loss: 0.0814
Epoch 4/20, Train Loss: 0.0816, Val Loss: 0.0801
Epoch 5/20, Train Loss: 0.0804, Val Loss: 0.0796
Epoch 6/20, Train Loss: 0.0795, Val Loss: 0.0790
Epoch 7/20, Train Loss: 0.0788, Val Loss: 0.0787
Epoch 8/20, Train Loss: 0.0781, Val Loss: 0.0785
Epoch 9/20, Train Loss: 0.0776, Val Loss: 0.0781
Epoch 10/20, Train Loss: 0.0770, Val Loss: 0.0779
Epoch 11/20, Train Loss: 0.0765, Val Loss: 0.0777
Epoch 12/20, Train Loss: 0.0760, Val Loss: 0.0775
Epoch 13/20, Train Loss: 0.0755, Val Loss: 0.0776
Epoch 14/20, Train Loss: 0.0750, Val Loss: 0.0773
Epoch 15/20, Train Loss: 0.0746, Val Loss: 0.0773
Epoch 16/20, Train Loss: 0.0741, Val Loss: 0.0772
Epoch 17/20, Train Loss: 0.0737, Val Loss: 0.0770
Epoch 18/20, Train Loss: 0.0732, Val Loss: 0.0771
Epoch 19/20, Train Loss: 0.0728, Val Loss: 0.0772
Epoch 20/20, Train Loss: 0.0723, Val Loss: 0.0773
Early sto

[I 2025-01-28 15:52:04,848] Trial 5 finished with value: 0.07725657431556167 and parameters: {'num_heads': 15, 'embedding_size': 285, 'num_layers': 2, 'dropout': 0.2618574215322765, 'learning_rate': 0.00015775441092497324}. Best is trial 5 with value: 0.07725657431556167.


Epoch 1/20, Train Loss: 0.2493, Val Loss: 0.1379
Epoch 2/20, Train Loss: 0.1300, Val Loss: 0.1131
Epoch 3/20, Train Loss: 0.1129, Val Loss: 0.1028
Epoch 4/20, Train Loss: 0.1043, Val Loss: 0.0967
Epoch 5/20, Train Loss: 0.0989, Val Loss: 0.0930
Epoch 6/20, Train Loss: 0.0952, Val Loss: 0.0906
Epoch 7/20, Train Loss: 0.0927, Val Loss: 0.0890
Epoch 8/20, Train Loss: 0.0908, Val Loss: 0.0875
Epoch 9/20, Train Loss: 0.0895, Val Loss: 0.0867
Epoch 10/20, Train Loss: 0.0884, Val Loss: 0.0859
Epoch 11/20, Train Loss: 0.0875, Val Loss: 0.0854
Epoch 12/20, Train Loss: 0.0869, Val Loss: 0.0848
Epoch 13/20, Train Loss: 0.0862, Val Loss: 0.0845
Epoch 14/20, Train Loss: 0.0858, Val Loss: 0.0839
Epoch 15/20, Train Loss: 0.0853, Val Loss: 0.0836
Epoch 16/20, Train Loss: 0.0849, Val Loss: 0.0834
Epoch 17/20, Train Loss: 0.0846, Val Loss: 0.0829
Epoch 18/20, Train Loss: 0.0842, Val Loss: 0.0829
Epoch 19/20, Train Loss: 0.0840, Val Loss: 0.0826
Epoch 20/20, Train Loss: 0.0837, Val Loss: 0.0824


[I 2025-01-28 15:56:14,683] Trial 6 finished with value: 0.0824053144658641 and parameters: {'num_heads': 7, 'embedding_size': 364, 'num_layers': 3, 'dropout': 0.27089571731073847, 'learning_rate': 1.0633027518029303e-05}. Best is trial 5 with value: 0.07725657431556167.


Epoch 1/20, Train Loss: 0.1513, Val Loss: 0.0992
Epoch 2/20, Train Loss: 0.0960, Val Loss: 0.0890
Epoch 3/20, Train Loss: 0.0890, Val Loss: 0.0854
Epoch 4/20, Train Loss: 0.0858, Val Loss: 0.0835
Epoch 5/20, Train Loss: 0.0838, Val Loss: 0.0822
Epoch 6/20, Train Loss: 0.0826, Val Loss: 0.0813
Epoch 7/20, Train Loss: 0.0816, Val Loss: 0.0807
Epoch 8/20, Train Loss: 0.0809, Val Loss: 0.0804
Epoch 9/20, Train Loss: 0.0803, Val Loss: 0.0799
Epoch 10/20, Train Loss: 0.0798, Val Loss: 0.0796
Epoch 11/20, Train Loss: 0.0793, Val Loss: 0.0794
Epoch 12/20, Train Loss: 0.0789, Val Loss: 0.0790
Epoch 13/20, Train Loss: 0.0785, Val Loss: 0.0789
Epoch 14/20, Train Loss: 0.0781, Val Loss: 0.0788
Epoch 15/20, Train Loss: 0.0778, Val Loss: 0.0785
Epoch 16/20, Train Loss: 0.0775, Val Loss: 0.0784
Epoch 17/20, Train Loss: 0.0771, Val Loss: 0.0783
Epoch 18/20, Train Loss: 0.0768, Val Loss: 0.0782
Epoch 19/20, Train Loss: 0.0765, Val Loss: 0.0781
Epoch 20/20, Train Loss: 0.0762, Val Loss: 0.0781


[I 2025-01-28 16:05:21,383] Trial 7 finished with value: 0.07814515282889065 and parameters: {'num_heads': 9, 'embedding_size': 504, 'num_layers': 5, 'dropout': 0.13337936290597635, 'learning_rate': 2.530806289740109e-05}. Best is trial 5 with value: 0.07725657431556167.


Epoch 1/20, Train Loss: 0.1173, Val Loss: 0.0925
Epoch 2/20, Train Loss: 0.0926, Val Loss: 0.0882
Epoch 3/20, Train Loss: 0.0899, Val Loss: 0.0868
Epoch 4/20, Train Loss: 0.0885, Val Loss: 0.0856
Epoch 5/20, Train Loss: 0.0875, Val Loss: 0.0850
Epoch 6/20, Train Loss: 0.0868, Val Loss: 0.0843
Epoch 7/20, Train Loss: 0.0861, Val Loss: 0.0843
Epoch 8/20, Train Loss: 0.0856, Val Loss: 0.0838
Epoch 9/20, Train Loss: 0.0850, Val Loss: 0.0833
Epoch 10/20, Train Loss: 0.0846, Val Loss: 0.0830
Epoch 11/20, Train Loss: 0.0842, Val Loss: 0.0827
Epoch 12/20, Train Loss: 0.0838, Val Loss: 0.0823
Epoch 13/20, Train Loss: 0.0834, Val Loss: 0.0820
Epoch 14/20, Train Loss: 0.0831, Val Loss: 0.0818
Epoch 15/20, Train Loss: 0.0827, Val Loss: 0.0818
Epoch 16/20, Train Loss: 0.0824, Val Loss: 0.0815
Epoch 17/20, Train Loss: 0.0820, Val Loss: 0.0813
Epoch 18/20, Train Loss: 0.0816, Val Loss: 0.0808
Epoch 19/20, Train Loss: 0.0811, Val Loss: 0.0807
Epoch 20/20, Train Loss: 0.0806, Val Loss: 0.0805


[I 2025-01-28 16:08:50,784] Trial 8 finished with value: 0.08048821087899825 and parameters: {'num_heads': 9, 'embedding_size': 162, 'num_layers': 4, 'dropout': 0.2536116624411341, 'learning_rate': 0.00037704316085736175}. Best is trial 5 with value: 0.07725657431556167.


Epoch 1/20, Train Loss: 0.2115, Val Loss: 0.1216
Epoch 2/20, Train Loss: 0.1145, Val Loss: 0.1013
Epoch 3/20, Train Loss: 0.1009, Val Loss: 0.0934
Epoch 4/20, Train Loss: 0.0942, Val Loss: 0.0890
Epoch 5/20, Train Loss: 0.0903, Val Loss: 0.0864
Epoch 6/20, Train Loss: 0.0877, Val Loss: 0.0848
Epoch 7/20, Train Loss: 0.0859, Val Loss: 0.0836
Epoch 8/20, Train Loss: 0.0847, Val Loss: 0.0829
Epoch 9/20, Train Loss: 0.0838, Val Loss: 0.0822
Epoch 10/20, Train Loss: 0.0831, Val Loss: 0.0816
Epoch 11/20, Train Loss: 0.0825, Val Loss: 0.0813
Epoch 12/20, Train Loss: 0.0820, Val Loss: 0.0808
Epoch 13/20, Train Loss: 0.0816, Val Loss: 0.0807
Epoch 14/20, Train Loss: 0.0812, Val Loss: 0.0804
Epoch 15/20, Train Loss: 0.0808, Val Loss: 0.0802
Epoch 16/20, Train Loss: 0.0805, Val Loss: 0.0801
Epoch 17/20, Train Loss: 0.0803, Val Loss: 0.0800
Epoch 18/20, Train Loss: 0.0800, Val Loss: 0.0797
Epoch 19/20, Train Loss: 0.0798, Val Loss: 0.0795
Epoch 20/20, Train Loss: 0.0796, Val Loss: 0.0792


[I 2025-01-28 16:11:35,877] Trial 9 finished with value: 0.0792430375816582 and parameters: {'num_heads': 10, 'embedding_size': 450, 'num_layers': 2, 'dropout': 0.19859796322956355, 'learning_rate': 1.1900639463338494e-05}. Best is trial 5 with value: 0.07725657431556167.


Epoch 1/20, Train Loss: 0.1326, Val Loss: 0.1294
Epoch 2/20, Train Loss: 0.1283, Val Loss: 0.1291
Epoch 3/20, Train Loss: 0.1281, Val Loss: 0.1289
Epoch 4/20, Train Loss: 0.1280, Val Loss: 0.1283
Epoch 5/20, Train Loss: 0.1279, Val Loss: 0.1282


[W 2025-01-28 16:12:55,897] Trial 10 failed with parameters: {'num_heads': 16, 'embedding_size': 208, 'num_layers': 6, 'dropout': 0.3519335601598031, 'learning_rate': 0.0008672706583596746} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/elouarn/Documents/Projet_IA/challenge-data-music-catalogs/music-catalogs-classifier/.venv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_12345/4229465671.py", line 34, in objective
    model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_12345/1285253702.py", line 34, in train_model
    loss.backward()
  File "/home/elouarn/Documents/Projet_IA/challenge-data-music-catalogs/music-catalogs-classifier/.venv/lib/python3.12/

KeyboardInterrupt: 

In [21]:
# Print the best hyperparameters
print("Best hyperparameters: ", study.best_params)

# Train the final model with the best hyperparameters
best_params = study.best_params
model = MultiTaskTransformer(
    input_size=X_train_final.shape[1],
    embedding_size=best_params["embedding_size"],
    num_heads=best_params["num_heads"],
    num_layers=best_params["num_layers"],
    num_labels=y_train.shape[1],
    dropout=best_params["dropout"],
).to(DEVICE)

optimizer = optim.Adam(model.parameters(), lr=best_params["learning_rate"])
criterion = nn.BCEWithLogitsLoss()

# Train the final model
model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE)

# Evaluate the final model
evaluate_model(model, test_loader, criterion)
evaluate_performance(model, test_loader)

Best hyperparameters:  {'num_heads': 15, 'embedding_size': 285, 'num_layers': 2, 'dropout': 0.2618574215322765, 'learning_rate': 0.00015775441092497324}




Epoch 1/20, Train Loss: 0.1162, Val Loss: 0.0874
Epoch 2/20, Train Loss: 0.0868, Val Loss: 0.0830
Epoch 3/20, Train Loss: 0.0834, Val Loss: 0.0812
Epoch 4/20, Train Loss: 0.0816, Val Loss: 0.0802
Epoch 5/20, Train Loss: 0.0804, Val Loss: 0.0797
Epoch 6/20, Train Loss: 0.0796, Val Loss: 0.0791
Epoch 7/20, Train Loss: 0.0788, Val Loss: 0.0786
Epoch 8/20, Train Loss: 0.0782, Val Loss: 0.0785
Epoch 9/20, Train Loss: 0.0776, Val Loss: 0.0783
Epoch 10/20, Train Loss: 0.0771, Val Loss: 0.0780
Epoch 11/20, Train Loss: 0.0765, Val Loss: 0.0778
Epoch 12/20, Train Loss: 0.0760, Val Loss: 0.0776
Epoch 13/20, Train Loss: 0.0755, Val Loss: 0.0774
Epoch 14/20, Train Loss: 0.0751, Val Loss: 0.0773
Epoch 15/20, Train Loss: 0.0746, Val Loss: 0.0773
Epoch 16/20, Train Loss: 0.0742, Val Loss: 0.0772
Epoch 17/20, Train Loss: 0.0737, Val Loss: 0.0773
Epoch 18/20, Train Loss: 0.0733, Val Loss: 0.0773
Epoch 19/20, Train Loss: 0.0728, Val Loss: 0.0774
Early stopping triggered at epoch 19. Best validation loss:

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
evaluate_performance(model, train_loader)

Accuracy: 0.9740548922715414
              precision    recall  f1-score   support

           0     0.5071    0.1880    0.2744       569
           1     0.6560    0.3902    0.4893      3983
           2     0.7778    0.1034    0.1826       812
           3     0.8095    0.0667    0.1232       255
           4     0.7545    0.4150    0.5355       200
           5     0.6587    0.2345    0.3459       938
           6     0.7072    0.3184    0.4391       402
           7     0.5363    0.2528    0.3436      1606
           8     0.6254    0.3883    0.4791      3237
           9     0.6746    0.1597    0.2582       714
          10     0.8491    0.7270    0.7833      2392
          11     0.7634    0.7369    0.7499      6648
          12     0.6857    0.4658    0.5547      1447
          13     0.6480    0.2444    0.3549      4505
          14     0.7051    0.2321    0.3493      2925
          15     0.5903    0.3142    0.4101      1165
          16     0.5357    0.0679    0.1205       22

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
import optuna
from optuna.trial import TrialState

# Define the objective function for Optuna
def objective(trial):
    # Suggest num_heads first
    num_heads = trial.suggest_int("num_heads", 4, 16)
    
    # Calculate valid embedding_size as multiples of num_heads within [128, 512]
    min_embed = ((128 + num_heads - 1) // num_heads) * num_heads
    max_embed = (512 // num_heads) * num_heads
    embedding_size = trial.suggest_int("embedding_size", min_embed, max_embed, step=num_heads)
    
    # Suggest remaining hyperparameters
    num_layers = trial.suggest_int("num_layers", 2, 6)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)

    # Initialize the model with suggested hyperparameters
    model = MultiTaskTransformer(
        input_size=X_train_final.shape[1],
        embedding_size=embedding_size,
        num_heads=num_heads,
        num_layers=num_layers,
        num_labels=y_train.shape[1],
        dropout=dropout,
    ).to(DEVICE)

    # Define the optimizer and criterion
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCEWithLogitsLoss()

    # Train the model
    model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE)

    # Evaluate the model on the validation set
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            val_loss += loss.item()
    val_loss /= len(val_loader)

    return val_loss

# Load the existing study from the SQLite database
study = optuna.load_study(
    study_name="no-name-e82af600-5504-486b-95cb-12b9e9a1ded9",  # Replace with your study name
    storage="sqlite:///db.sqlite3",
)

# Continue optimizing the objective function
study.optimize(objective, n_trials=50)



Epoch 1/20, Train Loss: 0.1305, Val Loss: 0.0924
Epoch 2/20, Train Loss: 0.0910, Val Loss: 0.0850


[W 2025-01-28 16:53:59,928] Trial 24 failed with parameters: {'num_heads': 16, 'embedding_size': 176, 'num_layers': 2, 'dropout': 0.3735526181976074, 'learning_rate': 0.0001856201922305069} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/elouarn/Documents/Projet_IA/challenge-data-music-catalogs/music-catalogs-classifier/.venv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_158452/3144562053.py", line 34, in objective
    model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_158452/1285253702.py", line 35, in train_model
    optimizer.step()
  File "/home/elouarn/Documents/Projet_IA/challenge-data-music-catalogs/music-catalogs-classifier/.venv/lib/python3.

KeyboardInterrupt: 

In [19]:
# Print the best hyperparameters
print("Best hyperparameters: ", study.best_params)

# Train the final model with the best hyperparameters
best_params = study.best_params
model = MultiTaskTransformer(
    input_size=X_train_final.shape[1],
    embedding_size=best_params["embedding_size"],
    num_heads=best_params["num_heads"],
    num_layers=best_params["num_layers"],
    num_labels=y_train.shape[1],
    dropout=best_params["dropout"],
).to(DEVICE)

optimizer = optim.Adam(model.parameters(), lr=best_params["learning_rate"])
criterion = nn.BCEWithLogitsLoss()

# Train the final model
model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, PATIENCE)

# Evaluate the final model
evaluate_model(model, test_loader, criterion)
evaluate_performance(model, test_loader)

Best hyperparameters:  {'num_heads': 15, 'embedding_size': 285, 'num_layers': 2, 'dropout': 0.2618574215322765, 'learning_rate': 0.00015775441092497324}




Epoch 1/20, Train Loss: 0.1162, Val Loss: 0.0874
Epoch 2/20, Train Loss: 0.0869, Val Loss: 0.0832
Epoch 3/20, Train Loss: 0.0835, Val Loss: 0.0813
Epoch 4/20, Train Loss: 0.0817, Val Loss: 0.0803
Epoch 5/20, Train Loss: 0.0805, Val Loss: 0.0796
Epoch 6/20, Train Loss: 0.0796, Val Loss: 0.0792
Epoch 7/20, Train Loss: 0.0789, Val Loss: 0.0788
Epoch 8/20, Train Loss: 0.0782, Val Loss: 0.0786
Epoch 9/20, Train Loss: 0.0776, Val Loss: 0.0781
Epoch 10/20, Train Loss: 0.0771, Val Loss: 0.0780
Epoch 11/20, Train Loss: 0.0766, Val Loss: 0.0777
Epoch 12/20, Train Loss: 0.0761, Val Loss: 0.0777
Epoch 13/20, Train Loss: 0.0756, Val Loss: 0.0775
Epoch 14/20, Train Loss: 0.0751, Val Loss: 0.0774
Epoch 15/20, Train Loss: 0.0747, Val Loss: 0.0773
Epoch 16/20, Train Loss: 0.0742, Val Loss: 0.0773
Epoch 17/20, Train Loss: 0.0737, Val Loss: 0.0771
Epoch 18/20, Train Loss: 0.0733, Val Loss: 0.0772
Epoch 19/20, Train Loss: 0.0729, Val Loss: 0.0773
Epoch 20/20, Train Loss: 0.0724, Val Loss: 0.0773
Early sto

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [20]:
# Sauvegarder le modèle et les ESNs dans le dossier models et un sous dossier qui s'incrémente
import os
import pickle

# Créer un dossier models s'il n'existe pas
if not os.path.exists("../models"):
    os.makedirs("../models")

# Créer un sous-dossier pour les modèles
sub_folder = 0
while os.path.exists(f"../models/model_{sub_folder}"):
    sub_folder += 1
os.makedirs(f"../models/model_{sub_folder}")

# Sauvegarder le modèle
torch.save(model.state_dict(), f"../models/model_{sub_folder}/transformer_weights.pth")

with open(f"../models/model_{sub_folder}/transformer.pkl", "wb") as f:
    pickle.dump(model, f)

# Sauvegarder les ESNs
with open(f"../models/model_{sub_folder}/esn_Genre.pkl", "wb") as f:
    pickle.dump(model_Genre, f)

with open(f"../models/model_{sub_folder}/esn_Instrument.pkl", "wb") as f:
    pickle.dump(model_Instrument, f)

with open(f"../models/model_{sub_folder}/esn_Mood.pkl", "wb") as f:
    pickle.dump(model_Mood, f)

# with open(f"../models/model_{sub_folder}/esn_Genre_Instrument.pkl", "wb") as f:
#     pickle.dump(model_Genre_Instrument, f)

# with open(f"../models/model_{sub_folder}/esn_Genre_Mood.pkl", "wb") as f:
#     pickle.dump(model_Genre_Mood, f)

# with open(f"../models/model_{sub_folder}/esn_Instrument_Mood.pkl", "wb") as f:
#     pickle.dump(model_Instrument_Mood, f)


print("Transformer et ESNs sauvegardés avec succès !")

Transformer et ESNs sauvegardés avec succès !
