# Final model training and comparison its results on different datasets

In [1]:
from torch.utils.data import DataLoader
import torch

In [2]:
from src.data.make_dataset import AudioDataset

In [3]:
import optuna

In [4]:
from src.models.final_model import CNN_Seq2Seq
from src.models.train import train_model
from src.models.tuning_train import tuning_train_model
from src.models.evaluate import evaluate_model

In [5]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Device:', device)

Device: cuda


## Load the data

In [6]:
train_dataset = AudioDataset('../Data/processed/final_data/cqt_bins_24/', 'train.h5')

test_dataset = AudioDataset('../Data/processed/final_data/cqt_bins_24/', 'test.h5')
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False, num_workers=16)

Fine-tuning the model with Optuna and evaluating the results

In [7]:
def objective(trial):
    # Define hyperparameters to be tuned
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [256, 512])
    num_epochs = trial.suggest_int("num_epochs", 2, 4)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])

    # Initialize model
    model = CNN_Seq2Seq().to(device)

    # Call the training function
    avg_f1 = tuning_train_model(model, train_dataset, device, num_epochs=num_epochs,
                                batch_size=batch_size, learning_rate=learning_rate,
                                optimizer_name=optimizer_name,
                                save_dir='../Models/final_model/cqt_bins_24')

    # Optuna aims to minimize the returned value
    return avg_f1

In [8]:
# Running the optimization
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=4)

[I 2023-11-28 18:31:26,351] A new study created in memory with name: no-name-b6bae265-14ef-4206-9667-95fa1e21662d
Epoch 1/4 - Training, Loss: 0.002: 100%|██████████| 51795/51795 [27:16<00:00, 31.64it/s]


Test Accuracy: 0.7587003149435274
Test Recall: 0.6257473926610304
Test Precision: 0.8691199387758639
Test F1: 0.7221590470368521
Epoch [1/4] - Validation F1: 0.722
Saved best model state to ../Models/final_model/cqt_bins_24\best_model_state.pth


Epoch 2/4 - Training, Loss: 0.001: 100%|██████████| 51795/51795 [26:20<00:00, 32.77it/s]


Test Accuracy: 0.7992038173327541
Test Recall: 0.7354459106521701
Test Precision: 0.8964040447263598
Test F1: 0.8058528069921088
Epoch [2/4] - Validation F1: 0.806
Saved best model state to ../Models/final_model/cqt_bins_24\best_model_state.pth


Epoch 3/4 - Training, Loss: 0.001: 100%|██████████| 51795/51795 [26:18<00:00, 32.82it/s]


Test Accuracy: 0.8245038281928757
Test Recall: 0.7909734449892902
Test Precision: 0.9063117646089277
Test F1: 0.8436786040231886
Epoch [3/4] - Validation F1: 0.844
Saved best model state to ../Models/final_model/cqt_bins_24\best_model_state.pth


Epoch 4/4 - Training, Loss: 0.001: 100%|██████████| 51795/51795 [27:09<00:00, 31.78it/s]


Test Accuracy: 0.8436583677237185
Test Recall: 0.8234736073505018
Test Precision: 0.9150839737993609
Test F1: 0.8660265952083842
Epoch [4/4] - Validation F1: 0.866
Saved best model state to ../Models/final_model/cqt_bins_24\best_model_state.pth


[I 2023-11-28 20:48:03,804] Trial 0 finished with value: 0.8660265952083842 and parameters: {'learning_rate': 0.0001263808620892193, 'batch_size': 256, 'num_epochs': 4, 'optimizer': 'RMSprop'}. Best is trial 0 with value: 0.8660265952083842.
Epoch 1/4 - Training, Loss: 0.693:   8%|▊         | 2048/25898 [02:36<30:22, 13.09it/s] 
[W 2023-11-28 20:50:41,118] Trial 1 failed with parameters: {'learning_rate': 0.001178671106733721, 'batch_size': 512, 'num_epochs': 4, 'optimizer': 'SGD'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\sokos\anaconda3\envs\torch_gpu\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\sokos\AppData\Local\Temp\ipykernel_20504\3268455426.py", line 12, in objective
    avg_f1 = tuning_train_model(model, train_dataset, device, num_epochs=num_epochs,
  File "C:\Users\sokos\DataspellProjects\Music_Transcriptor\src\models\tuning_train.py", lin

KeyboardInterrupt: 

In [9]:
# Results
print("Best trial:")
trial = study.best_trial
print(f" Value: {trial.value}")
print(" Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

Best trial:
 Value: 0.8660265952083842
 Params: 
    learning_rate: 0.0001263808620892193
    batch_size: 256
    num_epochs: 4
    optimizer: RMSprop


In [14]:
model = CNN_Seq2Seq().to(device)
model.load_state_dict(torch.load("../Models/final_model/best_model_state.pth"))

<All keys matched successfully>

In [15]:
cqt_seq2seq_metrics = evaluate_model(model=model, val_loader=test_loader, device=device, return_metrics=True)

Test Accuracy: 0.8958427454387489
Test Recall: 0.47523245425366467
Test Precision: 0.698409219179939
Test F1: 0.5522239688229337


## Initial model training before tuning

In [7]:
train_model(model=model, dataset=train_dataset, device=device, val_ratio=0.2,
            num_epochs=2, batch_size=512, learning_rate=0.001,
            save_dir='../Models/final_model/cqt_bins_24/')

Epoch 1/2 - Training, Loss: 0.003: 100%|██████████| 23020/23020 [29:15<00:00, 13.11it/s]
Epoch 1/2 - Validation, Loss: 0.002: 100%|██████████| 5755/5755 [06:36<00:00, 14.51it/s]


Epoch [1/2] - Train Loss: 0.003, Validation Loss: 0.002
Saved best model state to ../Models/final_model/cqt_bins_24/best_model_state.pth
Saved latest model checkpoint to ../Models/final_model/cqt_bins_24/latest_model_checkpoint.pth


Epoch 2/2 - Training, Loss: 0.002: 100%|██████████| 23020/23020 [28:54<00:00, 13.27it/s]
Epoch 2/2 - Validation, Loss: 0.002: 100%|██████████| 5755/5755 [06:36<00:00, 14.50it/s]

Epoch [2/2] - Train Loss: 0.002, Validation Loss: 0.002
Saved best model state to ../Models/final_model/cqt_bins_24/best_model_state.pth
Saved latest model checkpoint to ../Models/final_model/cqt_bins_24/latest_model_checkpoint.pth
Finished Training





In [8]:
cqt_seq2seq_metrics = evaluate_model(model=model, val_loader=test_loader, device=device, return_metrics=True)

Test Accuracy: 0.8954539530842746
Test Recall: 0.4218126468349823
Test Precision: 0.7263166555714722
Test F1: 0.5108319877436106
