# Demo: train and test on FMA small

In [1]:
import os
os.environ["MPG123_VERBOSE"] = "0"
import inspertorchaudio.data.datasets.fma_dataset as fma_dataset
import inspertorchaudio.models.dieleman2014 as dieleman2014
import inspertorchaudio.learning.supervised as supervised_learning

from torch.utils.data import DataLoader
from torch.optim import Adam
from pathlib import Path

FMA_DIRECTORY = Path("/mnt/data2/fma")
METADATA_SUBDIRECTORY = FMA_DIRECTORY / "fma_metadata"
TRACKS_CSV_PATH = METADATA_SUBDIRECTORY / "tracks.csv"

In [2]:
train_dataset, val_dataset, test_dataset, label_encoder = fma_dataset.fma_dataset(
    tracks_csv_full_path=TRACKS_CSV_PATH,
    audio_dir_full_path=FMA_DIRECTORY / "fma_full",
    subset='small',
    target_sample_rate=16000,
)

Checking training dataset files...


100%|██████████| 6384/6384 [00:00<00:00, 83847.20it/s]


Checking validation dataset files...


100%|██████████| 799/799 [00:00<00:00, 80342.56it/s]


Checking test dataset files...


100%|██████████| 800/800 [00:00<00:00, 77328.61it/s]


In [3]:
len(train_dataset), len(val_dataset), len(test_dataset)

(844, 88, 164)

In [4]:

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=20)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=20)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=20)

In [5]:
backbone = dieleman2014.Dieleman2014(
    sample_rate = 16000,
    n_fft = 1024,
    win_length = 256,
    hop_length = 256,
    f_min = 10.0,
    f_max = 6000.0,
    n_mels = 128,
    power = 1.0,
    compression_factor = 10000,
    n_features_out = 100,
)
n_classes = len(label_encoder.classes_)

classifier = dieleman2014.DielemanClassifier(
    backbone=backbone,
    n_classes=n_classes,
)

classifier.cuda()

DielemanClassifier(
  (backbone): Dieleman2014(
    (melspectrogram): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
    (conv1): Conv1d(128, 32, kernel_size=(8,), stride=(1,))
    (maxpool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv1d(32, 32, kernel_size=(8,), stride=(1,))
    (maxpool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (fc1): Linear(in_features=32, out_features=50, bias=True)
    (fc2): Linear(in_features=50, out_features=100, bias=True)
  )
  (fc): Linear(in_features=100, out_features=8, bias=True)
)

In [6]:
optimizer = Adam(classifier.parameters(), lr=0.001)
supervised_learning.train(
    model = classifier,
    optimizer = optimizer,
    train_dataloader=train_dataloader,
    eval_dataloader=val_dataloader,
    epochs = 5,
    use_cuda = 'auto',
    use_mlflow=True,
    use_eval=True,
)

Using CUDA: True
Epoch 1/5


Training:   0%|          | 0/27 [00:00<?, ?it/s][src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
Training: 100%|██████████| 27/27 [00:02<00:00, 12.76it/s]


Average Train Loss: 2.1689


Evaluating: 100%|██████████| 3/3 [00:01<00:00,  1.88it/s]


Average Validation Loss: 1.9766, Average Validation Accuracy: 0.1319


Registered model 'BestModel' already exists. Creating a new version of this model...
Created version '4' of model 'BestModel'.


Epoch 1 completed.

Epoch 2/5


Training:   0%|          | 0/27 [00:00<?, ?it/s][src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
Training: 100%|██████████| 27/27 [00:02<00:00, 12.32it/s]


Average Train Loss: 2.0583


Evaluating: 100%|██████████| 3/3 [00:01<00:00,  2.18it/s]


Average Validation Loss: 1.9530, Average Validation Accuracy: 0.3715


Registered model 'BestModel' already exists. Creating a new version of this model...
Created version '5' of model 'BestModel'.


Epoch 2 completed.

Epoch 3/5


Training:   0%|          | 0/27 [00:00<?, ?it/s][src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
Training: 100%|██████████| 27/27 [00:02<00:00, 12.10it/s]


Average Train Loss: 2.0605


Evaluating: 100%|██████████| 3/3 [00:01<00:00,  2.21it/s]


Average Validation Loss: 2.0291, Average Validation Accuracy: 0.3715
Epoch 3 completed.

Epoch 4/5


Training:   0%|          | 0/27 [00:00<?, ?it/s][src/libmpg123/layer3.c:INT123_do_layer3():1804] error: dequantization failed!
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
[src/libmpg123/id3.c:process_comment():587] error: No comment text / valid description?
Training:  41%|████      | 11/27 [00:01<00:02,  6.92it/s]


ValueError: Caught ValueError in DataLoader worker process 11.
Original Traceback (most recent call last):
  File "/home/tiago/dev/inspertorchaudio/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/tiago/dev/inspertorchaudio/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/home/tiago/dev/inspertorchaudio/src/inspertorchaudio/data/datasets/audio_dataset.py", line 65, in __getitem__
    audio_tensor = self.loading_pipeline(file_path)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tiago/dev/inspertorchaudio/src/inspertorchaudio/data/datasets/utils.py", line 30, in __call__
    audio_tensor, sample_rate = load_sample_and_to_mono(
                                ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tiago/dev/inspertorchaudio/src/inspertorchaudio/data/datasets/utils.py", line 113, in load_sample_and_to_mono
    raise ValueError(f'Audio file {file_path} is too short!')
ValueError: Audio file /mnt/data2/fma/fma_full/011/011306.mp3 is too short!
