# Demo: train and test on FMA small

In [9]:
import os
os.environ["MPG123_VERBOSE"] = "0"

import inspertorchaudio.data.datasets.fma_dataset as fma_dataset
import inspertorchaudio.models.dieleman2014 as dieleman2014
import inspertorchaudio.learning.supervised as supervised_learning

from torch.utils.data import DataLoader
from torch.optim import Adam
from pathlib import Path

FMA_DIRECTORY = Path("/mnt/data2/fma")
METADATA_SUBDIRECTORY = FMA_DIRECTORY / "fma_metadata"
TRACKS_CSV_PATH = METADATA_SUBDIRECTORY / "tracks.csv"

In [10]:
train_dataset, val_dataset, test_dataset, label_encoder = fma_dataset.fma_dataset(
    tracks_csv_full_path=TRACKS_CSV_PATH,
    audio_dir_full_path=FMA_DIRECTORY / "fma_wav16k",
    subset='small',
    target_sample_rate=16000,
    check_dataset_files=True,
)

Checking training dataset files...


100%|██████████| 6394/6394 [00:09<00:00, 667.54it/s]


Checking validation dataset files...


100%|██████████| 800/800 [00:01<00:00, 687.26it/s]


Checking test dataset files...


100%|██████████| 800/800 [00:01<00:00, 630.59it/s]


In [11]:
len(train_dataset), len(val_dataset), len(test_dataset)

(6376, 800, 800)

In [12]:
batch_size = 128

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, prefetch_factor=3)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, prefetch_factor=3)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, prefetch_factor=3)

In [13]:
backbone = dieleman2014.Dieleman2014(
    sample_rate = 16000,
    n_fft = 1024,
    win_length = 256,
    hop_length = 256,
    f_min = 10.0,
    f_max = 6000.0,
    n_mels = 128,
    power = 1.0,
    compression_factor = 10000,
    n_features_out = 100,
)
n_classes = len(label_encoder.classes_)

classifier = dieleman2014.DielemanClassifier(
    backbone=backbone,
    n_classes=n_classes,
)

classifier.cuda()

DielemanClassifier(
  (backbone): Dieleman2014(
    (melspectrogram): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
    (conv1): Conv1d(128, 32, kernel_size=(8,), stride=(1,))
    (maxpool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv1d(32, 32, kernel_size=(8,), stride=(1,))
    (maxpool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (fc1): Linear(in_features=32, out_features=50, bias=True)
    (fc2): Linear(in_features=50, out_features=100, bias=True)
  )
  (fc): Linear(in_features=100, out_features=8, bias=True)
)

In [14]:
x, y = next(iter(train_dataloader))
print(x.shape, y.shape)

Traceback (most recent call last):
  File "/home/tiago/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tiago/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tiago/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tiago/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tiago/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tiago/.local/share/uv/python/cpython-3.12.9-lin

torch.Size([128, 80000]) torch.Size([128])


In [15]:
optimizer = Adam(classifier.parameters(), lr=1e-2)


In [None]:
supervised_learning.train(
    model=classifier,
    optimizer=optimizer,
    train_dataloader=train_dataloader,
    eval_dataloader=val_dataloader,
    epochs=20,
    patience_for_stop=20,
    use_cuda=True,
    use_mlflow=False,
    use_eval=True,
)

Using CUDA: True
Epoch 1/20


Training: 100%|██████████| 50/50 [00:13<00:00,  3.71it/s]


Average Train Loss: 2.7274


Evaluating: 100%|██████████| 7/7 [00:07<00:00,  1.01s/it]


Average Validation Loss: 2.1002, Average Validation Accuracy: 0.1350
Epoch 1 completed.

Epoch 2/20


Training: 100%|██████████| 50/50 [00:12<00:00,  4.04it/s]


Average Train Loss: 2.1190


Evaluating: 100%|██████████| 7/7 [00:06<00:00,  1.03it/s]


Average Validation Loss: 2.0741, Average Validation Accuracy: 0.1350
Epoch 2 completed.

Epoch 3/20


Training: 100%|██████████| 50/50 [00:12<00:00,  3.91it/s]


Average Train Loss: 2.1014


Evaluating: 100%|██████████| 7/7 [00:06<00:00,  1.12it/s]


Average Validation Loss: 2.0774, Average Validation Accuracy: 0.1350
Epoch 3 completed.

Epoch 4/20


Training: 100%|██████████| 50/50 [00:11<00:00,  4.32it/s]


Average Train Loss: 2.0978


Evaluating: 100%|██████████| 7/7 [00:06<00:00,  1.15it/s]


Average Validation Loss: 2.0772, Average Validation Accuracy: 0.1350
Epoch 4 completed.

Epoch 5/20


Training: 100%|██████████| 50/50 [00:11<00:00,  4.21it/s]


Average Train Loss: 2.0958


Evaluating: 100%|██████████| 7/7 [00:05<00:00,  1.19it/s]


Average Validation Loss: 2.0777, Average Validation Accuracy: 0.1183
Epoch 5 completed.

Epoch 6/20


Training: 100%|██████████| 50/50 [00:11<00:00,  4.54it/s]


Average Train Loss: 2.0944


Evaluating: 100%|██████████| 7/7 [00:05<00:00,  1.20it/s]

Average Validation Loss: 2.0779, Average Validation Accuracy: 0.1183
No improvement for 5 epochs, stopping training.



