# Demo: train and test on FMA small

In [1]:
import os
os.environ["MPG123_VERBOSE"] = "0"

import inspertorchaudio.data.datasets.fma_dataset as fma_dataset
import inspertorchaudio.models.dieleman2014 as dieleman2014
import inspertorchaudio.learning.supervised as supervised_learning

from torch.utils.data import DataLoader
from torch.optim import Adam
from pathlib import Path

FMA_DIRECTORY = Path("/mnt/data2/fma")
METADATA_SUBDIRECTORY = FMA_DIRECTORY / "fma_metadata"
TRACKS_CSV_PATH = METADATA_SUBDIRECTORY / "tracks.csv"

In [2]:
train_dataset, val_dataset, test_dataset, label_encoder = fma_dataset.fma_dataset(
    tracks_csv_full_path=TRACKS_CSV_PATH,
    audio_dir_full_path=FMA_DIRECTORY / "fma_wav16k",
    subset='small',
    target_sample_rate=16000,
    check_dataset_files=True,
)

Checking training dataset files...


100%|██████████| 6394/6394 [00:07<00:00, 876.35it/s] 


Checking validation dataset files...


100%|██████████| 800/800 [00:01<00:00, 686.04it/s]


Checking test dataset files...


100%|██████████| 800/800 [00:01<00:00, 650.07it/s]


In [3]:
len(train_dataset), len(val_dataset), len(test_dataset)

(6376, 800, 800)

In [4]:
batch_size = 128

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, prefetch_factor=3)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, prefetch_factor=3)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, prefetch_factor=3)

In [5]:
backbone = dieleman2014.Dieleman2014(
    sample_rate = 16000,
    n_fft = 1024,
    win_length = 256,
    hop_length = 256,
    f_min = 10.0,
    f_max = 6000.0,
    n_mels = 128,
    power = 1.0,
    compression_factor = 10000,
    n_features_out = 100,
)
n_classes = len(label_encoder.classes_)

classifier = dieleman2014.DielemanClassifier(
    backbone=backbone,
    n_classes=n_classes,
)

classifier.cuda()

DielemanClassifier(
  (backbone): Dieleman2014(
    (melspectrogram): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
    (conv1): Conv1d(128, 32, kernel_size=(8,), stride=(1,))
    (maxpool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv1d(32, 32, kernel_size=(8,), stride=(1,))
    (maxpool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (fc1): Linear(in_features=32, out_features=50, bias=True)
    (fc2): Linear(in_features=50, out_features=100, bias=True)
  )
  (fc): Linear(in_features=100, out_features=8, bias=True)
)

In [6]:
x, y = next(iter(train_dataloader))
print(x.shape, y.shape)

torch.Size([128, 80000]) torch.Size([128])


In [7]:
optimizer = Adam(classifier.parameters(), lr=1e-2)


In [None]:
supervised_learning.train(
    model=classifier,
    optimizer=optimizer,
    train_dataloader=train_dataloader,
    eval_dataloader=val_dataloader,
    epochs=200,
    patience_for_stop=50,
    use_cuda=True,
    use_mlflow=False,
    use_eval=True,
)

Using CUDA: True
Epoch 1/200


Training: 100%|██████████| 50/50 [00:08<00:00,  6.04it/s]


Average Train Loss: 4.2673


Evaluating: 100%|██████████| 7/7 [00:03<00:00,  2.14it/s]


Average Validation Loss: 2.0909, Average Validation Accuracy: 0.1116
Epoch 1 completed.

Epoch 2/200


Training: 100%|██████████| 50/50 [00:10<00:00,  4.91it/s]


Average Train Loss: 2.0913


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.39it/s]


Average Validation Loss: 2.0958, Average Validation Accuracy: 0.1350
Epoch 2 completed.

Epoch 3/200


Training: 100%|██████████| 50/50 [00:09<00:00,  5.31it/s]


Average Train Loss: 2.1159


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.58it/s]


Average Validation Loss: 2.0854, Average Validation Accuracy: 0.1350
Epoch 3 completed.

Epoch 4/200


Training: 100%|██████████| 50/50 [00:09<00:00,  5.21it/s]


Average Train Loss: 2.1060


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.50it/s]


Average Validation Loss: 2.0850, Average Validation Accuracy: 0.1350
Epoch 4 completed.

Epoch 5/200


Training: 100%|██████████| 50/50 [00:08<00:00,  5.57it/s]


Average Train Loss: 2.1036


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.86it/s]


Average Validation Loss: 2.0841, Average Validation Accuracy: 0.1350
Epoch 5 completed.

Epoch 6/200


Training: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s]


Average Train Loss: 2.1015


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.17it/s]


Average Validation Loss: 2.0834, Average Validation Accuracy: 0.1350
Epoch 6 completed.

Epoch 7/200


Training: 100%|██████████| 50/50 [00:08<00:00,  5.78it/s]


Average Train Loss: 2.0996


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.91it/s]


Average Validation Loss: 2.0827, Average Validation Accuracy: 0.1350
Epoch 7 completed.

Epoch 8/200


Training: 100%|██████████| 50/50 [00:07<00:00,  6.58it/s]


Average Train Loss: 2.0984


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.89it/s]


Average Validation Loss: 2.0818, Average Validation Accuracy: 0.1362
Epoch 8 completed.

Epoch 9/200


Training: 100%|██████████| 50/50 [00:07<00:00,  6.65it/s]


Average Train Loss: 2.0971


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.12it/s]


Average Validation Loss: 2.0850, Average Validation Accuracy: 0.1150
Epoch 9 completed.

Epoch 10/200


Training: 100%|██████████| 50/50 [00:07<00:00,  6.86it/s]


Average Train Loss: 2.0963


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.99it/s]


Average Validation Loss: 2.0804, Average Validation Accuracy: 0.1228
Epoch 10 completed.

Epoch 11/200


Training: 100%|██████████| 50/50 [00:07<00:00,  6.82it/s]


Average Train Loss: 2.0951


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.43it/s]


Average Validation Loss: 2.0800, Average Validation Accuracy: 0.1328
Epoch 11 completed.

Epoch 12/200


Training: 100%|██████████| 50/50 [00:07<00:00,  7.12it/s]


Average Train Loss: 2.0966


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  2.86it/s]


Average Validation Loss: 2.0791, Average Validation Accuracy: 0.1350
Epoch 12 completed.

Epoch 13/200


Training: 100%|██████████| 50/50 [00:06<00:00,  7.58it/s]


Average Train Loss: 2.0948


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.46it/s]


Average Validation Loss: 2.0786, Average Validation Accuracy: 0.1350
Epoch 13 completed.

Epoch 14/200


Training: 100%|██████████| 50/50 [00:06<00:00,  8.19it/s]


Average Train Loss: 2.0929


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.32it/s]


Average Validation Loss: 2.0960, Average Validation Accuracy: 0.1183
Epoch 14 completed.

Epoch 15/200


Training: 100%|██████████| 50/50 [00:06<00:00,  7.96it/s]


Average Train Loss: 2.1019


Evaluating: 100%|██████████| 7/7 [00:01<00:00,  3.71it/s]


Average Validation Loss: 2.0927, Average Validation Accuracy: 0.1350
Epoch 15 completed.

Epoch 16/200


Training: 100%|██████████| 50/50 [00:05<00:00,  8.67it/s]


Average Train Loss: 2.1102


Evaluating: 100%|██████████| 7/7 [00:02<00:00,  3.49it/s]


Average Validation Loss: 2.0774, Average Validation Accuracy: 0.1172
Epoch 16 completed.

Epoch 17/200


Training:   0%|          | 0/50 [00:00<?, ?it/s]Exception ignored in: <function _releaseLock at 0x7f2a260e0ea0>
Traceback (most recent call last):
  File "/home/tiago/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/logging/__init__.py", line 243, in _releaseLock
    def _releaseLock():
    
KeyboardInterrupt: 


In [None]:
x = train_dataset[0][0].unsqueeze(0).cuda()
x.shape

torch.Size([1, 80000])

In [None]:
classifier.backbone(x)

tensor([[[ 0.4901, -0.0320, -0.3615,  ..., -0.1829,  0.1387,  0.4573],
         [ 0.4884, -0.0174, -0.3327,  ..., -0.1704,  0.1437,  0.4475],
         [ 0.4630, -0.0230, -0.3686,  ..., -0.1622,  0.1093,  0.4443],
         ...,
         [ 0.4642, -0.0509, -0.3418,  ..., -0.1416,  0.1664,  0.4556],
         [ 0.4828, -0.0564, -0.3654,  ..., -0.1748,  0.1264,  0.4487],
         [ 0.4979, -0.0068, -0.3119,  ..., -0.1976,  0.1512,  0.4387]]],
       device='cuda:0', grad_fn=<ViewBackward0>)

tensor([[[ 3.3578,  0.3539,  0.2803,  ...,  9.5505, 13.0295, 18.2513],
         [ 3.3257,  0.5429,  0.3146,  ..., 13.8293, 19.3040, 19.8438],
         [ 3.5678,  0.8666,  0.3766,  ..., 20.0199, 28.7554, 24.4701],
         ...,
         [ 5.3115, 10.7055, 15.4330,  ..., 12.7678, 12.3853, 29.9546],
         [12.1376,  8.6285, 13.7737,  ..., 25.0388,  9.8453, 31.4718],
         [14.4836,  6.4290, 11.2217,  ..., 44.2849, 29.0489, 30.4768]]],
       device='cuda:0')