# Demo: train and test on FMA small

In [1]:
import os
os.environ["MPG123_VERBOSE"] = "0"

import inspertorchaudio.data.datasets.fma_dataset as fma_dataset
import inspertorchaudio.models.dieleman2014 as dieleman2014
import inspertorchaudio.learning.supervised as supervised_learning

from torch.utils.data import DataLoader
from torch.optim import Adam
from pathlib import Path

FMA_DIRECTORY = Path("/mnt/data2/fma")
METADATA_SUBDIRECTORY = FMA_DIRECTORY / "fma_metadata"
TRACKS_CSV_PATH = METADATA_SUBDIRECTORY / "tracks.csv"

In [2]:
train_dataset, val_dataset, test_dataset, label_encoder = fma_dataset.fma_dataset(
    tracks_csv_full_path=TRACKS_CSV_PATH,
    audio_dir_full_path=FMA_DIRECTORY / "fma_wav16k",
    subset='small',
    target_sample_rate=16000,
    check_dataset_files=True,
)

Checking training dataset files...


100%|██████████| 6394/6394 [00:07<00:00, 879.98it/s]


Checking validation dataset files...


100%|██████████| 800/800 [00:00<00:00, 852.08it/s]


Checking test dataset files...


100%|██████████| 800/800 [00:00<00:00, 897.06it/s]


In [3]:
len(train_dataset), len(val_dataset), len(test_dataset)

(6376, 800, 800)

In [14]:
batch_size = 256
kwargs = {
     'num_workers' : 1,
     'pin_memory' : True,
     'prefetch_factor' : 3,
}
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, **kwargs)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

In [None]:
backbone = dieleman2014.Dieleman2014(
    sample_rate = 16000,
    n_fft = 1024,
    win_length = 256,
    hop_length = 256,
    f_min = 10.0,
    f_max = 6000.0,
    n_mels = 128,
    power = 1.0,
    compression_factor = 1,
    n_features_out = 100,
)
n_classes = len(label_encoder.classes_)

classifier = dieleman2014.DielemanClassifier(
    backbone=backbone,
    n_classes=n_classes,
)

classifier.cuda()

DielemanClassifier(
  (backbone): Dieleman2014(
    (melspectrogram): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
    (conv1): Conv1d(128, 32, kernel_size=(8,), stride=(1,))
    (maxpool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv1d(32, 32, kernel_size=(8,), stride=(1,))
    (maxpool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (fc1): Linear(in_features=32, out_features=50, bias=True)
    (fc2): Linear(in_features=50, out_features=100, bias=True)
  )
  (fc): Linear(in_features=100, out_features=8, bias=True)
)

In [11]:
# DEBUG

x, y = next(iter(train_dataloader))
y_pred = classifier(x, debug=True)


torch.Size([2, 128, 313])
torch.Size([2, 32, 306])
torch.Size([2, 32, 76])
torch.Size([2, 32, 69])
torch.Size([2, 32, 17])
torch.Size([2, 17, 32])
torch.Size([2, 17, 50])
torch.Size([2, 17, 100])
torch.Size([2, 17, 100])
torch.Size([2, 100])


In [6]:
x, y = next(iter(train_dataloader))
print(x.shape, y.shape)

torch.Size([128, 80000]) torch.Size([128])


In [9]:
optimizer = Adam(classifier.parameters(), lr=1e-4)


In [15]:
supervised_learning.train(
    model=classifier,
    optimizer=optimizer,
    train_dataloader=train_dataloader,
    eval_dataloader=val_dataloader,
    epochs=200,
    patience_for_stop=50,
    use_cuda=True,
    use_mlflow=False,
    use_eval=True,
)

Using CUDA: True
Epoch 1/200


Training: 100%|██████████| 25/25 [00:15<00:00,  1.63it/s]


Average Train Loss: 2.0958


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.09it/s]


Average Validation Loss: 2.1292, Average Validation Accuracy: 0.1045
Epoch 1 completed.

Epoch 2/200


Training: 100%|██████████| 25/25 [00:14<00:00,  1.78it/s]


Average Train Loss: 2.0957


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.00it/s]


Average Validation Loss: 2.1284, Average Validation Accuracy: 0.1045
Epoch 2 completed.

Epoch 3/200


Training: 100%|██████████| 25/25 [00:13<00:00,  1.79it/s]


Average Train Loss: 2.0956


Evaluating: 100%|██████████| 4/4 [00:02<00:00,  1.84it/s]


Average Validation Loss: 2.1295, Average Validation Accuracy: 0.1045
Epoch 3 completed.

Epoch 4/200


Training: 100%|██████████| 25/25 [00:13<00:00,  1.85it/s]


Average Train Loss: 2.0955


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.08it/s]


Average Validation Loss: 2.1292, Average Validation Accuracy: 0.1045
Epoch 4 completed.

Epoch 5/200


Training: 100%|██████████| 25/25 [00:13<00:00,  1.82it/s]


Average Train Loss: 2.0957


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.46it/s]


Average Validation Loss: 2.1296, Average Validation Accuracy: 0.1045
Epoch 5 completed.

Epoch 6/200


Training: 100%|██████████| 25/25 [00:13<00:00,  1.79it/s]


Average Train Loss: 2.0957


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.15it/s]


Average Validation Loss: 2.1292, Average Validation Accuracy: 0.1045
Epoch 6 completed.

Epoch 7/200


Training: 100%|██████████| 25/25 [00:13<00:00,  1.85it/s]


Average Train Loss: 2.0956


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.33it/s]


Average Validation Loss: 2.1294, Average Validation Accuracy: 0.1045
Epoch 7 completed.

Epoch 8/200


Training: 100%|██████████| 25/25 [00:12<00:00,  2.03it/s]


Average Train Loss: 2.0956


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.02it/s]


Average Validation Loss: 2.1295, Average Validation Accuracy: 0.1045
Epoch 8 completed.

Epoch 9/200


Training: 100%|██████████| 25/25 [00:14<00:00,  1.77it/s]


Average Train Loss: 2.0955


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.24it/s]


Average Validation Loss: 2.1290, Average Validation Accuracy: 0.1045
Epoch 9 completed.

Epoch 10/200


Training: 100%|██████████| 25/25 [00:12<00:00,  2.07it/s]


Average Train Loss: 2.0956


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.54it/s]


Average Validation Loss: 2.1291, Average Validation Accuracy: 0.1045
Epoch 10 completed.

Epoch 11/200


Training: 100%|██████████| 25/25 [00:12<00:00,  2.01it/s]


Average Train Loss: 2.0955


Evaluating: 100%|██████████| 4/4 [00:01<00:00,  2.25it/s]


Average Validation Loss: 2.1287, Average Validation Accuracy: 0.1045
Epoch 11 completed.

Epoch 12/200


Training:   8%|▊         | 2/25 [00:01<00:16,  1.40it/s]

Training interrupted by user.





In [None]:
x = train_dataset[0][0].unsqueeze(0).cuda()
x.shape

NameError: name 'train_dataset' is not defined

In [2]:
classifier.backbone(x)

NameError: name 'classifier' is not defined

tensor([[[ 3.3578,  0.3539,  0.2803,  ...,  9.5505, 13.0295, 18.2513],
         [ 3.3257,  0.5429,  0.3146,  ..., 13.8293, 19.3040, 19.8438],
         [ 3.5678,  0.8666,  0.3766,  ..., 20.0199, 28.7554, 24.4701],
         ...,
         [ 5.3115, 10.7055, 15.4330,  ..., 12.7678, 12.3853, 29.9546],
         [12.1376,  8.6285, 13.7737,  ..., 25.0388,  9.8453, 31.4718],
         [14.4836,  6.4290, 11.2217,  ..., 44.2849, 29.0489, 30.4768]]],
       device='cuda:0')