In [None]:
!pip install mirdata -q

In [None]:
import os
import pandas as pd
from torch.utils.data import Dataset
from torchvision import datasets
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torch
import torchaudio
import numpy as np
import random

In [None]:
import mirdata

data = mirdata.initialize('egfxset')
data.download()

In [None]:
data_dictionary = data.get_random_track_splits([0.8, 0.2], seed=42, split_names=['Train', 'Test'])

trainset = data_dictionary['Train']
testset = data_dictionary['Test']

print(f'Original data: {len(trainset) + len(testset)}')

Original data: 8970


In [None]:
def obtain_path(label: str):
  temp = label.split('_')
  res = "/root/mir_datasets/egfxset/" + temp[0] + "/" + temp[1] + ".wav"
  return res

print(f'Random path from label: {obtain_path(testset[random.randint(3, 100)])}')

Random path from label: /root/mir_datasets/egfxset/TubeScreamer/Middle-Neck/4-16.wav


In [None]:
filt_trainset = []
filt_testset = []

for label in trainset:
  path = obtain_path(label)
  audio, _ = torchaudio.load(path)
  audio_size = audio.size()[1]

  if int(audio_size) == int(240000):
    filt_trainset.append(label)

for label in testset:
  path = obtain_path(label)
  audio, _ = torchaudio.load(path)
  audio_size = audio.size()[1]

  if int(audio_size) == int(240000):
    filt_testset.append(label)

print(f'New filtered data: {len(filt_trainset) + len(filt_testset)}')

New filtered data: 8864


In [None]:
## Mapping the effectcs in order to type of effects

effect_map = {}
effect_map["TubeScreamer"] = "distorsion"
effect_map["RAT"] = "distorsion"
effect_map["TapeEcho"] = "delay"
effect_map["Spring Reverb"] = "reverb"
effect_map["Sweep Echo"] = "delay"
effect_map["Flanger"] = "modulation"
effect_map["Hall Reverb"] = "reverb"
effect_map["Phaser"] = "modulation"
effect_map["Plate Reverb"] = "reverb"
effect_map["Chorus"] = "modulation"
effect_map["Clean"] = "clean"
effect_map["Digital Delay"] = "delay"
effect_map["BluesDriver"] = "distorsion"

In [None]:
effect_to_int = {
    "modulation": 0,
    "reverb": 1,
    "delay": 2,
    "distorsion": 3,
    "clean": 4,
    "distorsion-delay": 5,
    "distorsion-reverb": 6,
    "distorsion-modulation": 7,
    "delay-reverb": 8,
    "delay-modulation": 9,
    "reverb-modulation": 10,
    "distorsion-delay-reverb": 11,
    "distorsion-delay-modulation": 12,
    "distorsion-reverb-modulation": 13,
    "delay-reverb-modulation": 14,
    "distorsion-delay-reverb-modulation": 15
}

In [None]:
class CustomMusicDataset(Dataset):
    def __init__(self, annotations_file, music_dir, tdata):
        annotation_path = os.path.join(music_dir, annotations_file)
        self.csv_data = pd.read_csv(annotation_path, header=None)
        self.labels = [ "clean", "distorsion", "delay", "reverb", "modulation",
                        "distorsion-delay", "distorsion-reverb", "distorsion-modulation",
                        "delay-reverb", "delay-modulation", "reverb-modulation", "distorsion-delay-reverb", "distorsion-delay-modulation",
                        "distorsion-reverb-modulation", "delay-reverb-modulation", "distorsion-delay-reverb-modulation"]
        self.tdata = tdata
        self.music_dir = music_dir

    def __len__(self):
        return len(self.tdata)

    def __getitem__(self, idx):
        audio_path = obtain_path(self.tdata[idx])
        audio, _ = torchaudio.load(audio_path)
        audio = audio[:, 48000:48000*3]
        label = effect_map[self.tdata[idx].split('_')[0]]
        label = effect_to_int[label]

        return audio, label

train_set = CustomMusicDataset('egfxset_metadata.csv', '/root/mir_datasets/egfxset/', filt_trainset)
test_set = CustomMusicDataset('egfxset_metadata.csv', '/root/mir_datasets/egfxset/', filt_testset)

In [None]:
BATCH_SIZE = 100
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
x, y = next(iter(train_loader))

In [None]:
x.size()

torch.Size([100, 1, 96000])

In [None]:
in_channels = x.size()[1]

class ConvNet1D(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = torch.nn.Sequential(
            torch.nn.Conv1d(in_channels, 3, 3),
            torch.nn.BatchNorm1d(3),
            torch.nn.ReLU(),
            torch.nn.MaxPool1d(3),
            torch.nn.Conv1d(3, 6, 3, stride = 2),
            torch.nn.BatchNorm1d(6),
            torch.nn.ReLU(),
            torch.nn.MaxPool1d(3),
            torch.nn.Conv1d(6, 3, 3, stride = 2),
            torch.nn.BatchNorm1d(3),
            torch.nn.ReLU(),
            torch.nn.MaxPool1d(3)
        )
        self.FC = torch.nn.Sequential(
            torch.nn.Linear(2664, 1200),
            torch.nn.ReLU(),
            torch.nn.Linear(1200, 16)
        )

    def forward(self, x):
      x = self.conv(x)
      x = x.flatten(start_dim=1)
      x = self.FC(x)

      return x

model = ConvNet1D()

In [None]:
model(x).size()

torch.Size([100, 16])

In [None]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
len(test_loader.dataset)

1778

In [None]:
def get_accuracy(model, dataloader):
    model.eval()
    with torch.no_grad():
        correct=0
        for x, y in iter(dataloader):
            out = model(x)
            correct+=(torch.argmax(out, axis=1)==y).sum()
        return correct/len(dataloader.dataset)

In [None]:
def train(model, optimizer, trainloader, testloader, epochs):
    best_accuracy = -1
    for epoch in range(epochs):
        current_accuracy = get_accuracy(model, testloader)
        print("Test accuracy: ", current_accuracy)

        if current_accuracy > best_accuracy:
          torch.save(model, 'best_model' + str(epoch) + '.pt')
          best_accuracy = current_accuracy

        model.train()
        print("Epoch: ", epoch)
        for x, y in iter(trainloader):
            out = model(x)
            l = loss(out, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
    print("Final accuracy: ", get_accuracy(model, testloader))

In [None]:
train(model, optimizer, train_loader, test_loader, epochs=30)

Test accuracy:  tensor(0.0692)
Epoch:  0
Test accuracy:  tensor(0.5647)
Epoch:  1
Test accuracy:  tensor(0.5861)
Epoch:  2
Test accuracy:  tensor(0.5771)
Epoch:  3
Test accuracy:  tensor(0.5607)
Epoch:  4
Test accuracy:  tensor(0.6232)
Epoch:  5
Test accuracy:  tensor(0.6777)
Epoch:  6
Test accuracy:  tensor(0.6794)
Epoch:  7
Test accuracy:  tensor(0.7272)
Epoch:  8
Test accuracy:  tensor(0.5799)
Epoch:  9
Test accuracy:  tensor(0.7064)
Epoch:  10
Test accuracy:  tensor(0.7413)
Epoch:  11
Test accuracy:  tensor(0.7171)
Epoch:  12
Test accuracy:  tensor(0.7227)
Epoch:  13
Test accuracy:  tensor(0.7463)
Epoch:  14
Test accuracy:  tensor(0.7058)
Epoch:  15
Test accuracy:  tensor(0.6485)
Epoch:  16
Test accuracy:  tensor(0.7317)
Epoch:  17
Test accuracy:  tensor(0.5096)
Epoch:  18
Test accuracy:  tensor(0.7289)
Epoch:  19
Test accuracy:  tensor(0.6856)
Epoch:  20
Test accuracy:  tensor(0.7345)
Epoch:  21
Test accuracy:  tensor(0.6963)
Epoch:  22
Test accuracy:  tensor(0.7238)
Epoch:  23
Te