# Distortion Classification

In [1]:
import torch
import torchaudio
import os

### Define transformation

In [2]:
SAMPLE_RATE = 22050
NUM_SAMPLES = 22050*3

mfcc = torchaudio.transforms.MFCC(
    sample_rate = SAMPLE_RATE, 
    n_mfcc = 64,
    melkwargs = {
        "n_fft": 1024,
        "hop_length": 1024,
        "n_mels": 64,
        "center": False})

#### Functions for training

In [3]:
from src.gtfxdataset import GtFxDataset
from src.util import plot_spectrogram
from src.extrector import train
from src.extrector import model
from torch import nn

AUDIO_DIR = "_assets/DATASET/GT-FX-C53/"
ANNOTATIONS_FILE = os.path.join(AUDIO_DIR, "train.csv")
EVU_ANNOTATIONS_FILE = os.path.join(AUDIO_DIR, "evaluation.csv")
EFFECT_MAP = ["distortion", "chorus", "tremolo", "delay", "reverb"]

def load_train_data(effect):
    
    fxData = GtFxDataset(ANNOTATIONS_FILE,
                        AUDIO_DIR,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device,
                        effect=EFFECT_MAP[effect])
    return fxData

def load_evaluation_data(effect):

    evuData = GtFxDataset(EVU_ANNOTATIONS_FILE,
                        AUDIO_DIR,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device,
                        effect=EFFECT_MAP[effect])

    BATCH_SIZE = round(len(evuData) / 1500)
    val_dataloader = train.create_data_loader(evuData, BATCH_SIZE)
    return val_dataloader

def split_data(data):

    BATCH_SIZE = round(len(data) / 1500)

    split_ratio = [0.9, 0.1]
    train_set, test_set = torch.utils.data.random_split(data, lengths=split_ratio)

    train_dataloader = train.create_data_loader(train_set, BATCH_SIZE)
    test_dataloader = train.create_data_loader(test_set, BATCH_SIZE)

    return train_dataloader, test_dataloader   

#### Add Tensorboard to record data

In [4]:
from torch.utils.tensorboard import SummaryWriter

EXPERIMENT_NAME = "c53_parameter"
LOG_DIR = "_log/" + EXPERIMENT_NAME
EVU_DIR = "_log/Evaluation/"

if not os.path.exists('%s' % LOG_DIR):
    os.makedirs('%s' % LOG_DIR)

if not os.path.exists('%s' % EVU_DIR):
    os.makedirs('%s' % EVU_DIR)

log_writer = SummaryWriter(LOG_DIR)


#### Model training

In [5]:
from src.util import plot_violin
import numpy as np

WEIGHTS_DIR = "_weights/"
LEARNING_RATE = 0.001
EPOCHS = 15

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

error = []

fx = EFFECT_MAP.index("tremolo")

WEIGHTS_PATH = os.path.join(WEIGHTS_DIR, EXPERIMENT_NAME + "_" + str(fx))

if not os.path.exists('%s' % WEIGHTS_DIR):
    os.makedirs('%s' % WEIGHTS_DIR)

fxData = load_train_data(fx)
# fxData, _ = torch.utils.data.random_split(fxData, lengths=[0.01, 0.99])

train_dataloader, test_dataloader = split_data(fxData)
val_dataloader = load_evaluation_data(fx)

# construct model and assign it to device
cnn = model.Extractor().to(device)

# if fx == 0:
#     signal, _, _, _, _ = fxData[0]
#     print(f"There are {len(fxData)} samples in the dataset.")
#     print(f"Shape of signal: {signal.shape}")

#     print("input feature:")
#     log_writer.add_figure("Input Feature", plot_spectrogram(signal[0], title="MFCC"))
#     log_writer.add_graph(cnn, signal.unsqueeze_(0))

# initialise loss funtion + optimiser
loss_fn = nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(cnn.parameters(), lr=LEARNING_RATE)

# train model
train.train(cnn,
            train_dataloader,
            test_dataloader,
            loss_fn,
            optimiser,
            device,
            log_writer,
            EPOCHS,
            WEIGHTS_PATH,
            effect=fx)

_, _, log = train.test(cnn, val_dataloader, device, effect=fx)
for _, data in enumerate(log):
    error.append(data[3])

arr = np.array(error)
np.save(EVU_DIR + EXPERIMENT_NAME + "_" + str(fx) + "_evaluation.npy", arr)

# log_writer.add_figure("Error Box", 
#                       plot_violin(error, title="Error", labels=EFFECT_MAP, ylabel="parameter value", outlier=True))

log_writer.close()

Using device cpu
Epoch 1
loss: 0.087666  [  0/170046]
loss: 0.311293  [2520/170046]
loss: 0.297591  [5040/170046]
loss: 0.067609  [7560/170046]
loss: 0.056309  [10080/170046]
loss: 0.058064  [12600/170046]
loss: 0.029724  [15120/170046]
loss: 0.022559  [17640/170046]
loss: 0.017389  [20160/170046]
loss: 0.016694  [22680/170046]
loss: 0.018302  [25200/170046]
loss: 0.017817  [27720/170046]
loss: 0.013206  [30240/170046]
loss: 0.011643  [32760/170046]
loss: 0.009713  [35280/170046]
loss: 0.010264  [37800/170046]
loss: 0.010293  [40320/170046]
loss: 0.019304  [42840/170046]
loss: 0.010920  [45360/170046]
loss: 0.006517  [47880/170046]
loss: 0.010229  [50400/170046]
loss: 0.009471  [52920/170046]
loss: 0.013455  [55440/170046]
loss: 0.006518  [57960/170046]
loss: 0.010180  [60480/170046]
loss: 0.009495  [63000/170046]
loss: 0.007485  [65520/170046]
loss: 0.008946  [68040/170046]
loss: 0.008431  [70560/170046]
loss: 0.006278  [73080/170046]
loss: 0.006704  [75600/170046]
loss: 0.007561  [78