# Parameter extraction

In [5]:
import torch
from torch import nn
import torchaudio
import os
import numpy as np
import matplotlib.pyplot as plt

### Load dataset

In [6]:
from src.gtfxdataset import GtFxDataset

AUDIO_DIR = "_assets/DATASET/GT-FX-C51/"
ANNOTATIONS_FILE = os.path.join(AUDIO_DIR, "annotation.csv")

SAMPLE_RATE = 22050
NUM_SAMPLES = 22050*3

EFFECT_MAP = ["distortion", "chorus", "tremolo", "delay", "reverb"]
EFFECT = 1

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

mel_spectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=512,
    hop_length=1050,
    n_mels=64,
    # power=2
)

spectrogram = torchaudio.transforms.Spectrogram(
    power=2,
    n_fft=127,
    win_length= 127,
    hop_length= 1040,
    normalized=True
)

mfcc = torchaudio.transforms.MFCC(
    sample_rate = SAMPLE_RATE, 
    n_mfcc = 64,
    melkwargs = {
        "n_fft": 1024,
        "hop_length": 1030,
        "n_mels": 64,
        "center": False})

fxData = GtFxDataset(ANNOTATIONS_FILE,
                        AUDIO_DIR,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device,
                        EFFECT_MAP[EFFECT])

signal, _, _, _, _ = fxData[0]
print(f"There are {len(fxData)} samples in the dataset.")
print(f"Shape of signal: {signal.shape}")
    

Using device cpu
There are 5220 samples in the dataset.
Shape of signal: torch.Size([1, 64, 64])


#### Split dataset into train and test sets

In [7]:
from src.extrector import train

BATCH_SIZE = 32

split_ratio = [0.8, 0.1, 0.1]
train_set, test_set, val_set = torch.utils.data.random_split(fxData, lengths=split_ratio)

train_dataloader = train.create_data_loader(train_set, BATCH_SIZE)
test_dataloader = train.create_data_loader(test_set, BATCH_SIZE)
val_dataloader = train.create_data_loader(val_set, BATCH_SIZE)


#### Model training

In [12]:
from src.extrector import model

LEARNING_RATE = 0.0005
EPOCHS = 10

WEIGHTS_DIR = "_weights/"
WEIGHTS_FILE = os.path.join(WEIGHTS_DIR, "c51_parameter_" + str(EFFECT) + ".pth")

if not os.path.exists('%s' % WEIGHTS_DIR):
    os.makedirs('%s' % WEIGHTS_DIR)

# construct model and assign it to device
cnn = model.Extractor().to(device)

# initialise loss funtion + optimiser
loss_fn = nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(cnn.parameters(), lr=LEARNING_RATE)

# train model
train.train(cnn, train_dataloader, test_dataloader, loss_fn, optimiser, device, EPOCHS, effect=EFFECT)

# save model
torch.save(cnn.state_dict(), WEIGHTS_FILE)
print("Trained feed forward net saved at %s" %(WEIGHTS_FILE))

_weights/c51_parameter_1.pth


#### Evaluation

In [9]:
import csv

cnn = model.Extractor().to(device)

state_dict = torch.load(WEIGHTS_FILE)
cnn.load_state_dict(state_dict)

log = train.test(cnn, val_dataloader, device, effect=EFFECT)

for i in range(10):
    print(log[i])

# file = open('report.csv', 'w+', newline ='')

# # writing the data into the file
# with file:   
#     write = csv.writer(file)
#     write.writerows(log)


avg MSE: 0.007226
['c51_325_cs_1', 0.25, 0.29]
['c51_222_cs_2', 0.53, 0.51]
['c51_384_cs_4', 0.83, 0.88]
['c51_530_cs_2', 0.35, 0.46]
['c51_989_cs_2', 0.49, 0.53]
['c51_495_cs_1', 0.19, 0.32]
['c51_339_cs_4', 0.88, 0.91]
['c51_318_cs_2', 0.48, 0.53]
['c51_536_cs_2', 0.47, 0.5]
['c51_523_cs_1', 0.24, 0.32]
