# Signal Chain Reconstruction

In [1]:
import torch
from torch import nn
import torchaudio
import os
import numpy as np
import matplotlib.pyplot as plt

### Load dataset

In [2]:
from src.gtfxdataset import GtFxDataset

AUDIO_DIR = "_assets/DATASET/GT-FX-C53/"
EVU_ANNOTATIONS_FILE = os.path.join(AUDIO_DIR, "evaluation.csv")

SAMPLE_RATE = 22050
NUM_SAMPLES = 22050*3

EFFECT_MAP = ["distortion", "chorus", "tremolo", "delay", "reverb"]


if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

mfcc = torchaudio.transforms.MFCC(
    sample_rate = SAMPLE_RATE, 
    n_mfcc = 64,
    melkwargs = {
        "n_fft": 1024,
        "hop_length": 1024,
        "n_mels": 64,
        "center": False})

fxData = GtFxDataset(EVU_ANNOTATIONS_FILE,
                        AUDIO_DIR,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)
    

Using device cpu


In [3]:
from src.classifier import model as cModel
from src.classifier import train as cTrain
from src.extrector import model as eModel
from src.extrector import train as eTrain
from src.effectapplier import EffectApplier
from src.util import similarity_percentage

def signal_chain_reconstruction(test_signal, sample_file, dry_dir, weights_dir, device="cpu"):  

    CLASSIFY_WEIGHTS = os.path.join(weights_dir, "c53_classify_15.pth")
    classifier = cModel.Classifier().to(device)
    classifier.load_state_dict(torch.load(CLASSIFY_WEIGHTS))
    classify_log = cTrain.test_single(classifier, test_signal)

    dry_file =  "C50-" + sample_file[4:8] + ".wav"
    eff = EffectApplier(dry_dir, dry_file)

    log = [sample_file[:-4]]
    extractor = eModel.Extractor().to(device)
    for fx, pred_state in enumerate(classify_log):
        
        state = test_data[1][fx]
        log.append(state * pred_state)

        if state > 0:
            WEIGHTS_FILE = os.path.join(weights_dir, "c53_parameter_" + str(fx) + "_15.pth")
            extractor.load_state_dict(torch.load(WEIGHTS_FILE))
            pred_val = eTrain.test_single(extractor, test_signal, fx)
            eff.addEffect(fx, pred_val, mode="manual")
            val = round(test_data[3][fx].item(), 2)
            error = round(pred_val - val, 2)
        else:
            error = -1.0
        log.append(error)

    remix = eff.generate()
    waveform, _ = torchaudio.load(os.path.join(AUDIO_DIR, sample_file))
    resampler = torchaudio.transforms.Resample(44100, 22050)

    waveform_feature = resampler(waveform)
    waveform_feature = mfcc(waveform_feature).reshape(64, 64)
    remix_feature = resampler(remix)
    remix_feature = mfcc(remix_feature).reshape(64, 64)
    similarity = similarity_percentage(waveform_feature, remix_feature)

    log.append(round(similarity, 1))
    return remix, waveform, log

#### Singal Chain reconstruction (singal)

In [4]:
from src.util import play_audio
import pandas as pd
import random

EFFECT_MAP2 = ["Distortion(Gain)", "Chorus(Depth)", "Tremolo(Rate)", "Delay(Delay Time)", "Reverb(Decay Time)"]

SAMPLE_RATE = 44100

_WEIGHTS_DIR = "_weights/Legacy/"
_DRY_SAMPLE_DIR = "_assets/DATASET/GT-FX-DRY"

index = random.randint(0, len(fxData))

test_data = fxData[index]
sample_file = fxData.get_audio_sample_filename(index) + ".wav"
remix, waveform, log = signal_chain_reconstruction(test_data, sample_file,
                                        _DRY_SAMPLE_DIR, _WEIGHTS_DIR)

table = []

for i in range(5):

    effect_state = True if test_data[1][i] > 0.0 else False
    paramter_value = str(round(test_data[3][i].item(), 2)) if effect_state else "N/A"
    predicted_value = str(round(log[i*2 + 2] + test_data[3][i].item(), 2)) if effect_state else "N/A"
    table.append(["Activate" if log[i*2 + 1] == 1.0 else "Bypass",
                  "Activate" if effect_state else "Bypass",
                  predicted_value,
                  paramter_value])

print(f"Test file: {sample_file}")
play_audio(waveform, SAMPLE_RATE)

print(f"Remix by CNN Model:")
play_audio(remix, SAMPLE_RATE)

print(f"Similarity Percentage: {log[11]:.2f}%")

df = pd.DataFrame(table,
                  columns = ["Predicted", "Expected", "Predicted", "Expected"],
                  index = EFFECT_MAP2)

df

Test file: C53-0556-3-10011-30031.wav


Remix by CNN Model:


Similarity Percentage: 69.70%


Unnamed: 0,Predicted,Expected,Predicted.1,Expected.1
Distortion(Gain),Activate,Activate,0.81,0.84
Chorus(Depth),Bypass,Bypass,,
Tremolo(Rate),Bypass,Bypass,,
Delay(Delay Time),Activate,Activate,0.79,0.8
Reverb(Decay Time),Activate,Activate,0.25,0.23


#### Evaluate Signal Chain Reconstruction

In [5]:
EXPERIMENT_NAME = "c53_signal_chain"
EVU_DIR = "_log/Evaluation/"

_WEIGHTS_DIR = "_weights/Legacy/"
_DRY_SAMPLE_DIR = "_assets/DATASET/GT-FX-DRY"

if not os.path.exists('%s' % EVU_DIR):
    os.makedirs('%s' % EVU_DIR)

table = []
for index in range(len(fxData)):
    test_data = fxData[index]
    sample_file = fxData.get_audio_sample_filename(index) + ".wav"
    _, _, log = signal_chain_reconstruction(test_data, sample_file,
                                            _DRY_SAMPLE_DIR, _WEIGHTS_DIR)
    table.append(log)

    if index % 1500 == 0:
        print(f"Progress: {index}/{len(fxData)}")

arr = np.array(table)
np.save(EVU_DIR + EXPERIMENT_NAME + "_evaluation.npy", arr)

Progress: 0/39000
Progress: 1500/39000
Progress: 3000/39000
Progress: 4500/39000
Progress: 6000/39000
Progress: 7500/39000
Progress: 9000/39000
Progress: 10500/39000
Progress: 12000/39000
Progress: 13500/39000
Progress: 15000/39000
Progress: 16500/39000
Progress: 18000/39000
Progress: 19500/39000
Progress: 21000/39000
Progress: 22500/39000
Progress: 24000/39000
Progress: 25500/39000
Progress: 27000/39000
Progress: 28500/39000
Progress: 30000/39000
Progress: 31500/39000
Progress: 33000/39000
Progress: 34500/39000
Progress: 36000/39000
Progress: 37500/39000
