# Signal Chain Reconstruction

In [7]:
import torch
from torch import nn
import torchaudio
import os
import numpy as np
import matplotlib.pyplot as plt

### Load dataset

In [8]:
from src.gtfxdataset import GtFxDataset

AUDIO_DIR = "_assets/DATASET/GT-FX-C53/"
ANNOTATIONS_FILE = os.path.join(AUDIO_DIR, "evaluation.csv")

SAMPLE_RATE = 22050
NUM_SAMPLES = 22050*3

EFFECT_MAP = ["distortion", "chorus", "tremolo", "delay", "reverb"]


if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device {device}")

mfcc = torchaudio.transforms.MFCC(
    sample_rate = SAMPLE_RATE, 
    n_mfcc = 64,
    melkwargs = {
        "n_fft": 1024,
        "hop_length": 1024,
        "n_mels": 64,
        "center": False})

fxData = GtFxDataset(ANNOTATIONS_FILE,
                        AUDIO_DIR,
                        mfcc,
                        SAMPLE_RATE,
                        NUM_SAMPLES,
                        device)
    

Using device cpu


#### Reconstruct Singal Chain

In [34]:
from src.classifier import model as cModel
from src.classifier import train as cTrain
from src.extrector import model as eModel
from src.extrector import train as eTrain
from src.effectapplier import EffectApplier
from src.util import play_audio
import pandas as pd
import random

_WEIGHTS_DIR = "_weights/"
_DRY_SAMPLE_DIR = "_assets/DATASET/GT-FX-DRY"

CLASSIFY_WEIGHTS = os.path.join(_WEIGHTS_DIR, "c53_classify_15.pth")

index = random.randint(0, len(fxData))
test_data = fxData[index]

sample_file = fxData.get_audio_sample_filename(index) + ".wav"
dry_file =  "C50-" + sample_file[4:8] + ".wav"

classify = cModel.Classifier().to(device)
classify.load_state_dict(torch.load(CLASSIFY_WEIGHTS))

log = cTrain.test_single(classify, test_data)

extract = eModel.Extractor().to(device)
eff = EffectApplier(_DRY_SAMPLE_DIR, dry_file)

table = []
para_list = ['Gain', 'Depth', 'Rate', 'Delay Time', 'Decay']
for fx, state in enumerate(log):
    table.append([EFFECT_MAP[fx],
                 "State",
                 "Activate" if test_data[1][fx] > 0.0 else "Bypass",
                 "Activate" if state == 1 else "Bypass"])

    if state == 1:
        WEIGHTS_FILE = os.path.join(_WEIGHTS_DIR, "c53_parameter_" + str(fx) + "_20.pth")
        extract.load_state_dict(torch.load(WEIGHTS_FILE))
        val = eTrain.test_single(extract, test_data, fx)
        eff.addEffect(fx, val, mode="manual")
    else:
        val = -1.0

    table.append([EFFECT_MAP[fx],
                para_list[fx],
                "N/A" if test_data[1][fx] < 1 else round(test_data[3][fx].item(), 2),
                "N/A" if val < 0 else val])
remix = eff.generate()
waveform, sr = torchaudio.load(os.path.join(AUDIO_DIR, sample_file))

print(f"Test file: {sample_file}")
play_audio(waveform, sr)

print(f"Remix by CNN Model:")
play_audio(remix, sr)

from fastdtw import fastdtw

resampler = torchaudio.transforms.Resample(44100, 22050)

waveform = resampler(waveform)
waveform = mfcc(waveform).reshape(64, 64)
remix = resampler(remix)
remix = mfcc(remix).reshape(64, 64)

def mfcc_dist(a,b):
      dist = 0
      for x, y in zip(a,b):
          dist = dist + (x - y) * (x - y)
      return np.sqrt(dist)

distance, _ = fastdtw(waveform, remix, dist=mfcc_dist)
max_possible_distance = 64* 64
normalized_distance = distance / max_possible_distance
similarity_percentage = (1 - normalized_distance) * 100
print(f"Similarity Percentage: {similarity_percentage:.2f}%")

df = pd.DataFrame(table)
df.columns = ["Effect", "Item", "Expected", "Predicted"]
df2 = df.groupby(['Effect', "Item"])
df2.first()


Test file: C53-0425-3-01110-02230.wav


Remix by CNN Model:


Similarity Percentage: 46.70%


Unnamed: 0_level_0,Unnamed: 1_level_0,Expected,Predicted
Effect,Item,Unnamed: 2_level_1,Unnamed: 3_level_1
chorus,Depth,0.53,0.65
chorus,State,Activate,Activate
delay,Delay Time,0.79,0.81
delay,State,Activate,Activate
distortion,Gain,,
distortion,State,Bypass,Bypass
reverb,Decay,,
reverb,State,Bypass,Bypass
tremolo,Rate,0.53,0.51
tremolo,State,Activate,Activate
