In [9]:
import os, sys, wave, struct

import numpy as np

import torch
import torchaudio
from scipy.io import wavfile
import IPython

In [10]:
def gain(xs, sr, min_dB=-12, max_dB=12):
    
    gain_dB = (torch.rand(1).item() * (max_dB - min_dB)) + min_dB  # Random gain in dB
    effects = [["gain", f"{gain_dB}"]]  # SoX effect
    
    for idx, x in enumerate(xs):
        y, _ = torchaudio.sox_effects.apply_effects_tensor(x, sr, effects, channels_first=True)
        xs[idx] = y

    return xs

In [11]:
def peaking_filter(xs, sr=44100, frequency=1000, width_q=0.707, gain_db=12):

    # gain_db = ((torch.rand(1) * 6) + 6).numpy().squeeze()
    # width_q = (torch.rand(1) * 4).numpy().squeeze()
    # frequency = ((torch.rand(1) * 9960) + 40).numpy().squeeze()

    # if torch.rand(1) > 0.5:
    #    gain_db = -gain_db

    effects = [["equalizer", f"{frequency}", f"{width_q}", f"{gain_db}"]]

    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [12]:
def pitch_shift(xs, min_shift=-200, max_shift=200, sr=44100):

    shift = min_shift + (torch.rand(1)).numpy().squeeze() * (max_shift - min_shift)

    effects = [["pitch", f"{shift}"]]

    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [13]:
def time_stretch(xs, min_stretch=0.8, max_stretch=1.2, sr=44100):

    stretch = min_stretch + (torch.rand(1)).numpy().squeeze() * (
        max_stretch - min_stretch
    )

    effects = [["tempo", f"{stretch}"]]
    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [14]:
def lowpass_filter(xs, sr=44100, frequency=4000):
    effects = [["lowpass", f"{frequency}"]]

    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [15]:
def apply(xs, sr, augmentations):

    # iterate over augmentation dict
    for aug, params in augmentations.items():
        if aug == "gain":
            xs = gain(xs, sr=sr, **params)
        elif aug == "peak":
            xs = peaking_filter(xs, sr=sr, **params)
        elif aug == "lowpass":
            xs = lowpass_filter(xs, sr=sr, **params)
        elif aug == "pitch":
            xs = pitch_shift(xs, sr=sr, **params)
        elif aug == "tempo":
            xs = time_stretch(xs, sr=sr, **params)
        else:
            raise RuntimeError("Invalid augmentation: {aug}")

    return xs

In [16]:
samplerate, data = wavfile.read('guitare.wav')

data = torch.tensor(data, dtype=torch.int16)

data = data.unsqueeze(0)  # Add channel dimension

xs = [data]
'''
x_filtered = apply(xs, samplerate, {
    #'gain': {'min_dB': -12, 'max_dB': 12},
    #'peak': {'frequency': 5000, 'width_q': 0.707, 'gain_db': 10},
    #'lowpass': {'frequency': 4000},
    'pitch': {'min_shift': -200, 'max_shift': 200},
    'tempo': {'min_stretch': 0.8, 'max_stretch': 1.2}
    })
new_data = x_filtered[0].squeeze(0).numpy()
wavfile.write('guitare_filtered.wav', int(samplerate), new_data)
'''

"\nx_filtered = apply(xs, samplerate, {\n    #'gain': {'min_dB': -12, 'max_dB': 12},\n    #'peak': {'frequency': 5000, 'width_q': 0.707, 'gain_db': 10},\n    #'lowpass': {'frequency': 4000},\n    'pitch': {'min_shift': -200, 'max_shift': 200},\n    'tempo': {'min_stretch': 0.8, 'max_stretch': 1.2}\n    })\nnew_data = x_filtered[0].squeeze(0).numpy()\nwavfile.write('guitare_filtered.wav', int(samplerate), new_data)\n"

In [17]:
from peq import ParametricEQ
from compressor import Compressor

compressor = Compressor(sample_rate=samplerate)

In [18]:
peq = ParametricEQ(sample_rate=samplerate)

params = torch.rand(peq.num_control_params)

target_audio = peq(
    data.view(1, 1, -1).numpy(),
    params.view(1, -1).numpy(),
    sample_rate=samplerate
)
target_audio = torch.tensor(target_audio)

In [19]:
params = torch.rand(compressor.num_control_params)
params[-1] = 0.5
target_audio = compressor(
    target_audio.view(1, 1, -1).numpy(),
    params.view(1, -1).numpy(),
    sample_rate=samplerate
)
target_audio = torch.tensor(target_audio).view(1, -1)

compressed_data = target_audio.squeeze(0).numpy()
wavfile.write('guitare_compressed_eq.wav', int(samplerate), compressed_data)

In [20]:
IPython.display.Audio('guitare_compressed_eq.wav')

In [21]:
# Loading model
from utils import DSPMode
from system import System

#logdir = os.path.dirname(os.path.dirname('checkpoints/style/jamendo/autodiff/lightning_logs/version_0/checkpoints/epoch=362-step=1210241-val-jamendo-autodiff.ckpt'))
pckpts = 'checkpoints'
peq_ckpt = 'test/jamendo-peq.ckpt' #os.path.join(pckpts, "proxies/jamendo/peq/lightning_logs/version_0/checkpoints/epoch=326-step=204374-val-jamendo-peq.ckpt" )
comp_ckpt = 'test/jamendo-comp.ckpt'#os.path.join(pckpts, "proxies/jamendo/comp/lightning_logs/version_0/checkpoints/epoch=274-step=171874-val-jamendo-comp.ckpt" )
proxy_ckpts = [peq_ckpt, comp_ckpt]
dsp_mode = DSPMode.INFER
logdir = 'test/jamendo-autodiff.ckpt'
system = System.load_from_checkpoint(logdir, dsp_mode=dsp_mode, proxy_ckpts=proxy_ckpts,map_location='cpu').eval()

In [22]:
import torchaudio.transforms as T
sr = 24000
x, x_sr = torchaudio.load("guitare.wav")
r, s_sr = torchaudio.load("guitare.wav")
resampler = T.Resample(orig_freq=x_sr, new_freq=sr)
x_24000 = resampler(x).view(1, -1)
r_24000 = resampler(r).view(1, -1)


# Normalize to -12 dB
def normalize(z):
    z = z[0:1, : 24000 * 5]
    z/=z.abs().max()
    z*= 10 ** (-12 / 20.0)
    z=z.view(1, 1, -1)
    return z

x_24000=normalize(x_24000)
r_24000=normalize(r_24000)



#system.encoder(x)    -> obtain e
#system.controller(e_x,e_y,z=None) -> obtain p , z only usefull for different controller (not baseline)
#system.processor() -> obtain y_hat
with torch.no_grad():
    y_hat, p, e = system(x_24000, x_24000)  #TEST when nothing has happened to the original signal. Apply compressor to x and r to change
    y_hat=y_hat.view(1,-1)
    print(p)
    #y_hat_ch = system.processor(x_24000[:, :, :],p,epsilon=system.eps_scheduler.epsilon,dsp_mode=dsp_mode,sample_rate=sr)
print(y_hat.shape)
torchaudio.save('guitare_estimated.wav',y_hat,int(sr))

tensor([[0.6219, 0.2000, 0.0482, 0.5691, 0.2637, 0.0046, 0.5530, 0.5770, 0.1346,
         0.6368, 0.5223, 0.2072, 0.3200, 0.3552, 0.3666, 0.4462, 0.6071, 0.0761,
         0.6249, 0.1923, 0.9985, 0.4800, 0.0106, 0.5473]])
torch.Size([1, 120000])


In [23]:
IPython.display.Audio('guitare_estimated.wav')