In [457]:
import os, sys, wave, struct

import numpy as np

import torch
import torchaudio
from scipy.io import wavfile
import IPython

In [458]:
def gain(xs, sr, min_dB=-12, max_dB=12):
    
    gain_dB = (torch.rand(1).item() * (max_dB - min_dB)) + min_dB  # Random gain in dB
    effects = [["gain", f"{gain_dB}"]]  # SoX effect
    
    for idx, x in enumerate(xs):
        y, _ = torchaudio.sox_effects.apply_effects_tensor(x, sr, effects, channels_first=True)
        xs[idx] = y

    return xs

In [459]:
def peaking_filter(xs, sr=44100, frequency=1000, width_q=0.707, gain_db=12):

    # gain_db = ((torch.rand(1) * 6) + 6).numpy().squeeze()
    # width_q = (torch.rand(1) * 4).numpy().squeeze()
    # frequency = ((torch.rand(1) * 9960) + 40).numpy().squeeze()

    # if torch.rand(1) > 0.5:
    #    gain_db = -gain_db

    effects = [["equalizer", f"{frequency}", f"{width_q}", f"{gain_db}"]]

    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [460]:
def pitch_shift(xs, min_shift=-200, max_shift=200, sr=44100):

    shift = min_shift + (torch.rand(1)).numpy().squeeze() * (max_shift - min_shift)

    effects = [["pitch", f"{shift}"]]

    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [461]:
def time_stretch(xs, min_stretch=0.8, max_stretch=1.2, sr=44100):

    stretch = min_stretch + (torch.rand(1)).numpy().squeeze() * (
        max_stretch - min_stretch
    )

    effects = [["tempo", f"{stretch}"]]
    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [462]:
def lowpass_filter(xs, sr=44100, frequency=4000):
    effects = [["lowpass", f"{frequency}"]]

    for idx, x in enumerate(xs):
        y, sr = torchaudio.sox_effects.apply_effects_tensor(
            x, sr, effects, channels_first=True
        )
        xs[idx] = y

    return xs

In [463]:
def apply(xs, sr, augmentations):

    # iterate over augmentation dict
    for aug, params in augmentations.items():
        if aug == "gain":
            xs = gain(xs, sr=sr, **params)
        elif aug == "peak":
            xs = peaking_filter(xs, sr=sr, **params)
        elif aug == "lowpass":
            xs = lowpass_filter(xs, sr=sr, **params)
        elif aug == "pitch":
            xs = pitch_shift(xs, sr=sr, **params)
        elif aug == "tempo":
            xs = time_stretch(xs, sr=sr, **params)
        else:
            raise RuntimeError("Invalid augmentation: {aug}")

    return xs

In [None]:
samplerate, data = wavfile.read('guitare.wav')

data = torch.tensor(data, dtype=torch.int16)

data = data.unsqueeze(0)  # Add channel dimension

xs = [data]

x_augmented = apply(xs, samplerate, {
    #'gain': {'min_dB': -12, 'max_dB': 12},
    #'peak': {'frequency': 5000, 'width_q': 0.707, 'gain_db': 10},
    #'lowpass': {'frequency': 4000},
    'pitch': {'min_shift': -200, 'max_shift': 200},
    'tempo': {'min_stretch': 0.8, 'max_stretch': 1.2}
    })
new_data = x_augmented[0].squeeze(0).numpy()
wavfile.write('guitare_augmented.wav', int(samplerate), new_data)

In [465]:
from compressor import Compressor
from peq import ParametricEQ

compressor = Compressor(sample_rate=samplerate)

In [466]:
peq = ParametricEQ(sample_rate=samplerate)

params = torch.rand(peq.num_control_params)

target_audio = peq(
    data.view(1, 1, -1).numpy(),
    params.view(1, -1).numpy(),
    sample_rate=samplerate
)
target_audio = torch.tensor(target_audio)

In [467]:
params = torch.rand(compressor.num_control_params)
params[-1] = 0.5
target_audio = compressor(
    target_audio.view(1, 1, -1).numpy(),
    params.view(1, -1).numpy(),
    sample_rate=samplerate
)
target_audio = torch.tensor(target_audio).view(1, -1)

compressed_data = target_audio.squeeze(0).numpy()
wavfile.write('guitare_compressed_eq.wav', int(samplerate), compressed_data)

In [468]:
IPython.display.Audio('guitare_compressed_eq.wav')