In [None]:
from pydub import AudioSegment, effects

# Load audio file
audio = AudioSegment.from_file("../data/out/htdemucs_ft/Avicii - Wake Me Up/vocals.wav")
normalized = effects.normalize(audio)
normalized.export("../data/out/normalized_audio/vocals.wav", format="wav")

In [None]:
from pedalboard import Pedalboard, NoiseGate, Compressor, LowpassFilter

# Setup pedalboard
board = Pedalboard([
    NoiseGate(threshold_db=-18.0, attack_ms=500.0, release_ms=1500.0),
    LowpassFilter(cutoff_frequency_hz=500.0),
    Compressor(threshold_db=-6.0, ratio=5),
])

In [None]:
from pedalboard.io import AudioFile

# Load audio file
with AudioFile("../data/out/htdemucs_ft/Idina Menzel - Let It Go/vocals.wav") as audio:
    with AudioFile("../data/out/optimized_audio/frozen_vocals.wav", 'w', audio.samplerate, audio.num_channels) as output:
        # Loop over file and apply pedalboard effects
        while audio.tell() < audio.frames:
            chunk = audio.read(int(audio.samplerate))
            effected = board.process(chunk, audio.samplerate, reset=False)
            output.write(effected)

In [None]:
# Process using speechbrain ML model: metricgan-plus-voicebank
from speechbrain.pretrained import SpectralMaskEnhancement
import torchaudio
import torch

model = SpectralMaskEnhancement.from_hparams(
    source="speechbrain/metricgan-plus-voicebank",
    savedir="../models/metricgan-plus-voicebank",
)

input = "../data/out/htdemucs_ft/Avicii - Wake Me Up/vocals.wav"
output = "../data/out/optimized_audio/vocals_enhanced_metricgan.wav"

noisy = model.load_audio(input).unsqueeze(0)

enhanced = model.enhance_batch(noisy, lengths=torch.tensor([1.]))

# Saving enhanced signal on disk
torchaudio.save(output, enhanced.cpu(), 16000)

In [None]:
# Process using speechbrain ML model: mtl-mimic-voicebank
import torchaudio
from speechbrain.pretrained import WaveformEnhancement

input = "../data/out/htdemucs_ft/Avicii - Wake Me Up/vocals.wav"
output = "../data/out/optimized_audio/vocals_enhanced_mtl.wav"

enhance_model = WaveformEnhancement.from_hparams(
    source="speechbrain/mtl-mimic-voicebank",
    savedir="pretrained_models/mtl-mimic-voicebank",
)
enhanced = enhance_model.enhance_file(input)

# Saving enhanced signal on disk
torchaudio.save(output, enhanced.unsqueeze(0).cpu(), 16000)