In [None]:
import torch
from denoiser import Denoiser
from layers import TacotronSTFT, STFT
from hparams import create_hparams
from utils import load_wav_to_torch
import IPython.display as ipd

In [None]:
hparams = create_hparams()

In [None]:
stft = TacotronSTFT(hparams.filter_length, hparams.hop_length, hparams.win_length,
                    hparams.n_mel_channels, hparams.sampling_rate, hparams.mel_fmin,
                    hparams.mel_fmax)
def load_mel(path):
    audio, sampling_rate = load_wav_to_torch(path)
    if sampling_rate != stft.sampling_rate:
        raise ValueError("{} {} SR doesn't match target {} SR".format(
            sampling_rate, stft.sampling_rate))
    audio_norm = audio / hparams.max_wav_value
    audio_norm = audio_norm.unsqueeze(0)
    audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False)
    melspec = stft.mel_spectrogram(audio_norm)
    return melspec

In [None]:
waveglow_path = 'WAVEGLOW PATH HERE'
waveglow = torch.load(waveglow_path)['model']
waveglow.cuda().eval().half()
for k in waveglow.convinv:
    k.float()
denoiser = Denoiser(waveglow)

In [None]:
file_path = 'WAV PATH HERE'
mel_outputs_postnet = load_mel(file_path).cuda().half()

audio = []
with torch.no_grad():
    sigma_=0.5; audio.append(waveglow.infer(mel_outputs_postnet, sigma=sigma_))
    print("sigma = {}".format(sigma_)); ipd.display(ipd.Audio(audio[len(audio)-1][0].data.cpu().numpy(), rate=hparams.sampling_rate))
    sigma_=0.7; audio.append(waveglow.infer(mel_outputs_postnet, sigma=sigma_))
    print("sigma = {}".format(sigma_)); ipd.display(ipd.Audio(audio[len(audio)-1][0].data.cpu().numpy(), rate=hparams.sampling_rate))
    sigma_=0.8; audio.append(waveglow.infer(mel_outputs_postnet, sigma=sigma_))
    print("sigma = {}".format(sigma_)); ipd.display(ipd.Audio(audio[len(audio)-1][0].data.cpu().numpy(), rate=hparams.sampling_rate))
    sigma_=0.9; audio.append(waveglow.infer(mel_outputs_postnet, sigma=sigma_))
    print("sigma = {}".format(sigma_)); ipd.display(ipd.Audio(audio[len(audio)-1][0].data.cpu().numpy(), rate=hparams.sampling_rate))
    sigma_=1.0; audio.append(waveglow.infer(mel_outputs_postnet, sigma=sigma_))
    print("sigma = {}".format(sigma_)); ipd.display(ipd.Audio(audio[len(audio)-1][0].data.cpu().numpy(), rate=hparams.sampling_rate))

In [None]:
# Denoised

In [None]:
denoise_strength = 0.01
for i in audio:
    audio_denoised = denoiser(i, strength=denoise_strength)[:, 0]
    ipd.display(ipd.Audio(audio_denoised.cpu().numpy(), rate=hparams.sampling_rate))