In [2]:
import librosa
import numpy as np
import tensorflow as tf

Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [None]:
# CPU Version
# melgan_two_outputs: shape [2, audio_length]
def denoiser_cpu(melgan_two_outputs, param_denoise):
    real_audio = melgan_two_outputs[0]
    bias_audio = melgan_two_outputs[1]
    
    audio_spec, audio_angles = librosa.magphase(librosa.stft(real_audio, n_fft=2048, hop_length=200, win_length=800))
    bias_spec, _ = librosa.magphase(librosa.stft(bias_audio, n_fft=2048, hop_length=200, win_length=800))
    
    audio_spec_denoised = audio_spec - bias_spec * param_denoise
    audio_spec_denoised = np.clip(audio_spec_denoised, 0.0, 999999999.9)
    S_complex = audio_spec_denoised.astype(np.complex)

    audio_denoised = librosa.istft(S_complex * audio_angles, hop_length=200, win_length=800)
    return audio_denoised

In [None]:
# GPU Version
# melgan_two_outputs: shape [2, audio_length, 1]
def denoiser_gpu(melgan_two_outputs, param_denoise):
    with tf.variable_scope('denoiser'):
        bias_audio = tf.identity(tf.squeeze(melgan_two_outputs[1], 1))
        bias_audio = tf.expand_dims(bias_audio, 0)

        real_audio = tf.identity(tf.squeeze(melgan_two_outputs[0], 1))
        real_audio = tf.expand_dims(real_audio, 0)

        bias_spec = tf.abs(tf.contrib.signal.stft(bias_audio, 800, 200, 2048))

        audio_stft = tf.contrib.signal.stft(real_audio, 800, 200, 2048)
        audio_spec = tf.abs(audio_stft)
        audio_angles = audio_stft / tf.cast(tf.maximum(1e-8, audio_spec), tf.complex64)

        param = tf.constant([param_denoise], dtype=tf.float32) 
        audio_spec_denoised = tf.subtract(audio_spec, tf.multiply(bias_spec, param))
        audio_spec_denoised = tf.clip_by_value(audio_spec_denoised, 0.0, 999999999.9)
        S_complex = tf.cast(audio_spec_denoised, dtype=tf.complex64)

        denoiser_audio = tf.contrib.signal.inverse_stft(S_complex * audio_angles, 800, 200, 2048)
        return tf.squeeze(denoiser_audio, 0)