In [1]:
import matplotlib.pyplot as plt
import librosa as dsp
import librosa.display as dsp_plot
import numpy as np
import math
import IPython.display

from params import Hyperparams as hp

pre-emphasis: 0.97; frame length: 50 ms;

frame shift: 12.5 ms; window type: Hann

In [2]:

def load_wave(audiofilepath):
    return dsp.load(audiofilepath, mono=True)

def do_preemphasis(wave, pre_emphasis=0.97):
    return np.append(wave[0], wave[1:] - pre_emphasis * wave[:-1])

def do_spectrogram(y=None, sr=16000,win_length=0.05,hop_length=0.0125, n_fft=2048,n_mels=80):
    #lengths: seconds -> samples
    #do stft first cause need to pass win_length arg
    stft = dsp.stft(y=y, n_fft=n_fft, win_length=win_length, hop_length=hop_length)

    #####################
    #keithito version
    mels=dsp.filters.mel(sr, n_fft, n_mels=n_mels)
    s_mel = np.dot(mels,np.abs(stft))
    #read 20*log.. as 10*2 because (abs(signal))^2 => 2*10*log...
    S_mel = 20 * np.log10(np.maximum(1e-8,s_mel))
    S_lin =20 * np.log10(np.maximum(1e-8,np.abs(stft)))
    if hp.use_ref_db:
        print("Using reference db..")
        S_mel=S_mel - hp.ref_level_db
        S_lin=S_lin - hp.ref_level_db

    #normalize between 0-1 (https://github.com/keithito/tacotron/issues/38)
    if hp.normalize:
        print("Normalizing..")
        S_mel=np.clip((S_mel - hp.min_level_db) / -hp.min_level_db, 0, 1)
        S_lin=np.clip((S_lin - hp.min_level_db) / -hp.min_level_db, 0, 1)

    return S_lin,S_mel
    #print(S[:200])

def spect2wav(S,denormalize=True, use_ref_db=True,win_length=128,hop_length=32, n_fft=2048):
    '''Converts spectrogram to waveform using librosa'''
    # Convert back to linear
    if denormalize:
        print("Denormalizing..")
        S = (np.clip(S, 0, 1) * -hp.min_level_db) + hp.min_level_db
    print(S.shape)
    if use_ref_db:
        print("Adding ref db..")
        S=S + hp.ref_level_db

    #db -> amp
    S= np.power(10.0, S * 0.05)
    #amplitude raises of 1.5 as keithito (paper put 1.2)
    S = S**(1.5) #(1.2)

    #Based on https://github.com/librosa/librosa/issues/434
    angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
    S_complex = np.abs(S).astype(np.complex)
    y = dsp.istft(S_complex * angles, hop_length=hop_length, win_length=win_length)
    print("Start looping for",hp.griffin_lim_iters,"iteration...")
    for i in range(hp.griffin_lim_iters):
        ft = dsp.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
        angles = np.exp(1j * np.angle(ft))
        y = dsp.istft(S_complex * angles, hop_length=hop_length, win_length=win_length)
    return y


In [3]:
%matplotlib inline
y, sr = load_wave(dsp.util.example_audio_file())
y=y[1:500000]
#test_spectrogram(y,sr)

hp.use_ref_db=False
hp.normalize=False
hp.griffin_lim_iters=5
S_lin, S_mel = do_spectrogram(y=y,
                           sr=sr,
                           win_length=hp.win_length,
                           hop_length=hp.hop_length,
                           n_fft=hp.n_fft,
                           n_mels=hp.n_mels
                          )

S= S_lin

IPython.display.Audio(data=y, rate=sr)


In [4]:

print("db->amp->ISTFT directly")
IPython.display.Audio(data=dsp.istft(np.power(10.0, S * 0.05), hop_length=hp.hop_length, win_length=hp.win_length), rate=sr)


db->amp->ISTFT directly


In [40]:
print("lin2wav 1 griffin lim iter")
hp.griffin_lim_iters=1
wav_1iter = spect2wav(S,
                denormalize=hp.normalize,
                use_ref_db=hp.use_ref_db,
                win_length=hp.win_length,
                hop_length=hp.hop_length,
                n_fft=hp.n_fft)
IPython.display.Audio(data=wav_1iter, rate=sr)

lin2wav 1 griffin lim iter
(1025, 4928)
Start looping for 1 iteration...


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [35]:

hp.griffin_lim_iters=100
print("lin2wav",100,"griffin lim iter")
wav_100iter = spect2wav(S,
                denormalize=hp.normalize,
                use_ref_db=hp.use_ref_db,
                win_length=hp.win_length,
                hop_length=hp.hop_length,
                n_fft=hp.n_fft)
IPython.display.Audio(data=wav_100iter, rate=sr)

lin2wav 100 griffin lim iter
(1025, 401)
Start looping for 100 iteration...


In [15]:
def test_spectrogram(y,sr):
    
    
    hop_length = int(hp.hop_length*sr)
    win_length = int(hp.win_length*sr)
    n_fft=2048
    n_mels=80
    
    ft = dsp.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length);
    #####################
    #barronalex version
    S_bar_amp = dsp.feature.melspectrogram(S=ft, n_mels=n_mels)

    stft_bar_log = np.log(np.abs(ft) + 1e-8)
    S_bar_log = np.log(np.abs(S_bar_amp) + 1e-8)
    S_bar = (stft_bar_log,S_bar_log)
    
    print("barron: ", len(S_bar))
    #####################
    #keithito version 
    S_kei = do_spectrogram(y=y,
                           sr=sr,
                           win_length=hp.win_length,
                           hop_length=hp.hop_length,
                           n_fft=hp.n_fft,
                           n_mels=hp.n_fft
                          )
    print("keithito: ", len(S_kei))
    #print(spect_kei[:200])
    
    #####################Ã 
    #kyubyong
    # magnitude spectrogram -> linear one
    magnitude = np.abs(ft) #(1+n_fft/2, T)
    # power spectrogram 
    power = magnitude**2 #(1+n_fft/2, T) 
    # mel spectrogram
    S_kyu = dsp.feature.melspectrogram(S=power, n_mels=80) #(n_mels, T)
    
    
    plt.figure(figsize=(15, 20))
    
    plt.subplot(6, 1, 1)
    dsp_plot.specshow(stft_bar_log, sr=sr,fmax=2048, y_axis='log',x_axis='time')
    plt.colorbar()
    plt.title('Barron spectrogram linear')
    plt.subplot(6, 1, 2)
    dsp_plot.specshow(S_bar_log, sr=sr,fmax=2048, y_axis='mel',x_axis='time')
    plt.colorbar()
    plt.title('Barron spectrogram mel')
    
    plt.subplot(6, 1, 3)
    dsp_plot.specshow(S_kei[0], y_axis='log',fmax=2048, x_axis='time')
    plt.colorbar()#plt.colorbar(format='%+2.0f dB')
    plt.title('Keithito spectrogram linear')
    plt.subplot(6, 1, 4)
    dsp_plot.specshow(S_kei[1],y_axis='mel',fmax=2048, x_axis='time')
    plt.colorbar()#plt.colorbar(format='%+2.0f dB')
    plt.title('Keithito spectrogram mel')
    
    plt.subplot(6, 1, 5)
    dsp_plot.specshow(magnitude,fmax=2048,y_axis="log", x_axis='time')
    plt.colorbar()#plt.colorbar(format='%+2.0f dB')
    plt.title('kyubyong spectrogram linear')
    plt.subplot(6, 1, 6)
    dsp_plot.specshow(S_kyu,y_axis='mel',fmax=2048, x_axis='time')
    plt.colorbar()#plt.colorbar(format='%+2.0f dB')
    plt.title('kyubyong spectrogram mel')
    plt.tight_layout()
