In [1]:
import os
import pickle
import numpy as np
import soundfile as sf
import librosa
from scipy import signal
from scipy.signal import get_window
from librosa.filters import mel
from librosa.util import normalize
from numpy.random import RandomState

In [2]:
import torch
import IPython.display as ipd

In [3]:
# audio file directory
rootDir = './test'
# spectrogram directory
targetDir = './spmel'

In [4]:
dirName, subdirList, _ = next(os.walk(rootDir))
print('Found directory: %s' % dirName)

Found directory: ./test


In [5]:
def mel_gan_handler(x, fft_length = 1024, hop_length = 256,sr = 22050):
    wav = normalize(x)
    p = (fft_length - hop_length) // 2
    wav = np.squeeze(np.pad(wav, (p, p), "reflect"))
    fft = librosa.stft(
                       wav, 
                       n_fft = fft_length, 
                       hop_length = hop_length,
                       window = 'hann',
                       center = False
                     )
    # 這裡的 abs 是 sqrt(實部**2 + 虛部**2)
    mag = abs(fft)
    mel_basis = mel(sr, 1024, fmin = 0.0 , fmax=None, n_mels=80)
    mel_output = np.dot(mel_basis,mag)
    log_mel_spec = np.log10(np.maximum(1e-5,mel_output)).astype(np.float32)
    return log_mel_spec

In [6]:
target_fs = 22050

In [7]:
for subdir in sorted(subdirList):
    print(subdir)
    if not os.path.exists(os.path.join(targetDir, subdir)):
        os.makedirs(os.path.join(targetDir, subdir))
    _,_, fileList = next(os.walk(os.path.join(dirName,subdir)))
    for fileName in sorted(fileList):
        # Read audio file
        x, fs = sf.read(os.path.join(dirName,subdir,fileName))
        # if 雙聲道
        if len(x.shape) > 1: 
            x = x[:,:1].squeeze()
        # resample to target_rate
        x = librosa.resample(x, fs, target_fs)
        S = mel_gan_handler(x).T 
        # save spect    
        np.save(os.path.join(targetDir, subdir, fileName[:-3]),
                S.astype(np.float32), allow_pickle=False)

p225
p226
p227
