### mel scale and frequency

In [None]:
from matplotlib import pyplot as plt
import numpy as np

x = np.linspace(0, 5000, num = 50000)
y = 2595 * np.log10(1 + x / 700)
# print(len(x))
x0 = 1000
y0 = 2595 * np.log10(1 + x0 / 700)
plt.plot(x, y)
plt.scatter(x0, y0)
plt.plot([x0, x0], [0, y0], 'k--')
plt.plot([0, x0], [x0, y0], 'k--')
plt.xlabel('f(hz)')
plt.ylabel('Mel(f)')
plt.title('relationship between linear and mel scale')
plt.xlim(0, x[-1])
plt.ylim(0, y[-1])

# plt.savefig('mel_vs_f.png')
plt.show()

### how to use librosa to draw a waveform of a audio file

In [None]:
from matplotlib import pyplot as plt
import numpy as np
import librosa

# 使用librosa读取音频
inputWavPath = "/home/newdisk/XWLB-GNLBKX/dataset_02/wav/000012.wav"
y, sr = librosa.load(inputWavPath)
yNum = np.arange(len(y))

# 截取前0.3s的音频
sampleSignal = y[0:int(sr*0.8)]
sampleNum = np.arange(len(sampleSignal))

plt.figure(figsize=(11, 7), dpi=500)
plt.subplot(211)
plt.plot(yNum/sr, y, color='black')
plt.plot(sampleNum/sr, sampleSignal, color='blue')
plt.xlabel('Time(sec)')
plt.ylabel('Amplitude')
plt.title('Waveform')

plt.subplot(212)
plt.plot(sampleNum/sr, sampleSignal, color='blue')
plt.xlabel('Time(sec)')
plt.ylabel('Amplitude')
plt.title('0~0.3s waveform')
plt.tight_layout()
# plt.savefig('.\\temp\\waveform.png', dpi=500)
plt.show()

### how to draw mel spectrum

In [None]:
import librosa
from matplotlib import pyplot as plt
import numpy as np

inputWavPath = "/home/newdisk/XWLB-GNLBKX/dataset_02/wav/000012.wav"
sampleRate = 44100
preemphasis = 0.97

nFft = 2205
frameLength = 0.05
frameShift = 0.01

fMin = 0
fMax = sampleRate / 2

eps  = 1e-10
nMle = 80

winLength = int(sampleRate * frameLength)
hopLength = int(sampleRate * frameShift)
melBasis = librosa.filters.mel(
    sampleRate, nFft, nMle, fmin=fMin, fmax=fMax)


def getSpectrogram(inputWavPath):
    y, sr = librosa.load(inputWavPath)
    y = np.append(y[0], y[1:]-preemphasis*y[:-1]) #预加重
    linear = librosa.stft(
        y=y, n_fft=nFft, hop_length=hopLength, win_length=winLength)
    mag = np.abs(linear)
    mel = np.dot(melBasis, mag)
    mel = np.log10(np.maximum(eps, mel))
    mel = mel.T.astype(np.float32)
    return mel

def plotSpectrogram(spectrogram, filePath):
    spectrogram = spectrogram.T
    fig = plt.figure(figsize=(16, 9))
    plt.imshow(spectrogram, aspect='auto', origin='lower')
    plt.colorbar()
    plt.xlabel('frames')
    plt.tight_layout()
    plt.savefig(filePath, dpi=500)
    plt.show()


melspec = getSpectrogram(inputWavPath)
plotSpectrogram(melspec, '../temp/mel_spectrogram.png')



### extract MFCC

In [None]:
import librosa
from matplotlib import pyplot as plt
import numpy as np
from scipy.fftpack import dct

num_ceps = 12   #MFCC阶数，可选2~13
mfcc = dct(melspec, type=2, axis=1, norm='ortho')[:, 1:(num_ceps + 1)]
plotSpectrogram(mfcc, '../temp/mfcc.png')

(nframes, ncoeff) = mfcc.shape
cep_lifter = 22
n = np.arange(ncoeff)
lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
mfcc *= lift
plotSpectrogram(mfcc, '../temp/mfcc_list.png')


### 均值方差归一化

In [None]:
frame_num = melspec.shape[0]
cep_sum = np.sum(melspec, axis=0)
cep_squ_sum = np.sum(np.square(melspec), axis=0)
cep_mean = cep_sum / frame_num
cep_std = cep_squ_sum / frame_num - np.square(cep_mean)