## Feature Extraction

In [1]:
from python_speech_features import mfcc, delta
import scipy.io.wavfile as wav
from sklearn import preprocessing
import os
import numpy as np
import wave
import librosa
import librosa
import librosa.display
import matplotlib.pyplot as plt 

### Loading

<b>With Librosa

In [2]:
plt.figure(figsize=(10,7))
data,sample_rate=librosa.load('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')
#librosa.display.waveplot(data,sr=sample_rate)
print(sample_rate)

22050


<Figure size 720x504 with 0 Axes>

<b> With Scipy

In [3]:
from scipy.io import wavfile as wav
rate,audio=wav.read('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')
print(audio.shape)
print(rate)

(52480,)
16000


### MFCC

In [5]:
def MFCC(filename):
    
    (rate,sig) = wav.read(os.path.join(filename))
    mfcc_feature = mfcc(sig,rate, lowfreq=0,)
    mfcc_feature  = preprocessing.scale(mfcc_feature)
    deltas        = delta(mfcc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((mfcc_feature, deltas, double_deltas))
    return combined_feature

In [7]:
MFCC('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav').shape

(327, 39)

### Filterbank

In [8]:
from python_speech_features import fbank

In [9]:
def filterbank(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    fb_feature=fbank(audio,rate,lowfreq=0,)
    fb_feature  = preprocessing.scale(fb_feature[0])
    deltas        = delta(fb_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((fb_feature, deltas, double_deltas))
    return combined_feature

In [11]:
filterbank('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav').shape

(295, 78)

## Spafe
[SPAFE](https://github.com/SuperKogito/spafe) has a collection of different feature extractors implemented. Various examples on how to use spafe filter banks or feature extraction techniques are available under [examples](https://github.com/SuperKogito/spafe/tree/master/examples).

In [15]:
pip install spafe

Note: you may need to restart the kernel to use updated packages.


### Linear Predictive Coding (LPC)

In [40]:
from spafe.features.lpc import lpc,lpcc

def lpc_spafe(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    lpc_feature=lpc(audio,rate) #or lpcc(audio,rate)
    lpc_feature
    lpc_feature  = preprocessing.scale(lpc_feature)
    deltas        = delta(lpc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((lpc_feature, deltas, double_deltas))
    return combined_feature

In [33]:
lpc_spafe('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav').shape

(302, 39)

### Bark Frequency Cepstral Coefficient (BFCC)

In [41]:
from spafe.features.bfcc import bfcc

def bfcc_spafe(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    bfcc_feature=bfcc(audio,rate,low_freq=0)
    bfcc_feature
    bfcc_feature  = preprocessing.scale(bfcc_feature)
    deltas        = delta(bfcc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((bfcc_feature, deltas, double_deltas))
    return combined_feature

In [43]:
bfcc_spafe('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')

array([[-1.42943682, -0.6450405 ,  0.60054584, ...,  0.12046899,
        -0.01962624,  0.02168859],
       [-1.38444159, -0.3518967 ,  0.4842991 , ...,  0.29461665,
         0.0912562 ,  0.05895109],
       [-1.63192793, -0.92039695,  0.54013201, ...,  0.31955457,
         0.16139025,  0.0458816 ],
       ...,
       [-1.45688349, -0.86768371,  0.7054231 , ..., -0.11864573,
        -0.01228361, -0.07109461],
       [-1.43669033, -0.76434432,  0.66945078, ..., -0.13705815,
        -0.01466392, -0.07000513],
       [-1.37364182, -0.63061857,  0.66986331, ..., -0.08221595,
        -0.01608087, -0.03467854]])