## Feature Extraction

In [3]:
from python_speech_features import mfcc, delta
import scipy.io.wavfile as wav
from sklearn import preprocessing
import os
import numpy as np
import wave
import librosa
import librosa
import librosa.display
import matplotlib.pyplot as plt 

## A. Data Loading

<b> a. With Librosa

In [2]:
plt.figure(figsize=(10,7))
data,sample_rate=librosa.load('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')
#librosa.display.waveplot(data,sr=sample_rate)
print(sample_rate)

22050


<Figure size 720x504 with 0 Axes>

<b> b. With Scipy

In [3]:
from scipy.io import wavfile as wav
rate,audio=wav.read('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')
print(audio.shape)
print(rate)

(52480,)
16000


## B. Feature Extraction

### 1. MFCC

In [5]:
def MFCC(filename):
    
    (rate,sig) = wav.read(os.path.join(filename))
    mfcc_feature = mfcc(sig,rate, lowfreq=0,)
    mfcc_feature  = preprocessing.scale(mfcc_feature)
    deltas        = delta(mfcc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((mfcc_feature, deltas, double_deltas))
    return combined_feature

In [7]:
MFCC('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav').shape

(327, 39)

### 2. Filterbank

In [8]:
from python_speech_features import fbank

In [9]:
def filterbank(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    fb_feature=fbank(audio,rate,lowfreq=0,)
    fb_feature  = preprocessing.scale(fb_feature[0])
    deltas        = delta(fb_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((fb_feature, deltas, double_deltas))
    return combined_feature

In [11]:
filterbank('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav').shape

(295, 78)

## Spafe
[SPAFE](https://github.com/SuperKogito/spafe) has a collection of different feature extractors implemented. Various examples on how to use spafe filter banks or feature extraction techniques are available under [examples](https://github.com/SuperKogito/spafe/tree/master/examples).

In [15]:
pip install spafe

Note: you may need to restart the kernel to use updated packages.


### 3. Linear Predictive Coding (LPC)

In [40]:
from spafe.features.lpc import lpc,lpcc

def lpc_spafe(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    lpc_feature=lpc(audio,rate) #or lpcc(audio,rate)
    lpc_feature
    lpc_feature  = preprocessing.scale(lpc_feature)
    deltas        = delta(lpc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((lpc_feature, deltas, double_deltas))
    return combined_feature

In [33]:
lpc_spafe('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav').shape

(302, 39)

### 4. Bark Frequency Cepstral Coefficient (BFCC)

In [4]:
from spafe.features.bfcc import bfcc

def bfcc_spafe(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    bfcc_feature=bfcc(sig,rate,low_freq=0)
    bfcc_feature
    bfcc_feature  = preprocessing.scale(bfcc_feature)
    deltas        = delta(bfcc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((bfcc_feature, deltas, double_deltas))
    return combined_feature

In [5]:
bfcc_spafe('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')

array([[-1.3648115 , -0.2000881 ,  0.70963554, ...,  0.1227477 ,
        -0.01772412, -0.07713884],
       [-1.44031137, -0.13492665,  0.76560042, ...,  0.18222279,
        -0.02055833, -0.07142815],
       [-1.44524298, -0.48402188,  0.33931808, ...,  0.1587244 ,
        -0.0112999 , -0.05122105],
       ...,
       [-1.27162822, -0.11282422,  0.74014279, ...,  0.05455623,
         0.11598528,  0.08536465],
       [-1.27221153,  0.20528767,  0.76921206, ...,  0.10516836,
         0.14359657,  0.11064443],
       [-1.28802288,  0.37646174,  0.91929387, ...,  0.06560141,
         0.09067088,  0.066169  ]])

### 5. Linear Frequency Cepstral Coefficient (LFCC)

In [7]:
from spafe.features.lfcc import lfcc

def lfcc_spafe(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    lfcc_feature=lfcc(sig,rate,low_freq=0)
    lfcc_feature
    lfcc_feature  = preprocessing.scale(lfcc_feature)
    deltas        = delta(lfcc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((lfcc_feature, deltas, double_deltas))
    return combined_feature

In [8]:
lfcc_spafe('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')

array([[-1.10182334, -0.22626736, -0.82609706, ...,  0.00599013,
        -0.0116137 , -0.07004604],
       [-1.17102277, -0.14641885, -1.06846761, ...,  0.05309552,
        -0.05709961, -0.10156803],
       [-1.13740565, -0.19952466, -1.09553133, ...,  0.10487634,
        -0.09038651, -0.10326062],
       ...,
       [-1.06682431, -0.23435093, -0.76934373, ...,  0.029299  ,
         0.06508178, -0.12630983],
       [-1.12495651, -0.13154205, -0.69080501, ...,  0.01929752,
         0.11047756, -0.1057171 ],
       [-1.14462474, -0.10105876, -0.53588648, ...,  0.01706412,
         0.0626383 , -0.05995113]])

### 6. Revising Perceptual Linear Prediction (RPLP)

In [6]:
from spafe.features.rplp import rplp

def rplp_spafe(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    rplp_feature=rplp(sig,rate)
    rplp_feature
    rplp_feature  = preprocessing.scale(rplp_feature)
    deltas        = delta(rplp_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((rplp_feature, deltas, double_deltas))
    return combined_feature

In [8]:
rplp_spafe('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')

array([[-4.60114708e-03, -1.24285261e-03, -1.01255624e-02, ...,
        -8.96320077e-05, -3.36838135e-04, -7.70508338e-04],
       [-4.60114708e-03, -1.24285261e-03, -1.01255624e-02, ...,
        -3.54048771e-04, -9.33315601e-04, -2.06573062e-03],
       [-4.60114708e-03, -1.24285261e-03, -1.01255624e-02, ...,
        -1.25465503e-03, -1.82416846e-03, -2.91928640e-03],
       ...,
       [-2.46302022e-02, -1.99864367e-02, -5.73396137e-02, ...,
        -4.68732173e-04, -1.90659204e-04,  3.36183047e-03],
       [-4.15632952e-02, -4.18575739e-02, -1.10418961e-01, ...,
        -1.10201811e-04, -1.37676073e-03, -1.25136881e-03],
       [-5.59372248e-02, -6.00970979e-02, -1.55123426e-01, ...,
         7.93909168e-04, -9.75189970e-04, -3.38316013e-03]])

### 7. Normlized Grammarchirp Cepstral Coefficient (NGCC)

In [9]:
from spafe.features.ngcc import ngcc

def ngcc_spafe(filename):   
    (rate,sig) = wav.read(os.path.join(filename))
    ngcc_feature=ngcc(sig,rate)
    ngcc_feature
    ngcc_feature  = preprocessing.scale(ngcc_feature)
    deltas        = delta(ngcc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined_feature      = np.hstack((ngcc_feature, deltas, double_deltas))
    return combined_feature

In [10]:
ngcc_spafe('ST-AEDS-20180100_1-OS/m0005_us_m0005_00422.wav')

array([[-1.20880921, -0.35442179,  0.35208147, ...,  0.0415037 ,
         0.09338459, -0.0584096 ],
       [-1.1940691 , -0.10078899,  0.57252485, ...,  0.19156198,
         0.20660415,  0.03373905],
       [-1.26432874, -0.58355093,  0.25806344, ...,  0.30123794,
         0.2223628 ,  0.13312751],
       ...,
       [-1.12361714, -0.01716192,  0.60810753, ..., -0.02335087,
        -0.04803602,  0.3138255 ],
       [-1.12540512,  0.09232404,  0.49288214, ..., -0.01234885,
        -0.06409792,  0.2549323 ],
       [-1.09371381,  0.29495027,  0.52461615, ...,  0.00150291,
        -0.03852663,  0.08108373]])