In [2]:
from glob import glob
import os
from scipy.io import wavfile
import python_speech_features
import numpy as np
import sounddevice as sd
import time
from sklearn.svm import SVC
import joblib
from sklearn.decomposition import PCA

Reading files in the direction with the specific name.

In [3]:
def read_files(direction, name=''):
    if not os.path.exists(direction):
        raise 'Directory is not available'
    files = '{}/{}*.wav'.format(direction, name)
    return glob(files)

```extract()``` extracts Mel Frequency Cepsteral Coefficients (MFCC) features from a single file.

```batch_extract()``` extracts all MFCC features from training files.

In [4]:
def extract(audio):
    SAMPLE_RATE = 11025
    FRAME_LENGTH = int(SAMPLE_RATE * 0.025)
    FRAME_STEP = FRAME_LENGTH - int(SAMPLE_RATE * 0.015)
    PRE_EMPH = 0.97
    WINDOW_LENGTH = 0.025
    WINDOW_STEP = 0.010
    WINDOW_FUNCTION = np.hamming
    f = []
    
    def calculate_all(feats):
            f.extend(feats.min(axis=0))
            f.extend(feats.max(axis=0))
            f.extend(feats.mean(axis=0))
            f.extend(feats.var(axis=0))
    
    def calculate_energy(frames):
        energies = []
        for frame in frames:
            energy = 1 / len(frame) * np.sum(np.power(frame, 2))
            energies.append(energy)
        return energies
    
    def calculate_zcr(frames):
        def sign(x):
            return 1 if x >= 0 else -1
        
        zc_rates = []
        for frame in frames:
            zc_rate = 0
            for i in range(1, len(frame)):
                zc_rate += abs(sign(frame[i]) - sign(frame[i - 1])) / 2
            zc_rates.append(zc_rate / len(frame))
        return zc_rates
    
    rate, signal = wavfile.read(audio)
    signal_frames = python_speech_features.sigproc.framesig(signal, frame_len=FRAME_LENGTH,
                                                            frame_step=FRAME_STEP,
                                                            winfunc=WINDOW_FUNCTION)
    mfcc = python_speech_features.mfcc(signal_frames, rate, winlen=WINDOW_LENGTH,
                                      winstep=WINDOW_STEP, numcep=13, preemph=PRE_EMPH,
                                      winfunc=WINDOW_FUNCTION)
    delta = np.concatenate([np.zeros(shape=(1, 13)), np.diff(mfcc, n=1, axis=0)])
    delta_delta = np.concatenate([np.zeros(shape=(2, 13)), np.diff(mfcc, n=2, axis=0)])
    
    calculate_all(mfcc)
    calculate_all(delta)
    calculate_all(delta_delta)
    
    energies = calculate_energy(signal_frames)
    calculate_all(np.array([energies]).reshape(len(energies), 1))
    
    zcrs = calculate_zcr(signal_frames)
    calculate_all(np.array([zcrs]).reshape(len(zcrs), 1))
    
    return np.asarray(f)

def batch_extract(direction):
    feats, labels = [], []
    
    valid_samples = read_files(direction, name='ABM')
    for sample in valid_samples:
        sample_feats = extract(sample)
        feats.append(sample_feats)
        labels.append(1)
    
    invalid_samples = read_files(direction, name='NABM')
    for sample in invalid_samples:
        sample_feats = extract(sample)
        feats.append(sample_feats)
        labels.append(-1)
    
    return np.asarray(feats), np.asarray(labels)

```record()``` records a single voice from the user.

```batch_record()``` records bunch of voices from the user inorder to record training files or test files.

In [5]:
def record(samplerate=11025, duration=2, play_rec=False, flag='test'):
    FRAMES = int(duration * samplerate)
    print('Recording Started')
    voice = sd.rec(FRAMES, samplerate, dtype=np.float, channels=1, mapping=None, blocking=True)
    print('Recording Ended')
    
    if play_rec is True:
        sd.play(voice, samplerate, blocking=True)
    
    if flag == 'test':
        wavfile.write(filename='temp.wav', rate=samplerate, data=voice)
        time.sleep(1)
    elif flag == 'train':
        return voice, samplerate
    
def batch_record(direction, count, name):
    if not os.path.exists(direction):
        os.makedirs(direction)
    
    last_file = len(read_files(direction, name))
    for cnt in range(last_file, last_file + count):
        file_name = '{}/{}.wav'.format(direction, name + str(cnt))
        voice, samplerate = record(flag='train')
        wavfile.write(filename, samplerate, data=voice)
        time.sleep(0.5)

```create_classifier()``` creates a classifier, fit it with the features and labels returned from ```batch_extract()```. finally save the classifier in a python object. 

```predict()``` if the test file is verified it returns ```1``` else returns ```-1```.

```single_test()``` records a voice from user and says that the person is verified or not.

```calculate_accuracy()``` predicts all the test files in the directory and returns the accuracy of classifier.

In [6]:
def create_classifier(feats, labels, pca_flag=False):
    clf = SVC(C=1, max_iter=-1, gamma='auto')
    if pca_flag is True:
        pca = joblib.load('pca.pkl')
        feats = pca.transform(feats)
    
    clf.fit(feats, labels)
    joblib.dump(clf, filename='SVM.pkl')
    time.sleep(1)
    
def predict(clf, audio, pca_flag=False):
    feats = extract(audio)
    if pca_flag is True:
        pca = joblib.load('pca.pkl')
        feats = pca.transform(feats.reshape(1, -1))
    ans = clf.predict(feats.reshape(1, -1))
    if ans == 1:
        print('\n\nAli Bayat Mokhtari Verified')
    else:
        print('\n\nNot Verified!')

    return ans

def single_test():
    record(play_rec=True)
    clf = joblib.load('SVM.pkl')
    res = predict(clf, 'temp.wav', pca_flag=True)
    print(res)
    return res

def calculate_accuracy(direction, name):
    corrects = 0
    clf.load('SVM.pkl')
    test_files = read_files(direction, name)
    for test_file in test_files:
        ans = predict(clf, audio=test_file, pca_flag=True)
        if ans == -1:
            corrects += 1
    
    print((corrects / len(test_files)) * 100)

```create_pca()``` fits a pca model to reduce dimentions from features inorder to get better accuracy. Also saves it in a python object.

In [7]:
def create_pca(feats, n_feats):
    pca = PCA(n_components=n_feats, whiten=True)
    pca.fit(feats)
    joblib.dump(pca, filename='pca.pkl')

The main region of code.

In [8]:
feats, labels = batch_extract(direction='./train')
create_pca(feats, n_feats=40)
create_classifier(feats, labels, pca_flag=True)

(560, 164)


In [15]:
ans = single_test()

Recording Started
Recording Ended


Ali Bayat Mokhtari Verified
[1]
