In [11]:
from glob import glob
import os
from scipy.io import wavfile
import python_speech_features
import numpy as np
import sounddevice as sd
import time
import joblib
from sklearn.decomposition import PCA
import tensorflow

In [12]:
tensorflow.compat.v1.logging.set_verbosity(tensorflow.compat.v1.logging.ERROR) # Ignore tensorflow warnings

In [13]:
def read_files(direction, name=''):
    if not os.path.exists(direction):
        raise 'Directory is not available'
    files = '{}/{}*.wav'.format(direction, name)
    return glob(files)

In [44]:
def extract(audio):
    SAMPLE_RATE = 11025
    FRAME_LENGTH = int(SAMPLE_RATE * 0.025)
    FRAME_STEP = FRAME_LENGTH - int(SAMPLE_RATE * 0.015)
    PRE_EMPH = 0.97
    WINDOW_LENGTH = 0.025
    WINDOW_STEP = 0.010
    WINDOW_FUNCTION = np.hamming
    f = []
    
    def calculate_all(feats):
            f.extend(feats.min(axis=0))
            f.extend(feats.max(axis=0))
            f.extend(feats.mean(axis=0))
            f.extend(feats.var(axis=0))
    
    def calculate_energy(frames):
        energies = []
        for frame in frames:
            energy = 1 / len(frame) * np.sum(np.power(frame, 2))
            energies.append(energy)
        return energies
    
    def calculate_zcr(frames):
        def sign(x):
            return 1 if x >= 0 else -1
        
        zc_rates = []
        for frame in frames:
            zc_rate = 0
            for i in range(1, len(frame)):
                zc_rate += abs(sign(frame[i]) - sign(frame[i - 1])) / 2
            zc_rates.append(zc_rate / len(frame))
        return zc_rates
    
    rate, signal = wavfile.read(audio)
    signal_frames = python_speech_features.sigproc.framesig(signal, frame_len=FRAME_LENGTH,
                                                            frame_step=FRAME_STEP,
                                                            winfunc=WINDOW_FUNCTION)
    mfcc = python_speech_features.mfcc(signal_frames, rate, winlen=WINDOW_LENGTH,
                                      winstep=WINDOW_STEP, numcep=13, preemph=PRE_EMPH,
                                      winfunc=WINDOW_FUNCTION)
    delta = np.concatenate([np.zeros(shape=(1, 13)), np.diff(mfcc, n=1, axis=0)])
    delta_delta = np.concatenate([np.zeros(shape=(2, 13)), np.diff(mfcc, n=2, axis=0)])
    
    calculate_all(mfcc)
    calculate_all(delta)
    calculate_all(delta_delta)
    
    energies = calculate_energy(signal_frames)
    calculate_all(np.array([energies]).reshape(len(energies), 1))
    
    zcrs = calculate_zcr(signal_frames)
    calculate_all(np.array([zcrs]).reshape(len(zcrs), 1))
    
    return np.asarray(f)


def batch_extract(direction):
    feats, labels = [], []
    
    valid_samples = read_files(direction, name='ABM')
    for sample in valid_samples:
        sample_feats = extract(sample)
        feats.append(sample_feats)
        labels.append(1)
    
    invalid_samples = read_files(direction, name='NABM')
    for sample in invalid_samples:
        sample_feats = extract(sample)
        feats.append(sample_feats)
        labels.append(0)
    
    return np.asarray(feats), np.asarray(labels)

In [45]:
def record(samplerate=11025, duration=2, play_rec=False, flag='test'):
    FRAMES = int(duration * samplerate)
    print('Recording Started')
    voice = sd.rec(FRAMES, samplerate, dtype=np.float, channels=1, mapping=None, blocking=True)
    print('Recording Ended')
    
    if play_rec is True:
        sd.play(voice, samplerate, blocking=True)
    
    if flag == 'test':
        wavfile.write(filename='temp.wav', rate=samplerate, data=voice)
        time.sleep(1)
    elif flag == 'train':
        return voice, samplerate
    
    
def batch_record(direction, count, name):
    if not os.path.exists(direction):
        os.makedirs(direction)
    
    last_file = len(read_files(direction, name))
    for cnt in range(last_file, last_file + count):
        file_name = '{}/{}.wav'.format(direction, name + str(cnt))
        voice, samplerate = record(flag='train')
        wavfile.write(filename, samplerate, data=voice)
        time.sleep(0.5)

In [58]:
def create_network(feats, labels, n_hiddenlayers=3, _epochs=20, pca_flag=False):
    net = tensorflow.keras.models.Sequential()
    
    net.add(tensorflow.keras.layers.Flatten())
    for _ in range(n_hiddenlayers):
        net.add(tensorflow.keras.layers.Dense(units=128, activation=tensorflow.nn.relu))
    
    net.add(tensorflow.keras.layers.Dense(units=2, activation=tensorflow.nn.softmax))
    net.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    if pca_flag is True:
        pca = joblib.load('pca_dnn.pkl')
        feats = pca.transform(feats)
    
    net.fit(feats, labels, epochs=_epochs)
    net.save('net.h5')
    
    
def predict(net, audio, pca_flag=False):
    feats = extract(audio)
    
    if pca_flag is True:
        pca = joblib.load('pca_dnn.pkl')
        feats = pca.transform(feats.reshape(1, -1))
        
    score = net.predict(feats.reshape(1, -1))
    res_class = net.predict_classes(feats.reshape(1, -1))
    
    if res_class == 1:
        print('\n\nAli Bayat Mokhtari Verified.')
    elif res_class == 0:
        print('\n\nNot Verified!')
        
    print('score is: {}'.format(score))
    return res_class


def single_test():
    record(play_rec=True)
    net = tensorflow.keras.models.load_model('net.h5')
    res = predict(net, 'temp.wav', pca_flag=True)
    print(res)
    return res


def calculate_accuracy(direction, pcaflag, name, cls):
    net = tensorflow.keras.models.load_model('net.h5')
    files = read_files(direction, name=name)
    cnt = 0
    for _file in files:
        test_class = predict(net, audio=_file, pca_flag=pcaflag)
        print(_file, test_class)
        if test_class == cls:
            cnt += 1
    print('\n\naccuracy is: {}'.format((cnt / len(files)) * 100))

In [51]:
def create_pca(feats, n_feats):
    pca = PCA(n_components=n_feats, whiten=True)
    pca.fit(feats)
    joblib.dump(pca, filename='pca_dnn.pkl')

In [25]:
feats, labels = batch_extract(direction='./train')
create_pca(feats, n_feats=40)
create_network(feats, labels, n_hiddenlayers=2, _epochs=10, pca_flag=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [43]:
ans = single_test()

Recording Started
Recording Ended


Ali Bayat Mokhtari Verified.
score is: [[2.641026e-04 9.997359e-01]]
[1]


In [60]:
calculate_accuracy(direction='./train', pcaflag=True, name='NABM', cls=0)



Not Verified!
score is: [[0.9985997  0.00140027]]
./train/NABM_Bye16.wav [0]


Not Verified!
score is: [[9.9967849e-01 3.2153432e-04]]
./train/NABM_Hi80.wav [0]


Not Verified!
score is: [[0.99754214 0.00245785]]
./train/NABM_Hi94.wav [0]


Not Verified!
score is: [[0.99532473 0.00467528]]
./train/NABM_Hi43.wav [0]


Not Verified!
score is: [[0.99441236 0.00558768]]
./train/NABM_Hi57.wav [0]


Not Verified!
score is: [[9.9998224e-01 1.7794531e-05]]
./train/NABM_Hi56.wav [0]


Not Verified!
score is: [[9.990126e-01 9.873934e-04]]
./train/NABM_Hi42.wav [0]


Not Verified!
score is: [[9.9927539e-01 7.2458194e-04]]
./train/NABM_Hi95.wav [0]


Not Verified!
score is: [[0.9588058  0.04119423]]
./train/NABM_Hi81.wav [0]


Not Verified!
score is: [[9.999994e-01 5.509705e-07]]
./train/NABM_Bye17.wav [0]


Not Verified!
score is: [[9.9999964e-01 3.2590708e-07]]
./train/NABM_Bye15.wav [0]


Not Verified!
score is: [[9.997552e-01 2.447608e-04]]
./train/NABM_Bye29.wav [0]


Not Verified!
score is



Not Verified!
score is: [[9.9909043e-01 9.0954243e-04]]
./train/NABM_Hi27.wav [0]


Not Verified!
score is: [[9.9956709e-01 4.3285484e-04]]
./train/NABM_Hi33.wav [0]


Not Verified!
score is: [[9.9962389e-01 3.7603604e-04]]
./train/NABM_Bye107.wav [0]


Not Verified!
score is: [[9.9986541e-01 1.3455504e-04]]
./train/NABM_Bye113.wav [0]


Not Verified!
score is: [[9.9986684e-01 1.3312430e-04]]
./train/NABM_Bye99.wav [0]


Not Verified!
score is: [[0.99618196 0.00381809]]
./train/NABM_Hi123.wav [0]


Not Verified!
score is: [[9.9971300e-01 2.8693778e-04]]
./train/NABM_Bye66.wav [0]


Not Verified!
score is: [[9.9998057e-01 1.9441937e-05]]
./train/NABM_Bye72.wav [0]


Not Verified!
score is: [[9.9999869e-01 1.3061222e-06]]
./train/NABM_Hi137.wav [0]


Not Verified!
score is: [[0.98558366 0.01441635]]
./train/NABM_Bye58.wav [0]


Not Verified!
score is: [[9.9981529e-01 1.8475478e-04]]
./train/NABM_Hi109.wav [0]


Not Verified!
score is: [[9.9983430e-01 1.6566493e-04]]
./train/NABM_Bye64.



Not Verified!
score is: [[0.9929322  0.00706781]]
./train/NABM_Hi128.wav [0]


Not Verified!
score is: [[9.9980336e-01 1.9667036e-04]]
./train/NABM_Bye45.wav [0]


Not Verified!
score is: [[0.99082243 0.00917756]]
./train/NABM_Hi100.wav [0]


Not Verified!
score is: [[9.992617e-01 7.382913e-04]]
./train/NABM_Hi114.wav [0]


Not Verified!
score is: [[0.95115596 0.04884404]]
./train/NABM_Bye51.wav [0]


Not Verified!
score is: [[9.999589e-01 4.114780e-05]]
./train/NABM_Bye86.wav [0]


Not Verified!
score is: [[9.9999976e-01 2.3869310e-07]]
./train/NABM_Bye5.wav [0]


Not Verified!
score is: [[0.9899951  0.01000488]]
./train/NABM_Bye92.wav [0]


Not Verified!
score is: [[9.9999499e-01 4.9929363e-06]]
./train/NABM_Bye124.wav [0]


Not Verified!
score is: [[9.999528e-01 4.724879e-05]]
./train/NABM_Hi38.wav [0]


Not Verified!
score is: [[9.9981612e-01 1.8394306e-04]]
./train/NABM_Bye130.wav [0]


Not Verified!
score is: [[0.9977336  0.00226634]]
./train/NABM_Bye118.wav [0]


Not Verified!