In [11]:
import glob
import numpy as np
import random
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

from keras.layers import LSTM, Dense, Dropout, Flatten
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint


from os import listdir
from os.path import isfile, join

from keras.models import model_from_json

DATA_DIR = 'C:\\wamp64\\www\\Multi-modal-authentification\\voices\\'


files = [f for f in listdir(DATA_DIR) if isfile(join(DATA_DIR, f))]


X_train = []
X_val = []
for i in range(0, len(files)):
    tmp = files[i].split("_")[0]
    if tmp == "left":
        X_train.append(files[i])
    else:
        X_val.append(files[i])

labels = []
for i in range(len(X_train)):
    label = X_train[i].split('_')[1].split('.')[0]
    if label not in labels:
        labels.append(label)
print(labels)

max_length = 435

label_binarizer = LabelBinarizer()
label_binarizer.fit(list(set(labels)))

def one_hot_encode(x): return label_binarizer.transform(x)

def prepare(path):
    wave, sr = librosa.load(path, mono=True)
    mfcc = librosa.feature.mfcc(wave, sr)
    mfcc = np.pad(mfcc, ((0, 0), (0, max_length - len(mfcc[0]))), mode='constant', constant_values=0)
    return np.array(mfcc)


def batch_generator(data, batch_size=16):
    # data is just the X_train
    while 1:
        random.shuffle(data)
        X, y = [], []
        for i in range(batch_size):
            wav = data[i]
            wave, sr = librosa.load(DATA_DIR + wav, mono=True) # Adjust the path where the dataset is.
            label = wav.split('_')[1].split('.')[0]
            y.append(label)
            mfcc = librosa.feature.mfcc(wave, sr)
            mfcc = np.pad(mfcc, ((0,0), (0, max_length-len(mfcc[0]))), mode='constant', constant_values=0)
            X.append(np.array(mfcc))
        yield np.array(X), np.array(one_hot_encode(y))


with open("C:\\wamp64\\www\\Multi-modal-authentification\\" + './model.json', 'r') as f:
    model = model_from_json(f.read())

model.load_weights("C:\\wamp64\\www\\Multi-modal-authentification\\" + './checkpoints_old/voice_recognition_best_model_05.hdf5')

#model = tf.keras.models.load_model("./checkpoints_old/voice_recognition_best_model_05.hdf5")


['Abigail', 'Adam', 'Alexander', 'Amelia', 'Arthur', 'Ava', 'Avery', 'Charlie', 'Charlotte', 'Daniel', 'David', 'Dylan', 'Edward', 'Elizabeth', 'Elliott', 'Emily', 'Ethan', 'Evelyn', 'Frederick', 'George', 'Harper', 'Harrison', 'Harry', 'Harvey', 'Henry', 'Isabella', 'Jack', 'Jake', 'James', 'Joseph', 'Louis', 'Lucy', 'Madison', 'Max', 'Mia', 'Noah', 'Oliver', 'Olivia', 'Oscar', 'Samuel', 'Sebastian', 'Sophia', 'Theo', 'Thomas', 'Victoria', 'William']


In [84]:
import json

data = prepare("C:\\wamp64\\www\\Multi-modal-authentification\\" + "right_Emily.wav")
#data = np.reshape(data, data.shape + (1,))
#X = np.array(data)

b = data.reshape(-1, data.shape[0], data.shape[1])

#print(b.shape)



prediction = model.predict(b)
#prediction = model.predict_classes(b, batch_size=32, verbose = 2)
#print(prediction[0][1])
index = False
highest_probability = False
final_predictions = {}
final_predictions['predictions'] = {}
final_predictions['results'] = {}
for i in range(0, len(prediction[0])):
    final_predictions['predictions'][labels[i]] = str(prediction[0][i])
    if prediction[0][i] > highest_probability:
        highest_probability = prediction[0][i]
        index = i
        
#print("Highest probability is " + str(highest_probability))
#print(labels[index])

final_predictions['results'] = {
    "label": labels[index],
    "probability": str(highest_probability) 
}

json_data = json.dumps(final_predictions)

print(json_data)


{"predictions": {"Abigail": "0.0063126483", "Adam": "0.05075737", "Alexander": "0.11355051", "Amelia": "0.0023061077", "Arthur": "0.012582032", "Ava": "0.017087925", "Avery": "0.00071791175", "Charlie": "0.00043110887", "Charlotte": "0.0029428164", "Daniel": "0.00026786522", "David": "0.060302373", "Dylan": "0.0006901187", "Edward": "0.00517013", "Elizabeth": "0.0015940629", "Elliott": "0.01374901", "Emily": "0.33593005", "Ethan": "0.00061023916", "Evelyn": "0.00515447", "Frederick": "0.03377318", "George": "0.0044793147", "Harper": "0.028340064", "Harrison": "0.005109241", "Harry": "0.00074405794", "Harvey": "0.0073210513", "Henry": "0.005617719", "Isabella": "0.009576508", "Jack": "0.00068354496", "Jake": "0.0030357144", "James": "0.03508899", "Joseph": "0.003288122", "Louis": "0.0015190764", "Lucy": "0.0042081433", "Madison": "0.11422729", "Max": "0.0042268783", "Mia": "0.0005174069", "Noah": "0.0022254295", "Oliver": "0.0012295869", "Olivia": "0.005747381", "Oscar": "0.0011674549",

In [None]:
SEED = 2017
DATA_DIR = '../Data/spoken_numbers_pcm/' 

In [None]:
files = glob.glob(DATA_DIR + "*.wav")
X_train, X_val = train_test_split(files, test_size=0.2, random_state=SEED)

print('# Training examples: {}'.format(len(X_train)))
print('# Validation examples: {}'.format(len(X_val)))

In [None]:
labels = []
for i in range(len(X_train)):
    label = X_train[i].split('/')[-1].split('_')[1]
    if label not in labels:
        labels.append(label)
print(labels)

In [None]:
label_binarizer = LabelBinarizer()
label_binarizer.fit(list(set(labels)))

def one_hot_encode(x): return label_binarizer.transform(x)

In [None]:
n_features = 20
max_length = 80
n_classes = len(labels)

In [None]:
def batch_generator(data, batch_size=16):
    while 1:
        random.shuffle(data)
        X, y = [], []
        for i in range(batch_size):
            wav = data[i]
            wave, sr = librosa.load(wav, mono=True)
            label = wav.split('/')[-1].split('_')[1]
            y.append(label)
            mfcc = librosa.feature.mfcc(wave, sr)
            mfcc = np.pad(mfcc, ((0,0), (0, max_length-len(mfcc[0]))), mode='constant', constant_values=0) 
            X.append(np.array(mfcc))
        yield np.array(X), np.array(one_hot_encode(y))

In [None]:
learning_rate = 0.001
batch_size = 64
n_epochs = 50
dropout = 0.5

input_shape = (n_features, max_length)
steps_per_epoch = 50

In [None]:
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=input_shape,
dropout=dropout))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(n_classes, activation='softmax'))

In [None]:
opt = Adam(lr=learning_rate)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()

In [None]:
callbacks = [ModelCheckpoint('checkpoints/voice_recognition_best_model_{epoch:02d}.hdf5', save_best_only=True),
            EarlyStopping(monitor='val_acc', patience=2)]

In [None]:
history = model.fit_generator(
 generator=batch_generator(X_train, batch_size),
 steps_per_epoch=steps_per_epoch,
 epochs=n_epochs, verbose=1,
 validation_data=batch_generator(X_val, 32),
 validation_steps=5,
 callbacks=callbacks
 )