# Voice Fingerprint model training using GMM Models

In [1]:
import os
import time
import pickle
import numpy as np
from sklearn import preprocessing
from scipy.io.wavfile import read
import python_speech_features as mfcc
from sklearn.mixture import GaussianMixture 

## Functions

In [2]:
# function to calculate and returns the delta of given feature vector matrix
def calculate_delta(array):
    rows,cols = array.shape
    # print(rows)
    # print(cols)
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
                first =0
            else:
                first = i-j
            if i+j > rows-1:
                second = rows-1
            else:
                second = i+j 
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas

In [3]:
# function to extract mfcc and delta features from audio and return them in a combined matrix
def extract_features(audio,rate):
    mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)    
    mfcc_feature = preprocessing.scale(mfcc_feature)
    # print(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta)) 
    return combined

In [4]:
# function to train the models
def train_model(data_set_name , model_name):
	SAMPLES_NUMBER = 5
	source = f"data_set\\{data_set_name}\\"   					 #trained set path
	dest  =  f"trained_models\\{model_name}\\"     				 #destination path to store trained models
	train_file = f"data_set\\{data_set_name}_addition.txt"     #training file with sample names   
	file_paths = open(train_file,'r')
	count = 1
	features = np.asarray(())
	for path in file_paths:    
		path = path.strip()   
		print(path)
		sr,audio = read(source + path)
		print(sr)
		vector   = extract_features(audio,sr)
		if features.size == 0:
			features = vector
		else:
			features = np.vstack((features, vector))
		
		if count == SAMPLES_NUMBER:    
			gmm = GaussianMixture(n_components = 10, max_iter = 300, covariance_type='diag',n_init = 1)
			gmm.fit(features) 
			
			# dumping the trained gaussian model
			print(path)
			picklefile = path.split("\\")[0]+".gmm"
			print(picklefile)

			pickle.dump(gmm,open(dest + picklefile,'wb'))
			print('+ modeling completed for speaker:',picklefile," with data point = ",features.shape)   
			features = np.asarray(())
			count = 0
		count = count + 1

In [5]:
# function to test models and predict the speaker or sentence
def test_model(model_name):
	source   = "data_set\\testing_set\\"  #path of test samples
	modelpath = f"trained_models\\{model_name}\\" #path of trained models
	test_file = "data_set\\testing_set_addition.txt"       #test samples names
	file_paths = open(test_file,'r')

	gmm_files = [os.path.join(modelpath,fname) for fname in
				os.listdir(modelpath) if fname.endswith('.gmm')]
	
	#Load the Gaussian gender Models
	models    = [pickle.load(open(fname,'rb')) for fname in gmm_files]
	speakers   = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files]

	# Read the test directory and get the list of test audio files 
	for path in file_paths:   
		path = path.strip()   
		print(f"Test File : {path}\n")
		sr,audio = read(source + path)
		vector   = extract_features(audio,sr)
		log_likelihood = np.zeros(len(models)); 
		max_score=-100
		for i in range(len(models)):
			gmm    = models[i]  #checking with each model one by one
			scores = np.array(gmm.score(vector))
			if scores > max_score:
				max_score = scores
			print(gmm_files[i].split("\\")[-1])
			print(scores)
			log_likelihood[i] = scores.sum()
		winner = np.argmax(log_likelihood)
		print("\tdetected as - ", speakers[winner])
		print(f"MaxScore = {max_score}")
		print("-"*50)
		# detecting by threshold score
		if max_score > -24:
			print("Accepted")
		else :
			print("Other")
		print("#"*50)
		time.sleep(1.0)  


## Models Training

In [6]:
# train teams models
train_model("teams_training_set" , "Team_Verification")

Neveen\open_door_neveen-sample1.wav
44100
Neveen\open_door_neveen-sample2.wav
44100
Neveen\open_door_neveen-sample3.wav
44100
Neveen\open_door_neveen-sample4.wav
44100
Neveen\open_door_neveen-sample5.wav
44100
Neveen\open_door_neveen-sample5.wav
Neveen.gmm
+ modeling completed for speaker: Neveen.gmm  with data point =  (995, 40)
Nurhan\open_the_door_nourhan-sample1.wav
44100
Nurhan\open_the_door_nourhan-sample2.wav
44100
Nurhan\open_the_door_nourhan-sample3.wav
44100
Nurhan\open_the_door_nourhan-sample4.wav
44100
Nurhan\open_the_door_nourhan-sample5.wav
44100
Nurhan\open_the_door_nourhan-sample5.wav
Nurhan.gmm
+ modeling completed for speaker: Nurhan.gmm  with data point =  (995, 40)
Omar\omar_open_door-sample0.wav
44100
Omar\omar_open_door-sample1.wav
44100
Omar\omar_open_door-sample2.wav
44100
Omar\omar_open_door-sample3.wav
44100
Omar\Open_The_Door_Omar-sample7.wav
44100
Omar\Open_The_Door_Omar-sample7.wav
Omar.gmm
+ modeling completed for speaker: Omar.gmm  with data point =  (995

In [7]:
#train sentence models
train_model("sentence_training_set" , "Sentence_Verification")

Neveen_Close_The_Door\close_door_neveen-sample0.wav
44100
Neveen_Close_The_Door\close_door_neveen-sample1.wav
44100
Neveen_Close_The_Door\close_door_neveen-sample2.wav
44100
Neveen_Close_The_Door\close_door_neveen-sample3.wav
44100
Neveen_Close_The_Door\close_door_neveen-sample4.wav
44100
Neveen_Close_The_Door\close_door_neveen-sample4.wav
Neveen_Close_The_Door.gmm
+ modeling completed for speaker: Neveen_Close_The_Door.gmm  with data point =  (995, 40)
Neveen_Close_The_Window\close_window_neveen-sample0.wav
44100
Neveen_Close_The_Window\close_window_neveen-sample1.wav
44100
Neveen_Close_The_Window\close_window_neveen-sample2.wav
44100
Neveen_Close_The_Window\close_window_neveen-sample3.wav
44100
Neveen_Close_The_Window\close_window_neveen-sample4.wav
44100
Neveen_Close_The_Window\close_window_neveen-sample4.wav
Neveen_Close_The_Window.gmm
+ modeling completed for speaker: Neveen_Close_The_Window.gmm  with data point =  (995, 40)
Neveen_Open_The_Door\open_door_neveen-sample0.wav
44100


## Models Testing

In [8]:
test_model("Team_Verification")

Test File : Neven\close_door_neveen-sample18.wav

Neveen.gmm
-21.895330557241095
Nurhan.gmm
-26.680078346651644
Omar.gmm
-25.76609851787727
Others_1.gmm
-30.513643444226254
Other_2.gmm
-25.252634657113198
Other_3.gmm
-27.21934480228752
Salman.gmm
-26.796677704050012
	detected as -  Neveen
MaxScore = -21.895330557241095
--------------------------------------------------
Accepted
##################################################
Test File : Neven\close_door_neveen-sample19.wav

Neveen.gmm
-20.900064840045857
Nurhan.gmm
-27.53337978257185
Omar.gmm
-27.14774480948544
Others_1.gmm
-31.02929591211509
Other_2.gmm
-25.18863885137989
Other_3.gmm
-27.688856395865262
Salman.gmm
-27.185617680296545
	detected as -  Neveen
MaxScore = -20.900064840045857
--------------------------------------------------
Accepted
##################################################
Test File : Neven\close_door_neven.wav

Neveen.gmm
-23.983217743966684
Nurhan.gmm
-29.845814546358753
Omar.gmm
-27.042147090990287
Others_

In [9]:
test_model("Sentence_Verification")

Test File : Neven\close_door_neveen-sample18.wav

Neveen_Close_The_Door.gmm
-19.67967009135858
Neveen_Close_The_Window.gmm
-22.013328369893255
Neveen_Open_The_Door.gmm
-22.566884461253018
Neveen_Open_The_Window.gmm
-22.924086120967488
Nur_Close_The_Door.gmm
-29.477156717862396
Nur_Close_The_Window.gmm
-27.71550844570354
Nur_Open_The_Door.gmm
-26.287227143952865
Nur_Open_The_Window.gmm
-25.551354353596544
Omar_Close_The_Door.gmm
-30.18744020419865
Omar_Close_The_Window.gmm
-26.615646857030537
Omar_Open_The_Door.gmm
-25.899947603640705
Omar_Open_The_Window.gmm
-27.10388047731759
Salman_Close_The_Door.gmm
-27.412418739090818
Salman_Close_The_Window.gmm
-26.107234306197718
Salman_Open_The_Door.gmm
-27.425213380609197
Salman_Open_The_Window.gmm
-30.290168827082482
	detected as -  Neveen_Close_The_Door
MaxScore = -19.67967009135858
--------------------------------------------------
Accepted
##################################################
Test File : Neven\close_door_neveen-sample19.wav

N