In [1]:
import os
import wave
import time
import pickle
import pyaudio
import warnings
import numpy as np
from sklearn import preprocessing
from scipy.io.wavfile import read
import python_speech_features as mfcc
from sklearn.mixture import GaussianMixture 

In [2]:
def calculate_delta(array):
    rows,cols = array.shape
    # print(rows)
    # print(cols)
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
                first =0
            else:
                first = i-j
            if i+j > rows-1:
                second = rows-1
            else:
                second = i+j 
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas

In [3]:
def extract_features(audio,rate):
    mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)    
    mfcc_feature = preprocessing.scale(mfcc_feature)
    # print(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta)) 
    return combined

In [4]:
def train_model():
	SAMPLES_NUMBER = 5
	source = "training_set\\"   					 #trained set path
	dest  =  "trained_models\\"     				 #destination path to store trained models
	train_file = "training_set_addition.txt"     #training file with sample names   
	file_paths = open(train_file,'r')
	count = 1
	features = np.asarray(())
	for path in file_paths:    
		path = path.strip()   
		print(path)
		sr,audio = read(source + path)
		print(sr)
		vector   = extract_features(audio,sr)
		if features.size == 0:
			features = vector
		else:
			features = np.vstack((features, vector))
		
		if count == SAMPLES_NUMBER:    
			gmm = GaussianMixture(n_components = 10, max_iter = 300, covariance_type='diag',n_init = 1)
			gmm.fit(features) 
			
			# dumping the trained gaussian model
			picklefile = path.split("-")[0]+".gmm"
			pickle.dump(gmm,open(dest + picklefile,'wb'))
			print('+ modeling completed for speaker:',picklefile," with data point = ",features.shape)   
			features = np.asarray(())
			count = 0
		count = count + 1

In [None]:
def test_model():
	source   = "testing_set\\"  #path of test samples
	modelpath = "trained_models\\" #path of trained models
	test_file = "testing_set_addition.txt"       #test samples names
	file_paths = open(test_file,'r')

	gmm_files = [os.path.join(modelpath,fname) for fname in
				os.listdir(modelpath) if fname.endswith('.gmm')]
	
	#Load the Gaussian gender Models
	models    = [pickle.load(open(fname,'rb')) for fname in gmm_files]
	speakers   = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files]

	# Read the test directory and get the list of test audio files 
	for path in file_paths:   
		path = path.strip()   
		print(f"Test File : {path}\n")
		sr,audio = read(source + path)
		vector   = extract_features(audio,sr)
		log_likelihood = np.zeros(len(models)); 
		max_score=-100
		for i in range(len(models)):
			gmm    = models[i]  #checking with each model one by one
			scores = np.array(gmm.score(vector))
			if scores > max_score:
				max_score = scores
			print(gmm_files[i].split("\\")[-1])
			print(scores)
			log_likelihood[i] = scores.sum()
		winner = np.argmax(log_likelihood)
		print("\tdetected as - ", speakers[winner])
		print(f"MaxScore = {max_score}")
		print("-"*50)
		if max_score > -24:
			print("In Group")
		else :
			print("Other")
		print("#"*50)
		time.sleep(1.0)  
