In [1]:
import os
import wave
import time
import pickle
import numpy as np
from sklearn import preprocessing
from scipy.io.wavfile import read
import python_speech_features as mfcc
from sklearn.mixture import GaussianMixture 
import librosa

In [2]:
def calculate_delta(array):
   
    rows,cols = array.shape
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
              first =0
            else:
              first = i-j
            if i+j > rows-1:
                second = rows-1
            else:
                second = i+j 
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas

In [3]:
def extract_features(audio,rate):
       
    mfcc_feature = mfcc.mfcc(audio,rate, 0.0125, 0.005,20,nfft = 1200, appendEnergy = True)    
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta)) 
    return combined

In [5]:
def train_model_voc():

    source   = r"D:\Projects\Voice-Passsword-Checker\ShaabanVoice"
    dest = r"D:\Projects\Voice recognition\models_vo"
    train_file = r"D:\Projects\Voice recognition\training_set_addition.txt"     
    file_paths = open(train_file,'r')
    count = 1
    features = np.asarray(())
    for path in file_paths:    
        path = path.strip()   

        audio,sr = librosa.load(source +"\\"+ path)
        vector   = extract_features(audio,sr)
    
        if features.size == 0:
            features = vector
        else:
            features = np.vstack((features, vector))

        if count == 6:    
            gmm = GaussianMixture(n_components = 6, max_iter = 200, covariance_type='diag',n_init = 3)
            gmm.fit(features)
            
            # dumping the trained gaussian model
            picklefile = path.split("-")[0]+"-"+path.split("-")[1].split()[0]+".gmm"
            pickle.dump(gmm,open(dest +"\\"+ picklefile,'wb'))
            print('+ modeling completed for speaker:',picklefile," with data point = ",features.shape)   
            features = np.asarray(())
            count = 0
        count = count + 1

In [6]:
train_model_voc()

+ modeling completed for speaker: Shaaban-Close.gmm  with data point =  (3208, 40)
+ modeling completed for speaker: Shaaban-Open.gmm  with data point =  (3106, 40)
+ modeling completed for speaker: Shaaban-Push.gmm  with data point =  (3191, 40)
+ modeling completed for speaker: Shaaban-Window.gmm  with data point =  (3174, 40)


In [6]:
def test_model(Audiofile):

    modelpath = r"D:\Projects\Voice recognition\models_vo"
    gmm_files = [os.path.join(modelpath,fname) for fname in
                  os.listdir(modelpath) if fname.endswith('.gmm')]
     
    #Load the Gaussian gender Models
    models    = [pickle.load(open(fname,'rb')) for fname in gmm_files]
    speakers   = [fname.split("\\")[-1].split(".gmm")[0] for fname 
                  in gmm_files]
     
    # Read the test directory and get the list of test audio files 
    audio,sr = librosa.load(Audiofile)
    vector   = extract_features(audio,sr)

    log_likelihood = np.zeros(len(models)) 

    for i in range(len(models)):
        gmm    = models[i]  #checking with each model one by one
        scores = np.array(gmm.score(vector))
        log_likelihood[i] = scores.sum()
         
        winner = np.argmax(log_likelihood)
        print("\tdetected as - ", speakers[winner])
        print(log_likelihood)

In [7]:
test_model(r"D:\Projects\Voice-Passsword-Checker\audio.wav")

	detected as -  Dina-Open
[-26.90699712   0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.        ]
	detected as -  Dina-Push
[-26.90699712 -24.92036505   0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.        ]
	detected as -  Dina-Window
[-26.90699712 -24.92036505 -26.14490822   0.           0.
   0.           0.           0.           0.           0.
   0.           0.        ]
	detected as -  Romaisaa-Close
[-26.90699712 -24.92036505 -26.14490822 -24.57014779   0.
   0.           0.           0.           0.           0.
   0.           0.        ]
	detected as -  Romaisaa-Open
[-26.90699712 -24.92036505 -26.14490822 -24.57014779 -26.26822274
   0.           0.           0.           0.           0.
   0.           0.        ]
	detected as -  Romaisaa-Push
[-26.90699712 -24.92036505 -26.14490822 -24.57014779 -26.26822274
 -25.33970673   0.           0.    