In [13]:
import os
import _pickle as cPickle
import numpy as np
from scipy.io.wavfile import read
import python_speech_features as mfcc
from sklearn import preprocessing
import warnings
warnings.filterwarnings("ignore")
import python_speech_features as mfcc
from sklearn import mixture
import time

In [25]:
 class MFCC:
        def __init__(self, audio, sample_rate=16000):
            self.audio = audio
            self.sr = sample_rate
            
        def get_features(self):
            features = mfcc.mfcc(self.audio, self.sr, 0.025, 0.01, 13,appendEnergy = False)
            feat = np.asarray(())
            for i in range(features.shape[0]):
                temp = features[i,:]
                if np.isnan(np.min(temp)):
                    continue
                else:
                    if feat.size == 0:
                        feat = temp
                    else:
                        feat = np.vstack((feat, temp))
            features = feat;
            features = preprocessing.scale(features)
            return features
        
        def get_combined_features(self):
            features = mfcc.mfcc(self.audio, self.sr, 0.025, 0.01, 20,appendEnergy = False)
            mfcc_feat = preprocessing.scale(features)
            delta = self.calculate_delta(mfcc_feat)
            combined = np.hstack((mfcc_feat,delta)) 
            return combined
        
        def calculate_delta(self, array):
            """Calculate and returns the delta of given feature vector matrix"""

            rows,cols = array.shape
            deltas = np.zeros((rows,20))
            N = 2
            for i in range(rows):
                index = []
                j = 1
                while j <= N:
                    if i-j < 0:
                        first = 0
                    else:
                        first = i-j
                    if i+j > rows -1:
                        second = rows -1
                    else:
                        second = i+j
                    index.append((second,first))
                    j+=1
                deltas[i] = (array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]]))) / 10
            return deltas

In [26]:
class Gender_Classification:
    
    def __init__(self, sample_rate=16000):
        modelpath  = ".\\pygender\\"
        self.sr = sample_rate
        gmm_files = [os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm')]
        self.models = [cPickle.load(open(fname,'rb')) for fname in gmm_files]
        self.genders = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files]

    def classify(self, audio):
        features = MFCC(audio, self.sr).get_features()
        scores = None
        log_likelihood = np.zeros(len(self.models))
        for i in range(len(self.models)):
            gmm = self.models[i]         #checking with each model one by one
            scores = np.array(gmm.score(features))
            log_likelihood[i] = scores.sum()
        winner = np.argmax(log_likelihood)
        return self.genders[winner]
        

In [27]:
class Speaker_Identification:
    
    def __init__(self, sample_rate=16000):
        self.sr = sample_rate
        self.model_path = modelpath = r".\speaker_models\\"
        gmm_files = [os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm')]
        self.models    = [cPickle.load(open(fname,'rb')) for fname in gmm_files]
        self.speakers   = [fname.split("\\")[-1].split(".gmm")[0] for fname in gmm_files]
    
    def identify(self, audio):
        vector = MFCC(audio,self.sr).get_combined_features()
        log_likelihood = np.zeros(len(self.models)) 
        for i in range(len(self.models)):
            gmm = self.models[i]         #checking with each model one by one
            scores = np.array(gmm.score(vector))
            log_likelihood[i] = scores.sum()
        if(max(log_likelihood) > -24.50):
            winner = np.argmax(log_likelihood)
            return self.speakers[winner]
        else:
            return "Speaker undetected"

In [29]:
sr, audio  = read(r".\test_audio\0bIb6Pjzk1g.wav")
gc = Gender_Classification(sr)
yg = gc.classify(audio)
print("Gender of the speaker:", yg)

sr, audio  = read(r".\test_audio\anthonyschaller-20071221-\a0491.wav")
si = Speaker_Identification(sr)
yg = si.identify(audio)
print("Speaker's name:", yg)

Gender of the speaker: female
Speaker's name: anthonyschaller
