In [1]:
import numpy as np
import pandas as pd
import os
import glob
import python_speech_features as mfcc
from scipy.io.wavfile import read
from matplotlib import pyplot
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import re
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
%matplotlib inline

In [2]:
pwd = os.getcwd() #this is the current working directory
#1st row of confusion matrix will represent male and 2nd will represent female
genderDict = {'M':0,'F':1}
speakerDict = {}


In [3]:
#pushing the indexes to 0 to redue the size of one hot vector so 1st speaker will have 0th index and so on
# in the confusion matrix and classification report
for i in range(1,143):
    speakerDict[i] = i-1 

In [4]:
class dataLoader(object):
    '''data loader class to load the data from the dictionary for both problems'''
    
    def __init__(self,problem,scaling=0):
        '''initializing directories based on problem and specifying whether scaling is required or not'''
        self.problem = problem 
        if self.problem==0:
            #if problem is of gender recognition
            self.DIR = "\\Gender_Recognition\\"
        elif self.problem==1:
            #if problem is of speaker recognition
            self.DIR = "\\Speaker_Recognition\\"
        else:
            #if problem is none raise error
            print("Values can only be 0 or 1")
            raise NotImplementedError(0)
        #setting the scaling variable
        self.scaling=scaling
        
    def returnData(self):
        '''function to return the processed data and scale it if scaling is set'''
        ###Naming conventions for train/test/val data XTrain/XTest
        self.XTrain,self.YTrain = self.dataLoader("Train",self.problem)
        self.XTest,self.YTest = self.dataLoader("Test",self.problem)

        #Doing feature scaling if self.scaling is true 
        #Ignoring the bias term as that will just 0 it
        mean = np.mean(self.XTrain[:,1:],axis=0)
        std = np.std(self.XTrain[:,1:],axis=0)
        #if scaling needs to be done
        if self.scaling:
            self.XTrain[:,1:] = (self.XTrain[:,1:]-mean)/std
            self.XTest[:,1:] = (self.XTest[:,1:]-mean)/std
        #converting to one hot enocding vectors
        return (self.XTrain,self.YTrain,self.XTest,self.YTest)

    def returnHotEncodings(self):
        '''function to return one hot encodings of the data'''
        return (self.YTrainOne,self.YTestOne,self.YValOne)

    def get_MFCC(self,audio, sr):
        '''blackbox function provided by the TA'''
        features = mfcc.mfcc(audio, sr, 0.025, 0.01, 13, appendEnergy = True)
        return np.mean(features, axis=0)

    def dataLoader(self,dirName,problem):
        '''function to load data depending on the problem'''
        #if problem is gender recognition
        if problem==0:
            labels = []
            data = []
            names = []
            for name in glob.glob(pwd+self.DIR+str(dirName)+"\\*"):
               #label is M or F which is last word of file
                label = name[-1]
                #reading the wave files in the directory
                for subnames in glob.glob(name+"\\*"):
                    #convertin the wave files to features and appending
                    sr, audio = read(subnames)
                    features =self.get_MFCC(audio, sr) #adding the bias term
                    data.append(features)
                    #converting labels to 0/1 depending upon genderDict
                    labels.append(genderDict[label])
                    names.append(subnames)
        #if problem is speaker recognition
        if problem==1:
            labels = []
            data = []
            names = []
            for name in glob.glob(pwd+self.DIR+str(dirName)+"\\*"):
                #using regex capture groups to read the speaker identity
                pattern = re.search("([0-9]{3})",name)
                label = int(pattern.group(1))
                #reading the wave files in the directory
                for subnames in glob.glob(name+"\\*"):
                    #converting to features and appending
                    sr, audio = read(subnames)
                    features =np.append(1 ,self.get_MFCC(audio, sr)) #adding the bias term
                    data.append(features)
                    #converting to labels depending upon the speaker dict
                    labels.append(speakerDict[label])
                    names.append(subnames)
        #returning the data
        return (np.array(data),np.array(labels))

In [5]:
#Data loading for gender recognition problem
loaderp1 = dataLoader(0)
XTrainp1,YTrainp1,XTestp1,YTestp1 = loaderp1.returnData()

In [6]:
#data loading for speaker recognition problem
loaderp2 = dataLoader(1)
XTrainp2,YTrainp2,XTestp2,YTestp2 = loaderp2.returnData()

In [7]:
class Trainer(object):
    '''class to train mlp,svm and gnb'''
    
    def __init__(self,XTrain,YTrain,XTest,YTest,problem):
        '''function to set variables for project part 3'''
        self.XTrain = XTrain
        self.YTrain = YTrain
        self.XTest = XTest
        self.YTest = YTest
        #problem 0 is the gender problem and 1 is the speaker problem
        self.problem = problem
    
    def doMLP(self):
        '''function to train mlp and tune using grid search and get results on the testing data'''
        #initializing the parameters as asked in the manual
        parameters = {
        'random_state':[1],
        'activation':['logistic'],
        'solver':['sgd'],
        'max_iter':[5000],
        'learning_rate_init':[0.4,0.1,0.01],
        'hidden_layer_sizes':[(128,64),(64,),(64,32),(32,)],
        
        }
        #setting the instance
        mlp = MLPClassifier()
        #initializing the girdi search with the asked variables
        clf = GridSearchCV(mlp, param_grid=parameters,scoring='f1_macro',cv=3,n_jobs=-1)
        #fitting the mlp and tunning on training data
        clf.fit(self.XTrain,self.YTrain)
        #getting prediction from the best estimator on the testing data
        self.ypred = clf.predict(self.XTest)
        #pritnting the stats using preicitons
        self.printStats(self.ypred)
        
    
    def doSVM(self,max_iter):
        '''function to train linear SVM and get results on the testing data'''
        #initializing the linear svm instance
        svm = LinearSVC(random_state=0, verbose=1, max_iter=max_iter,dual=False)
        #fitting on the training data
        svm.fit(self.XTrain,self.YTrain)
        #getting prediction on the testing data
        self.ypredsvm = svm.predict(self.XTest)
        #printing stats using obtained predictions
        self.printStats(self.ypredsvm)
        
    def doGNB(self):
        '''function to train gaussian naive bayes and get results on the testing data'''
        #initializing the gaussian naive bayes instance
        mnb = GaussianNB()
        #fitting on the training data
        mnb.fit(self.XTrain,self.YTrain)
        #getting predictions on the testing data
        self.ypredgnb = mnb.predict(self.XTest)
        #pritnting stats obtained using predictions
        self.printStats(self.ypredgnb)
    
    def printStats(self,ypred):
        '''function to print stats using predictions as asked in the project phase 3 manual'''
        
        #printing accuracy
        print("Accuracy is:",accuracy_score(ypred,self.YTest))
        print()
        
        #depending on the problem printing the classification report
        if self.problem==0:
            target_names = ['M','F']
            print("Printing classification report:")
            print(classification_report(ypred,self.YTest,target_names=target_names))
        if self.problem==1:
            #indexes are pushed by negative one behind
            print(classification_report(ypred,self.YTest))
            
        print()
        print("Printing confusion matrix")
        print(confusion_matrix(ypred,self.YTest))
        

In [8]:
#initializing the training classes 
trainer0 = Trainer(XTrain=XTrainp1,YTrain=YTrainp1,XTest=XTestp1,YTest=YTestp1,problem=0)
trainer1 = Trainer(XTrain=XTrainp2,YTrain=YTrainp2,XTest=XTestp2,YTest=YTestp2,problem=1)

In [9]:
#training and testing mlp for gender problem
trainer0.doMLP()

Accuracy is: 0.8705882352941177

Printing classification report:
              precision    recall  f1-score   support

           M       0.93      0.90      0.92       134
           F       0.68      0.75      0.71        36

    accuracy                           0.87       170
   macro avg       0.80      0.83      0.81       170
weighted avg       0.88      0.87      0.87       170


Printing confusion matrix
[[121  13]
 [  9  27]]


In [10]:
#training and testing mlp for svm problem
trainer0.doSVM(max_iter=20000)

[LibLinear]Accuracy is: 0.8352941176470589

Printing classification report:
              precision    recall  f1-score   support

           M       0.92      0.87      0.90       138
           F       0.55      0.69      0.61        32

    accuracy                           0.84       170
   macro avg       0.74      0.78      0.75       170
weighted avg       0.85      0.84      0.84       170


Printing confusion matrix
[[120  18]
 [ 10  22]]


In [11]:
#training and testing gnb for gender problem
trainer0.doGNB()

Accuracy is: 0.8529411764705882

Printing classification report:
              precision    recall  f1-score   support

           M       0.92      0.89      0.91       135
           F       0.62      0.71      0.67        35

    accuracy                           0.85       170
   macro avg       0.77      0.80      0.79       170
weighted avg       0.86      0.85      0.86       170


Printing confusion matrix
[[120  15]
 [ 10  25]]


In [12]:
#training and testing mlp for speaker problem
trainer1.doMLP()



Accuracy is: 0.9507042253521126

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         2
           6       1.00      1.00      1.00         2
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         2
          11       1.00      1.00      1.00         2
          12       1.00      1.00      1.00         2
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         2
          15       1.00      1.00      1.00         2
          16       1.00      1.00      1.00     

  'recall', 'true', average, warn_for)


In [13]:
#training and testing svm for speaker problem
trainer1.doSVM(max_iter=2000)

[LibLinear]Accuracy is: 0.8450704225352113

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      1.00      1.00         2
           2       0.50      1.00      0.67         1
           3       0.00      0.00      0.00         0
           4       1.00      1.00      1.00         2
           5       1.00      0.67      0.80         3
           6       1.00      0.33      0.50         6
           7       0.50      1.00      0.67         1
           8       1.00      0.50      0.67         4
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         2
          11       1.00      1.00      1.00         2
          12       1.00      0.67      0.80         3
          13       1.00      0.67      0.80         3
          14       1.00      1.00      1.00         2
          15       1.00      1.00      1.00         2
          16       1.00      0.67    

  'recall', 'true', average, warn_for)


In [14]:
#training and testing mlp for gnb problem
trainer1.doGNB()

Accuracy is: 0.9225352112676056

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      1.00      1.00         2
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         2
           6       1.00      1.00      1.00         2
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2
           9       1.00      1.00      1.00         2
          10       1.00      1.00      1.00         2
          11       1.00      1.00      1.00         2
          12       1.00      1.00      1.00         2
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         2
          15       1.00      1.00      1.00         2
          16       1.00      0.40      0.57     

  'recall', 'true', average, warn_for)
