In [8]:
import os
import glob

import numpy as np
from scipy.io import wavfile
from hmmlearn import hmm
from python_speech_features import mfcc

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

print("Import OK!")

Import OK!


Wrapper class for Hidden Markov clasiffier.

In [9]:
# Class to handle all HMM related processing
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000, seed=42):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components, 
                    covariance_type=self.cov_type, n_iter=self.n_iter, random_state=seed)
        else:
            raise TypeError('Invalid model type')

    # X is a 2D numpy array where each row is 13D
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    # Run the model on input data
    def get_score(self, input_data):
        return self.model.score(input_data)

Train HMMs on audio data

In [13]:
def extractMFCCFromDir(dirName):
    X = np.array([])
    for f in glob.glob(targetTrainDir + '/*.wav'):
        fs, s = wavfile.read(f)
        mfcc_features = mfcc(s, fs)
        
        if len(X) == 0:
            X = mfcc_features
        else:
            X = np.append(X, mfcc_features, axis=0)
    
    return X

In [15]:
targetTrainDir = "target_train"
nonTargetTrainDir = "non_target_train"

hmm_target = None
hmm_nonTarget = None

y = []

#Train target (1) HMM
X = extractMFCCFromDir(targetTrainDir)
y.append([1 for i in range(len(X))])

hmm_trainer = HMMTrainer()
hmm_trainer.train(X)
hmm_target = hmm_trainer
hmm_trainer = None

#Train nonTarget (0) HMM
X = extractMFCCFromDir(nonTargetTrainDir)
    
hmm_trainer = HMMTrainer()
hmm_trainer.train(X)
hmm_nonTarget = hmm_trainer
hmm_trainer = None

print("Training OK!")

Training OK!


Evaluate on Dev audio data

In [17]:
targetTestDir = "target_dev"
nonTargetTestDir = "non_target_dev"

correct = 0
incorrect = 0

yTest = []
predictions = []

for f in glob.glob(targetTestDir + '/*.wav'):
    yTest.append(1)
    fs, s = wavfile.read(f)
    mfcc_features = mfcc(s, fs)
    
    targetScore = hmm_target.get_score(mfcc_features)
    nonTargetScore = hmm_nonTarget.get_score(mfcc_features)
    
    if(targetScore >= nonTargetScore):
        predictions.append(1)
        # print(f'Correct as Target: Target={targetScore}, Non Target={nonTargetScore}')
        correct += 1
    else:
        predictions.append(0)
        #print(f'Incorrect, should be Target: Target={targetScore}, Non Target={nonTargetScore}')
        incorrect += 1

for f in glob.glob(nonTargetTestDir + '/*.wav'):
    yTest.append(0)
    fs, s = wavfile.read(f)
    mfcc_features = mfcc(s, fs)
    
    targetScore = hmm_target.get_score(mfcc_features)
    nonTargetScore = hmm_nonTarget.get_score(mfcc_features)
    
    if(targetScore < nonTargetScore):
        predictions.append(0)
        #print(f'Correct as Non Target: Target={targetScore}, Non Target={nonTargetScore}')
        correct += 1
    else:
        predictions.append(1)
        #print(f'Incorrect, should be Non Target: Target={targetScore}, Non Target={nonTargetScore}')
        incorrect += 1

print(f"{(correct)/(correct+incorrect)*100}%; {correct} correct, {incorrect} incorrect.")

print(yTest)
print(predictions)
print(accuracy_score(yTest, predictions))
print(confusion_matrix(yTest, predictions))

print("Evaluation OK!")

14.285714285714285%; 10 correct, 60 incorrect.
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0.14285714285714285
[[ 0 60]
 [ 0 10]]
Evaluation OK!
