In [5]:
import os
import glob

import numpy as np
from scipy.io import wavfile
from hmmlearn import hmm
from python_speech_features import mfcc

print("Import OK!")

Import OK!


Wrapper class for Hidden Markov clasiffier.

In [6]:
# Class to handle all HMM related processing
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components, 
                    covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    # X is a 2D numpy array where each row is 13D
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    # Run the model on input data
    def get_score(self, input_data):
        return self.model.score(input_data)

Train HMMs on audio data

In [8]:
targetTrainDir = "target_train"
nonTargetTrainDir = "non_target_train"

hmm_target = None
hmm_nonTarget = None

#Train target (1) HMM
X = np.array([])

for f in glob.glob(targetTrainDir + '/*.wav'):
    fs, s = wavfile.read(f)
    mfcc_features = mfcc(s, fs)
    
    if len(X) == 0:
        X = mfcc_features
    else:
        X = np.append(X, mfcc_features, axis=0)
    y.append(1)
    
hmm_trainer = HMMTrainer()
hmm_trainer.train(X)
hmm_target = hmm_trainer
hmm_trainer = None

#Train nonTarget (0) HMM
X = np.array([])
y = []

for f in glob.glob(nonTargetTrainDir + '/*.wav'):
    fs, s = wavfile.read(f)
    mfcc_features = mfcc(s, fs)
    
    if len(X) == 0:
        X = mfcc_features
    else:
        X = np.append(X, mfcc_features, axis=0)
    y.append(0)
    
hmm_trainer = HMMTrainer()
hmm_trainer.train(X)
hmm_nonTarget = hmm_trainer
hmm_trainer = None

print("Training OK!")

Training OK!


Evaluate on Dev audio data

In [9]:
targetTestDir = "target_dev"
nonTargetTestDir = "non_target_dev"

correct = 0
incorrect = 0

for f in glob.glob(targetTestDir + '/*.wav'):
    fs, s = wavfile.read(f)
    mfcc_features = mfcc(s, fs)
    
    targetScore = hmm_target.get_score(mfcc_features)
    nonTargetScore = hmm_nonTarget.get_score(mfcc_features)
    
    if(targetScore >= nonTargetScore):
        print(f'Correct as Target: Target={targetScore}, Non Target={nonTargetScore}')
        correct += 1
    else:
        print(f'Incorrect, should be Target: Target={targetScore}, Non Target={nonTargetScore}')
        incorrect += 1

for f in glob.glob(nonTargetTestDir + '/*.wav'):
    fs, s = wavfile.read(f)
    mfcc_features = mfcc(s, fs)
    
    targetScore = hmm_target.get_score(mfcc_features)
    nonTargetScore = hmm_nonTarget.get_score(mfcc_features)
    
    if(targetScore < nonTargetScore):
        print(f'Correct as Non Target: Target={targetScore}, Non Target={nonTargetScore}')
        correct += 1
    else:
        print(f'Incorrect, should be Non Target: Target={targetScore}, Non Target={nonTargetScore}')
        incorrect += 1
        
print(f"{(correct)/(correct+incorrect)*100}%; {correct} correct, {incorrect} incorrect.")
print("Evaluation OK!")

Incorrect, should be Target: Target=-30977.466761692842, Non Target=-29553.702472552293
Incorrect, should be Target: Target=-36183.05850890484, Non Target=-35316.21983860671
Incorrect, should be Target: Target=-38593.1828540726, Non Target=-37905.03709356334
Incorrect, should be Target: Target=-61577.633526465805, Non Target=-61258.89854393109
Incorrect, should be Target: Target=-53296.04252445513, Non Target=-52311.97092274584
Incorrect, should be Target: Target=-40640.60301224684, Non Target=-39479.10158917778
Incorrect, should be Target: Target=-40071.71907996566, Non Target=-39270.28988456595
Incorrect, should be Target: Target=-44374.683695206855, Non Target=-43740.70619126588
Incorrect, should be Target: Target=-47591.7120054944, Non Target=-46912.795851192284
Incorrect, should be Target: Target=-39187.65366579915, Non Target=-37828.11664225013
Correct as Non Target: Target=-69439.9608612688, Non Target=-67060.84285209308
Correct as Non Target: Target=-40610.057285864495, Non Tar