In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
import os
# !pip install hmmlearn
# !pip install features
from hmmlearn import hmm #importing GaussianHMM 
import librosa # reading wavefilesfrom librosa.feature import mfcc #to extract mfcc features
from librosa.feature import mfcc

In [2]:
input_folder = 'hmm-speech-recognition-0.1/audio'
for dirname in os.listdir(input_folder):
    subfolder = os.path.join(input_folder, dirname)
    label = subfolder[subfolder.rfind('/') + 1:]
    print(label)

apple
kiwi
lime
banana
pineapple
orange
peach


In [3]:
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4):
        self.model_name = model_name
        self.n_components = n_components

        self.models = []
        if self.model_name == 'GaussianHMM':
            self.model=hmm.GaussianHMM(n_components=4)
        else:
            print("Please choose GaussianHMM")
    def train(self, X):
        self.models.append(self.model.fit(X))
    def get_score(self, input_data):
        return self.model.score(input_data)

In [4]:
hmm_models = []
for dirname in os.listdir(input_folder):
  # Get the name of the subfolder 
    subfolder = os.path.join(input_folder, dirname)
    if not os.path.isdir(subfolder): 
        continue
    # Extract the label
    label = subfolder[subfolder.rfind('/') + 1:]
    # Initialize variables
    X = np.array([])
    y_words = []
    for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
    # Read the input file
        try:
            filepath = os.path.join(subfolder, filename)
            y,sr = librosa.load(filepath, duration=3, offset=0.5)
            mfcc_features = mfcc(y=y, sr=sr, n_mfcc=40)
            # Extract MFCC features
            if len(X) == 0:
                X = mfcc_features[:,:15]
            else:
                X = np.append(X, mfcc_features[:,:15], axis=0)
               # Append the label
                y_words.append(label)
        except:
            continue
    hmm_trainer = HMMTrainer()
    hmm_trainer.train(X)
    hmm_models.append((hmm_trainer, label))
    hmm_trainer = None




In [5]:
input_files = [
'./hmm-speech-recognition-0.1/audio/pineapple/pineapple15.wav',
'./hmm-speech-recognition-0.1/audio/orange/orange15.wav',
'./hmm-speech-recognition-0.1/audio/apple/apple15.wav',
'./hmm-speech-recognition-0.1/audio/kiwi/kiwi15.wav'
]

In [6]:
# Classify input data
for input_file in input_files:
    # Read input file
    sampling_freq, audio = wavfile.read(input_file)
    y,sr = librosa.load(filepath, duration=3, offset=0.5)
    # Extract MFCC features
    mfcc_features = mfcc(y=y, sr=sr, n_mfcc=40)

    # Define variables
    max_score = None
    output_label = None

    # Iterate through all HMM models and pick 
    # the one with the highest score
    for item in hmm_models:
        hmm_model, label = item
        score = hmm_model.get_score(mfcc_features)
        if score > max_score:
            max_score = score
            output_label = label
        # Print the output
        print ("Predicted:", output_label)

Predicted: pineapple
Predicted: orange
Predicted: apple
Predicted: kiwi
