In [13]:
# Import libraries
import os
import numpy as np
import librosa
from hmmlearn import hmm
from librosa.feature import mfcc

In [14]:
# Function to extract MFCC features from the audio file
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components, 
                    covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    # X is a 2D numpy array where each row is 13D
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    # Run the model on input data
    def get_score(self, input_data):
        return self.model.score(input_data)

In [15]:
# Define the path to the audio files
input_folder = './fruits_audio/'
os.listdir(input_folder)

['apple', 'banana', 'kiwi', 'lime', 'orange', 'peach', 'pineapple']

In [16]:
for dirname in os.listdir(input_folder):
      # Get the name of the subfolder 
      subfolder = os.path.join(input_folder, dirname)
      #print(subfolder)
      label = subfolder[subfolder.rfind('/') + 1:]
      print(label)

apple
banana
kiwi
lime
orange
peach
pineapple


In [17]:
hmm_models = []

# Iterate through each subfolder
for dirname in os.listdir(input_folder):
    subfolder = os.path.join(input_folder, dirname)
    if not os.path.isdir(subfolder): 
         continue
    label = subfolder[subfolder.rfind('/') + 1:]
    X = np.array([])
    y_words = []
    
    # Extract MFCC features
    for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
        filepath = os.path.join(subfolder, filename)
        sampling_freq, audio = librosa.load(filepath)            
        mfcc_features = mfcc(y=sampling_freq, sr=audio)
        if len(X) == 0:
            X = mfcc_features[:,:15]
        else:
            X = np.append(X, mfcc_features[:,:15], axis=0)            
        y_words.append(label)
    print('X.shape =', X.shape)
    
    hmm_trainer = HMMTrainer()
    hmm_trainer.train(X)
    hmm_models.append((hmm_trainer, label))
    hmm_trainer = None

X.shape = (280, 15)
X.shape = (280, 15)


X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)
X.shape = (280, 15)


In [18]:
# Test files
input_files = [
  './fruits_audio/pineapple/pineapple01.wav',
  './fruits_audio/orange/orange01.wav',
  './fruits_audio/apple/apple01.wav',
  './fruits_audio/kiwi/kiwi01.wav',
  './fruits_audio/banana/banana01.wav',
  './fruits_audio/peach/peach01.wav',
  './fruits_audio/pineapple/pineapple10.wav',
  './fruits_audio/orange/orange10.wav',
  './fruits_audio/apple/apple10.wav',
  './fruits_audio/kiwi/kiwi10.wav',
  './fruits_audio/banana/banana10.wav',
  './fruits_audio/peach/peach10.wav',
  './fruits_audio/pineapple/pineapple15.wav',
  './fruits_audio/orange/orange15.wav',
  './fruits_audio/apple/apple15.wav',
  './fruits_audio/kiwi/kiwi15.wav',
  './fruits_audio/banana/banana15.wav',
  './fruits_audio/peach/peach15.wav'
]


# Classify input data
for input_file in input_files:
  if not os.path.exists(input_file):
    print(f"File {input_file} does not exist.")
    continue
  sampling_freq, audio = librosa.load(input_file)

  # Extract MFCC features
  mfcc_features = mfcc(y=sampling_freq, sr=audio)
  mfcc_features=mfcc_features[:,:15]

  scores=[]
  for item in hmm_models:
    hmm_model, label = item

    score = hmm_model.get_score(mfcc_features)
    scores.append(score)

  # Get the index of the item with max score
  index=np.array(scores).argmax()

  # get axis of the max score
  axis = np.argmax(scores)
  
  print("\nTrue:", input_file[input_file.find('/')+1:input_file.rfind('/')])
  print("Predicted:", hmm_models[index][1])
  print("Axi:", axis)


True: fruits_audio/pineapple
Predicted: pineapple
Axi: 6

True: fruits_audio/orange
Predicted: orange
Axi: 4

True: fruits_audio/apple
Predicted: apple
Axi: 0

True: fruits_audio/kiwi
Predicted: kiwi
Axi: 2

True: fruits_audio/banana
Predicted: banana
Axi: 1

True: fruits_audio/peach
Predicted: peach
Axi: 5

True: fruits_audio/pineapple
Predicted: pineapple
Axi: 6

True: fruits_audio/orange
Predicted: orange
Axi: 4

True: fruits_audio/apple
Predicted: apple
Axi: 0

True: fruits_audio/kiwi
Predicted: kiwi
Axi: 2

True: fruits_audio/banana
Predicted: banana
Axi: 1

True: fruits_audio/peach
Predicted: peach
Axi: 5

True: fruits_audio/pineapple
Predicted: pineapple
Axi: 6

True: fruits_audio/orange
Predicted: orange
Axi: 4

True: fruits_audio/apple
Predicted: apple
Axi: 0

True: fruits_audio/kiwi
Predicted: kiwi
Axi: 2

True: fruits_audio/banana
Predicted: banana
Axi: 1

True: fruits_audio/peach
Predicted: peach
Axi: 5
