In [None]:
pip install hmmlearn librosa numpy scikit-learn

Code for GMMHMMs

In [2]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from hmmlearn.hmm import GaussianHMM
from sklearn.metrics import classification_report, accuracy_score

# Configuration
NUM_MFCC = 13  # Number of MFCC features
NUM_GAUSSIANS = 5  # Number of Gaussians in GMM
NUM_HMM_STATES = 3  # Number of states in HMM
AUDIO_PATH = "C:/Users/Natasha/Desktop/research_module/Musical_Instrument_Data"

# Step 1: Feature Extraction
def extract_features(file_path):
    """Extract MFCC + Delta + Delta-Delta features for a given audio file."""
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=NUM_MFCC)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    return np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0).T  # Transpose for (timesteps, features)

# Step 2: Load Dataset
def load_dataset(audio_path):
    """Load audio dataset and extract features."""
    features, labels = [], []
    for instrument_folder in os.listdir(audio_path):
        folder_path = os.path.join(audio_path, instrument_folder)
        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                if file_name.endswith('.wav') or file_name.endswith('.mp3'):
                    try:
                        feature = extract_features(file_path)
                        features.append(feature)
                        labels.append(instrument_folder)
                    except Exception as e:
                        print(f"Error processing {file_path}: {e}")
    return features, labels

# Step 3: Train GMM-HMM Models
def train_gmm_hmm(features, labels):
    """
    Train one GMM-HMM per class (instrument).
    Returns the trained models and the label encoder.
    """
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    models = []
    for label in np.unique(labels_encoded):
        # Get features for this class
        class_features = [features[i] for i in range(len(features)) if labels_encoded[i] == label]
        class_features = np.vstack(class_features)  # Combine all sequences into one array
        
        # Train GMM-HMM
        hmm = GaussianHMM(n_components=NUM_HMM_STATES, covariance_type='diag', n_iter=100)
        hmm.fit(class_features)
        models.append(hmm)
    
    return models, label_encoder

# Step 4: Predict Class for an Audio File
def predict_class(models, label_encoder, feature):
    """
    Predict the class of a given feature sequence using GMM-HMMs.
    """
    log_likelihoods = [model.score(feature) for model in models]
    predicted_label = np.argmax(log_likelihoods)  # Class with the highest log-likelihood
    return label_encoder.inverse_transform([predicted_label])[0]

# Step 5: Evaluate Models
def evaluate_gmm_hmm(models, label_encoder, features, labels):
    """
    Evaluate the GMM-HMM models on a test dataset.
    """
    y_true = labels
    y_pred = [predict_class(models, label_encoder, feature) for feature in features]
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.2f}")

# Main Workflow
if __name__ == "__main__":
    # Load dataset
    features, labels = load_dataset(AUDIO_PATH)
    
    # Split dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, stratify=labels, random_state=42)
    
    # Train GMM-HMM models
    models, label_encoder = train_gmm_hmm(X_train, y_train)
    
    # Evaluate GMM-HMM models
    evaluate_gmm_hmm(models, label_encoder, X_test, y_test)

Classification Report:
                 precision    recall  f1-score   support

Acoustic_guitar       0.43      0.33      0.38         9
      Bass_drum       0.90      1.00      0.95         9
          Cello       0.50      0.89      0.64         9
       Clarinet       0.67      0.44      0.53         9
    Double_bass       0.54      0.78      0.64         9
          Flute       0.50      0.78      0.61         9
         Hi_hat       0.90      1.00      0.95         9
      Saxophone       0.67      0.67      0.67         9
     Snare_drum       1.00      0.33      0.50         9
         Violin       0.50      0.18      0.27        11

       accuracy                           0.63        92
      macro avg       0.66      0.64      0.61        92
   weighted avg       0.66      0.63      0.60        92

Accuracy: 0.63


Ensemble Techniques for GMM-HMMs

1. Generate Log-Likelihood Features

In [3]:
def extract_log_likelihood_features(models, features):
    """
    Generate log-likelihood features for each input sequence using trained GMM-HMMs.
    """
    log_likelihood_features = []
    for feature in features:
        log_likelihood = [model.score(feature) for model in models]
        log_likelihood_features.append(log_likelihood)
    return np.array(log_likelihood_features)

2. Train and Evaluate SVM

In [4]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

# Extract log-likelihood features
X_train_ll = extract_log_likelihood_features(models, X_train)
X_test_ll = extract_log_likelihood_features(models, X_test)

# Standardize features
scaler = StandardScaler()
X_train_ll = scaler.fit_transform(X_train_ll)
X_test_ll = scaler.transform(X_test_ll)

# Train SVM
svm = SVC(kernel='linear', C=1.0)
svm.fit(X_train_ll, y_train)

# Evaluate SVM
y_pred = svm.predict(X_test_ll)
print("SVM Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print(f"SVM Accuracy: {accuracy_score(y_test, y_pred):.2f}")

SVM Classification Report:
                 precision    recall  f1-score   support

Acoustic_guitar       1.00      0.11      0.20         9
      Bass_drum       0.89      0.89      0.89         9
          Cello       0.50      0.78      0.61         9
       Clarinet       0.60      0.33      0.43         9
    Double_bass       0.23      0.78      0.36         9
          Flute       0.38      0.67      0.48         9
         Hi_hat       0.40      0.67      0.50         9
      Saxophone       1.00      0.11      0.20         9
     Snare_drum       0.00      0.00      0.00         9
         Violin       1.00      0.09      0.17        11

       accuracy                           0.43        92
      macro avg       0.60      0.44      0.38        92
   weighted avg       0.61      0.43      0.38        92

SVM Accuracy: 0.43


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
