# 06 - Classical ML Models (SVM, Random Forest, GMM)

Train and evaluate classical machine learning models for audio classification.

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

print("✓ Imports complete")

## Prepare Flattened Features

In [None]:
# Flatten 2D features to 1D for classical ML
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

print(f"Flattened feature shapes:")
print(f"  X_train_flat: {X_train_flat.shape}")
print(f"  X_val_flat: {X_val_flat.shape}")
print(f"  X_test_flat: {X_test_flat.shape}")

## Support Vector Machine (SVM)

In [None]:
print("\n" + "="*70)
print("SUPPORT VECTOR MACHINE (SVM)")
print("="*70)

# Train SVM
print("Training SVM...")
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=42)
svm_model.fit(X_train_flat, y_train)
print("✓ SVM training complete\n")

# Validation accuracy
predictions_svm_val = svm_model.predict(X_val_flat)
accuracy_svm_val = accuracy_score(y_val, predictions_svm_val)

# Test accuracy
predictions_svm = svm_model.predict(X_test_flat)
accuracy_svm = accuracy_score(y_test, predictions_svm)
precision_svm = precision_score(y_test, predictions_svm, average='weighted')
recall_svm = recall_score(y_test, predictions_svm, average='weighted')
f1_svm = f1_score(y_test, predictions_svm, average='weighted')

cm_svm = confusion_matrix(y_test, predictions_svm)

print("SVM Results:")
print(f"  Validation Accuracy: {accuracy_svm_val:.4f}")
print(f"  Test Accuracy:  {accuracy_svm:.4f}")
print(f"  Precision: {precision_svm:.4f}")
print(f"  Recall:    {recall_svm:.4f}")
print(f"  F1-score:  {f1_svm:.4f}")

## Random Forest

In [None]:
print("\n" + "="*70)
print("RANDOM FOREST")
print("="*70)

# Train Random Forest
print("Training Random Forest...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train_flat, y_train)
print("✓ Random Forest training complete\n")

# Validation accuracy
predictions_rf_val = rf_model.predict(X_val_flat)
accuracy_rf_val = accuracy_score(y_val, predictions_rf_val)

# Test accuracy
predictions_rf = rf_model.predict(X_test_flat)
accuracy_rf = accuracy_score(y_test, predictions_rf)
precision_rf = precision_score(y_test, predictions_rf, average='weighted')
recall_rf = recall_score(y_test, predictions_rf, average='weighted')
f1_rf = f1_score(y_test, predictions_rf, average='weighted')

cm_rf = confusion_matrix(y_test, predictions_rf)

print("Random Forest Results:")
print(f"  Validation Accuracy: {accuracy_rf_val:.4f}")
print(f"  Test Accuracy:  {accuracy_rf:.4f}")
print(f"  Precision: {precision_rf:.4f}")
print(f"  Recall:    {recall_rf:.4f}")
print(f"  F1-score:  {f1_rf:.4f}")

## Gaussian Mixture Model (GMM)

In [None]:
print("\n" + "="*70)
print("GAUSSIAN MIXTURE MODEL (GMM)")
print("="*70)

# Apply PCA
print("Applying PCA (n_components=128)...")
n_components_pca = 128
pca = PCA(n_components=n_components_pca, random_state=42)

X_train_pca = pca.fit_transform(X_train_flat)
X_val_pca = pca.transform(X_val_flat)
X_test_pca = pca.transform(X_test_flat)
print(f"✓ PCA complete. Shape: {X_train_pca.shape}\n")

# Train per-class GMM
print("Training GMM models for each class...")
gmm_models = {}

for class_name in y_train.unique():
    X_train_class = X_train_pca[y_train == class_name]
    gmm = GaussianMixture(n_components=1, covariance_type='diag', random_state=42)
    gmm.fit(X_train_class)
    gmm_models[class_name] = gmm

print(f"✓ Trained GMM for {len(gmm_models)} classes\n")

# Validation predictions
def predict_gmm(X, gmm_models):
    predictions = []
    for sample in X:
        sample = sample.reshape(1, -1)
        likelihoods = {cls: gmm.score(sample) for cls, gmm in gmm_models.items()}
        predicted_class = max(likelihoods, key=likelihoods.get)
        predictions.append(predicted_class)
    return np.array(predictions)

predictions_gmm_val = predict_gmm(X_val_pca, gmm_models)
accuracy_gmm_val = accuracy_score(y_val, predictions_gmm_val)

# Test predictions
predictions_gmm = predict_gmm(X_test_pca, gmm_models)
accuracy_gmm = accuracy_score(y_test, predictions_gmm)
precision_gmm = precision_score(y_test, predictions_gmm, average='weighted')
recall_gmm = recall_score(y_test, predictions_gmm, average='weighted')
f1_gmm = f1_score(y_test, predictions_gmm, average='weighted')

cm_gmm = confusion_matrix(y_test, predictions_gmm)

print("GMM Results:")
print(f"  Validation Accuracy: {accuracy_gmm_val:.4f}")
print(f"  Test Accuracy:  {accuracy_gmm:.4f}")
print(f"  Precision: {precision_gmm:.4f}")
print(f"  Recall:    {recall_gmm:.4f}")
print(f"  F1-score:  {f1_gmm:.4f}")

## Model Comparison Summary

In [None]:
# Create comparison table
comparison_df = pd.DataFrame({
    'Model': ['SVM', 'Random Forest', 'GMM'],
    'Val Accuracy': [accuracy_svm_val, accuracy_rf_val, accuracy_gmm_val],
    'Test Accuracy': [accuracy_svm, accuracy_rf, accuracy_gmm],
    'Precision': [precision_svm, precision_rf, precision_gmm],
    'Recall': [recall_svm, recall_rf, recall_gmm],
    'F1-Score': [f1_svm, f1_rf, f1_gmm]
})

print("\n" + "="*70)
print("CLASSICAL ML MODELS - COMPARISON")
print("="*70)
print(comparison_df.to_string(index=False))
print("="*70)

## Visualize Results

In [None]:
class_labels = label_encoder.classes_

# Plot confusion matrices
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# SVM CM
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Blues', ax=axes[0], cbar=False,
            xticklabels=class_labels, yticklabels=class_labels)
axes[0].set_title('SVM Confusion Matrix')
axes[0].set_ylabel('True Label')
axes[0].set_xlabel('Predicted Label')

# RF CM
sns.heatmap(cm_rf, annot=True, fmt='d', cmap='Greens', ax=axes[1], cbar=False,
            xticklabels=class_labels, yticklabels=class_labels)
axes[1].set_title('Random Forest Confusion Matrix')
axes[1].set_ylabel('True Label')
axes[1].set_xlabel('Predicted Label')

# GMM CM
sns.heatmap(cm_gmm, annot=True, fmt='d', cmap='Oranges', ax=axes[2], cbar=False,
            xticklabels=class_labels, yticklabels=class_labels)
axes[2].set_title('GMM Confusion Matrix')
axes[2].set_ylabel('True Label')
axes[2].set_xlabel('Predicted Label')

plt.tight_layout()
plt.show()

# Accuracy comparison
fig, ax = plt.subplots(figsize=(10, 5))
x = np.arange(3)
width = 0.35

val_accs = [accuracy_svm_val, accuracy_rf_val, accuracy_gmm_val]
test_accs = [accuracy_svm, accuracy_rf, accuracy_gmm]

ax.bar(x - width/2, val_accs, width, label='Validation', color='skyblue', edgecolor='navy')
ax.bar(x + width/2, test_accs, width, label='Test', color='coral', edgecolor='darkred')

ax.set_ylabel('Accuracy')
ax.set_title('Classical ML Models - Validation vs Test Accuracy')
ax.set_xticks(x)
ax.set_xticklabels(['SVM', 'Random Forest', 'GMM'])
ax.legend()
ax.set_ylim([0, 1.1])
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()