In [9]:
############################################################
# Audio Classification Pipeline
# Features: MFCC (40-d), Mel-Spectrogram (128-d), OpenL3 (1024-d)
# Models: Random Forest, XGBoost, Logistic Regression (ElasticNet),
# SVM, and MLP (for high-dimensional embeddings)
############################################################


import glob
import os
import librosa
import numpy as np
import openl3
import joblib


############################################################
# Step 1: Load all audio files and their labels
############################################################
# Recursively load all .wav files inside BabyCryingSounds directory
files = glob.glob(r"C:\Users\akars\OneDrive\Desktop\Akarsh\Akarsh\BabyCryingSounds\**\*.wav", recursive=True)


# Labels are taken from the parent folder name of each file
labels = [os.path.basename(os.path.dirname(f)) for f in files]

In [10]:
joblib.dump(labels, "label_encoder.pkl")


['label_encoder.pkl']

In [5]:
############################################################
# Step 2: Feature Extraction
# - MFCC (low-dimensional, 40)
# - Mel-spectrogram (medium-dimensional, 128)
# - OpenL3 embeddings (high-dimensional, 1024)
############################################################
features1 = [] # MFCC features
features2 = [] # Mel-Spectrogram features
features3 = [] # OpenL3 embeddings


for f in files:
    # Load audio file
    y, sr = librosa.load(f, sr=None, mono=True)
    # --- MFCC (basic features) ---
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    vec1 = np.mean(mfcc.T, axis=0) # take mean across time frames
    features1.append(vec1)
    # --- Mel-Spectrogram (medium features) ---
    melspc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    vec2 = np.mean(melspc.T, axis=0)
    features2.append(vec2)
    # --- OpenL3 embeddings (semantic, high-dim) ---
    emb, ts = openl3.get_audio_embedding(y, sr, content_type="env", embedding_size=512)
    vec3 = emb.mean(axis=0) # average across time
    features3.append(vec3)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 903ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 894ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 836ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 842ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 827ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 840ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 835ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 837ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 829ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 838ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 812ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 785ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 886ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [7]:
############################################################
# Step 2.5: Save Extracted Features (Optional)
# Save embeddings to disk before moving to next step
############################################################

save_dir = r"C:\Users\akars\OneDrive\Desktop\Akarsh\Akarsh\SavedFeatures"
os.makedirs(save_dir, exist_ok=True)

# Save all feature arrays and labels
np.save(os.path.join(save_dir, "features_mfcc.npy"), np.vstack(features1))
np.save(os.path.join(save_dir, "features_mel.npy"), np.vstack(features2))
np.save(os.path.join(save_dir, "features_openl3.npy"), np.vstack(features3))
np.save(os.path.join(save_dir, "labels.npy"), np.array(labels))

print("✅ All feature files saved successfully at:", save_dir)


NameError: name 'features1' is not defined

In [4]:
############################################################
# Step 2: Load Saved Features and Encode Labels
############################################################
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder

# Path where Step 1 saved the features
save_dir = r"C:\Users\akars\OneDrive\Desktop\Akarsh\Akarsh\SavedFeatures"

# ✅ Load saved feature arrays
X1 = np.load(os.path.join(save_dir, "features_mfcc.npy"))        # MFCC matrix
X2 = np.load(os.path.join(save_dir, "features_mel.npy"))         # Mel-Spectrogram matrix
X3 = np.load(os.path.join(save_dir, "features_openl3.npy"))      # OpenL3 embeddings
y  = np.load(os.path.join(save_dir, "labels.npy"))               # Labels

# ✅ Encode labels (text → numeric)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

print("✅ Loaded and encoded features successfully!")
print(f"MFCC shape: {X1.shape}, Mel shape: {X2.shape}, OpenL3 shape: {X3.shape}")
print(f"Total samples: {len(y_encoded)}, Unique classes: {len(np.unique(y_encoded))}")


✅ Loaded and encoded features successfully!
MFCC shape: (1197, 40), Mel shape: (1197, 128), OpenL3 shape: (1197, 512)
Total samples: 1197, Unique classes: 8


In [5]:
############################################################
# Step 4: Train/Test Split + Standardization
############################################################
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


# Split separately for each feature type
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y_encoded, test_size=0.2, stratify=y_encoded)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y_encoded, test_size=0.2, stratify=y_encoded)
X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y_encoded, test_size=0.2, stratify=y_encoded)


# Scale features (important for Logistic Regression & SVM)
scaler1, scaler2, scaler3 = StandardScaler(), StandardScaler(), StandardScaler()


X1_train_scaled = scaler1.fit_transform(X1_train)
X1_test_scaled = scaler1.transform(X1_test)


X2_train_scaled = scaler2.fit_transform(X2_train)
X2_test_scaled = scaler2.transform(X2_test)


X3_train_scaled = scaler3.fit_transform(X3_train)
X3_test_scaled = scaler3.transform(X3_test)

In [6]:
############################################################
# Step 5: Classification on MFCC Features (X1)
############################################################
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, f1_score
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# --- Random Forest ---
rf = RandomForestClassifier(max_depth=None, n_estimators=400, random_state=42)
rf.fit(X1_train, y1_train)
rf_preds = rf.predict(X1_test)
print("Random Forest (MFCC)")
print(classification_report(y1_test, rf_preds))
rf_f1 = f1_score(y1_test, rf_preds, average="weighted")

# --- XGBoost ---
xbg = XGBClassifier(
    n_estimators=400, learning_rate=0.1, max_depth=10,
    random_state=42, use_label_encoder=False, eval_metric='mlogloss'
)
xbg.fit(X1_train, y1_train)
xbg_preds = xbg.predict(X1_test)
print("XGBoost (MFCC)")
print(classification_report(y1_test, xbg_preds))
xbg_f1 = f1_score(y1_test, xbg_preds, average="weighted")

# --- Logistic Regression (Elastic Net) ---
logreg = LogisticRegression(
    penalty="elasticnet", solver="saga", l1_ratio=0.5,
    max_iter=1000, random_state=42
)
logreg.fit(X1_train_scaled, y1_train)
logreg_preds = logreg.predict(X1_test_scaled)
print("Logistic Regression (MFCC)")
print(classification_report(y1_test, logreg_preds))
logreg_f1 = f1_score(y1_test, logreg_preds, average="weighted")

# --- SVM (RBF kernel) ---
svm_rbf = SVC(kernel="rbf", C=1.0, probability=True, gamma="scale", random_state=42)
svm_rbf.fit(X1_train_scaled, y1_train)
svm_preds = svm_rbf.predict(X1_test_scaled)
print("SVM (MFCC)")
print(classification_report(y1_test, svm_preds))
svm_f1 = f1_score(y1_test, svm_preds, average="weighted")

############################################################
# Step 6: Compare and Save Best Model
############################################################
models = {
    "RandomForest": (rf, rf_f1),
    "XGBoost": (xbg, xbg_f1),
    "LogisticRegression": (logreg, logreg_f1),
    "SVM_RBF": (svm_rbf, svm_f1)
}

best_model_name = max(models, key=lambda x: models[x][1])
best_model, best_score = models[best_model_name]

print("\n✅ Best Model:", best_model_name)
print("Weighted F1-Score:", best_score)

# Save best model
joblib.dump(best_model, f"best_model_{best_model_name}.pkl")
print(f"Model saved as best_model_{best_model_name}.pkl")


Random Forest (MFCC)
              precision    recall  f1-score   support

           0       0.23      0.20      0.21        25
           1       0.16      0.14      0.15        22
           2       0.00      0.00      0.00        20
           3       0.08      0.07      0.08        27
           4       0.10      0.12      0.11        76
           5       1.00      1.00      1.00        22
           6       1.00      1.00      1.00        22
           7       0.05      0.04      0.04        26

    accuracy                           0.27       240
   macro avg       0.33      0.32      0.32       240
weighted avg       0.27      0.27      0.27       240



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost (MFCC)
              precision    recall  f1-score   support

           0       0.21      0.20      0.20        25
           1       0.16      0.14      0.15        22
           2       0.00      0.00      0.00        20
           3       0.11      0.11      0.11        27
           4       0.08      0.09      0.08        76
           5       1.00      1.00      1.00        22
           6       1.00      1.00      1.00        22
           7       0.05      0.04      0.04        26

    accuracy                           0.26       240
   macro avg       0.33      0.32      0.32       240
weighted avg       0.26      0.26      0.26       240

Logistic Regression (MFCC)
              precision    recall  f1-score   support

           0       0.35      0.28      0.31        25
           1       0.67      0.18      0.29        22
           2       0.00      0.00      0.00        20
           3       0.00      0.00      0.00        27
           4       0.36      0.75   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [11]:
############################################################
# Step 6: Classification on Mel-Spectrogram Features (X2)
############################################################
# --- Random Forest ---
rf2 = RandomForestClassifier(max_depth=None, n_estimators=400, random_state=42)
rf2.fit(X2_train, y2_train)
print("Random Forest (Mel)")
print(classification_report(y2_test, rf2.predict(X2_test)))


# --- XGBoost ---
xgb2 = XGBClassifier(n_estimators=400, learning_rate=0.1, max_depth=10,
random_state=42, use_label_encoder=False, eval_metric='mlogloss')
xgb2.fit(X2_train, y2_train)
print("XGBoost (Mel)")
print(classification_report(y2_test, xgb2.predict(X2_test)))


# --- Logistic Regression (Elastic Net) ---
logreg2 = LogisticRegression(penalty="elasticnet", solver="saga", l1_ratio=0.5, max_iter=1000, random_state=42)
logreg2.fit(X2_train_scaled, y2_train)
print("Logistic Regression (Mel)")
print(classification_report(y2_test, logreg2.predict(X2_test_scaled)))

Random Forest (Mel)
              precision    recall  f1-score   support

           0       0.43      0.36      0.39        25
           1       0.29      0.18      0.22        22
           2       0.00      0.00      0.00        20
           3       0.10      0.07      0.09        27
           4       0.18      0.24      0.20        76
           5       1.00      0.95      0.98        22
           6       1.00      1.00      1.00        22
           7       0.08      0.08      0.08        26

    accuracy                           0.33       240
   macro avg       0.38      0.36      0.37       240
weighted avg       0.33      0.33      0.32       240



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost (Mel)
              precision    recall  f1-score   support

           0       0.43      0.36      0.39        25
           1       0.31      0.18      0.23        22
           2       0.00      0.00      0.00        20
           3       0.12      0.11      0.11        27
           4       0.16      0.20      0.18        76
           5       1.00      0.95      0.98        22
           6       0.96      1.00      0.98        22
           7       0.08      0.08      0.08        26

    accuracy                           0.32       240
   macro avg       0.38      0.36      0.37       240
weighted avg       0.32      0.32      0.32       240

Logistic Regression (Mel)
              precision    recall  f1-score   support

           0       0.47      0.36      0.41        25
           1       0.33      0.18      0.24        22
           2       0.00      0.00      0.00        20
           3       0.20      0.04      0.06        27
           4       0.35      0.68     



In [23]:
############################################################
# Step 7: Classification on OpenL3 Embeddings (X3)
# PCA + MLP / RandomForest / SVM
############################################################
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

############################################################
# Step 7.1: Scale the OpenL3 Embeddings
############################################################
# Suppose you already have: X3 (OpenL3 embeddings), y_encoded (encoded labels)
scaler = StandardScaler()
X3_scaled = scaler.fit_transform(X3)

############################################################
# Step 7.2: Split data properly (train/test)
############################################################
X3_train_scaled, X3_test_scaled, y_train, y_test = train_test_split(
    X3_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

############################################################
# Step 7.3: PCA Dimensionality Reduction (256D)
############################################################
pca = PCA(n_components=256)
X3_train_pca = pca.fit_transform(X3_train_scaled)
X3_test_pca = pca.transform(X3_test_scaled)

############################################################
# Step 7.4: Convert to PyTorch tensors
############################################################
X_train_tensor = torch.tensor(X3_train_pca, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X3_test_pca, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

############################################################
# Step 7.5: Define MLP
############################################################
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 2, num_classes)
        )

    def forward(self, x):
        return self.model(x)

mlp = MLP(input_dim=256, hidden_dim=512, num_classes=len(np.unique(y_encoded)))
print(mlp)

############################################################
# Step 7.6: Random Forest (baseline on reduced embeddings)
############################################################
rf3 = RandomForestClassifier(n_estimators=200, random_state=42)
rf3.fit(X3_train_pca, y_train)
y_pred_rf3 = rf3.predict(X3_test_pca)
print("\nRandom Forest (OpenL3 PCA):")
print(classification_report(y_test, y_pred_rf3))

############################################################
# Step 7.7: SVM (Linear kernel on reduced embeddings)
############################################################
svm3 = SVC(kernel='linear', C=1.0, probability=True)
svm3.fit(X3_train_pca, y_train)
y_pred_svm3 = svm3.predict(X3_test_pca)
print("\nSVM (OpenL3 PCA):")
print(classification_report(y_test, y_pred_svm3))


MLP(
  (model): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=512, out_features=256, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=256, out_features=8, bias=True)
  )
)

Random Forest (OpenL3 PCA):
              precision    recall  f1-score   support

           0       0.39      0.28      0.33        25
           1       0.32      0.32      0.32        22
           2       0.00      0.00      0.00        20
           3       0.05      0.04      0.04        27
           4       0.16      0.21      0.18        76
           5       1.00      1.00      1.00        22
           6       1.00      1.00      1.00        22
           7       0.04      0.04      0.04        26

    accuracy                           0.32       240
   macro avg       0.37      0.36      0.36       240
weighted avg       0.31      0.32      0.31    

In [25]:
# ✅ Save trained SVM model
import joblib
joblib.dump(svm3, "svm_openl3.pkl")

############################################################
# (Optional) Save PCA and Scaler for future inference
############################################################
joblib.dump(pca, "pca_openl3.pkl")
joblib.dump(scaler, "scaler_openl3.pkl")

['scaler_openl3.pkl']