In [None]:
import glob
import os
import librosa
import numpy as np
import pandas as pd
import openl3

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset


In [None]:
# Recursively load all .wav files inside BabyCryingSounds directory
files = glob.glob(r"C:\Users\Asus\Desktop\Akarsh\BabyCryingSounds\**\*.wav", recursive=True)

# Labels are taken from the parent folder name of each file
labels = [os.path.basename(os.path.dirname(f)) for f in files]

print(f"Loaded {len(files)} audio files with {len(set(labels))} classes.")

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

y = np.array(labels)
y_encoded = le.fit_transform(y)

In [None]:
features1 = []  # MFCC features
features2 = []  # Mel-Spectrogram features
features3 = []  # OpenL3 embeddings

for f in files:
    y, sr = librosa.load(f, sr=None, mono=True)
    
    # --- MFCC ---
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    vec1 = np.mean(mfcc.T, axis=0)
    features1.append(vec1)
    
    # --- Mel-Spectrogram ---
    melspc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    vec2 = np.mean(melspc.T, axis=0)
    features2.append(vec2)
    
    # --- OpenL3 embeddings ---
    emb, ts = openl3.get_audio_embedding(y, sr, content_type="env", embedding_size=512)
    vec3 = emb.mean(axis=0)
    features3.append(vec3)

print("Feature extraction complete.")

X1 = np.vstack(features1)  # MFCC matrix
X2 = np.vstack(features2)  # Mel-Spectrogram matrix
X3 = np.vstack(features3)  # OpenL3 embeddings matrix


In [None]:
# Split separately for each feature type
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y_encoded, test_size=0.2, stratify=y_encoded)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y_encoded, test_size=0.2, stratify=y_encoded)
X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y_encoded, test_size=0.2, stratify=y_encoded)

# Scale features
scaler1, scaler2, scaler3 = StandardScaler(), StandardScaler(), StandardScaler()

X1_train_scaled = scaler1.fit_transform(X1_train)  
X1_test_scaled  = scaler1.transform(X1_test)

X2_train_scaled = scaler2.fit_transform(X2_train) 
X2_test_scaled  = scaler2.transform(X2_test)

X3_train_scaled = scaler3.fit_transform(X3_train)  
X3_test_scaled  = scaler3.transform(X3_test)

print("Data split and standardized.")


In [None]:
def cluster_and_visualize(X_scaled, name, k=12):
    # KMeans clustering
    kmeans = KMeans(n_clusters=k, init='k-means++', n_init=15, random_state=42)
    cluster_labels = kmeans.fit_predict(X_scaled)
    
    # Silhouette Score
    sil_score = silhouette_score(X_scaled, cluster_labels)
    print(f"{name} Silhouette Score for k={k}: {sil_score:.3f}")
    
    # PCA for 2D visualization
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X_scaled)
    
    # Plot clusters
    plt.figure(figsize=(7,5))
    for cluster in np.unique(cluster_labels):
        plt.scatter(X_pca[cluster_labels == cluster, 0], 
                    X_pca[cluster_labels == cluster, 1], 
                    label=f"Cluster {cluster}", alpha=0.6)
    plt.title(f"{name} Clustering (PCA 2D)")
    plt.xlabel("PCA 1")
    plt.ylabel("PCA 2")
    plt.legend()
    plt.show()

# Run clustering for each feature set
cluster_and_visualize(X1_train_scaled, "MFCC Features")
cluster_and_visualize(X2_train_scaled, "Mel-Spectrogram Features")
cluster_and_visualize(X3_train_scaled, "OpenL3 Embeddings")

In [None]:
def reduced_corr_heatmap(X_scaled, name, n_components=10):
    """
    Reduce feature dimensions with PCA, compute correlation, and plot heatmap.
    """
    # PCA reduction
    pca = PCA(n_components=n_components, random_state=42)
    X_reduced = pca.fit_transform(X_scaled)
    
    # Convert to DataFrame
    df_reduced = pd.DataFrame(X_reduced, columns=[f"PC{i+1}" for i in range(n_components)])
    
    # Compute correlation
    corr = df_reduced.corr()
    
    # Plot heatmap
    plt.figure(figsize=(10,8))
    sns.heatmap(corr, cmap='coolwarm', center=0, annot=True, fmt=".2f")
    plt.title(f"{name} Feature Correlation (PCA {n_components}D)")
    plt.show()

In [None]:
def reduced_corr_heatmap(X_scaled, name, n_components=10):
    """
    Reduce feature dimensions with PCA, compute correlation, and plot heatmap.
    """
    # PCA reduction
    pca = PCA(n_components=n_components, random_state=42)
    X_reduced = pca.fit_transform(X_scaled)
    
    # Convert to DataFrame
    df_reduced = pd.DataFrame(X_reduced, columns=[f"PC{i+1}" for i in range(n_components)])
    
    # Compute correlation
    corr = df_reduced.corr()
    
    # Plot heatmap
    plt.figure(figsize=(10,8))
    sns.heatmap(corr, cmap='coolwarm', center=0, annot=True, fmt=".2f")
    plt.title(f"{name} Feature Correlation (PCA {n_components}D)")
    plt.show()

# Run heatmaps for each feature set
reduced_corr_heatmap(X1_train_scaled, "MFCC Features", n_components=10)
reduced_corr_heatmap(X2_train_scaled, "Mel-Spectrogram Features", n_components=10)
reduced_corr_heatmap(X3_train_scaled, "OpenL3 Embeddings", n_components=15)

In [None]:
# Split separately for each feature type
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y_encoded, test_size=0.2, stratify=y_encoded)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y_encoded, test_size=0.2, stratify=y_encoded)
X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y_encoded, test_size=0.2, stratify=y_encoded)

# Scale features
scaler1, scaler2, scaler3 = StandardScaler(), StandardScaler(), StandardScaler()

X1_train_scaled = scaler1.fit_transform(X1_train)  
X1_test_scaled  = scaler1.transform(X1_test)

X2_train_scaled = scaler2.fit_transform(X2_train) 
X2_test_scaled  = scaler2.transform(X2_test)

X3_train_scaled = scaler3.fit_transform(X3_train)  
X3_test_scaled  = scaler3.transform(X3_test)

print("Data split and standardized.")


In [None]:
# --- Random Forest ---
rf = RandomForestClassifier(max_depth=None, n_estimators=400, random_state=42)
rf.fit(X1_train, y1_train)
print("Random Forest (MFCC)")
print(classification_report(y1_test, rf.predict(X1_test)))

# --- XGBoost ---
xbg = XGBClassifier(n_estimators=400, learning_rate=0.1, max_depth=10,
                    random_state=42, use_label_encoder=False, eval_metric='mlogloss')
xbg.fit(X1_train, y1_train)
print("XGBoost (MFCC)")
print(classification_report(y1_test, xbg.predict(X1_test)))

# --- Logistic Regression (Elastic Net) ---
logreg = LogisticRegression(penalty="elasticnet", solver="saga", max_iter=1000, random_state=42)
logreg.fit(X1_train_scaled, y1_train)
print("Logistic Regression (MFCC)")
print(classification_report(y1_test, logreg.predict(X1_test_scaled)))

# --- SVM (RBF kernel) ---
svm_rbf = SVC(kernel="rbf", C=1.0, probability=True, gamma="scale", random_state=42)
svm_rbf.fit(X1_train_scaled, y1_train)
print("SVM (MFCC)")
print(classification_report(y1_test, svm_rbf.predict(X1_test_scaled)))


In [None]:
# --- Random Forest ---
rf2 = RandomForestClassifier(max_depth=None, n_estimators=400, random_state=42)
rf2.fit(X2_train, y2_train)
print("Random Forest (Mel)")
print(classification_report(y2_test, rf2.predict(X2_test)))

# --- XGBoost ---
xgb2 = XGBClassifier(n_estimators=400, learning_rate=0.1, max_depth=10,
                     random_state=42, use_label_encoder=False, eval_metric='mlogloss')
xgb2.fit(X2_train, y2_train)
print("XGBoost (Mel)")
print(classification_report(y2_test, xgb2.predict(X2_test)))

# --- Logistic Regression (Elastic Net) ---
logreg2 = LogisticRegression(penalty="elasticnet", solver="saga", l1_ratio=0.5, max_iter=1000, random_state=42)
logreg2.fit(X2_train_scaled, y2_train)
print("Logistic Regression (Mel)")
print(classification_report(y2_test, logreg2.predict(X2_test_scaled)))


In [None]:
# Reduce high-dimensional embeddings to 256D
pca = PCA(n_components=256)
X3_train_pca = pca.fit_transform(X3_train_scaled)

print("PCA completed on OpenL3 embeddings.")


In [None]:
# Convert to PyTorch tensors
X_tensor = torch.tensor(X3_train_pca, dtype=torch.float32)
Y_tensor = torch.tensor(y_encoded, dtype=torch.long)

# Create simple dataset split
train_dataset = TensorDataset(X_tensor[:960], Y_tensor[:960])
test_dataset  = TensorDataset(X_tensor[960:], Y_tensor[960:])
train_loader  = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader   = DataLoader(test_dataset, batch_size=32)

# Define simple MLP
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim//2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim//2, num_classes)
        )

    def forward(self, x):
        return self.model(x)

mlp = MLP(input_dim=256, hidden_dim=512, num_classes=len(np.unique(y_encoded)))
print("MLP model initialized.")


In [None]:
# Random Forest
rf3 = RandomForestClassifier(n_estimators=200, random_state=42)
rf3.fit(X3_train_pca[:960], y_encoded[:960])
y_pred_rf3 = rf3.predict(X3_train_pca[960:])

# SVM
svm3 = SVC(kernel='linear', C=1.0, probability=True)
svm3.fit(X3_train_pca[:960], y_encoded[:960])
y_pred_svm3 = svm3.predict(X3_train_pca[960:])

print("Random Forest & SVM trained on PCA-reduced OpenL3 embeddings.")
