In [20]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
def extract_features(audio_path, sr=22050):
    y, _ = librosa.load(audio_path, sr=sr)

    # MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_std = np.std(mfccs, axis=1)

    # Pitch (F0)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    f0 = [np.max(pitches[:, i]) if np.max(magnitudes[:, i]) > 0 else 0 for i in range(pitches.shape[1])]
    f0_mean = np.mean([pitch for pitch in f0 if pitch > 0])
    f0_std = np.std([pitch for pitch in f0 if pitch > 0])

    # Spectral Centroid
    spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_centroid_mean = np.mean(spectral_centroids)
    spectral_centroid_std = np.std(spectral_centroids)

    # Pack features into a flat array
    features = np.concatenate([mfccs_mean, mfccs_std, [f0_mean, f0_std, spectral_centroid_mean, spectral_centroid_std]])
    
    return features

In [5]:
data_dir = 'Multi_Class_Classification'
folders = ['frustrated', 'delighted', 'dysregulated', 'request', 'selftalk']

# Initialize lists to store features and labels
feature_list = []
labels = []

# Loop over each folder to load and label data
for label in folders:
    folder_path = os.path.join(data_dir, label)
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):  # Assuming audio files are .wav
            file_path = os.path.join(folder_path, file_name)
            features = extract_features(file_path)
            feature_list.append(features)
            labels.append(label)

# Convert to arrays
X = np.array(feature_list)
y = np.array(labels)



In [6]:
param_grid = {
    'n_estimators': [200, 300, 500],          # Increase number of trees
    'max_depth': [None, 10, 20, 30],          # Maximum depth of each tree
    'min_samples_split': [2, 5, 10],          # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4],            # Minimum samples required at a leaf node
    'max_features': ['sqrt', 'log2'],         # Number of features to consider at each split
    'class_weight': ['balanced']              # Adjusting class weights to handle imbalance
}

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
rf = RandomForestClassifier(random_state=42)

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


In [10]:
best_rf = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'class_weight': 'balanced', 'max_depth': 30, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 300}


In [11]:
y_pred = best_rf.predict(X_test)

# Print evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.6125429553264605
Classification Report:
               precision    recall  f1-score   support

   delighted       0.54      0.49      0.51       257
dysregulated       0.70      0.61      0.65       132
  frustrated       0.72      0.66      0.69       326
     request       0.48      0.20      0.28        80
    selftalk       0.57      0.75      0.65       369

    accuracy                           0.61      1164
   macro avg       0.60      0.54      0.56      1164
weighted avg       0.61      0.61      0.60      1164

Confusion Matrix:
 [[125   5  27  12  88]
 [  2  81  25   0  24]
 [ 32   9 214   2  69]
 [ 18   6  11  16  29]
 [ 53  15  21   3 277]]


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
param_grid = {
    'C': [0.1, 1, 10, 100],               # Regularization parameter
    'gamma': ['scale', 'auto', 0.01, 0.001],  # Kernel coefficient
    'kernel': ['rbf']                     # Using RBF kernel
}

svm = SVC()

# Set up GridSearchCV for SVM
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Get the best model from the grid search
best_svm = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [16]:
y_pred = best_svm.predict(X_test)

# Calculate accuracy and print evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.6460481099656358
Classification Report:
               precision    recall  f1-score   support

   delighted       0.56      0.60      0.58       257
dysregulated       0.68      0.66      0.67       132
  frustrated       0.73      0.71      0.72       326
     request       0.46      0.29      0.35        80
    selftalk       0.65      0.70      0.67       369

    accuracy                           0.65      1164
   macro avg       0.62      0.59      0.60      1164
weighted avg       0.64      0.65      0.64      1164

Confusion Matrix:
 [[153   7  28  15  54]
 [  8  87  17   0  20]
 [ 30  18 230   4  44]
 [ 17   7  10  23  23]
 [ 63   9  30   8 259]]


In [19]:
rf = RandomForestClassifier(
    n_estimators=300, max_depth=None, max_features='sqrt', 
    min_samples_leaf=2, min_samples_split=2, class_weight='balanced', random_state=42
)
gb = GradientBoostingClassifier(
    n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
)
svm = SVC(C=1, gamma='scale', kernel='rbf', probability=True, random_state=42)

# Define the stacking classifier with a logistic regression meta-classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('rf', rf),
        ('gb', gb),
        ('svm', svm)
    ],
    final_estimator=LogisticRegression(),  # Meta-classifier
    cv=5,  # Cross-validation folds for stacking
    n_jobs=-1  # Use all processors
)

# Fit the stacking classifier on the training set
stacking_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = stacking_clf.predict(X_test)

# Evaluate the stacking classifier's performance
accuracy = accuracy_score(y_test, y_pred)
print("Stacking Classifier Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Stacking Classifier Accuracy: 0.6486254295532646
Classification Report:
               precision    recall  f1-score   support

   delighted       0.57      0.55      0.56       257
dysregulated       0.71      0.64      0.67       132
  frustrated       0.75      0.70      0.72       326
     request       0.47      0.20      0.28        80
    selftalk       0.62      0.77      0.69       369

    accuracy                           0.65      1164
   macro avg       0.62      0.57      0.59      1164
weighted avg       0.64      0.65      0.64      1164

Confusion Matrix:
 [[141   6  28  12  70]
 [  4  85  21   0  22]
 [ 30  11 229   2  54]
 [ 23   9   7  16  25]
 [ 50   9  22   4 284]]


In [21]:
cross_val_scores = cross_val_score(stacking_clf, X_train, y_train, cv=5, scoring='accuracy')
print(f"Cross-Validation Accuracy: {cross_val_scores.mean()} ± {cross_val_scores.std()}")

Cross-Validation Accuracy: 0.6588600533592045 ± 0.010898275879810121


In [22]:
rf = RandomForestClassifier(
    n_estimators=300, max_depth=None, max_features='sqrt', 
    min_samples_leaf=2, min_samples_split=2, class_weight='balanced', random_state=42
)
gb = GradientBoostingClassifier(
    n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42
)
svm = SVC(C=1, gamma='scale', kernel='rbf', probability=True, class_weight='balanced', random_state=42)

# Define the stacking classifier with a logistic regression meta-classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('rf', rf),
        ('gb', gb),
        ('svm', svm)
    ],
    final_estimator=LogisticRegression(),  # Meta-classifier
    cv=5,  # Cross-validation folds for stacking
    n_jobs=-1  # Use all processors
)

# Use cross-validation to evaluate the stacking classifier
cross_val_scores = cross_val_score(stacking_clf, X_train, y_train, cv=5, scoring='accuracy')
print(f"Cross-Validation Accuracy: {cross_val_scores.mean()} ± {cross_val_scores.std()}")

# Fit the stacking classifier on the training set
stacking_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = stacking_clf.predict(X_test)

# Evaluate the stacking classifier's performance on the test set
accuracy = accuracy_score(y_test, y_pred)
print("Stacking Classifier Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Cross-Validation Accuracy: 0.6577857085109087 ± 0.010472725648827614
Stacking Classifier Accuracy: 0.6391752577319587
Classification Report:
               precision    recall  f1-score   support

   delighted       0.55      0.53      0.54       257
dysregulated       0.66      0.65      0.66       132
  frustrated       0.76      0.69      0.72       326
     request       0.41      0.23      0.29        80
    selftalk       0.63      0.75      0.68       369

    accuracy                           0.64      1164
   macro avg       0.60      0.57      0.58      1164
weighted avg       0.64      0.64      0.63      1164

Confusion Matrix:
 [[137   6  26  19  69]
 [  5  86  21   0  20]
 [ 29  14 225   3  55]
 [ 24   9   8  18  21]
 [ 56  15  16   4 278]]
