In [1]:
import os
import numpy as np
import tensorflow as tf
import xgboost as xgb
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.model_selection import RandomizedSearchCV
import matplotlib.pyplot as plt

In [2]:
# -----------------------------
# Parameters and Dataset Loading
# -----------------------------
data_dir = "EmoDB_mel_spectrograms"  # Base directory; must have one subfolder per class
batch_size = 32
img_size = (299, 299)  # InceptionV3 expects 299x299 images
seed = 123

# Load raw datasets with an 80/20 train-validation split
raw_train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=seed,
    image_size=img_size,
    batch_size=batch_size
)
raw_val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=seed,
    image_size=img_size,
    batch_size=batch_size
)

# Save class names before caching and prefetching
class_names = raw_train_ds.class_names
print("Class names:", class_names)

# For performance, cache and prefetch the datasets
AUTOTUNE = tf.data.AUTOTUNE
train_ds = raw_train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = raw_val_ds.cache().prefetch(buffer_size=AUTOTUNE)


Found 535 files belonging to 7 classes.
Using 428 files for training.
Found 535 files belonging to 7 classes.
Using 107 files for validation.
Class names: ['anger', 'anxiety', 'boredom', 'disgust', 'happiness', 'neutral', 'sadness']


In [3]:
# -----------------------------
# CNN for Feature Extraction using InceptionV3
# -----------------------------
# Load a pretrained InceptionV3 model (without its top layers)
base_model = InceptionV3(weights="imagenet", include_top=False, input_shape=(img_size[0], img_size[1], 3))
# Add a global average pooling layer to convert convolutional features into a flat embedding vector
x = base_model.output
x = GlobalAveragePooling2D()(x)
feature_extractor = Model(inputs=base_model.input, outputs=x)


In [4]:
# -----------------------------
# Function to Extract Embeddings
# -----------------------------
def get_embeddings(dataset, model):
    """
    Iterates over the dataset to extract CNN embeddings.
    Preprocesses the images for InceptionV3 and collects embeddings and labels.
    """
    embeddings = []
    labels = []
    for batch_images, batch_labels in dataset:
        # Preprocess images for InceptionV3
        batch_images = preprocess_input(batch_images)
        # Get the embeddings for the batch
        batch_emb = model.predict(batch_images)
        embeddings.append(batch_emb)
        labels.append(batch_labels.numpy())
    return np.vstack(embeddings), np.hstack(labels)

print("Extracting features for training set...")
X_train, y_train = get_embeddings(train_ds, feature_extractor)
print("Extracting features for validation set...")
X_val, y_val = get_embeddings(val_ds, feature_extractor)

print("Training embeddings shape:", X_train.shape)
print("Validation embeddings shape:", X_val.shape)

# Ensure labels are integers
y_train = y_train.astype(np.int32)
y_val = y_val.astype(np.int32)


Extracting features for training set...
Extracting features for validation set...
Training embeddings shape: (428, 2048)
Validation embeddings shape: (107, 2048)


In [None]:
# -----------------------------
# XGBoost Classification with Hyperparameter Optimization via RandomizedSearchCV
# -----------------------------
num_classes = len(class_names)

# Define the parameter grid for random search
param_dist = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
    'max_depth': [5, 7, 9, 11],
    'n_estimators': [200, 300, 500, 1000],
    'subsample': [0.5, 0.7, 0.9, 1.0],
    'colsample_bytree': [0.5, 0.7, 0.9, 1.0],
    'gamma': [0, 0.1, 0.5, 1, 5]
}

# Initialize the base XGBoost classifier
base_clf = xgb.XGBClassifier(
    objective="multi:softmax",
    num_class=num_classes,
    eval_metric="mlogloss",
    use_label_encoder=False
)

# Setup RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=base_clf,
    param_distributions=param_dist,
    n_iter=50,                # Number of parameter settings that are sampled
    scoring='accuracy',
    cv=5,                     # 3-fold cross-validation
    verbose=1,
    random_state=seed,
    n_jobs=-1
)

print("Starting hyperparameter optimization with RandomizedSearchCV...")
random_search.fit(X_train, y_train)

print("\nBest Hyperparameters Found:")
print(random_search.best_params_)

# Get the best estimator
best_clf = random_search.best_estimator_


Starting hyperparameter optimization with RandomizedSearchCV...
Fitting 5 folds for each of 50 candidates, totalling 250 fits


In [None]:

# -----------------------------
# Evaluation on Validation Set
# -----------------------------
y_pred = best_clf.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("\nValidation Accuracy: {:.2f}%".format(accuracy * 100))
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=class_names))

# Plot the confusion matrix
cm = confusion_matrix(y_val, y_pred)
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.show()
