In [None]:
import os
import glob
import numpy as np
import tensorflow as tf
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, RNN, LSTMCell
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [None]:
def load_data(dataset_path, img_size=(128, 128)):
    """
    Loads precomputed mel-spectrogram images and extracts labels from folder names.
    """
    X = []
    y = []
    class_names = sorted(os.listdir(dataset_path))  # Get emotion categories
    
    for label in class_names:
        class_path = os.path.join(dataset_path, label)
        if not os.path.isdir(class_path):
            continue
        
        files = glob.glob(os.path.join(class_path, '*.png'))
        print(f"Found {len(files)} images for class '{label}'.")
        
        for file in files:
            img = cv2.imread(file, cv2.IMREAD_GRAYSCALE)  # Load in grayscale
            img = cv2.resize(img, img_size)  # Resize to standard size
            X.append(img)
            y.append(label)
    
    X = np.array(X, dtype=np.float32) / 255.0  # Normalize pixel values
    y = np.array(y)
    return X, y


In [None]:
def preprocess_labels(y):
    """Encodes string labels into numerical one-hot vectors."""
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_onehot = to_categorical(y_encoded)
    return y_onehot, le


In [None]:
def build_model(input_shape, num_classes):
    """
    Builds and returns an LSTM model using RNN wrappers with LSTMCell.
    This forces the non-cuDNN implementation, which is compatible with DirectML on AMD GPUs.
    """
    model = Sequential()
    # First LSTM layer using LSTMCell wrapped in an RNN
    model.add(RNN(LSTMCell(128), return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    # Second LSTM layer using LSTMCell wrapped in an RNN
    model.add(RNN(LSTMCell(64)))
    model.add(Dropout(0.2))
    # Fully connected layers for classification
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    return model


In [None]:
def scheduler(epoch, lr):
    return lr * 0.5 if epoch > 0 and epoch % 100 == 0 else lr


In [None]:
def main():
    dataset_path = 'RAVDESS_mel_spectrograms'  # Update with your dataset path
    X, y = load_data(dataset_path)
    # For the LSTM model, each spectrogram is treated as a sequence of 128 timesteps with 128 features.
    # No extra channel dimension is added.
    
    y_onehot, le = preprocess_labels(y)
    
    X_train, X_val, y_train, y_val = train_test_split(
        X, y_onehot, test_size=0.2, random_state=42, stratify=np.argmax(y_onehot, axis=1)
    )
    
    input_shape = X_train.shape[1:]  # Expected to be (128, 128)
    num_classes = y_onehot.shape[1]
    model = build_model(input_shape, num_classes)
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    early_stop = EarlyStopping(monitor='val_loss', patience=200, restore_best_weights=True)
    lr_scheduler = LearningRateScheduler(scheduler)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=100, min_lr=1e-6)
    
    model.fit(
        X_train, y_train, 
        epochs=2000, 
        batch_size=32, 
        validation_data=(X_val, y_val),
        callbacks=[early_stop, lr_scheduler, reduce_lr]
    )
    
    loss, acc = model.evaluate(X_val, y_val)
    print(f"Validation Loss: {loss:.4f}, Validation Accuracy: {acc:.4f}")


In [None]:
if __name__ == '__main__':
    main()
