In [1]:
import librosa
import soundfile
import os
import time
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
import random
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.utils import to_categorical
from keras.layers import Input, Flatten, Dropout, Activation
from keras.models import Model
from keras.layers import (
    Conv1D,
    Activation,
    Dropout,
    MaxPooling1D,
    Flatten,
    Dense,
    Input,
)
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from IPython.display import clear_output
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

In [2]:
# Function to recursively get all audio files from a directory
def get_audio_files(base_path):
    audio_files = []
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith(".wav"):
                audio_files.append(os.path.join(root, file))
    return audio_files


# Base path for TESS dataset
base_path_TESS = "../data/raw/TESS_Toronto_emotional_speech_set_data"

# Get all audio files from TESS datasets
audio_files_tess = get_audio_files(base_path_TESS)
# Save audio files paths
if not os.path.exists("../data/processed/audio_files_tess.txt"):
    with open("../data/processed/audio_files_tess.txt", "w") as f:
        for item in audio_files_tess:
            f.write("%s\n" % item)
    print("../data/processed/audio_files_tess.txt already exists is created")
else:
    print("../data/processed/audio_files_tess.txt already exists")

../data/processed/audio_files_tess.txt already exists


In [3]:
# Function to extract mfcc, chroma, mel, and contrast features from audio files
def extract_features(
    file_path,
    include_mfcc=True,
    include_chroma=True,
    include_mel=True,
    include_contrast=True,
    sample_rate=22050,
):
    try:
        audio, sr = librosa.load(file_path, sr=sample_rate)
        features = []

        if include_mfcc:
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
            features.append(np.mean(mfccs, axis=1))

        if include_chroma:
            chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
            features.append(np.mean(chroma, axis=1))

        if include_mel:
            mel = librosa.feature.melspectrogram(y=audio, sr=sr)
            features.append(np.mean(mel, axis=1))

        if include_contrast:
            contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
            features.append(np.mean(contrast, axis=1))

        return np.hstack(features)
    except Exception:
        print(f"Error encountered while parsing file: {file_path}")
        return None


# Load audio files from the text file
audio_files_tess = []
with open("../data/processed/audio_files_tess.txt", "r") as file:
    audio_files_tess = [line.strip() for line in file.readlines()]


# Function to create datasets excluding one feature each time
def create_datasets():
    features_labels = []

    feature_sets = {
        "all_features": (True, True, True, True),
        "no_mfcc": (False, True, True, True),
        "no_chroma": (True, False, True, True),
        "no_mel": (True, True, False, True),
        "no_contrast": (True, True, True, False),
    }
    

    for set_name, (mfcc, chroma, mel, contrast) in feature_sets.items():
        features = []
        labels = []
        label_map_tess = {
            "OAF_angry": 4,
            "OAF_disgust": 6,
            "OAF_Fear": 5,
            "OAF_happy": 2,
            "OAF_Pleasant_surprise": 7,
            "OAF_Sad": 3,
            "OAF_neutral": 0,
            "YAF_angry": 4,
            "YAF_disgust": 6,
            "YAF_fear": 5,
            "YAF_happy": 2,
            "YAF_pleasant_surprised": 7,
            "YAF_sad": 3,
            "YAF_neutral": 0,
        }
        label_map_ravdess = {
            "01": 0,
            "02": 1,
            "03": 2,
            "04": 3,
            "05": 4,
            "06": 5,
            "07": 6,
            "08": 7,
        }
        for idx, file in enumerate(audio_files_tess, start=1):
            feature = extract_features(
                file,
                include_mfcc=mfcc,
                include_chroma=chroma,
                include_mel=mel,
                include_contrast=contrast,
            )
            if feature is not None:
                features.append(feature)
                if "audio_speech_actors_01-24" in file:
                    # Extract label from RAVDESS file name
                    label = file.split(os.sep)[-1].split("-")[2]
                    labels.append(label_map_ravdess[label])
                else:
                    # Extract label from TESS file path
                    emotion = file.split(os.sep)[-2]
                    if emotion in label_map_tess:
                        labels.append(label_map_tess[emotion])
                    else:
                        print(f"Skipping {file} with unrecognized emotion: {emotion}")
                        features.pop()  # Remove the feature if label is not recognized
            print(
                f"Processing file {idx} of {len(audio_files_tess)} for set {set_name}"
            )

        features = np.array(features)
        labels = np.array(labels)
        features_labels.append((features, labels, set_name))

    return features_labels


# Create and save datasets
if not os.path.exists("../data/processed/features_labels_datasets.npy"):
    datasets = create_datasets()
    np.save("../data/processed/features_labels_datasets.npy", datasets)
else:
    datasets = np.load(
        "../data/processed/features_labels_datasets.npy", allow_pickle=True
    )
    print("Datasets loaded")

EOFError: No data left in file

In [4]:
# Function to train and evaluate the model for each dataset
def train_evaluate_model(features, labels, set_name):
    print(f"Training and evaluating for feature set: {set_name}")
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        features, labels, test_size=0.2, random_state=42, stratify=labels
    )

    # Reshape data to fit the model: (num_samples, timesteps, num_features)
    X_train = np.expand_dims(X_train, axis=-1)
    X_test = np.expand_dims(X_test, axis=-1)

    # Initialize a sequential model
    model = Sequential()

    # First convolutional layer with 64 filters, kernel size 5, 'same' padding, and ReLU activation
    model.add(
        Conv1D(64, 5, padding="same", input_shape=(X_train.shape[1], X_train.shape[2]))
    )
    model.add(Activation("relu"))
    model.add(Dropout(0.1))
    model.add(MaxPooling1D(pool_size=4))

    # Second convolutional layer with 128 filters, kernel size 5, 'same' padding, and ReLU activation
    model.add(Conv1D(128, 5, padding="same"))
    model.add(Activation("relu"))
    model.add(Dropout(0.1))
    model.add(MaxPooling1D(pool_size=4))

    # Third convolutional layer with 256 filters, kernel size 5, 'same' padding, and ReLU activation
    model.add(Conv1D(256, 5, padding="same"))
    model.add(Activation("relu"))
    model.add(Dropout(0.1))

    # Flatten the output for the dense layer
    model.add(Flatten())

    # Dense layer with 8 units and softmax activation for multi-class classification
    model.add(Dense(8))
    model.add(Activation("softmax"))

    # Define the RMSprop optimizer with a lower learning rate
    opt = RMSprop(learning_rate=0.001)

    # Compile the model with categorical crossentropy loss and the defined optimizer
    model.compile(
        loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
    )

    # Train the model
    history = model.fit(
        X_train,
        y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test, y_test),
        verbose=1,
    )

    # Evaluate the model
    score = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test accuracy for {set_name}: {score[1]}")

    # Generate classification report and confusion matrix
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)

    print(classification_report(y_test, y_pred_classes))

    # Plot confusion matrix
    confusion_mtx = confusion_matrix(y_test, y_pred_classes)
    plt.figure(figsize=(10, 8))
    sn.heatmap(confusion_mtx, annot=True, fmt="d")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title(f"Confusion Matrix for {set_name}")
    plt.show()


# Train and evaluate the model for each dataset
for features, labels, set_name in datasets:
    train_evaluate_model(features, labels, set_name)

NameError: name 'datasets' is not defined