In [1]:
# Import necessary libraries for data analysis and visualization
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import librosa
import librosa.display
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import umap.umap_ as umap
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Function to recursively get all audio files from a directory
def get_audio_files(base_path):
    audio_files = []
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith(".wav"):
                audio_files.append(os.path.join(root, file))
    return audio_files


# # Base path for RAVDESS dataset
base_path_TESS = "../data/raw/TESS_Toronto_emotional_speech_set_data"

# Get all audio files from RAVDESS and TESS datasets
audio_files_tess = get_audio_files(base_path_TESS)

# Save audio files paths
if not os.path.exists("../data/processed/audio_files_tess.txt"):
    with open("../data/processed/audio_files_tess.txt", "w") as f:
        for item in audio_files_tess:
            f.write("%s\n" % item)
    print("../data/processed/audio_files_tess.txt already exists is created")
else:
    print("../data/processed/audio_files_tess.txt already exists")

../data/processed/audio_files_tess.txt already exists


In [3]:
# Function to extract mfcc, chroma, mel, and contrast features from audio files
def extract_features(file_path, sample_rate=22050):
    try:
        audio, sr = librosa.load(file_path, sr=sample_rate)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
        mel = librosa.feature.melspectrogram(y=audio, sr=sr)
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
        features = np.hstack(
            (
                np.mean(mfccs, axis=1),
                np.mean(chroma, axis=1),
                np.mean(mel, axis=1),
                np.mean(contrast, axis=1),
            )
        )
        return features
    except Exception:
        print(f"Error encountered while parsing file: {file_path}")
        return None


# Load audio files from the text file
audio_files_tess = []
with open("../data/processed/audio_files_tess.txt", "r") as file:
    audio_files_tess = [line.strip() for line in file.readlines()]

# Extract features from all audio files
features = []
labels = []
label_map_tess = {
    "OAF_angry": 4,
    "OAF_disgust": 6,
    "OAF_Fear": 5,
    "OAF_happy": 2,
    "OAF_Pleasant_surprise": 7,
    "OAF_Sad": 3,
    "OAF_neutral": 0,
    "YAF_angry": 4,
    "YAF_disgust": 6,
    "YAF_fear": 5,
    "YAF_happy": 2,
    "YAF_pleasant_surprised": 7,
    "YAF_sad": 3,
    "YAF_neutral": 0,
}
label_map_ravdess = {
    "01": 0,
    "02": 1,
    "03": 2,
    "04": 3,
    "05": 4,
    "06": 5,
    "07": 6,
    "08": 7,
}
if not os.path.exists("../data/processed/features_tess.npy"):
    for idx, file in enumerate(audio_files_tess, start=1):
        feature = extract_features(file)
        if feature is not None:
            features.append(feature)
            if "audio_speech_actors_01-24" in file:
                # Extract label from RAVDESS file name
                label = file.split(os.sep)[-1].split("-")[2]
                labels.append(label_map_ravdess[label])
            else:
                # Extract label from TESS file path
                emotion = file.split(os.sep)[-2]
                if emotion in label_map_tess:
                    labels.append(label_map_tess[emotion])
                else:
                    print(f"Skipping {file} with unrecognized emotion: {emotion}")
                    features.pop()  # Remove the feature if label is not recognized
            print(f"Processing file {idx} of {len(audio_files_tess)}")

    print("Feature extraction complete.")
    features_tess = np.array(features)
    np.save("../data/processed/features_tess.npy", features)
    labels_tess = np.array(labels)
    np.save("../data/processed/labels_tess.npy", labels)
else:
    # Load features and labels
    features_tess = np.load("../data/processed/features_tess.npy")
    labels_tess = np.load("../data/processed/labels_tess.npy")

In [4]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    features_tess, labels_tess, test_size=0.2, random_state=42, stratify=labels_tess
)

In [5]:
# Define the augmentation pipeline
augment = Compose(
    [
        AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
        TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
        PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
        Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
    ]
)

In [6]:
# Apply augmentations to the training data
def augment_audio_features(features):
    augmented_features = []
    for feature in features:
        augmented_audio = augment(samples=feature, sample_rate=22050)
        mfccs = librosa.feature.mfcc(y=augmented_audio, sr=22050, n_mfcc=40)
        chroma = librosa.feature.chroma_stft(y=augmented_audio, sr=22050)
        mel = librosa.feature.melspectrogram(y=augmented_audio, sr=22050)
        contrast = librosa.feature.spectral_contrast(y=augmented_audio, sr=22050)
        augmented_feature = np.hstack(
            (
                np.mean(mfccs, axis=1),
                np.mean(chroma, axis=1),
                np.mean(mel, axis=1),
                np.mean(contrast, axis=1),
            )
        )
        augmented_features.append(augmented_feature)
    return np.array(augmented_features)


X_train_augmented = augment_audio_features(X_train)



In [7]:
# Combine the original and augmented training data
X_train_combined = np.vstack((X_train, X_train_augmented))
y_train_combined = np.hstack((y_train, y_train))

print(f"Original training data size: {X_train.shape}")
print(f"Augmented training data size: {X_train_augmented.shape}")
print(f"Combined training data size: {X_train_combined.shape}")
print(f"Combined training data size: {y_train_combined.shape}")

Original training data size: (2240, 187)
Augmented training data size: (2240, 187)
Combined training data size: (4480, 187)
Combined training data size: (4480,)


In [8]:
# Standardize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features_tess)

In [9]:
# Create a mapping dictionary
label_mapping = {
    0: "neutral",
    1: "calm",
    2: "happy",
    3: "sad",
    4: "angry",
    5: "fearful",
    6: "disgust",
    7: "surprised",
}

# Transform numeric labels to emotion labels
emotion_labels = [label_mapping[label] for label in labels_tess]
emotion_labels_train = [label_mapping[label] for label in y_train_combined]
emotion_labels_test = [label_mapping[label] for label in y_test]

In [10]:
# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(features_scaled)

# Create a DataFrame for PCA results
pca_df = pd.DataFrame(pca_result, columns=["PC1", "PC2"])
pca_df["label"] = emotion_labels

# Plot PCA results
fig_PCA = px.scatter(
    pca_df,
    x="PC1",
    y="PC2",
    color=pca_df["label"].astype(str),  # Ensure labels are treated as categorical data
    title="PCA of Audio Features",
    labels={"color": "Label"},
)

fig_PCA.show()

In [11]:
# Apply TSNE
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(features_scaled)

# Create a DataFrame for PCA results
tsne_df = pd.DataFrame(tsne_result, columns=["TSNE1", "TSNE2"])
tsne_df["label"] = emotion_labels

# Plot tsne results
fig_TSNE = px.scatter(
    tsne_df,
    x="TSNE1",
    y="TSNE2",
    color=tsne_df["label"].astype(str),  # Ensure labels are treated as categorical data
    title="TSNE of Audio Features",
    labels={"color": "Label"},
)

fig_TSNE.show()

In [12]:
# Standardize the training and test sets
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_combined)
X_test_scaled = scaler.transform(X_test)

In [13]:
# PCA on training data
pca_train = PCA(n_components=2)
pca_result_train = pca_train.fit_transform(X_train_scaled)
pca_df_train = pd.DataFrame(pca_result_train, columns=["PC1", "PC2"])
pca_df_train["label"] = emotion_labels_train

# PCA on test data
pca_test = PCA(n_components=2)
pca_result_test = pca_test.fit_transform(X_test_scaled)
pca_df_test = pd.DataFrame(pca_result_test, columns=["PC1", "PC2"])
pca_df_test["label"] = emotion_labels_test

# Plot PCA results for training data
fig_PCA_train = px.scatter(
    pca_df_train,
    x="PC1",
    y="PC2",
    color=pca_df_train["label"].astype(str),
    title="PCA of Training Audio Features",
    labels={"color": "Label"},
)
fig_PCA_train.show()

# Plot PCA results for test data
fig_PCA_test = px.scatter(
    pca_df_test,
    x="PC1",
    y="PC2",
    color=pca_df_test["label"].astype(str),
    title="PCA of Test Audio Features",
    labels={"color": "Label"},
)
fig_PCA_test.show()

# t-SNE on training data
tsne_train = TSNE(n_components=2, random_state=42)
tsne_result_train = tsne_train.fit_transform(X_train_scaled)
tsne_df_train = pd.DataFrame(tsne_result_train, columns=["TSNE1", "TSNE2"])
tsne_df_train["label"] = emotion_labels_train

# t-SNE on test data
tsne_test = TSNE(n_components=2, random_state=42)
tsne_result_test = tsne_test.fit_transform(X_test_scaled)
tsne_df_test = pd.DataFrame(tsne_result_test, columns=["TSNE1", "TSNE2"])
tsne_df_test["label"] = emotion_labels_test

# Plot t-SNE results for training data
fig_TSNE_train = px.scatter(
    tsne_df_train,
    x="TSNE1",
    y="TSNE2",
    color=tsne_df_train["label"].astype(str),
    title="t-SNE of Training Audio Features",
    labels={"color": "Label"},
)
fig_TSNE_train.show()

# Plot t-SNE results for test data
fig_TSNE_test = px.scatter(
    tsne_df_test,
    x="TSNE1",
    y="TSNE2",
    color=tsne_df_test["label"].astype(str),
    title="t-SNE of Test Audio Features",
    labels={"color": "Label"},
)
fig_TSNE_test.show()