In [19]:
# Import necessary libraries for data analysis and visualization
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import librosa
import librosa.display
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import umap.umap_ as umap
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [20]:
# Function to recursively get all audio files from a directory
def get_audio_files(base_path):
    audio_files = []
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith(".wav"):
                audio_files.append(os.path.join(root, file))
    return audio_files


# Base path for RAVDESS dataset
base_path_RAVDESS = "../data/raw/audio_speech_actors_01-24"

# Get all audio files from RAVDESS and TESS datasets
audio_files = get_audio_files(base_path_RAVDESS)
# Save audio files paths
if not os.path.exists("../data/processed/audio_files_ravdess.txt"):
    with open("../data/processed/audio_files_ravdess.txt", "w") as f:
        for item in audio_files:
            f.write("%s\n" % item)
    print("../data/processed/audio_files_ravdess.txt already exists is created")
else:
    print("../data/processed/audio_files_ravdess.txt already exists")

../data/processed/audio_files_ravdess.txt already exists


In [21]:
# Function to recursively get all audio files from a directory
def get_audio_files(base_path):
    audio_files = []
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith(".wav"):
                audio_files.append(os.path.join(root, file))
    return audio_files


# # Base path for RAVDESS dataset
base_path_RAVDESS = "../data/raw/audio_speech_actors_01-24"

# Get all audio files from RAVDESS and TESS datasets
audio_files_ravdess = get_audio_files(base_path_RAVDESS)

# Save audio files paths
if not os.path.exists("../data/processed/audio_files_ravdess.txt"):
    with open("../data/processed/audio_files_ravdess.txt", "w") as f:
        for item in audio_files_ravdess:
            f.write("%s\n" % item)
    print("../data/processed/audio_files_ravdess.txt already exists is created")
else:
    print("../data/processed/audio_files_ravdess.txt already exists")

../data/processed/audio_files_ravdess.txt already exists


In [22]:
# Function to extract mfcc, chroma, mel, and contrast features from audio files
def extract_features(file_path, sample_rate=22050):
    try:
        audio, sr = librosa.load(file_path, sr=sample_rate)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
        mel = librosa.feature.melspectrogram(y=audio, sr=sr)
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
        features = np.hstack(
            (
                np.mean(mfccs, axis=1),
                np.mean(chroma, axis=1),
                np.mean(mel, axis=1),
                np.mean(contrast, axis=1),
            )
        )
        return features
    except Exception:
        print(f"Error encountered while parsing file: {file_path}")
        return None


# Load audio files from the text file
audio_files_ravdess = []
with open("../data/processed/audio_files_ravdess.txt", "r") as file:
    audio_files_ravdess = [line.strip() for line in file.readlines()]

# Extract features from all audio files
features = []
labels = []
label_map_tess = {
    "OAF_angry": 4,
    "OAF_disgust": 6,
    "OAF_Fear": 5,
    "OAF_happy": 2,
    "OAF_Pleasant_surprise": 7,
    "OAF_Sad": 3,
    "OAF_neutral": 0,
    "YAF_angry": 4,
    "YAF_disgust": 6,
    "YAF_fear": 5,
    "YAF_happy": 2,
    "YAF_pleasant_surprised": 7,
    "YAF_sad": 3,
    "YAF_neutral": 0,
}
label_map_ravdess = {
    "01": 0,
    "02": 1,
    "03": 2,
    "04": 3,
    "05": 4,
    "06": 5,
    "07": 6,
    "08": 7,
}
if not os.path.exists("../data/processed/features_ravdess.npy"):
    for idx, file in enumerate(audio_files_ravdess, start=1):
        feature = extract_features(file)
        if feature is not None:
            features.append(feature)
            if "audio_speech_actors_01-24" in file:
                # Extract label from RAVDESS file name
                label = file.split(os.sep)[-1].split("-")[2]
                labels.append(label_map_ravdess[label])
            else:
                # Extract label from TESS file path
                emotion = file.split(os.sep)[-2]
                if emotion in label_map_tess:
                    labels.append(label_map_tess[emotion])
                else:
                    print(f"Skipping {file} with unrecognized emotion: {emotion}")
                    features.pop()  # Remove the feature if label is not recognized
            print(f"Processing file {idx} of {len(audio_files)}")

    print("Feature extraction complete.")
    features = np.array(features)
    np.save("../data/processed/features_ravdess.npy", features)
    labels = np.array(labels)
    np.save("../data/processed/labels_ravdess.npy", labels)
else:
    # Load features and labels
    features = np.load("../data/processed/features_ravdess.npy")
    labels = np.load("../data/processed/labels_ravdess.npy")

In [23]:
# Standardize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

In [24]:
# Create a mapping dictionary
label_mapping = {
    0: "neutral",
    1: "calm",
    2: "happy",
    3: "sad",
    4: "angry",
    5: "fearful",
    6: "disgust",
    7: "surprised",
}

# Transform numeric labels to emotion labels
emotion_labels = [label_mapping[label] for label in labels]

In [25]:
# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(features_scaled)

# Create a DataFrame for PCA results
pca_df = pd.DataFrame(pca_result, columns=["PC1", "PC2"])
pca_df["label"] = emotion_labels

# Plot PCA results
fig_PCA = px.scatter(
    pca_df,
    x="PC1",
    y="PC2",
    color=pca_df["label"].astype(str),  # Ensure labels are treated as categorical data
    title="PCA of Audio Features",
    labels={"color": "Label"},
)

fig_PCA.show()

In [26]:
# Define a range of perplexity values
perplexities = [5, 10, 20, 30, 40, 50,60,70,80,90,100]


# Function to apply t-SNE and plot the results for each perplexity value
def plot_tsne_perplexity(perplexity):
    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
    tsne_result = tsne.fit_transform(features_scaled)

    # Create a DataFrame for t-SNE results
    tsne_df = pd.DataFrame(tsne_result, columns=["TSNE1", "TSNE2"])
    tsne_df["label"] = emotion_labels

    # Plot t-SNE results
    fig_TSNE = px.scatter(
        tsne_df,
        x="TSNE1",
        y="TSNE2",
        color=tsne_df["label"].astype(
            str
        ),  # Ensure labels are treated as categorical data
        title=f"t-SNE of Audio Features (Perplexity={perplexity})",
        labels={"color": "Label"},
    )

    fig_TSNE.show()


# Apply t-SNE for each perplexity value and plot the results
for perplexity in perplexities:
    plot_tsne_perplexity(perplexity)