In [1]:
# Import necessary libraries for data analysis and visualization
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import librosa
import librosa.display
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import umap.umap_ as umap
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Function to recursively get all audio files from a directory
def get_audio_files(base_path):
    audio_files = []
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith(".wav"):
                audio_files.append(os.path.join(root, file))
    return audio_files


# Base path for RAVDESS dataset
base_path_RAVDESS = "../data/raw/audio_speech_actors_01-24"
# Base path for TESS dataset
base_path_TESS = "../data/raw/TESS_Toronto_emotional_speech_set_data"

# Get all audio files from RAVDESS and TESS datasets
audio_files_ravdess = get_audio_files(base_path_RAVDESS)
audio_files_tess = get_audio_files(base_path_TESS)
audio_files = audio_files_ravdess + audio_files_tess
# Save audio files paths
if not os.path.exists("../data/processed/audio_files.txt"):
    with open("../data/processed/audio_files.txt", "w") as f:
        for item in audio_files:
            f.write("%s\n" % item)
    print("../data/processed/audio_files.txt already exists is created")
else:
    print("../data/processed/audio_files.txt already exists")

../data/processed/audio_files.txt already exists


In [3]:
# Function to extract mfcc, chroma, mel, and contrast features from audio files
def extract_features(file_path, sample_rate=22050):
    try:
        audio, sr = librosa.load(file_path, sr=sample_rate)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
        mel = librosa.feature.melspectrogram(y=audio, sr=sr)
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
        features = np.hstack(
            (
                np.mean(mfccs, axis=1),
                np.mean(chroma, axis=1),
                np.mean(mel, axis=1),
                np.mean(contrast, axis=1),
            )
        )
        return features
    except Exception:
        print(f"Error encountered while parsing file: {file_path}")
        return None


# Load audio files from the text file
audio_files = []
with open("../data/processed/audio_files.txt", "r") as file:
    audio_files = [line.strip() for line in file.readlines()]

# Extract features from all audio files
features = []
labels = []
label_map_tess = {
    "OAF_angry": 4,
    "OAF_disgust": 6,
    "OAF_Fear": 5,
    "OAF_happy": 2,
    "OAF_Pleasant_surprise": 7,
    "OAF_Sad": 3,
    "OAF_neutral": 0,
    "YAF_angry": 4,
    "YAF_disgust": 6,
    "YAF_fear": 5,
    "YAF_happy": 2,
    "YAF_pleasant_surprised": 7,
    "YAF_sad": 3,
    "YAF_neutral": 0,
}
label_map_ravdess = {
    "01": 0,
    "02": 1,
    "03": 2,
    "04": 3,
    "05": 4,
    "06": 5,
    "07": 6,
    "08": 7,
}
if not os.path.exists("../data/processed/features.npy"):
    for idx, file in enumerate(audio_files, start=1):
        feature = extract_features(file)
        if feature is not None:
            features.append(feature)
            if "audio_speech_actors_01-24" in file:
                # Extract label from RAVDESS file name
                label = file.split(os.sep)[-1].split("-")[2]
                labels.append(label_map_ravdess[label])
            else:
                # Extract label from TESS file path
                emotion = file.split(os.sep)[-2]
                if emotion in label_map_tess:
                    labels.append(label_map_tess[emotion])
                else:
                    print(f"Skipping {file} with unrecognized emotion: {emotion}")
                    features.pop()  # Remove the feature if label is not recognized
            print(f"Processing file {idx} of {len(audio_files)}")

    print("Feature extraction complete.")
    features = np.array(features)
    np.save("../data/processed/features.npy", features)
    labels = np.array(labels)
    np.save("../data/processed/labels.npy", labels)  
else:
    # Load features and labels
    features = np.load("../data/processed/features.npy")
    labels = np.load("../data/processed/labels.npy")

In [4]:
# Standardize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

In [5]:
# Create a mapping dictionary
label_mapping = {
    0: "neutral",
    1: "calm",
    2: "happy",
    3: "sad",
    4: "angry",
    5: "fearful",
    6: "disgust",
    7: "surprised",
}

# Transform numeric labels to emotion labels
emotion_labels = [label_mapping[label] for label in labels]

In [6]:
# Apply PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(features_scaled)

# Create a DataFrame for PCA results
pca_df = pd.DataFrame(pca_result, columns=["PC1", "PC2"])
pca_df["label"] = emotion_labels

# Plot PCA results
fig_PCA = px.scatter(
    pca_df,
    x="PC1",
    y="PC2",
    color=pca_df["label"].astype(str),  # Ensure labels are treated as categorical data
    title="PCA of Audio Features",
    labels={"color": "Label"},
)

fig_PCA.show()

In [7]:
# Apply TSNE
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(features_scaled)

# Create a DataFrame for PCA results
tsne_df = pd.DataFrame(tsne_result, columns=["TSNE1", "TSNE2"])
tsne_df["label"] = emotion_labels

# Plot tsne results
fig_TSNE = px.scatter(
    tsne_df,
    x="TSNE1",
    y="TSNE2",
    color=tsne_df["label"].astype(str),  # Ensure labels are treated as categorical data
    title="TSNE of Audio Features",
    labels={"color": "Label"},
)

fig_TSNE.show()

In [8]:
# Apply UMAP
umap_reducer = umap.UMAP(n_components=2, random_state=42)
umap_result = umap_reducer.fit_transform(features_scaled)

# Create a DataFrame for PCA results
umap_df = pd.DataFrame(umap_result, columns=["UMAP1", "UMAP2"])
umap_df["label"] = emotion_labels

# Plot UMAP results
fig_UMAP = px.scatter(
    umap_df,
    x="UMAP1",
    y="UMAP2",
    color=umap_df["label"].astype(str),  # Ensure labels are treated as categorical data
    title="UMAP of Audio Features",
    labels={"color": "Label"},
)
fig_UMAP.show()


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



In [9]:
# Dictionary of samples
samples = {
    "angry": "../data/raw/TESS_Toronto_emotional_speech_set_data/OAF_angry/OAF_back_angry.wav",
    "disgust": "../data/raw/TESS_Toronto_emotional_speech_set_data/OAF_disgust/OAF_back_disgust.wav",
    "fear": "../data/raw/TESS_Toronto_emotional_speech_set_data/OAF_Fear/OAF_back_fear.wav",
    "happy": "../data/raw/TESS_Toronto_emotional_speech_set_data/OAF_happy/OAF_back_happy.wav",
    "neutral": "../data/raw/TESS_Toronto_emotional_speech_set_data/OAF_neutral/OAF_back_neutral.wav",
    "surprise": "../data/raw/TESS_Toronto_emotional_speech_set_data/OAF_Pleasant_surprise/OAF_back_ps.wav",
    "sad": "../data/raw/TESS_Toronto_emotional_speech_set_data/OAF_Sad/OAF_back_sad.wav",
}
# Function to extract mfcc, chroma, mel, and contrast features from audio files
def extract_features_vis(file_path, sample_rate=22050):
    try:
        audio, sr = librosa.load(file_path, sr=sample_rate)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
        mel = librosa.feature.melspectrogram(y=audio, sr=sr)
        contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
        return mfccs, chroma, mel, contrast
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}, {str(e)}")
        return None, None, None, None


# Function to plot features
def plot_features(emotion, mfccs, chroma, mel, contrast):
    fig = make_subplots(
        rows=4,
        cols=1,
        shared_xaxes=True,
        subplot_titles=("MFCC", "Chroma", "Mel Spectrogram", "Spectral Contrast"),
        vertical_spacing=0.1,
    )

    # MFCC
    fig.add_trace(go.Heatmap(z=mfccs, colorscale="Viridis"), row=1, col=1)

    # Chroma
    fig.add_trace(go.Heatmap(z=chroma, colorscale="Viridis"), row=2, col=1)

    # Mel Spectrogram
    fig.add_trace(
        go.Heatmap(z=librosa.power_to_db(mel, ref=np.max), colorscale="Viridis"),
        row=3,
        col=1,
    )

    # Spectral Contrast
    fig.add_trace(go.Heatmap(z=contrast, colorscale="Viridis"), row=4, col=1)

    # Update layout
    fig.update_layout(
        height=800, width=800, title_text=f"Features for {emotion} Emotion"
    )

    # Update x-axis titles
    fig.update_xaxes(title_text="Time", row=1, col=1)
    fig.update_xaxes(title_text="Time", row=2, col=1)
    fig.update_xaxes(title_text="Time", row=3, col=1)
    fig.update_xaxes(title_text="Time", row=4, col=1)

    # Update y-axis titles
    fig.update_yaxes(title_text="MFCC Coefficients", row=1, col=1)
    fig.update_yaxes(title_text="Chroma", row=2, col=1)
    fig.update_yaxes(title_text="Mel", row=3, col=1)
    fig.update_yaxes(title_text="Contrast", row=4, col=1)

    fig.show()


# Extract and plot features for each sample
for emotion, file_path in samples.items():
    mfccs, chroma, mel, contrast = extract_features_vis(file_path)
    if (
        mfccs is not None
        and chroma is not None
        and mel is not None
        and contrast is not None
    ):
        plot_features(emotion, mfccs, chroma, mel, contrast)