In [5]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

# Root folders containing actor subfolders
root_dirs = ["input", "audio_speech_actors_01-24"]

# Emotion label mapping
emotion_map = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

metadata = []

# Step 1: Parse file metadata
for root in root_dirs:
    for subdir, _, files in os.walk(root):
        for file in files:
            if file.endswith(".wav"):
                parts = file.split("-")
                if len(parts) != 7:
                    continue  # skip malformed filenames
                emotion_id = parts[2]
                actor_id = int(parts[6].split(".")[0])
                gender = "female" if actor_id % 2 == 0 else "male"

                metadata.append({
                    "filename": file,
                    "filepath": os.path.join(subdir, file),
                    "emotion": emotion_map.get(emotion_id, "unknown"),
                    "gender": gender,
                    "actor_id": actor_id
                })

df = pd.DataFrame(metadata)

In [6]:
# Step 2: Extract MFCCs from each file
def extract_mfcc(filepath, n_mfcc=40, max_pad_len=174):
    try:
        audio, sr = librosa.load(filepath, sr=None)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        # Padding/truncating to a fixed length
        if mfcc.shape[1] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        print(f"Error processing {filepath}: {e}")
        return None

In [7]:
# Prepare data
X = []
y = []

print("Extracting MFCC features...")
for _, row in tqdm(df.iterrows(), total=len(df)):
    mfcc = extract_mfcc(row['filepath'])
    if mfcc is not None:
        X.append(mfcc)
        y.append(row['emotion'])

X = np.array(X)
y = np.array(y)

# Save data
np.save("X_mfcc.npy", X)
np.save("y_labels.npy", y)
print("✅ MFCC features and labels saved as 'X_mfcc.npy' and 'y_labels.npy'")

Extracting MFCC features...


100%|██████████| 2880/2880 [00:58<00:00, 49.26it/s] 


✅ MFCC features and labels saved as 'X_mfcc.npy' and 'y_labels.npy'
