In [None]:
from google.colab import drive
import os
import json

drive.mount('/content/drive')
audio_folder = "/content/drive/MyDrive/multimodal_emotion_recognition/data"

Mounted at /content/drive


In [None]:
import os
import glob
import numpy as np
import librosa
from tqdm import tqdm
from google.colab import drive

# Set paths
mfcc_dir = "/content/drive/MyDrive/multimodal_emotion_recognition/mfccs"  # Output directory for MFCCs
os.makedirs(mfcc_dir, exist_ok=True)

# Emotion label map
emotion_map = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

def parse_emotion(filename):
    parts = filename.split("-")
    return emotion_map.get(parts[2], "unknown")

# Get all .wav files recursively
audio_files = sorted(glob.glob(os.path.join(audio_folder, '**', '*.wav'), recursive=True))
print(f"Found {len(audio_files)} audio files.")


# Generate and save MFCCs
for file in tqdm(audio_files, desc="Generating MFCCs"):
    try:
        y, sr = librosa.load(file, sr=22050)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

        # Normalize length
        max_len = 200
        if mfcc.shape[1] < max_len:
            pad_width = max_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_len]

        # Save MFCC as .npy
        emotion = parse_emotion(os.path.basename(file))
        base = os.path.splitext(os.path.basename(file))[0]
        out_path = os.path.join(mfcc_dir, f"{emotion}_{base}.npy")

        np.save(out_path, mfcc)
    except Exception as e:
        print(f"Error processing {file}: {e}")


Found 2880 audio files.


Generating MFCCs: 100%|██████████| 2880/2880 [37:24<00:00,  1.28it/s]
