In [1]:
import os
from pathlib import Path
import zipfile

# Upload kaggle.json manually or mount from Google Drive
kaggle_token = '/content/kaggle.json'
!mkdir -p ~/.kaggle
!cp {kaggle_token} ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [2]:
!kaggle datasets download -d sangayb/iemocap
with zipfile.ZipFile("iemocap.zip", 'r') as zip_ref:
    zip_ref.extractall("iemocap_data")

Dataset URL: https://www.kaggle.com/datasets/sangayb/iemocap
License(s): CC0-1.0


In [8]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
import librosa

# === Updated Feature Extraction: [T, D] MFCC + Delta
def extract_features(y, sr=16000):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    stacked = np.vstack([mfcc, delta, delta2])  # [39, T]
    return stacked.T  # [T, 39]

# === Audio Loader
def load_audio(file_path, sr=16000):
    y, _ = librosa.load(file_path, sr=sr)
    y = librosa.util.normalize(y)
    return y

# === Label Dictionary
def build_label_dict(eval_dir):
    label_dict = {}
    for root, _, files in os.walk(eval_dir):
        for file in files:
            if file.endswith(".txt"):
                with open(os.path.join(root, file)) as f:
                    for line in f:
                        parts = line.strip().split()
                        if len(parts) >= 5 and parts[4] != 'xxx':
                            utt_id = parts[3]
                            label = parts[4]
                            label_dict[utt_id] = label
    return label_dict

# === Dataset Loader with Padding
def load_dataset(audio_root, label_dict, max_len=300):
    X, y, speakers = [], [], []
    for subdir, _, files in os.walk(audio_root):
        for file in files:
            if file.endswith(".wav") and not file.startswith("._"):
                path = os.path.join(subdir, file)
                utt_id = file.split(".")[0]
                label = label_dict.get(utt_id)
                if label:
                    try:
                        y_audio = load_audio(path)
                        features = extract_features(y_audio)  # [T, D]

                        # Pad or truncate to max_len
                        if features.shape[0] < max_len:
                            pad = np.zeros((max_len - features.shape[0], features.shape[1]))
                            features = np.vstack([features, pad])
                        else:
                            features = features[:max_len]

                        speaker_id = utt_id.split("_")[0]
                        X.append(features)
                        y.append(label)
                        speakers.append(speaker_id)
                    except Exception as e:
                        print(f"Error processing {file}: {e}")
    return X, y, speakers

In [9]:
# Setup
dataset_root = "iemocap_data"
sessions = ["Session1", "Session2", "Session3", "Session4", "Session5"]
FEATURE_DIR = "features_seq"
os.makedirs(FEATURE_DIR, exist_ok=True)

# Build Label Dictionary
label_dict = {}
for session in sessions:
    eval_path = os.path.join(dataset_root, session, "dialog", "EmoEvaluation")
    session_labels = build_label_dict(eval_path)
    label_dict.update(session_labels)

print(f"Total labeled utterances: {len(label_dict)}")

# Extract and Process Features
X, y, speakers = [], [], []
for session in sessions:
    audio_path = os.path.join(dataset_root, session, "sentences", "wav")
    Xi, yi, si = load_dataset(audio_path, label_dict, max_len=300)
    X.extend(Xi)
    y.extend(yi)
    speakers.extend(si)

X = np.array(X)         # shape: [N, T, D]
y = np.array(y)
speakers = np.array(speakers)

# Save .npy Files
np.save(os.path.join(FEATURE_DIR, "X_seq.npy"), X)
np.save(os.path.join(FEATURE_DIR, "y_seq.npy"), y)
np.save(os.path.join(FEATURE_DIR, "speakers_seq.npy"), speakers)

print(f"Saved: X_seq.npy shape = {X.shape}, y_seq.npy, speakers_seq.npy")

Total labeled utterances: 7548
Saved: X_seq.npy shape = (7532, 300, 39), y_seq.npy, speakers_seq.npy


In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
!cp -r /content/features_seq /content/drive/MyDrive/SU_Project/