In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install pandas soundfile speechbrain




In [None]:
import zipfile
from pathlib import Path

ravdess_zip = Path("/content/drive/MyDrive/emotional_ai/raw/ravdess.zip")
extract_path = Path("/content/drive/MyDrive/emotional_ai/data/audio/ravdess/wav")

extract_path.mkdir(parents=True, exist_ok=True)

with zipfile.ZipFile(ravdess_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Extracted RAVDESS to {extract_path}")


Extracted RAVDESS to /content/drive/MyDrive/emotional_ai/data/audio/ravdess/wav


In [None]:
import os
from pathlib import Path
import re
import json
import pandas as pd
import soundfile as sf
from sklearn.model_selection import train_test_split


# Paths
BASE = Path("/content/drive/MyDrive/emotional_ai")
DATA_DIR = BASE / "data" / "audio" / "ravdess"
AUDIO_ROOT = DATA_DIR / "wav"            # point to your extracted wavs
META_DIR = DATA_DIR / "meta"             # where CSVs will go
META_DIR.mkdir(parents=True, exist_ok=True)


# RAVDESS parsing helpers
emotion_map = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised",
}

def parse_ravdess(fname: str):
    # expects "MM-VC-EM-IE-ST-RI-AC.wav"
    stem = Path(fname).stem
    parts = stem.split("-")
    if len(parts) != 7:
        return None
    mm, vc, em, ie, st, ri, ac = parts
    emotion = emotion_map.get(em, "unknown")
    modality = {"01":"audiovisual","02":"video","03":"audio"}.get(mm, "unknown")
    vocal = "speech" if vc == "01" else "song"
    return dict(emotion=emotion, modality=modality, vocal=vocal)


# Collect AUDIO + SPEECH files
rows = []
for p in AUDIO_ROOT.rglob("*.wav"):
    meta = parse_ravdess(p.name)
    if not meta:
        continue
    if meta["modality"] == "audio" and meta["vocal"] == "speech" and meta["emotion"] != "unknown":
        rows.append({"path": str(p), "emotion": meta["emotion"]})

df = pd.DataFrame(rows)
if df.empty:
    raise RuntimeError(f"No RAVDESS files found under {AUDIO_ROOT}. Check the path/extraction.")

# Duration (needed by SpeechBrain)
def get_duration_sec(wav_path: str) -> float:
    info = sf.info(wav_path)
    return float(info.frames) / float(info.samplerate)

df["duration"] = df["path"].apply(get_duration_sec)

# Stable ID for each row (SpeechBrain requires unique IDs)
def make_id(path: str) -> str:
    # e.g., Actor_01/03-01-05-01-02-01-01.wav → 03_01_05_01_02_01_01
    return re.sub(r"[^a-zA-Z0-9]+", "_", Path(path).stem)

df["ID"] = df["path"].apply(make_id)

# Keep only SpeechBrain columns: ID,wav,duration,emotion
sb_df = df[["ID", "path", "duration", "emotion"]].rename(columns={"path": "wav"})

# Stratified split by emotion-
train_df, temp_df = train_test_split(
    sb_df, test_size=0.2, random_state=42, stratify=sb_df["emotion"]
)
valid_df, test_df = train_test_split(
    temp_df, test_size=0.5, random_state=42, stratify=temp_df["emotion"]
)


# Save SpeechBrain CSVs
train_csv = META_DIR / "train.csv"
valid_csv = META_DIR / "valid.csv"
test_csv  = META_DIR / "test.csv"

train_df.to_csv(train_csv, index=False)
valid_df.to_csv(valid_csv, index=False)
test_df.to_csv(test_csv, index=False)

print(f"Saved:\n- {train_csv}\n- {valid_csv}\n- {test_csv}")


# Save class list for consistency
labels = sorted(sb_df["emotion"].unique())
with open(META_DIR / "labels.json", "w") as f:
    json.dump(labels, f, indent=2)
print("Saved labels.json with classes:", labels)


# Quick sanity checks
def show_counts(name, d):
    print(f"\n{name} size: {len(d)}")
    print(d["emotion"].value_counts().sort_index())

show_counts("Train", train_df)
show_counts("Valid", valid_df)
show_counts("Test", test_df)

Saved:
- /content/drive/MyDrive/emotional_ai/data/audio/ravdess/meta/train.csv
- /content/drive/MyDrive/emotional_ai/data/audio/ravdess/meta/valid.csv
- /content/drive/MyDrive/emotional_ai/data/audio/ravdess/meta/test.csv
Saved labels.json with classes: ['angry', 'calm', 'disgust', 'fearful', 'happy', 'neutral', 'sad', 'surprised']

Train size: 2304
emotion
angry        308
calm         307
disgust      307
fearful      307
happy        307
neutral      154
sad          307
surprised    307
Name: count, dtype: int64

Valid size: 288
emotion
angry        38
calm         39
disgust      39
fearful      38
happy        38
neutral      19
sad          38
surprised    39
Name: count, dtype: int64

Test size: 288
emotion
angry        38
calm         38
disgust      38
fearful      39
happy        39
neutral      19
sad          39
surprised    38
Name: count, dtype: int64
