In [1]:
import sys, os, glob, re, random, json, math, subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, f1_score, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)

<torch._C.Generator at 0x1e251e71610>

In [9]:
RAW_DIR = "../data/raw"         # put RAVDESS .wav here
INTERIM_DIR = "data/interim" # transcripts, merged CSVs

In [14]:
import glob, os
files = glob.glob(RAW_DIR + "/**/*.wav", recursive=True)
print("Found", len(files), "wav files. Example:", files[:3])


Found 3000 wav files. Example: ['../data/raw\\Actor_01\\03-01-01-01-01-01-01.wav', '../data/raw\\Actor_01\\03-01-01-01-01-02-01.wav', '../data/raw\\Actor_01\\03-01-01-01-02-01-01.wav']


In [19]:
import pandas as pd

# mapping numbers â†’ emotion names
EMO_MAP = {
    '01':'neutral','02':'calm','03':'happy','04':'sad',
    '05':'angry','06':'fearful','07':'disgust','08':'surprised'
}

def parse_ravdess_filename(path: str):
    """
    Decode one RAVDESS filename into its meaning.
    Example filename: 03-01-05-02-02-01-12.wav
    """
    base = os.path.basename(path)
    parts = base.replace(".wav", "").split("-")
    if len(parts) != 7:
        return None
    return {
        "path": path,
        "emotion_id": parts[2],
        "emotion": EMO_MAP.get(parts[2], "unknown"),
        "intensity": parts[3],    # 01=normal, 02=strong
        "statement": parts[4],    # 01 / 02 (two sentences)
        "repetition": parts[5],   # 01 / 02
        "actor": parts[6]
    }

# apply function to every file
rows = [parse_ravdess_filename(p) for p in files]
rows = [r for r in rows if r is not None]  # drop any failed parses

df_meta = pd.DataFrame(rows)
print(df_meta.shape)
df_meta.head()


(3000, 7)


Unnamed: 0,path,emotion_id,emotion,intensity,statement,repetition,actor
0,../data/raw\Actor_01\03-01-01-01-01-01-01.wav,1,neutral,1,1,1,1
1,../data/raw\Actor_01\03-01-01-01-01-02-01.wav,1,neutral,1,1,2,1
2,../data/raw\Actor_01\03-01-01-01-02-01-01.wav,1,neutral,1,2,1,1
3,../data/raw\Actor_01\03-01-01-01-02-02-01.wav,1,neutral,1,2,2,1
4,../data/raw\Actor_01\03-01-02-01-01-01-01.wav,2,calm,1,1,1,1


In [25]:
df_meta.to_csv("../data/interim/metadata.csv", index=False)
print(" Saved to data/interim/metadata.csv")


 Saved to data/interim/metadata.csv


In [29]:
import subprocess

def ok(cmd):
    try:
        r = subprocess.run(cmd, capture_output=True, text=True)
        return r.returncode == 0
    except FileNotFoundError:
        return False

print("ffmpeg?", ok(["ffmpeg", "-version"]))
print("whisper?", ok(["whisper", "--help"]))


ffmpeg? False
whisper? False
