In [6]:
from pathlib import Path
import librosa
import numpy as np
import pandas as pd
import parselmouth
import os
from parselmouth.praat import call
import noisereduce as nr

# Path to your RAVDESS dataset folder (adjust as needed)
data_folder = Path(r"C:/Users/jbkee/OneDrive/Desktop/Jupyter Projects/SAVEE/AudioData")

def get_savee_label(filename):
    code = filename[0].lower()
    label_map = {
        "a": "angry",
        "d": "disgust",
        "f": "fearful",
        "h": "happy",
        "n": "neutral",
        "sa": "sad",        # NOTE: check if SAVEE uses 'sa' or 's'
        "su": "surprised"
    }
    # Handle 'sa', 'su' by checking 2 letters if needed
    if filename[:2].lower() in label_map:
        return label_map[filename[:2].lower()]
    return label_map.get(code, "unknown")

def safe_formant_values(formant_obj, formant_index, duration):
    times = np.arange(0, duration, 0.01)
    values = [formant_obj.get_value_at_time(formant_index, t) for t in times]
    return [v for v in values if v is not None and not np.isnan(v) and v > 0]

def load_and_denoise(audio_path, sr=16000):
    y, _ = librosa.load(audio_path, sr=sr)
    y_trimmed, _ = librosa.effects.trim(y, top_db=20)

    # Use the first 0.25s as noise profile
    noise_sample = y_trimmed[:int(0.1 * sr)]
    
    # Apply noise reduction
    y_denoised = nr.reduce_noise(y=y_trimmed, y_noise=noise_sample, sr=sr)
    
    # Normalize
    y_denoised = y_denoised / np.max(np.abs(y_denoised)) if np.max(np.abs(y_denoised)) > 0 else y_denoised
    return y_denoised, sr

from scipy.signal import butter, lfilter

def bandpass_filter(y, sr, lowcut=50.0, highcut=4000.0, order=5):
    nyquist = 0.5 * sr
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return lfilter(b, a, y)

def normalize_rms(y, target_dBFS=-20.0):
    rms = np.sqrt(np.mean(y**2))
    scalar = 10 ** (target_dBFS / 20) / (rms + 1e-6)
    return y * scalar

def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=16000)
    y, _ = librosa.effects.trim(y, top_db=20)

    # y = bandpass_filter(y, sr)

    y = normalize_rms(y)
    
    y = y / np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else y

    # MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_std = np.std(mfcc, axis=1)
    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta2 = librosa.feature.delta(mfcc, order=2)

    # Mean of each delta coefficient
    mfcc_delta_mean = [np.mean(mfcc_delta[i]) for i in range(13)]
    mfcc_delta2_mean = [np.mean(mfcc_delta2[i]) for i in range(13)]

    # Pitch
    pitch = librosa.yin(y, fmin=50, fmax=500, sr=sr)
    pitch_mean = np.mean(pitch)
    pitch_std = np.std(pitch)
    pitch_range = np.max(pitch) - np.min(pitch)

    # RMS Energy
    rms = librosa.feature.rms(y=y)
    rms_mean = np.mean(rms)
    rms_std = np.std(rms)
    rms_range = np.max(rms) - np.min(rms)

    # Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y)
    zcr_mean = np.mean(zcr)
    zcr_std = np.std(zcr)

    # Spectral Centroid
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    centroid_mean = np.mean(centroid)
    centroid_std = np.std(centroid)

    # Spectral Bandwidth
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    bandwidth_mean = np.mean(bandwidth)
    bandwidth_std = np.std(bandwidth)

    # Spectral Rolloff
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    rolloff_mean = np.mean(rolloff)
    rolloff_std = np.std(rolloff)

    # Spectral Contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast_mean = np.mean(contrast, axis=1)
    contrast_std = np.std(contrast, axis=1)

    # Spectral Flatness
    flatness = librosa.feature.spectral_flatness(y=y)
    flatness_mean = np.mean(flatness)
    flatness_std = np.std(flatness)

    # Parselmouth Features
    try:
        snd = parselmouth.Sound(str(audio_path))
        formant = snd.to_formant_burg()

        f1 = safe_formant_values(formant, 1, snd.duration)
        f2 = safe_formant_values(formant, 2, snd.duration)
        f3 = safe_formant_values(formant, 3, snd.duration)

        
        f1_mean = np.mean(f1) if f1 else 0
        f2_mean = np.mean(f2) if f2 else 0
        f3_mean = np.mean(f3) if f3 else 0

        try:
            pitch = snd.to_pitch()
            point_process = call(snd, "To PointProcess (periodic, cc)", 75, 500)
            jitter = call(point_process, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
            shimmer = call([snd, point_process], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
            harmonicity = snd.to_harmonicity_cc()
            hnr = call(harmonicity, "Get mean", 0, 0)
        except Exception as e:
            print(f"Error processing file {audio_path}: {e}")
        
    except Exception as e:
        f1_mean = f2_mean = f3_mean = 0
        jitter = shimmer = hnr = 0
        print(f"Error processing file {audio_path}: {e}")

    return {
        **{f"mfcc{i+1}_mean": mfcc[i].mean() for i in range(13)},
        **{f"mfcc_delta{i+1}_mean": mfcc_delta_mean[i] for i in range(13)},
        **{f"mfcc_delta2_{i+1}_mean": mfcc_delta2_mean[i] for i in range(13)},

        "pitch_mean": pitch_mean,
        "pitch_std": pitch_std,
        "pitch_range": pitch_range,

        "rms_mean": rms_mean,
        "rms_std": rms_std,
        "rms_range": rms_range,

        "zcr_mean": zcr_mean,
        "zcr_std": zcr_std,

        "centroid_mean": centroid_mean,
        "centroid_std": centroid_std,
        "bandwidth_mean": bandwidth_mean,
        "bandwidth_std": bandwidth_std,
        "rolloff_mean": rolloff_mean,
        "rolloff_std": rolloff_std,
        "flatness_mean": flatness_mean,
        "flatness_std": flatness_std,
        **{f"contrast{i+1}_mean": contrast_mean[i] for i in range(contrast.shape[0])},
        **{f"contrast{i+1}_std": contrast_std[i] for i in range(contrast.shape[0])},

        "f1_mean": f1_mean,
        "f2_mean": f2_mean,
        "f3_mean": f3_mean,
        "jitter": jitter,
        "shimmer": shimmer,
        "hnr": hnr,
    }

# Gather all features
feature_rows = []

for file in data_folder.glob("**/*.wav"):
    try:
        label = get_savee_label(file.name)

        feats = extract_features(file)
        feats["filename"] = file.name
        feats["label"] = label
        feature_rows.append(feats)

        print(f"Processed {file.name}")
    except Exception as e:
        print(f"Failed on {file.name}: {e}")

# Convert to DataFrame and save
df = pd.DataFrame(feature_rows)
df.to_csv("features_savee_05.csv", index=False)
print("✅ Features saved to features_ravdess.csv")

Processed a01.wav
Processed a02.wav
Processed a03.wav
Processed a04.wav
Processed a05.wav
Processed a06.wav
Processed a07.wav
Processed a08.wav
Processed a09.wav
Processed a10.wav
Processed a11.wav
Processed a12.wav
Processed a13.wav
Processed a14.wav
Processed a15.wav
Processed d01.wav
Processed d02.wav
Processed d03.wav
Processed d04.wav
Processed d05.wav
Processed d06.wav
Processed d07.wav
Processed d08.wav
Processed d09.wav
Processed d10.wav
Processed d11.wav
Processed d12.wav
Processed d13.wav
Processed d14.wav
Processed d15.wav
Processed f01.wav
Processed f02.wav
Processed f03.wav
Processed f04.wav
Processed f05.wav
Processed f06.wav
Processed f07.wav
Processed f08.wav
Processed f09.wav
Processed f10.wav
Processed f11.wav
Processed f12.wav
Processed f13.wav
Processed f14.wav
Processed f15.wav
Processed h01.wav
Processed h02.wav
Processed h03.wav
Processed h04.wav
Processed h05.wav
Processed h06.wav
Processed h07.wav
Processed h08.wav
Processed h09.wav
Processed h10.wav
Processed 