In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.utils import to_categorical
from joblib import dump
import warnings

warnings.filterwarnings('ignore')

# Define paths and labels
data_path = "dataset_files//audio_speech_actors_01-24"  # Update this path
emotion_labels = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}

# Function to augment audio
def augment_audio(y, sr):
    noise = np.random.randn(len(y)) * 0.005
    y_stretch = librosa.effects.time_stretch(y, rate=np.random.uniform(0.8, 1.2))
    y_pitch = librosa.effects.pitch_shift(y, sr=sr, n_steps=np.random.randint(-3, 4))
    return [y + noise, y_stretch, y_pitch]

# Extract sequential features
def extract_features_sequential(file_path, max_len=180):
    try:
        y, sr = librosa.load(file_path, duration=3, offset=0.5)
        samples = [y] + augment_audio(y, sr)
        features = []
        
        for sample in samples:
            mfcc = librosa.feature.mfcc(y=sample, sr=sr, n_mfcc=40)
            chroma = librosa.feature.chroma_stft(y=sample, sr=sr)
            mel = librosa.feature.melspectrogram(y=sample, sr=sr)
            
            # Ensure each feature array is of length max_len
            mfcc = librosa.util.fix_length(mfcc, max_len, axis=1)
            chroma = librosa.util.fix_length(chroma, max_len, axis=1)
            mel = librosa.util.fix_length(mel, max_len, axis=1)
            
            # Stack the features into a single time-series
            feature_vector = np.vstack([mfcc, chroma, mel]).T
            features.append(feature_vector)
        
        return np.array(features)
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Process each file in the dataset
def process_files_sequential():
    files = []
    labels = []
    for actor in os.listdir(data_path):
        actor_folder = os.path.join(data_path, actor)
        if os.path.isdir(actor_folder):
            for file_name in os.listdir(actor_folder):
                if file_name.endswith(".wav"):
                    emotion_code = file_name.split("-")[2]
                    if emotion_code in emotion_labels:
                        emotion = emotion_labels[emotion_code]
                        file_path = os.path.join(actor_folder, file_name)
                        features = extract_features_sequential(file_path)
                        if features is not None:
                            files.extend(features)  # Collect all samples, including augmented
                            labels.extend([emotion] * features.shape[0])
    return np.array(files), labels

# Main function to prepare and save data
def prepare_sequential_data():
    features, labels = process_files_sequential()

    # Check if any features were extracted
    if not len(features):
        print("No features extracted. Check data paths and feature extraction.")
        return

    # Label encoding and one-hot encoding for labels
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)
    labels_one_hot = to_categorical(labels_encoded)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(features, labels_one_hot, test_size=0.2, random_state=42)

    # Scaling features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1)).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(X_test.shape[0], -1)).reshape(X_test.shape)

    # Compute class weights for handling imbalanced classes
    class_weights = compute_class_weight('balanced', classes=np.unique(labels_encoded), y=labels_encoded)
    class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

    # Save the preprocessed data and metadata
    dump((X_train, X_test, y_train, y_test, class_weights_dict), 'sequential_preprocessed_data.joblib')
    dump(label_encoder, 'label_encoder.joblib')
    dump(scaler, 'scaler.joblib')
    print("Sequential data preprocessing and saving complete.")

# Run the data preparation
prepare_sequential_data()

2024-11-14 17:49:34.513977: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-14 17:49:47.240894: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731606593.115650   16690 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731606594.430573   16690 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-14 17:50:02.558701: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr