In [2]:
import os
import librosa
import numpy as np
import pandas as pd
from joblib import Parallel, delayed  # For parallel processing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight  # For class weighting
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, Activation, Bidirectional, LSTM, Attention, Concatenate, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Step 1: Define the Data Path and Emotion Labels
data_path = "C:\\Users\\HP\\Downloads\\RAVDESS_Dataset"  # Path to the extracted folder

emotion_labels = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}
def augment_audio(y, sr):
    """Applies various audio augmentations to the given audio sample."""
    noise = np.random.randn(len(y))
    y_noise = y + 0.005 * noise
    y_stretch = librosa.effects.time_stretch(y, rate=np.random.uniform(0.8, 1.2))  # Variable time stretch
    y_pitch = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=np.random.randint(-3, 4))  # Random pitch shift
    y_vol = y * np.random.uniform(0.8, 1.2)  # Random volume increase/decrease
    shift = np.roll(y, np.random.randint(1000, 5000))  # Random time shift
    return [y_noise, y_stretch, y_pitch, y_vol, shift]
def extract_features_with_augmentation(file_path):
    """Extracts features from original and augmented audio files."""
    try:
        y, sr = librosa.load(file_path, duration=3, offset=0.5)  # Load the audio file
        augmented_samples = augment_audio(y, sr)
        features = []
        
        for sample in [y] + augmented_samples:
            # MFCC features
            mfcc = librosa.feature.mfcc(y=sample, sr=sr, n_mfcc=40)
            # Chroma features
            chroma = librosa.feature.chroma_stft(y=sample, sr=sr)
            # Mel spectrogram features
            mel = librosa.feature.melspectrogram(y=sample, sr=sr)
            # Add more feature extraction methods here if needed
            zcr = librosa.feature.zero_crossing_rate(sample)
            spec_contrast = librosa.feature.spectral_contrast(y=sample, sr=sr)
            tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(sample), sr=sr)
            feature_vector = np.hstack([np.mean(mfcc, axis=1), np.mean(chroma, axis=1), np.mean(mel, axis=1),
                                        np.mean(zcr, axis=1), np.mean(spec_contrast, axis=1), np.mean(tonnetz, axis=1)])

            # Concatenate all features
            feature_vector = np.hstack([np.mean(mfcc, axis=1), np.mean(chroma, axis=1), np.mean(mel, axis=1)])
            features.append(feature_vector)

        # Aggregate features for original and augmented samples
        return np.vstack(features)
    except Exception as e:
        print(f"Error extracting features from {file_path}. Error: {e}")
        return None
# Step 4: Parallel Feature Extraction
def process_file(file_path, emotion):
    augmented_feature_vectors = extract_features_with_augmentation(file_path)
    if augmented_feature_vectors is not None:
        return [(feature_vector, emotion) for feature_vector in augmented_feature_vectors]
    return []

files_to_process = []
for actor_folder in os.listdir(data_path):
    actor_path = os.path.join(data_path, actor_folder)
    if os.path.isdir(actor_path):
        for file_name in os.listdir(actor_path):
            if len(file_name.split("-")) > 2 and file_name.endswith(".wav"):
                emotion_code = file_name.split("-")[2]
                if emotion_code in emotion_labels:
                    emotion = emotion_labels[emotion_code]
                    file_path = os.path.join(actor_path, file_name)
                    files_to_process.append((file_path, emotion))

results = Parallel(n_jobs=-1)(delayed(process_file)(file_path, emotion) for file_path, emotion in files_to_process)
# Flatten results and prepare data
features, labels = [], []
for result in results:
    if result:
        for feature_vector, emotion in result:
            features.append(feature_vector)
            labels.append(emotion)
features_df = pd.DataFrame(features)
features_df['label'] = labels
label_encoder = LabelEncoder()
features_df['label'] = label_encoder.fit_transform(features_df['label'])

X = features_df.iloc[:, :-1].values
y = features_df.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1)).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(X_test.shape[0], -1)).reshape(X_test.shape)

# Step 7: Split Data into Training and Testing Sets
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Step 8: Convert Labels to One-Hot Encoding
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test) 
# Compute class weights to handle class imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}
print("Preprocessed data saved successfully.")

Preprocessed data saved successfully.


In [3]:
# Save the preprocessed data
dump((X_train, X_test, y_train_one_hot, y_test_one_hot, class_weights_dict), 'preprocessed_data2.joblib')

print("Preprocessed data saved successfully.")

Preprocessed data saved successfully.
