# init

In [None]:
# IMPORT THE LIBRARIES
import os
import sys
import warnings

import pandas as pd
import numpy as np

# librosa is a Python library for analyzing audio and music.
# It can be used to extract the data from the audio files.
import librosa
import librosa.display

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Sklearn tools
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# To play the audio files
import IPython.display as ipd
from IPython.display import Audio

# Deep learning libraries
import tensorflow as tf
import keras

# Sequences / text preprocessing (correct imports!)
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Other keras layers
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import sequence
from keras.layers import Dense, Embedding
from keras.layers import LSTM, BatchNormalization, GRU
from keras.layers import Input, Flatten, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import SGD

# Add Attention layers for improved architecture
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, GlobalAveragePooling1D

# For saving/loading features
import joblib
import pickle

# Suppress warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

print("✅ Imports done. TensorFlow version:", tf.__version__)
print("✅ Additional imports: Attention layers, joblib for feature saving")

In [None]:
# 🔇 COMPREHENSIVE WARNING SUPPRESSION
if not sys.warnoptions:
    warnings.simplefilter("ignore")

# Suppress specific warnings that are common during audio processing
warnings.filterwarnings("ignore", category=DeprecationWarning)
#warnings.filterwarnings("ignore", category=UserWarning, module="librosa")
#warnings.filterwarnings("ignore", message=".*n_fft.*too large.*")
warnings.filterwarnings("ignore", message=".*PySoundFile failed.*")
warnings.filterwarnings("ignore", category=FutureWarning)

# Set environment variable to suppress TensorFlow warnings (if needed)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print("✅ Imports done. TensorFlow version:", tf.__version__)
print("✅ Additional imports: Attention layers, joblib for feature saving")

In [None]:
# � Verify Kaggle Input Datasets
print(" Verifying available datasets in Kaggle environment...")

import os

# Check available datasets in Kaggle input directory
kaggle_input = "/kaggle/input"
if os.path.exists(kaggle_input):
    available_datasets = os.listdir(kaggle_input)
    print(f"✅ Available datasets: {available_datasets}")
    
    # Check each expected dataset
    expected_datasets = [
        "ravdess-emotional-speech-audio",
        "cremad", 
        "toronto-emotional-speech-set-tess",
        "surrey-audiovisual-expressed-emotion-savee"
    ]
    
    for dataset in expected_datasets:
        if dataset in available_datasets:
            print(f"✅ {dataset}: Found")
        else:
            print(f"❌ {dataset}: Missing - Please add this dataset to your Kaggle notebook")
            
else:
    print("❌ /kaggle/input directory not found. Make sure you're running on Kaggle.")

print("\n� Dataset paths for processing:")
print("RAVDESS: /kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/")
print("CREMA: /kaggle/input/cremad/AudioWAV/")  
print("TESS: /kaggle/input/toronto-emotional-speech-set-tess/TESS Toronto emotional speech set data/")
print("SAVEE: /kaggle/input/surrey-audiovisual-expressed-emotion-savee/ALL/")

# Importing Data 

                                              Ravdess Dataframe
Here is the filename identifiers as per the official RAVDESS website:

* Modality (01 = full-AV, 02 = video-only, 03 = audio-only).
* Vocal channel (01 = speech, 02 = song).
* Emotion (01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised).
* Emotional intensity (01 = normal, 02 = strong). NOTE: There is no strong intensity for the 'neutral' emotion.
* Statement (01 = "Kids are talking by the door", 02 = "Dogs are sitting by the door").
* Repetition (01 = 1st repetition, 02 = 2nd repetition).
* Actor (01 to 24. Odd numbered actors are male, even numbered actors are female).

So, here's an example of an audio filename. 02-01-06-01-02-01-12.mp4 This means the meta data for the audio file is:

* Video-only (02)
* Speech (01)
* Fearful (06)
* Normal intensity (01)
* Statement "dogs" (02)
* 1st Repetition (01)
* 12th Actor (12) - Female (as the actor ID number is even)

## preprocessing

**Ravdees**

In [None]:
import os
import pandas as pd

# Path using local storage (faster processing)
ravdess = "/kaggle/input/ravdess-emotional-speech-audio/audio_speech_actors_01-24/"
ravdess_directory_list = os.listdir(ravdess)

file_emotion = []
file_path = []

# Loop over actor folders
for actor_dir in ravdess_directory_list:
    actor_path = os.path.join(ravdess, actor_dir)
    for file in os.listdir(actor_path):
        part = file.split('.')[0].split('-')
        emotion_code = int(part[2])
        file_emotion.append(emotion_code)
        file_path.append(os.path.join(actor_path, file))

# Create dataframe
ravdess_df = pd.DataFrame({
    'Emotions': file_emotion,
    'Path': file_path
})

# Map emotion codes to emotion labels
ravdess_df['Emotions'] = ravdess_df['Emotions'].replace({
    1: 'neutral',
    2: 'neutral',
    3: 'happy',
    4: 'sad',
    5: 'angry',
    6: 'fear',
    7: 'disgust',
    8: 'surprise'
})

# Quick check
print(ravdess_df.head())
print("______________________________________________")
print(ravdess_df.tail())
print("_______________________________________________")
print(ravdess_df['Emotions'].value_counts())


**Crema DataFrame**

CREMA-D is a data set of 7,442 original clips from 91 actors. These clips were from 48 male and 43 female actors between the ages of 20 and 74 coming from a variety of races and ethnicities (African America, Asian, Caucasian, Hispanic, and Unspecified). Actors spoke from a selection of 12 sentences. The sentences were presented using one of six different emotions (Anger, Disgust, Fear, Happy, Neutral, and Sad) and four different emotion levels (Low, Medium, High, and Unspecified).

In [None]:
# Path to CREMA dataset using local storage (faster processing)
Crema = "/kaggle/input/cremad/AudioWAV/"

# Get list of all audio files
crema_directory_list = os.listdir(Crema)

file_emotion = []
file_path = []

for file in crema_directory_list:
    # storing file paths
    file_path.append(Crema + file)

    # storing file emotions
    part = file.split('_')
    if part[2] == 'SAD':
        file_emotion.append('sad')
    elif part[2] == 'ANG':
        file_emotion.append('angry')
    elif part[2] == 'DIS':
        file_emotion.append('disgust')
    elif part[2] == 'FEA':
        file_emotion.append('fear')
    elif part[2] == 'HAP':
        file_emotion.append('happy')
    elif part[2] == 'NEU':
        file_emotion.append('neutral')
    else:
        file_emotion.append('Unknown')

# Create dataframe
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
path_df = pd.DataFrame(file_path, columns=['Path'])
Crema_df = pd.concat([emotion_df, path_df], axis=1)

# Show summary
print(Crema_df.head())
print(Crema_df.Emotions.value_counts())

**TESS dataset**

There are a set of 200 target words were spoken in the carrier phrase "Say the word _' by two actresses (aged 26 and 64 years) and recordings were made of the set portraying each of seven emotions (anger, disgust, fear, happiness, pleasant surprise, sadness, and neutral). There are 2800 data points (audio files) in total.

The dataset is organised such that each of the two female actor and their emotions are contain within its own folder. And within that, all 200 target words audio file can be found. The format of the audio file is a WAV format

In [None]:
# Path to dataset using local storage (faster processing)
Tess = "/kaggle/input/toronto-emotional-speech-set-tess/TESS Toronto emotional speech set data/"

tess_directory_list = os.listdir(Tess)

file_emotion = []
file_path = []

for dir in tess_directory_list:
    directories = os.listdir(Tess + dir)
    for file in directories:
        part = file.split('.')[0]
        part = part.split('_')[2]
        if part == 'ps':
            file_emotion.append('surprise')
        else:
            file_emotion.append(part)
        file_path.append(Tess + dir + '/' + file)

# DataFrame for emotions
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

# DataFrame for paths
path_df = pd.DataFrame(file_path, columns=['Path'])

Tess_df = pd.concat([emotion_df, path_df], axis=1)
print(Tess_df.head())
print(Tess_df.Emotions.value_counts())

**SAVEE Dataset**

Context
The SAVEE database was recorded from four native English male speakers (identified as DC, JE, JK, KL), postgraduate students and researchers at the University of Surrey aged from 27 to 31 years. Emotion has been described psychologically in discrete categories: anger, disgust, fear, happiness, sadness and surprise. This is supported by the cross-cultural studies of Ekman [6] and studies of automatic emotion recognition tended to focus on recognizing these [12]. We added neutral to provide recordings of 7 emotion categories. The text material consisted of 15 TIMIT sentences per emotion: 3 common, 2 emotion-specific and 10 generic sentences that were different for each emotion and phonetically-balanced. The 3 common and 2 × 6 = 12 emotion-specific sentences were recorded as neutral to give 30 neutral sentences.

Content
This results in a total of 120 utterances per speaker, for example:

Common: She had your dark suit in greasy wash water all year.
Anger: Who authorized the unlimited expense account?
Disgust: Please take this dirty table cloth to the cleaners for me.
Fear: Call an ambulance for medical assistance.
Happiness: Those musicians harmonize marvelously.
Sadness: The prospect of cutting back spending is an unpleasant one for any governor.
Surprise: The carpet cleaners shampooed our oriental rug.
Neutral: The best way to learn is to solve extra problems.

In [None]:
Savee = "/kaggle/input/surrey-audiovisual-expressed-emotion-savee/ALL/"

savee_directory_list = os.listdir(Savee)

file_emotion = []
file_path = []

for file in savee_directory_list:
    file_path.append(Savee + file)
    part = file.split('_')[1]
    ele = part[:-6]
    if ele=='a':
        file_emotion.append('angry')
    elif ele=='d':
        file_emotion.append('disgust')
    elif ele=='f':
        file_emotion.append('fear')
    elif ele=='h':
        file_emotion.append('happy')
    elif ele=='n':
        file_emotion.append('neutral')
    elif ele=='sa':
        file_emotion.append('sad')
    else:
        file_emotion.append('surprise')

# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
path_df = pd.DataFrame(file_path, columns=['Path'])

Savee_df = pd.concat([emotion_df, path_df], axis=1)
print(Savee_df.head())
print(Savee_df.Emotions.value_counts())

**Integration**

In [None]:
# creating Dataframe using all the 4 dataframes we created so far.
data_path = pd.concat([ravdess_df, Crema_df, Tess_df, Savee_df], axis = 0)
data_path.to_csv("data_path.csv",index=False)
data_path.head()

In [None]:
print(data_path.Emotions.value_counts())


>*                           Data Visualisation and Exploration

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.title('Count of Emotions', size=16)
sns.countplot(data_path.Emotions)
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()

In [None]:
data,sr = librosa.load(file_path[0])
sr

In [None]:
ipd.Audio(data,rate=sr)

In [None]:
# CREATE LOG MEL SPECTROGRAM
plt.figure(figsize=(10, 5))
spectrogram = librosa.feature.melspectrogram(y=data, sr=sr, n_mels=128,fmax=8000) 
log_spectrogram = librosa.power_to_db(spectrogram)
librosa.display.specshow(log_spectrogram, y_axis='mel', sr=sr, x_axis='time');
plt.title('Mel Spectrogram ')
plt.colorbar(format='%+2.0f dB')

In [None]:
mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=30)


# MFCC
plt.figure(figsize=(16, 10))
plt.subplot(3,1,1)
librosa.display.specshow(mfcc, x_axis='time')
plt.ylabel('MFCC')
plt.colorbar()

ipd.Audio(data,rate=sr)

# Data augmentation

In [None]:
# NOISE
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

# STRETCH
def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate)
# SHIFT
def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)
# PITCH
def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

In [None]:
# NORMAL AUDIO


import librosa.display
plt.figure(figsize=(12, 5))
librosa.display.waveshow(y=data, sr=sr)
ipd.Audio(data,rate=sr)

In [None]:
# AUDIO WITH NOISE
x = noise(data)
plt.figure(figsize=(12,5))
librosa.display.waveshow(y=x, sr=sr)
ipd.Audio(x, rate=sr)

In [None]:
# STRETCHED AUDIO
x = stretch(data)
plt.figure(figsize=(12, 5))
librosa.display.waveshow(y=x, sr=sr)
ipd.Audio(x, rate=sr)

In [None]:
# SHIFTED AUDIO
x = shift(data)
plt.figure(figsize=(12,5))
librosa.display.waveshow(y=x, sr=sr)
ipd.Audio(x, rate=sr)

In [None]:
# AUDIO WITH PITCH
x = pitch(data, sr)
plt.figure(figsize=(12, 5))
librosa.display.waveshow(y=x, sr=sr)
ipd.Audio(x, rate=sr)

# Feature extraction

In [None]:
def zcr(data,frame_length,hop_length):
    zcr=librosa.feature.zero_crossing_rate(y=data,frame_length=frame_length,hop_length=hop_length)
    return np.squeeze(zcr)

def rmse(data,frame_length=2048,hop_length=512):
    rmse=librosa.feature.rms(y=data,frame_length=frame_length,hop_length=hop_length)
    return np.squeeze(rmse)

def mfcc(data,sr,frame_length=2048,hop_length=512,flatten:bool=True):
    mfcc=librosa.feature.mfcc(y=data,sr=sr)
    return np.squeeze(mfcc.T)if not flatten else np.ravel(mfcc.T)

def extract_features(data,sr=22050,frame_length=2048,hop_length=512):
    """Extract features with consistent dimensions"""
    result=np.array([])
    
    # Spectral features for better emotion recognition
    spectral_centroid = librosa.feature.spectral_centroid(y=data, sr=sr, hop_length=hop_length)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=data, sr=sr, hop_length=hop_length)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=data, sr=sr, hop_length=hop_length)
    
    # Extract individual features
    zcr_feat = zcr(data,frame_length,hop_length)
    rmse_feat = rmse(data,frame_length,hop_length)
    mfcc_feat = mfcc(data,sr,frame_length,hop_length)
    
    # Debug: Check individual feature shapes
    debug = False  # Set to True for debugging
    if debug:
        print(f"ZCR shape: {zcr_feat.shape}, RMSE shape: {rmse_feat.shape}")
        print(f"MFCC shape: {mfcc_feat.shape}")
        print(f"Spectral shapes: {np.squeeze(spectral_centroid).shape}, {np.squeeze(spectral_bandwidth).shape}, {np.squeeze(spectral_rolloff).shape}")
    
    result=np.hstack((result,
                      zcr_feat,
                      rmse_feat,
                      mfcc_feat,
                      np.squeeze(spectral_centroid),
                      np.squeeze(spectral_bandwidth),
                      np.squeeze(spectral_rolloff)
                     ))
    
    # Ensure result is always 1D
    result = np.ravel(result)
    
    if debug:
        print(f"Final feature shape: {result.shape}")
    
    return result

In [None]:
# Modified function to extract only original features (no augmentation)
def get_original_features(path, duration=2.5, offset=0.6):
    """Extract features from original audio without augmentation"""
    data, sr = librosa.load(path, duration=duration, offset=offset)
    features = extract_features(data)
    return np.array(features)

# Function to extract augmented features (for training data only)
def get_augmented_features(path, duration=2.5, offset=0.6):
    """Extract features with augmentation - use only for training data"""
    data, sr = librosa.load(path, duration=duration, offset=offset)
    
    # Original features
    aud_original = extract_features(data)
    audio = np.array(aud_original)
    
    # Augmented features
    noised_audio = noise(data)
    aud_noise = extract_features(noised_audio)
    audio = np.vstack((audio, aud_noise))
    
    pitched_audio = pitch(data, sr)
    aud_pitch = extract_features(pitched_audio)
    audio = np.vstack((audio, aud_pitch))
    
    pitched_audio1 = pitch(data, sr)
    pitched_noised_audio = noise(pitched_audio1)
    aud_pitch_noise = extract_features(pitched_noised_audio)
    audio = np.vstack((audio, aud_pitch_noise))
    
    return audio

In [None]:
import multiprocessing as mp
print("Number of processors: ", mp.cpu_count())

# Faster way to get features
***Parallel way***


Here's a breakdown of what the code does:

The from joblib import Parallel, delayed statement imports the Parallel and delayed functions from the joblib library.
The start = timeit.default_timer() statement starts a timer to measure the time taken to process the audio files.
The process_feature function processes a single audio file by extracting its features using the get_feat function and appending the corresponding X and Y values to the X and Y lists.
The paths and emotions variables extract the paths and emotions from the data_path DataFrame.
The Parallel function runs the process_feature function in parallel for each audio file using the delayed function to wrap the process_feature function.
The results variable contains the X and Y values for each audio file.
The X and Y lists are populated with the X and Y values from each audio file using the extend method.
The stop = timeit.default_timer() statement stops the timer.
The print('Time: ', stop - start) statement prints the time taken to process the audio files.
Overall, this code demonstrates how to use the joblib library to process multiple audio files in parallel, which can significantly reduce the processing time for large datasets.

In [None]:
from joblib import Parallel, delayed
import timeit

# First, extract only original features (no augmentation) to prevent data leakage
start = timeit.default_timer()

def process_original_feature(path, emotion):
    """Process original features without augmentation with error handling"""
    try:
        features = get_original_features(path)
        # Debug: Check feature shape
        if len(features.shape) != 1:
            print(f"⚠️ Warning: Features shape {features.shape} for {path}")
        return features, emotion, True
    except Exception as e:
        print(f"❌ Error processing {path}: {e}")
        return None, emotion, False

paths = data_path.Path
emotions = data_path.Emotions

print("Extracting original features without augmentation...")
print(f"Total files to process: {len(paths)}")

# Run the loop in parallel for original features only
results = Parallel(n_jobs=-1)(delayed(process_original_feature)(path, emotion) 
                              for (path, emotion) in zip(paths, emotions))

# Collect the results with error handling
X_original = []
Y_original = []
failed_files = []

for result in results:
    features, emotion, success = result
    if success and features is not None:
        X_original.append(features)
        Y_original.append(emotion)
    else:
        failed_files.append((emotion, success))

print(f"✅ Successfully processed: {len(X_original)} files")
print(f"❌ Failed files: {len(failed_files)}")

if failed_files:
    print("Failed files info:", failed_files[:5])  # Show first 5 failures

# Check feature dimensions before conversion
if X_original:
    feature_lengths = [len(f) for f in X_original]
    unique_lengths = set(feature_lengths)
    print(f"Feature dimensions found: {unique_lengths}")
    
    if len(unique_lengths) > 1:
        print("⚠️ INHOMOGENEOUS FEATURES DETECTED!")
        print(f"Min length: {min(feature_lengths)}")
        print(f"Max length: {max(feature_lengths)}")
        
        # Fix: Use the most common length or pad/truncate
        from collections import Counter
        length_counts = Counter(feature_lengths)
        target_length = length_counts.most_common(1)[0][0]
        print(f"Using target length: {target_length}")
        
        # Pad or truncate features to target length
        X_original_fixed = []
        for features in X_original:
            if len(features) < target_length:
                # Pad with zeros
                padded = np.pad(features, (0, target_length - len(features)), 'constant')
                X_original_fixed.append(padded)
            elif len(features) > target_length:
                # Truncate
                X_original_fixed.append(features[:target_length])
            else:
                X_original_fixed.append(features)
        
        X_original = X_original_fixed
        print(f"✅ Fixed inhomogeneous features to length {target_length}")

# Convert to numpy array
X_original = np.array(X_original)
Y_original = np.array(Y_original)

stop = timeit.default_timer()
print(f'Time for original feature extraction: {stop - start:.2f} seconds')
print(f'Original dataset shape: {X_original.shape}')
print(f'Feature dimensions: {X_original.shape[1] if len(X_original.shape) > 1 else "N/A"}')

In [None]:
# 🔍 DEBUG: Test feature extraction on a single file first
print("🔍 Testing feature extraction on a single file...")

# Get the first file path for testing
test_path = data_path.Path.iloc[0]
test_emotion = data_path.Emotions.iloc[0]

print(f"Test file: {test_path}")
print(f"Test emotion: {test_emotion}")

try:
    # Test the feature extraction
    features = get_original_features(test_path)
    print(f"✅ Feature extraction successful!")
    print(f"Feature shape: {features.shape}")
    print(f"Feature length: {len(features)}")
    print(f"Feature type: {type(features)}")
    print(f"Sample features (first 10): {features[:10]}")
    
    # Test on a few more files to check consistency
    print("\n🔍 Testing on 5 files for consistency...")
    for i in range(min(5, len(data_path))):
        path = data_path.Path.iloc[i]
        try:
            feat = get_original_features(path)
            print(f"File {i+1}: Shape {feat.shape}, Length {len(feat)}")
        except Exception as e:
            print(f"File {i+1}: ERROR - {e}")
            
except Exception as e:
    print(f"❌ Error in feature extraction: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# Check the shape of our original dataset
print(f"Original features shape: {X_original.shape}")
print(f"Original labels shape: {Y_original.shape}")
print("Emotion distribution:", np.unique(Y_original, return_counts=True))

# Perform train-test split on ORIGINAL data (no augmentation yet)
from sklearn.model_selection import train_test_split

# Split the original dataset
X_train_orig, X_test_orig, y_train_orig, y_test_orig = train_test_split(
    X_original, Y_original, 
    test_size=0.2, 
    random_state=42, 
    shuffle=True,
    stratify=Y_original
)

print(f"\nAfter train-test split (original data only):")
print(f"Train set: {X_train_orig.shape}")
print(f"Test set: {X_test_orig.shape}")
print(f"Train labels: {y_train_orig.shape}")
print(f"Test labels: {y_test_orig.shape}")

In [None]:
def process_augmented_training_feature(path, emotion):
    """Process augmented features for training data only"""
    features = get_augmented_features(path)
    emotions_list = [emotion] * features.shape[0]  # 4 copies for each augmentation
    return features, emotions_list

In [None]:
# Simplified approach: Apply augmentation only to training data
#-------------TEST
print("Applying data augmentation to training set only...")

# Create train/test indices to map back to original dataset
train_indices = []
test_indices = []

# Since we split X_original and Y_original, we need to map back to data_path indices
# We'll use the train_test_split indices directly
from sklearn.model_selection import train_test_split

# Get the actual indices from the split
indices = np.arange(len(X_original))
train_idx, test_idx = train_test_split(
    indices, 
    test_size=0.2, 
    random_state=42, 
    shuffle=True,
    stratify=Y_original
)

print(f"Training indices: {len(train_idx)}")
print(f"Testing indices: {len(test_idx)}")

# Extract training paths and emotions using indices
train_paths = data_path.Path.iloc[train_idx].values
train_emotions = data_path.Emotions.iloc[train_idx].values
test_paths = data_path.Path.iloc[test_idx].values  
test_emotions = data_path.Emotions.iloc[test_idx].values

print(f"Training paths: {len(train_paths)}")
print(f"Training emotions: {len(train_emotions)}")

# Apply augmentation to training data only
start_aug = timeit.default_timer()
train_aug_results = Parallel(n_jobs=-1)(delayed(process_augmented_training_feature)(path, emotion) 
                                         for (path, emotion) in zip(train_paths, train_emotions))

# Collect augmented training results  
X_train_augmented = []
y_train_augmented = []
for features, emotions_list in train_aug_results:
    for i in range(features.shape[0]):
        X_train_augmented.append(features[i])
        y_train_augmented.append(emotions_list[i])

X_train_augmented = np.array(X_train_augmented)
y_train_augmented = np.array(y_train_augmented)

# Test data - extract original features only (no augmentation)
test_results = Parallel(n_jobs=-1)(delayed(process_original_feature)(path, emotion) 
                                   for (path, emotion) in zip(test_paths, test_emotions))

X_test_final = []
y_test_final = []
for features, emotion in test_results:
    X_test_final.append(features)
    y_test_final.append(emotion)

X_test_final = np.array(X_test_final)
y_test_final = np.array(y_test_final)

stop_aug = timeit.default_timer()
print(f'Time for augmentation: {stop_aug - start_aug:.2f} seconds')

print(f"\nFinal datasets (NO DATA LEAKAGE):")
print(f"Training (with 4x augmentation): {X_train_augmented.shape}")
print(f"Testing (original only): {X_test_final.shape}")
print(f"Training labels: {y_train_augmented.shape}")
print(f"Testing labels: {y_test_final.shape}")

# Set final variables
X_train_final = X_train_augmented
y_train_final = y_train_augmented

# Saving features

In [None]:
# 💾 SAVE EXTRACTED FEATURES FOR LATER USE
# This allows you to do extraction and training in different Kaggle sessions

import joblib
import pickle
import os

# Create directory for saved features
os.makedirs('/kaggle/working/extracted_features', exist_ok=True)

print("💾 Saving extracted features for future sessions...")

# Save the leak-free datasets
feature_data = {
    'X_train_augmented': X_train_augmented,
    'y_train_augmented': y_train_augmented,
    'X_test_final': X_test_final,
    'y_test_final': y_test_final,
    'train_idx': train_idx,
    'test_idx': test_idx,
    'label_encoder': label_encoder,
    'onehot_encoder': onehot_encoder
}

# Save using joblib (efficient for numpy arrays)
joblib.dump(feature_data, '/kaggle/working/extracted_features/leak_free_features.pkl')

# Also save individual components for flexibility
np.save('/kaggle/working/extracted_features/X_train_augmented.npy', X_train_augmented)
np.save('/kaggle/working/extracted_features/y_train_augmented.npy', y_train_augmented)
np.save('/kaggle/working/extracted_features/X_test_final.npy', X_test_final)
np.save('/kaggle/working/extracted_features/y_test_final.npy', y_test_final)
np.save('/kaggle/working/extracted_features/train_indices.npy', train_idx)
np.save('/kaggle/working/extracted_features/test_indices.npy', test_idx)

# Save encoders separately
with open('/kaggle/working/extracted_features/label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)
with open('/kaggle/working/extracted_features/onehot_encoder.pkl', 'wb') as f:
    pickle.dump(onehot_encoder, f)

print("✅ Features saved successfully!")
print(f"📁 Saved files in: /kaggle/working/extracted_features/")
print(f"📊 Training features: {X_train_augmented.shape}")
print(f"📊 Test features: {X_test_final.shape}")
print(f"📊 Training labels: {y_train_augmented.shape}")
print(f"📊 Test labels: {y_test_final.shape}")

# Save metadata for verification
metadata = {
    'extraction_timestamp': pd.Timestamp.now().isoformat(),
    'original_dataset_size': len(data_path),
    'train_size_augmented': len(X_train_augmented),
    'test_size_original': len(X_test_final),
    'feature_dimensions': X_train_augmented.shape[1],
    'augmentation_factor': 4,
    'classes': label_encoder.classes_.tolist(),
    'train_test_split_ratio': 0.2,
    'random_state': 42
}

with open('/kaggle/working/extracted_features/metadata.json', 'w') as f:
    import json
    json.dump(metadata, f, indent=2)

print("✅ Metadata saved for verification!")

## 🔄 LOAD EXTRACTED FEATURES (For New Sessions)
**Use this section if you want to skip feature extraction and load pre-extracted features**

In [None]:
# 📂 LOAD PRE-EXTRACTED FEATURES
# Run this cell if you want to skip feature extraction and load saved features
# Make sure the extracted_features folder exists in your Kaggle input or working directory

import joblib
import pickle
import numpy as np
import json
import os

def load_extracted_features(base_path='/kaggle/input/extracted-features'):
    """
    Load pre-extracted features from a previous session
    
    Args:
        base_path: Path to the extracted features directory
                  For Kaggle datasets: '/kaggle/input/your-dataset-name'
                  For working directory: '/kaggle/working/extracted_features'
    """
    
    if not os.path.exists(base_path):
        print(f"❌ Features directory not found: {base_path}")
        print("💡 Make sure to:")
        print("   1. Save this notebook's output as a Kaggle dataset, OR")
        print("   2. Copy features from /kaggle/working/extracted_features to your input")
        return None
    
    print(f"📂 Loading features from: {base_path}")
    
    try:
        # Method 1: Load complete feature data (recommended)
        if os.path.exists(f"{base_path}/leak_free_features.pkl"):
            print("🔄 Loading complete feature dataset...")
            feature_data = joblib.load(f"{base_path}/leak_free_features.pkl")
            
            X_train_augmented = feature_data['X_train_augmented']
            y_train_augmented = feature_data['y_train_augmented']
            X_test_final = feature_data['X_test_final']
            y_test_final = feature_data['y_test_final']
            train_idx = feature_data['train_idx']
            test_idx = feature_data['test_idx']
            label_encoder = feature_data['label_encoder']
            onehot_encoder = feature_data['onehot_encoder']
            
        else:
            # Method 2: Load individual components
            print("🔄 Loading individual feature files...")
            X_train_augmented = np.load(f"{base_path}/X_train_augmented.npy")
            y_train_augmented = np.load(f"{base_path}/y_train_augmented.npy")
            X_test_final = np.load(f"{base_path}/X_test_final.npy")
            y_test_final = np.load(f"{base_path}/y_test_final.npy")
            train_idx = np.load(f"{base_path}/train_indices.npy")
            test_idx = np.load(f"{base_path}/test_indices.npy")
            
            with open(f"{base_path}/label_encoder.pkl", 'rb') as f:
                label_encoder = pickle.load(f)
            with open(f"{base_path}/onehot_encoder.pkl", 'rb') as f:
                onehot_encoder = pickle.load(f)
        
        # Load metadata for verification
        if os.path.exists(f"{base_path}/metadata.json"):
            with open(f"{base_path}/metadata.json", 'r') as f:
                metadata = json.load(f)
            print("📋 Metadata:")
            for key, value in metadata.items():
                print(f"   {key}: {value}")
        
        print("✅ Features loaded successfully!")
        print(f"📊 Training features: {X_train_augmented.shape}")
        print(f"📊 Test features: {X_test_final.shape}")
        print(f"📊 Training labels: {y_train_augmented.shape}")
        print(f"📊 Test labels: {y_test_final.shape}")
        print(f"📊 Classes: {label_encoder.classes_}")
        
        # Set global variables for use in training
        globals()['X_train_final'] = X_train_augmented
        globals()['y_train_final'] = y_train_augmented
        globals()['X_test_final'] = X_test_final
        globals()['y_test_final'] = y_test_final
        globals()['label_encoder'] = label_encoder
        globals()['onehot_encoder'] = onehot_encoder
        
        return {
            'X_train': X_train_augmented,
            'y_train': y_train_augmented,
            'X_test': X_test_final,
            'y_test': y_test_final,
            'label_encoder': label_encoder,
            'onehot_encoder': onehot_encoder
        }
        
    except Exception as e:
        print(f"❌ Error loading features: {e}")
        return None

# Uncomment and run this line to load features:
# loaded_data = load_extracted_features('/kaggle/input/your-dataset-name')

print("🔧 Function defined. To use:")
print("   loaded_data = load_extracted_features('/kaggle/input/your-dataset-name')")
print("   OR")
print("   loaded_data = load_extracted_features('/kaggle/working/extracted_features')")

In [None]:
# 🚀 QUICK START FOR NEW SESSIONS
# Uncomment the appropriate line below to load your features:

# Option 1: Load from working directory (same session)
# loaded_data = load_extracted_features('/kaggle/working/extracted_features')

# Option 2: Load from Kaggle dataset (new session)
# loaded_data = load_extracted_features('/kaggle/input/your-dataset-name')

# After loading, you can jump directly to the "Data preparation" section below
# and skip all the feature extraction cells above

print("💡 Instructions for new sessions:")
print("1. Save this notebook's output as a Kaggle dataset")
print("2. In new session, add that dataset as input")
print("3. Uncomment one of the load_extracted_features() lines above")
print("4. Skip to 'Data preparation' section")
print("5. Continue with model training!")

# Data preparation

In [None]:
# Data preparation using leak-free datasets
print("Preparing data for modeling...")

# Use our leak-free datasets
X = X_train_final  # Training features (with augmentation)
Y = y_train_final  # Training labels (with augmentation)

X_test = X_test_final  # Test features (original only)  
Y_test = y_test_final  # Test labels (original only)

print(f"Training data: {X.shape}")
print(f"Test data: {X_test.shape}")
print(f"Training labels: {Y.shape}")
print(f"Test labels: {Y_test.shape}")

In [None]:
# One-hot encode the labels (fit on training, transform both)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# First encode to integers
label_encoder = LabelEncoder()
y_train_int = label_encoder.fit_transform(Y)
y_test_int = label_encoder.transform(Y_test)

# Then one-hot encode
onehot_encoder = OneHotEncoder(sparse=False)
Y_train_onehot = onehot_encoder.fit_transform(y_train_int.reshape(-1, 1))
Y_test_onehot = onehot_encoder.transform(y_test_int.reshape(-1, 1))

print(f"One-hot encoded training labels shape: {Y_train_onehot.shape}")
print(f"One-hot encoded test labels shape: {Y_test_onehot.shape}")
print(f"Number of classes: {Y_train_onehot.shape[1]}")
print(f"Classes: {label_encoder.classes_}")

In [None]:
# Final data preparation - NO additional train_test_split needed!
# We already have leak-free train/test split

# Assign final variables for modeling
x_train = X  # Training features (with augmentation)
x_test = X_test  # Test features (original only)
y_train = Y_train_onehot  # One-hot encoded training labels
y_test = Y_test_onehot  # One-hot encoded test labels

print(f"Final training set: {x_train.shape}")
print(f"Final test set: {x_test.shape}")
print(f"Final training labels: {y_train.shape}")  
print(f"Final test labels: {y_test.shape}")

# Verify no data leakage
print(f"\nData leakage check:")
print(f"Training samples: {len(x_train)}")
print(f"Test samples: {len(x_test)}")
print(f"Total original samples: {len(data_path)}")
print(f"Expected training samples (with 4x augmentation): {len(data_path) * 0.8 * 4}")
print(f"Expected test samples (no augmentation): {len(data_path) * 0.2}")

In [None]:
print("✅ Using leak-free train/test split from above (no additional split needed)")
print(f"Training data: {x_train.shape}")
print(f"Test data: {x_test.shape}")
print(f"Training labels: {y_train.shape}")
print(f"Test labels: {y_test.shape}")

In [None]:
# Scaling - fit on training data only, transform both
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)  # Fit and transform training data
x_test_scaled = scaler.transform(x_test)  # Only transform test data

print(f"Scaled training data shape: {x_train_scaled.shape}")
print(f"Scaled test data shape: {x_test_scaled.shape}")

# Update variables
x_train = x_train_scaled
x_test = x_test_scaled

In [None]:
# Callbacks with correct metric names
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

model_checkpoint = ModelCheckpoint(
    'best_model_no_leakage.h5', 
    monitor='val_accuracy', 
    save_best_only=True,
    verbose=1
)

early_stop = EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

lr_reduction = ReduceLROnPlateau(
    monitor='val_accuracy',
    patience=3,
    verbose=1,
    factor=0.5,
    min_lr=0.00001
)

> Applying early stopping for all models


In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping,ReduceLROnPlateau
model_checkpoint = ModelCheckpoint('best_model1_weights.h5', monitor='val_accuracy', save_best_only=True)

In [None]:
# Fix callback monitor parameter - use 'val_accuracy' not 'val_acc' for Keras 2.x
early_stop=EarlyStopping(monitor='val_accuracy',mode='auto',patience=5,restore_best_weights=True)
lr_reduction=ReduceLROnPlateau(monitor='val_accuracy',patience=3,verbose=1,factor=0.5,min_lr=0.00001)

# CNN model

In [None]:
# Reshape for CNN (1D convolution)
x_train_cnn = np.expand_dims(x_train, axis=2)
x_test_cnn = np.expand_dims(x_test, axis=2)

print(f"CNN training data shape: {x_train_cnn.shape}")
print(f"CNN test data shape: {x_test_cnn.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Test labels shape: {y_test.shape}")

In [None]:
import tensorflow.keras.layers as L

# CNN Model with proper input shape
model = tf.keras.Sequential([
    # Input layer with correct shape
    L.Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu',
             input_shape=(x_train_cnn.shape[1], 1)),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=5, strides=2, padding='same'),
    
    L.Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=5, strides=2, padding='same'),
    L.Dropout(0.2),
    
    L.Conv1D(128, kernel_size=3, strides=1, padding='same', activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=3, strides=2, padding='same'),
    
    L.Conv1D(128, kernel_size=3, strides=1, padding='same', activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=3, strides=2, padding='same'),
    L.Dropout(0.2),
    
    L.Conv1D(64, kernel_size=3, strides=1, padding='same', activation='relu'),
    L.BatchNormalization(),
    L.MaxPool1D(pool_size=3, strides=2, padding='same'),
    L.Dropout(0.3),
    
    L.Flatten(),
    L.Dense(256, activation='relu'),
    L.BatchNormalization(),
    L.Dropout(0.4),
    L.Dense(128, activation='relu'),
    L.Dropout(0.3),
    L.Dense(y_train.shape[1], activation='softmax')  # Dynamic output size
])

# Compile model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:
# Train the model with leak-free data
print("Training CNN model with NO DATA LEAKAGE...")
print(f"Training on {x_train_cnn.shape[0]} samples")
print(f"Validating on {x_test_cnn.shape[0]} samples")

history = model.fit(
    x_train_cnn, y_train,
    epochs=50,
    validation_data=(x_test_cnn, y_test),
    batch_size=64,
    callbacks=[early_stop, lr_reduction, model_checkpoint],
    verbose=1
)

In [None]:
# Evaluate model on leak-free test data
test_accuracy = model.evaluate(x_test_cnn, y_test, verbose=0)[1]
print(f"Test Accuracy (NO DATA LEAKAGE): {test_accuracy*100:.2f}%")

# Plot training history
epochs_range = range(len(history.history['accuracy']))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy plot
ax1.plot(epochs_range, history.history['accuracy'], label='Training Accuracy')
ax1.plot(epochs_range, history.history['val_accuracy'], label='Validation Accuracy')
ax1.set_title('Model Accuracy (No Data Leakage)')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Accuracy')
ax1.legend()

# Loss plot
ax2.plot(epochs_range, history.history['loss'], label='Training Loss')
ax2.plot(epochs_range, history.history['val_loss'], label='Validation Loss')
ax2.set_title('Model Loss (No Data Leakage)')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Loss')
ax2.legend()

plt.tight_layout()
plt.show()

print(f"\nFinal Results:")
print(f"Best Validation Accuracy: {max(history.history['val_accuracy'])*100:.2f}%")
print(f"This is a more realistic accuracy without data leakage!")

In [None]:
# Generate predictions on test data
pred_test = model.predict(x_test_cnn)
y_pred = onehot_encoder.inverse_transform(pred_test)
y_test_labels = onehot_encoder.inverse_transform(y_test)

# Convert back to original labels
y_pred_original = label_encoder.inverse_transform(y_pred.flatten())
y_test_original = label_encoder.inverse_transform(y_test_labels.flatten())

# Create results dataframe
results_df = pd.DataFrame({
    'Predicted Labels': y_pred_original,
    'Actual Labels': y_test_original
})

print("Sample predictions (NO DATA LEAKAGE):")
print(results_df.head(10))

print(f"\nPrediction accuracy verification:")
print(f"Correct predictions: {sum(results_df['Predicted Labels'] == results_df['Actual Labels'])}")
print(f"Total predictions: {len(results_df)}")
print(f"Accuracy: {sum(results_df['Predicted Labels'] == results_df['Actual Labels'])/len(results_df)*100:.2f}%")

In [None]:
df0

Some plots of multi_model
______________________________________________


# Evalutation

Results of best model

In [None]:
from sklearn.metrics import confusion_matrix,classification_report
cm = confusion_matrix(y_test0, y_pred0)
plt.figure(figsize = (12, 10))
cm = pd.DataFrame(cm , index = [i for i in encoder.categories_] , columns = [i for i in encoder.categories_])
#cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm, linecolor='white', cmap='Blues', linewidth=1, annot=True, fmt='.2f')
plt.title('Confusion Matrix', size=20)
plt.xlabel('Predicted Labels', size=14)
plt.ylabel('Actual Labels', size=14)
plt.show()
print(classification_report(y_test0, y_pred0))

# Saving Best Model

In [None]:
# IMPROVED MODEL SAVING - Consistent naming and complete saving
import os
import pickle
from tensorflow.keras.models import Sequential, model_from_json

print("Saving model with consistent naming...")

# 1. Save the complete model (architecture + weights)
model.save('complete_emotion_model.h5')
print("✅ Complete model saved as: complete_emotion_model.h5")

# 2. Save model architecture as JSON
model_json = model.to_json()
with open("CNN_model.json", "w") as json_file:
    json_file.write(model_json)
print("✅ Model architecture saved as: CNN_model.json")

# 3. Save weights with consistent naming (match the ModelCheckpoint callback)
model.save_weights("best_model_no_leakage.h5")
print("✅ Model weights saved as: best_model_no_leakage.h5")

# 4. Save preprocessing objects for deployment
with open('scaler_final.pickle', 'wb') as f:
    pickle.dump(scaler, f)
print("✅ Scaler saved as: scaler_final.pickle")
    
with open('label_encoder_final.pickle', 'wb') as f:
    pickle.dump(label_encoder, f)
print("✅ Label encoder saved as: label_encoder_final.pickle")
    
with open('onehot_encoder_final.pickle', 'wb') as f:
    pickle.dump(onehot_encoder, f)
print("✅ OneHot encoder saved as: onehot_encoder_final.pickle")

print("\n🎉 MODEL SAVED SUCCESSFULLY!")
print("Files created for deployment:")
print("- complete_emotion_model.h5 (full model)")
print("- CNN_model.json (architecture)")
print("- best_model_no_leakage.h5 (weights)")
print("- scaler_final.pickle")
print("- label_encoder_final.pickle") 
print("- onehot_encoder_final.pickle")

In [None]:
# IMPROVED MODEL LOADING - Consistent naming
from tensorflow.keras.models import Sequential, model_from_json

print("Loading model with consistent naming...")

# Load model architecture
json_file = open('/kaggle/working/CNN_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# Load weights with consistent naming (match the saved filename)
loaded_model.load_weights("/kaggle/working/best_model_no_leakage.h5")
print("✅ Loaded model from disk with consistent naming")

# Alternative: Load complete model directly
# from tensorflow.keras.models import load_model
# loaded_model = load_model('/kaggle/working/complete_emotion_model.h5')
# print("✅ Loaded complete model from disk")

In [None]:
# Evaluate loaded model with consistent variable names
loaded_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
score = loaded_model.evaluate(x_test_cnn, y_test)  # Use consistent variable names
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

# Saving and Loading our Stnadrad Scaler and encoder
* To save the StandardScaler object to use it later in a Flask API

pickle file


In [None]:
import pickle

# IMPROVED SAVING - Consistent with final naming convention
print("Saving preprocessors with consistent naming...")

# Save scaler with final naming
with open('scaler_final.pickle', 'wb') as f:
    pickle.dump(scaler, f)
print("✅ Scaler saved as: scaler_final.pickle")

# Save label encoder with final naming  
with open('label_encoder_final.pickle', 'wb') as f:
    pickle.dump(label_encoder, f)
print("✅ Label encoder saved as: label_encoder_final.pickle")

# Save onehot encoder with final naming
with open('onehot_encoder_final.pickle', 'wb') as f:
    pickle.dump(onehot_encoder, f)
print("✅ OneHot encoder saved as: onehot_encoder_final.pickle")

# Load them back to verify
with open('scaler_final.pickle', 'rb') as f:
    scaler_loaded = pickle.load(f)

with open('label_encoder_final.pickle', 'rb') as f:
    label_encoder_loaded = pickle.load(f)
    
with open('onehot_encoder_final.pickle', 'rb') as f:
    onehot_encoder_loaded = pickle.load(f)

print("✅ All preprocessors saved and verified successfully!")

# Test script
* That can predict new record 

In [None]:
# IMPROVED TEST SCRIPT - Load model with consistent naming
from tensorflow.keras.models import Sequential, model_from_json

print("Loading model for prediction with consistent naming...")

# Load model architecture
json_file = open('/kaggle/working/CNN_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# Load weights with consistent naming
loaded_model.load_weights("/kaggle/working/best_model_no_leakage.h5")
print("✅ Loaded model from disk for predictions")

# Compile the model for predictions
loaded_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
import pickle

# IMPROVED LOADING - Load preprocessors with consistent naming
print("Loading preprocessors with consistent naming...")

with open('/kaggle/working/scaler_final.pickle', 'rb') as f:
    scaler_loaded = pickle.load(f)
print("✅ Scaler loaded")

with open('/kaggle/working/label_encoder_final.pickle', 'rb') as f:
    label_encoder_loaded = pickle.load(f)
print("✅ Label encoder loaded")
    
with open('/kaggle/working/onehot_encoder_final.pickle', 'rb') as f:
    onehot_encoder_loaded = pickle.load(f)
print("✅ OneHot encoder loaded")

print("✅ All preprocessors loaded successfully for predictions!")

# Use consistent variable names for predictions
scaler2 = scaler_loaded
encoder2 = onehot_encoder_loaded

In [None]:
import librosa

In [None]:
# IMPROVED FEATURE EXTRACTION - Match training features exactly
def zcr(data,frame_length,hop_length):
    zcr=librosa.feature.zero_crossing_rate(y=data,frame_length=frame_length,hop_length=hop_length)
    return np.squeeze(zcr)

def rmse(data,frame_length=2048,hop_length=512):
    rmse=librosa.feature.rms(y=data,frame_length=frame_length,hop_length=hop_length)
    return np.squeeze(rmse)

def mfcc(data,sr,frame_length=2048,hop_length=512,flatten:bool=True):
    mfcc=librosa.feature.mfcc(y=data,sr=sr)
    return np.squeeze(mfcc.T)if not flatten else np.ravel(mfcc.T)

def extract_features(data,sr=22050,frame_length=2048,hop_length=512):
    """Extract features matching exactly what was used during training"""
    result=np.array([])
    
    # Add spectral features to match training (IMPORTANT!)
    spectral_centroid = librosa.feature.spectral_centroid(y=data, sr=sr, hop_length=hop_length)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=data, sr=sr, hop_length=hop_length)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=data, sr=sr, hop_length=hop_length)
    
    result=np.hstack((result,
                      zcr(data,frame_length,hop_length),
                      rmse(data,frame_length,hop_length),
                      mfcc(data,sr,frame_length,hop_length),
                      np.squeeze(spectral_centroid),  # Added for consistency
                      np.squeeze(spectral_bandwidth),  # Added for consistency  
                      np.squeeze(spectral_rolloff)     # Added for consistency
                     ))
    return result

In [None]:
# IMPROVED PREDICTION FUNCTION - Fixed feature dimensions and consistent naming
def get_predict_feat(path):
    """Extract features for prediction with correct dimensions"""
    d, s_rate = librosa.load(path, duration=2.5, offset=0.6)
    res = extract_features(d)  # Now includes spectral features
    result = np.array(res)
    
    # Check the actual feature dimensions from training
    print(f"Extracted features shape: {result.shape}")
    
    # Reshape to match training data format
    result = np.reshape(result, newshape=(1, -1))  # Dynamic reshaping
    print(f"Reshaped features: {result.shape}")
    
    # Scale using the same scaler from training
    i_result = scaler2.transform(result)
    
    # Expand dims for CNN input
    final_result = np.expand_dims(i_result, axis=2)
    print(f"Final features for CNN: {final_result.shape}")
    
    return final_result

In [None]:
res=get_predict_feat("/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-07-01-01-01-01.wav")
print(res.shape)

In [None]:
# IMPROVED PREDICTION FUNCTION - Proper emotion mapping
def prediction(path1):
    """Predict emotion from audio file path"""
    try:
        # Extract features
        res = get_predict_feat(path1)
        
        # Make prediction
        predictions = loaded_model.predict(res)
        print(f"Raw predictions shape: {predictions.shape}")
        print(f"Raw predictions: {predictions}")
        
        # Convert one-hot back to label
        y_pred = encoder2.inverse_transform(predictions)
        predicted_emotion = y_pred[0][0]
        
        # Get confidence scores
        confidence = np.max(predictions) * 100
        
        print(f"🎯 Predicted Emotion: {predicted_emotion}")
        print(f"🎯 Confidence: {confidence:.2f}%")
        
        return predicted_emotion, confidence
        
    except Exception as e:
        print(f"❌ Error in prediction: {str(e)}")
        return None, 0

In [None]:
prediction("/kaggle/input/ravdess-emotional-speech-audio/Actor_02/03-01-01-01-01-01-02.wav")

In [None]:
prediction("/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-01-01-01-01-01.wav")

In [None]:
prediction("/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-05-01-02-02-01.wav")

In [None]:
prediction("/kaggle/input/ravdess-emotional-speech-audio/Actor_21/03-01-04-02-02-02-21.wav")

In [None]:
prediction("/kaggle/input/ravdess-emotional-speech-audio/Actor_02/03-01-06-01-02-02-02.wav")

In [None]:
prediction("/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-08-01-01-01-01.wav")

In [None]:
prediction("/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-07-01-01-01-01.wav")