In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("zaber666/meld-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/meld-dataset


In [None]:
import pandas as pd
import numpy as np

# Load datasets
train_path = "/kaggle/input/meld-dataset/MELD-RAW/MELD.Raw/train/train_sent_emo.csv"
test_path = "/kaggle/input/meld-dataset/MELD-RAW/MELD.Raw/test_sent_emo.csv"
val_path = "/kaggle/input/meld-dataset/MELD-RAW/MELD.Raw/dev_sent_emo.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
val_df = pd.read_csv(val_path)

# Extract labels
train_sentiment = train_df["Sentiment"]
val_sentiment = val_df["Sentiment"]
test_sentiment = test_df["Sentiment"]

train_emotion = train_df["Emotion"]
val_emotion = val_df["Emotion"]
test_emotion = test_df["Emotion"]


In [3]:
import pickle
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

# Load Feature Data
with open('/kaggle/input/meld-dataset/MELD-Features-Models/MELD.Features.Models/features/audio_embeddings_feature_selection_emotion.pkl', 'rb') as a:
    pre_audio_features_emotion = pickle.load(a)
print(type(pre_audio_features_emotion))
print(len(pre_audio_features_emotion))

with open('/kaggle/input/meld-dataset/MELD-Features-Models/MELD.Features.Models/features/audio_emotion.pkl', 'rb') as b:
    audio_features_emotion = pickle.load(b)
print(type(audio_features_emotion))
print(len(audio_features_emotion))

with open('/kaggle/input/meld-dataset/MELD-Features-Models/MELD.Features.Models/features/audio_embeddings_feature_selection_sentiment.pkl', 'rb') as c:
    pre_audio_features_sentiment = pickle.load(c)
print(type(pre_audio_features_sentiment))
print(len(pre_audio_features_sentiment))

with open('/kaggle/input/meld-dataset/MELD-Features-Models/MELD.Features.Models/features/audio_sentiment.pkl', 'rb') as d:
    audio_features_sentiment = pickle.load(d)
print(type(audio_features_sentiment))
print(len(audio_features_sentiment))


<class 'list'>
3
<class 'list'>
3
<class 'list'>
3
<class 'list'>
3


In [4]:
# Unpack the list
X1_train, X1_val, X1_test = pre_audio_features_emotion

# Check the keys in each set
#print("Train keys:", X1_train.keys())
#print("Validation keys:", X1_val.keys())
#print("Test keys:", X1_test.keys())


In [5]:
def get_audio_features(df, feature_dict):
    utt_ids = df['Dialogue_ID'].astype(str) + "_" + df['Utterance_ID'].astype(str)
    features = np.array([feature_dict[utt_id] for utt_id in utt_ids if utt_id in feature_dict])
    return features

# Extract aligned features
X1_train_aligned = get_audio_features(train_df, X1_train)
X1_val_aligned = get_audio_features(val_df, X1_val)
X1_test_aligned = get_audio_features(test_df, X1_test)

# Check shapes
print("Train features shape:", X1_train_aligned.shape)
print("Validation features shape:", X1_val_aligned.shape)
print("Test features shape:", X1_test_aligned.shape)

# Optional: sanity check against label dataframes
print("\nTrain labels count:", len(train_df))
print("Validation labels count:", len(val_df))
print("Test labels count:", len(test_df))


Train features shape: (9989, 1611)
Validation features shape: (1109, 1611)
Test features shape: (2610, 1611)

Train labels count: 9989
Validation labels count: 1109
Test labels count: 2610


In [6]:
# Load Labels
y_train = train_df["Emotion"].values
y_val = val_df["Emotion"].values
y_test = test_df["Emotion"].values

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# One-hot encode for model compatibility (if needed)
#from tensorflow.keras.utils import to_categorical
#y_train_cat = to_categorical(y_train_encoded)
#y_val_cat = to_categorical(y_val_encoded)
#y_test_cat = to_categorical(y_test_encoded)


In [7]:
from keras.models import load_model
from keras.layers import GRU, Dense, Dropout, Bidirectional, Input
from keras import Sequential
import tensorflow as tf

tf.config.run_functions_eagerly(True)

# Include all possible layer types used in the model
custom_objects = {
    'GRU': GRU,
    'Dense': Dense,
    'Dropout': Dropout,
    'Bidirectional': Bidirectional,
    'Sequential': Sequential,
    'Input': Input
}

# Load the model
emotion_model = load_model(
    '/kaggle/input/meld-dataset/MELD-Features-Models/MELD.Features.Models/models/audio_weights_emotion.hdf5',
    custom_objects=custom_objects, compile=False
)


In [8]:
# Trim to make it divisible by 33
trim_size = X1_val_aligned.shape[0] - (X1_val_aligned.shape[0] % 33)
X1_val_trimmed = X1_val_aligned[:trim_size]

# Reshape to (num_sequences, 33, 1611)
X1_val_reshaped = X1_val_trimmed.reshape(-1, 33, 1611)

# Test set
trim_size_test = X1_test_aligned.shape[0] - (X1_test_aligned.shape[0] % 33)
X1_test_trimmed = X1_test_aligned[:trim_size_test]
X1_test_reshaped = X1_test_trimmed.reshape(-1, 33, 1611)


In [9]:
# Validation
# Trim features
trim_size_val = X1_val_aligned.shape[0] - (X1_val_aligned.shape[0] % 33)
X1_val_trimmed = X1_val_aligned[:trim_size_val]
X1_val_reshaped = X1_val_trimmed.reshape(-1, 33, 1611)

# Trim labels accordingly
y_val_trimmed = y_val_encoded[:trim_size_val]

# Predict
val_preds = emotion_model.predict(X1_val_reshaped)
print("val_preds.shape:", val_preds.shape)

# Reshape model predictions to (1089, 7)
val_preds_flat = val_preds.reshape(-1, val_preds.shape[-1])  # (1089, 7)
val_preds_labels = np.argmax(val_preds_flat, axis=1)

y_val_trimmed = y_val_encoded[:len(val_preds_labels)]

print("Validation Accuracy:", accuracy_score(y_val_trimmed, val_preds_labels))




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 529ms/step
val_preds.shape: (33, 33, 7)
Validation Accuracy: 0.09733700642791551


In [10]:
# Testing
# Trim features
trim_size_test = X1_test_aligned.shape[0] - (X1_test_aligned.shape[0] % 33)
X1_test_trimmed = X1_test_aligned[:trim_size_test]
X1_test_reshaped = X1_test_trimmed.reshape(-1, 33, 1611)

# Trim labels accordingly
y_test_trimmed = y_test_encoded[:trim_size_test]

# Predict
test_preds = emotion_model.predict(X1_test_reshaped)
print("test_preds.shape:", test_preds.shape)

# Reshape model predictions to (1089, 7)
test_preds_flat = test_preds.reshape(-1, test_preds.shape[-1])  # (1089, 7)
test_preds_labels = np.argmax(test_preds_flat, axis=1)

y_test_trimmed = y_test_encoded[:len(test_preds_labels)]

# Now it's safe
print("Test Accuracy:", accuracy_score(y_test_trimmed, test_preds_labels))
print("\nTest Classification Report:")
print(classification_report(y_test_trimmed, test_preds_labels, target_names=label_encoder.classes_))




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 624ms/step
test_preds.shape: (79, 33, 7)
Test Accuracy: 0.09167625623321826

Test Classification Report:
              precision    recall  f1-score   support

       anger       0.09      0.54      0.16       345
     disgust       0.00      0.00      0.00        68
        fear       0.00      0.00      0.00        50
         joy       0.00      0.00      0.00       402
     neutral       0.19      0.00      0.01      1253
     sadness       0.00      0.00      0.00       208
    surprise       0.10      0.17      0.13       281

    accuracy                           0.09      2607
   macro avg       0.06      0.10      0.04      2607
weighted avg       0.12      0.09      0.04      2607



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
# Unpack the list
X2_train, X2_val, X2_test = pre_audio_features_sentiment

# Check the keys in each set
#print("Train keys:", X2_train.keys())
#print("Validation keys:", X1_val.keys())
#print("Test keys:", X2_test.keys())


In [12]:
# Extract aligned features
X2_train_aligned = get_audio_features(train_df, X2_train)
X2_val_aligned = get_audio_features(val_df, X2_val)
X2_test_aligned = get_audio_features(test_df, X2_test)

# Check shapes
print("Train features shape:", X2_train_aligned.shape)
print("Validation features shape:", X2_val_aligned.shape)
print("Test features shape:", X2_test_aligned.shape)

# Optional: sanity check against label dataframes
print("\nTrain labels count:", len(train_df))
print("Validation labels count:", len(val_df))
print("Test labels count:", len(test_df))


Train features shape: (9989, 1422)
Validation features shape: (1109, 1422)
Test features shape: (2610, 1422)

Train labels count: 9989
Validation labels count: 1109
Test labels count: 2610


In [13]:
# Load Labels
y_train = train_df["Sentiment"].values
y_val = val_df["Sentiment"].values
y_test = test_df["Sentiment"].values

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# One-hot encode for model compatibility (if needed)
#from tensorflow.keras.utils import to_categorical
#y_train_cat = to_categorical(y_train_encoded)
#y_val_cat = to_categorical(y_val_encoded)
#y_test_cat = to_categorical(y_test_encoded)


In [14]:
from keras.models import load_model
from keras.layers import GRU, LSTM, Dense, Dropout, Bidirectional, Input
from keras import Sequential
import tensorflow as tf

tf.config.run_functions_eagerly(True)

# Include all possible layer types used in the model
custom_objects = {
    'GRU': GRU,
    'LSTM': LSTM,
    'Dense': Dense,
    'Dropout': Dropout,
    'Bidirectional': Bidirectional,
    'Sequential': Sequential,
    'Input': Input
}

# Load the model
sentiment_model = load_model(
    '/kaggle/input/meld-dataset/MELD-Features-Models/MELD.Features.Models/models/audio_weights_sentiment.hdf5',
    custom_objects=custom_objects, compile=False
)


In [15]:
# Trim to make it divisible by 33
trim_size = X2_val_aligned.shape[0] - (X2_val_aligned.shape[0] % 33)
X2_val_trimmed = X2_val_aligned[:trim_size]

# Reshape to (num_sequences, 33, 1611)
X2_val_reshaped = X2_val_trimmed.reshape(-1, 33, 1422)

# Test set
trim_size_test = X2_test_aligned.shape[0] - (X2_test_aligned.shape[0] % 33)
X2_test_trimmed = X2_test_aligned[:trim_size_test]
X2_test_reshaped = X2_test_trimmed.reshape(-1, 33, 1422)


In [16]:
# Validation
# Trim features
trim_size_val = X2_val_aligned.shape[0] - (X2_val_aligned.shape[0] % 33)
X2_val_trimmed = X2_val_aligned[:trim_size_val]
X2_val_reshaped = X2_val_trimmed.reshape(-1, 33, 1422)

# Trim labels accordingly
y_val_trimmed = y_val_encoded[:trim_size_val]

# Predict
val_preds = sentiment_model.predict(X2_val_reshaped)
print("val_preds.shape:", val_preds.shape)

# Reshape model predictions to (1089, 7)
val_preds_flat = val_preds.reshape(-1, val_preds.shape[-1])  # (1089, 7)
val_preds_labels = np.argmax(val_preds_flat, axis=1)

y_val_trimmed = y_val_encoded[:len(val_preds_labels)]

print("Validation Accuracy:", accuracy_score(y_val_trimmed, val_preds_labels))




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 827ms/step
val_preds.shape: (33, 33, 3)
Validation Accuracy: 0.26538108356290174


In [17]:
# Testing
# Trim features
trim_size_test = X2_test_aligned.shape[0] - (X2_test_aligned.shape[0] % 33)
X2_test_trimmed = X2_test_aligned[:trim_size_test]
X2_test_reshaped = X2_test_trimmed.reshape(-1, 33, 1422)

# Trim labels accordingly
y_test_trimmed = y_test_encoded[:trim_size_test]

# Predict
test_preds = sentiment_model.predict(X2_test_reshaped)
print("test_preds.shape:", test_preds.shape)

# Reshape model predictions to (1089, 7)
test_preds_flat = test_preds.reshape(-1, test_preds.shape[-1])  # (1089, 7)
test_preds_labels = np.argmax(test_preds_flat, axis=1)

y_test_trimmed = y_test_encoded[:len(test_preds_labels)]

# Now it's safe
print("Test Accuracy:", accuracy_score(y_test_trimmed, test_preds_labels))
print("\nTest Classification Report:")
print(classification_report(y_test_trimmed, test_preds_labels, target_names=label_encoder.classes_))




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 987ms/step
test_preds.shape: (79, 33, 3)
Test Accuracy: 0.25009589566551593

Test Classification Report:
              precision    recall  f1-score   support

    negative       0.26      0.52      0.35       833
     neutral       0.20      0.01      0.01      1253
    positive       0.23      0.41      0.29       521

    accuracy                           0.25      2607
   macro avg       0.23      0.31      0.22      2607
weighted avg       0.22      0.25      0.18      2607

