In [None]:
import pandas as pd
import seaborn as sns
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
from sklearn.model_selection import train_test_split
import torchaudio
from torch.utils.data import Dataset, DataLoader
from transformers import Wav2Vec2Model, Wav2Vec2Processor, Trainer, TrainingArguments, Wav2Vec2ForSequenceClassification
import warnings
from collections import Counter
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import accuracy_score, f1_score
import torch.nn as nn
warnings.filterwarnings('ignore')



csv_path = '/kaggle/input/meld-data/MELD_complete_dataset.csv'
df = pd.read_csv(csv_path, encoding='utf-8', on_bad_lines='skip')

data_dict = df.to_dict(orient='records')




for i, row in enumerate(data_dict[:3]): 
    print(f"\n Record {i+1}:")
    for key, value in row.items():
        print(f"{key:<15}: {value}")



df = df.drop(['Speaker','Sentiment', 'Dialogue_ID', 'Utterance_ID', 'Season',
              'StartTime', 'EndTime','Episode','audio_path'], axis=1, errors='ignore')

data_dict = df.to_dict(orient='records')


row = data_dict[0]
for key, value in row.items():
    print(f"{key:<15}: {value}")


print(df['Emotion'].value_counts())


texts = df['Utterance'].astype(str).tolist()
labels = df['Emotion'].astype(str).tolist()

tokenized = [t.lower().split() for t in texts]
word_counts = Counter(w for sent in tokenized for w in sent)
vocab = {w: i+2 for i, (w, _) in enumerate(word_counts.items())}
vocab['<PAD>'] = 0
vocab['<UNK>'] = 1


glove_path = "/kaggle/input/embeddings/glove.6B.100d.txt" 
embedding_dim = 100  

embeddings_index = {}
with open(glove_path, 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

# --- Create embedding matrix ---
embedding_matrix = np.zeros((len(vocab), embedding_dim))
for word, i in vocab.items():
    vector = embeddings_index.get(word)
    if vector is not None:
        embedding_matrix[i] = vector



encoded_texts = [[vocab.get(w, 1) for w in sent] for sent in tokenized]

label_to_idx = {l: i for i, l in enumerate(sorted(set(labels)))}
idx_to_label = {i: l for l, i in label_to_idx.items()}
encoded_labels = [label_to_idx[l] for l in labels]

X_train, X_test, y_train, y_test = train_test_split(encoded_texts, encoded_labels, test_size=0.2, random_state=42)


from torch.nn.utils.rnn import pad_sequence
import torch

X_train_tensors = [torch.tensor(x, dtype=torch.long) for x in X_train]
X_test_tensors = [torch.tensor(x, dtype=torch.long) for x in X_test]

X_train_padded = pad_sequence(X_train_tensors, batch_first=True, padding_value=0)
X_test_padded = pad_sequence(X_test_tensors, batch_first=True, padding_value=0)

y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

print("Train shape:", X_train_padded.shape)
print("Test shape:", X_test_padded.shape)


class CNN_LSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.conv = nn.Conv1d(embed_dim, 128, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(128, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)
        
    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.relu(self.conv(x))
        x = x.permute(0, 2, 1)
        _, (h, _) = self.lstm(x)
        h = torch.cat((h[-2], h[-1]), dim=1)
        return self.fc(h)

vocab_size = len(vocab)
num_classes = len(label_to_idx)
model = CNN_LSTM(vocab_size, embed_dim=100, hidden_dim=128, num_classes=num_classes)



model.embedding.weight.data.copy_(torch.tensor(embedding_matrix))
model.embedding.weight.requires_grad = True  # allow fine-tuning


criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
from sklearn.metrics import accuracy_score, f1_score
import torch

epochs = 5
batch_size = 32
val_test_avg=0
val_train_avg=0
f1_avg=0
for epoch in range(epochs):
    model.train()
    total_loss = 0

    
    for i in range(0, len(X_train_padded), batch_size):
        X_batch = X_train_padded[i:i+batch_size]
        y_batch = y_train_tensor[i:i+batch_size]
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()


    model.eval()
    with torch.no_grad():
        preds = model(X_test_padded).argmax(dim=1).cpu().numpy()
        y_true = y_test_tensor.cpu().numpy()

        preds_train = model(X_train_padded).argmax(dim=1).cpu().numpy()
        y_true_train = y_train_tensor.cpu().numpy()
    
    acc = accuracy_score(y_true, preds)
    acc2 = accuracy_score(y_true_train, preds_train)
    f1 = f1_score(y_true, preds, average='weighted')
    val_test_avg+=acc
    val_train_avg+=acc2
    f1_avg+=f1
    
    print(f"Epoch [{epoch+1}/{epochs}]  |  Loss: {total_loss:.4f}  |  Val Acc: {acc*100:.2f}% | Val Acc2: {acc2*100:.2f}%|  F1: {f1:.4f}")

print(f"Validation test accuracy: {(val_test_avg/epochs)*100:.2f}% | Validation train Accuracy: {(val_train_avg/epochs)*100:.2f}%|  F1: {(f1_avg/epochs):.4f}")

In [None]:
def predict_emotion(sentence):
    model.eval()
    with torch.no_grad():
        tokens = sentence.lower().split()
        encoded = [vocab.get(w, 1) for w in tokens]

        # Convert to tensor (1, seq_len)
        x_tensor = torch.tensor(encoded, dtype=torch.long).unsqueeze(0)

        # Directly feed to model (no extra pad_sequence)
        output = model(x_tensor)
        pred = torch.argmax(output, dim=1).item()

        emotion = idx_to_label[pred]
        return emotion

# Example usage
test_sentence = "I am really Happy today"
predicted_emotion = predict_emotion(test_sentence)
print(f"Sentence: {test_sentence}")
print(f"Predicted Emotion: {predicted_emotion}")
