In [None]:
import os
from google.colab import drive

drive.mount('/content/drive')

transcript_dir = "/content/drive/MyDrive/multimodal_emotion_recognition/transcripts"
transcript_file = "/content/drive/MyDrive/multimodal_emotion_recognition/transcripts/transcripts.json"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
emotion_map = {
    "neutral": 0,
    "calm": 1,
    "happy": 2,
    "sad": 3,
    "angry": 4,
    "fearful": 5,
    "disgust": 6,
    "surprised": 7
}

import json

with open(transcript_file, 'r') as file:
    transcript_data = json.load(file)
transcripts = [item["transcript"] for item in transcript_data]
labels = [item["label"] for item in transcript_data]

In [None]:
!pip install torch==2.0.1 torchtext==0.15.2 --force-reinstall

Collecting torch==2.0.1
  Downloading torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl.metadata (24 kB)
Collecting torchtext==0.15.2
  Downloading torchtext-0.15.2-cp311-cp311-manylinux1_x86_64.whl.metadata (7.4 kB)
Collecting filelock (from torch==2.0.1)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions (from torch==2.0.1)
  Downloading typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 kB)
Collecting sympy (from torch==2.0.1)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch==2.0.1)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch==2.0.1)
  Downloading jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch==2.0.1)
  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==2.0.1)
  Downloading nvidia_cuda_runtime

In [None]:
from sklearn.preprocessing import LabelEncoder
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

tokenizer = get_tokenizer("basic_english")
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Build vocabulary
def yield_tokens(texts):
    for text in texts:
        yield tokenizer(text)

vocab = build_vocab_from_iterator(yield_tokens(transcripts), specials=["<unk>", "<pad>"])
vocab.set_default_index(vocab["<unk>"])


In [None]:
import torch

def text_pipeline(text):
    return vocab(tokenizer(text))

max_len = 50  # Or determine based on dataset

def pad_sequence(seq, max_len):
    if len(seq) >= max_len:
        return seq[:max_len]
    return seq + [vocab["<pad>"]] * (max_len - len(seq))

padded_inputs = [pad_sequence(text_pipeline(text), max_len) for text in transcripts]


In [None]:
from torch.utils.data import Dataset, DataLoader

class TextEmotionDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = torch.tensor(inputs, dtype=torch.long)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.inputs[idx], self.labels[idx]

dataset = TextEmotionDataset(padded_inputs, encoded_labels)

from sklearn.model_selection import train_test_split
train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-package

In [None]:
import torch.nn as nn

class TextLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim, pad_idx):
        super(TextLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        _, (hidden, _) = self.lstm(embedded)
        out = self.fc(hidden[-1])
        return out


In [None]:
model = TextLSTM(len(vocab), 100, 128, len(label_encoder.classes_), vocab["<pad>"])
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-2)


In [None]:
import torch
import torch.nn as nn

def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy


In [None]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total
    return avg_loss, accuracy


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

num_epochs = 10
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")
    print("-" * 50)


Epoch 1/10
Train Loss: 2.0788, Accuracy: 0.1254
Val   Loss: 2.0648, Accuracy: 0.1198
--------------------------------------------------
Epoch 2/10
Train Loss: 2.0904, Accuracy: 0.1241
Val   Loss: 2.0726, Accuracy: 0.1198
--------------------------------------------------
Epoch 3/10
Train Loss: 2.0731, Accuracy: 0.1289
Val   Loss: 2.0601, Accuracy: 0.1441
--------------------------------------------------
Epoch 4/10
Train Loss: 2.0689, Accuracy: 0.1350
Val   Loss: 2.0660, Accuracy: 0.1406
--------------------------------------------------
Epoch 5/10
Train Loss: 2.0663, Accuracy: 0.1372
Val   Loss: 2.0638, Accuracy: 0.1181
--------------------------------------------------
Epoch 6/10
Train Loss: 2.2625, Accuracy: 0.1337
Val   Loss: 2.3195, Accuracy: 0.1441
--------------------------------------------------
Epoch 7/10
Train Loss: 2.2337, Accuracy: 0.1350
Val   Loss: 2.1197, Accuracy: 0.1181
--------------------------------------------------
Epoch 8/10
Train Loss: 2.0894, Accuracy: 0.1328
