In [1]:
import pandas as pd
import os
import torch
import torch.nn as nn
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from models.EmoTimeBert import EmotionalTimeBert
from tqdm import tqdm
from data_handling.datasets.EmpatheticDialogues import EmpatheticDialoguesDataset
from utils.utils import train_model, validate_model, test_model, collate_conversations
os.environ["TOKENIZERS_PARALLELISM"] = "false"
TRAINING = False

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
empath_data = pd.read_csv("empatheticdialogues/train.csv", on_bad_lines="skip")
val_empath_data = pd.read_csv("empatheticdialogues/valid.csv", on_bad_lines="skip")
grouped = empath_data.groupby("conv_id")
val_grouped = val_empath_data.groupby("conv_id")
# speaker_counts = (
#     empath_data
#     .groupby("conv_id")["speaker_idx"]
#     .nunique()
# )
# speaker_counts.value_counts()

In [3]:
emotion_labels = empath_data["context"].unique().tolist()
emotion_to_id = {emotion: idx for idx, emotion in enumerate(emotion_labels)}

conversations = []

for conv_id, df_conv in grouped:
    texts = df_conv["utterance"].tolist()
    # texts = (df_conv["prompt"] + "[SEP]" + df_conv["utterance"]).tolist()
    labels = [emotion_to_id[x] for x in df_conv["context"]]
    timestamps = df_conv["utterance_idx"].tolist()
    speakers = (
        df_conv["speaker_idx"]
        .rank(method="dense")
        .astype(int)
        .sub(1)
        .tolist()
    )
    conversations.append({
        "texts": texts,
        "labels": labels,
        "timestamps": timestamps,
        "speakers": speakers
    })

val_conversations = []

for conv_id, df_conv in val_grouped:
    texts = df_conv["utterance"].tolist()
    # texts = (df_conv["prompt"] + "[SEP]" + df_conv["utterance"]).tolist()
    labels = [emotion_to_id[x] for x in df_conv["context"]]
    timestamps = df_conv["utterance_idx"].tolist()
    speakers = (
        df_conv["speaker_idx"]
        .rank(method="dense")
        .astype(int)
        .sub(1)
        .tolist()
    )
    val_conversations.append({
        "texts": texts,
        "labels": labels,
        "timestamps": timestamps,
        "speakers": speakers
    })


In [7]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
dataset = EmpatheticDialoguesDataset(conversations, tokenizer)
val_dataset = EmpatheticDialoguesDataset(val_conversations, tokenizer)
loader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    num_workers=8,
    # pin_memory=True,
    # persistent_workers=True,
    collate_fn=lambda x: collate_conversations(x, tokenizer),
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False, # future me, keep it false, helps reproduce results
    num_workers=8,
    #pin_memory=True,
    # persistent_workers=True,
    collate_fn=lambda x: collate_conversations(x, tokenizer),
)


In [10]:
device = "cpu"

if torch.cuda.is_available():
    device = "cuda"
    print("Device:", torch.cuda.get_device_name(0))
    print("CUDA Enabled!")

model = EmotionalTimeBert("./medbert_4_epochs", num_labels=len(emotion_labels)).to(device)

if not TRAINING:
    model.load_state_dict(torch.load("emotional_time_bert_5_to_show.pt"))

criterion = nn.CrossEntropyLoss(ignore_index=-1)

optimizer = torch.optim.AdamW([
    {"params": model.encoder.encoder.layer[-2:].parameters(), "lr": 1e-5},
    {"params": model.temporal_transformer.parameters(), "lr": 3e-4},
    {"params": model.time_embed.parameters(), "lr": 3e-4},
    {"params": model.speakers_embed.parameters(), "lr": 3e-4},
    {"params": model.head_emotions.parameters(), "lr": 3e-4},
])

Some weights of BertModel were not initialized from the model checkpoint at ./medbert_4_epochs and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Device: NVIDIA GeForce RTX 5080
CUDA Enabled!


In [11]:
if TRAINING:
    num_of_epochs = 5
    for epoch in range(num_of_epochs):
        progress_bar = tqdm(loader, total=len(loader))
        avg_loss = train_model(model, optimizer, device, criterion, progress_bar)
        validate_progress = tqdm(val_loader, total=len(val_loader))
        val_loss, val_f1 = validate_model(model, device, criterion, validate_progress)
        print(f"Epoch {epoch+1}: train loss = {avg_loss:.4f}")
        print(f"Epoch {epoch+1}: val loss = {val_loss:.4f}")
        print(f"Epoch {epoch+1}: val F1 = {val_f1:.4f}")

    torch.save(model.state_dict(), f"emotional_time_bert_{num_of_epochs}.pt")


In [12]:
test_empath_data = pd.read_csv("empatheticdialogues/test.csv", on_bad_lines="skip")
test_grouped = test_empath_data.groupby("conv_id")
test_conversations = []

for conv_id, df_conv in test_grouped:
    texts = df_conv["utterance"].tolist()
    # texts = (df_conv["prompt"] + "[SEP]" + df_conv["utterance"]).tolist()
    labels = [emotion_to_id[x] for x in df_conv["context"]]
    timestamps = df_conv["utterance_idx"].tolist()
    speakers = (
        df_conv["speaker_idx"]
        .rank(method="dense")
        .astype(int)
        .sub(1)
        .tolist()
    )
    test_conversations.append({
        "texts": texts,
        "labels": labels,
        "timestamps": timestamps,
        "speakers": speakers
    })

In [14]:
test_dataset = EmpatheticDialoguesDataset(test_conversations, tokenizer)
test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=8,
    collate_fn=lambda x: collate_conversations(x, tokenizer)
)

In [15]:
test_f1 = test_model(
    model=model,
    dataloader=test_loader,
    device=device,
    emotion_labels=emotion_labels
)

  output = torch._nested_tensor_from_mask(
Testing: 100%|██████████| 80/80 [00:06<00:00, 13.12it/s]

Test Macro F1: 0.4205

Per-emotion results:
              precision    recall  f1-score   support

 sentimental      0.403     0.415     0.409       205
      afraid      0.309     0.262     0.284       164
       proud      0.630     0.308     0.413       221
    faithful      0.733     0.282     0.407       117
   terrified      0.326     0.548     0.409       155
      joyful      0.311     0.171     0.221       187
       angry      0.252     0.149     0.188       181
         sad      0.356     0.477     0.408       195
     jealous      0.564     0.579     0.571       183
    grateful      0.583     0.380     0.460       221
    prepared      0.450     0.595     0.512       173
 embarrassed      0.584     0.642     0.612       179
     excited      0.389     0.624     0.479       202
     annoyed      0.385     0.616     0.474       198
      lonely      0.545     0.772     0.639       171
     ashamed      0.315     0.161     0.213       143
      guilty      0.385     0.638    


