In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import RobertaModel, RobertaTokenizer
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch.optim as optim
import json

In [11]:
import sys
print(sys.executable)


from transformers import logging
logging.set_verbosity_error()

/home/payalsaha/python311/bin/python3.11


In [8]:

class ConversationDataset(Dataset):
    """
    Dataset for conversation-level emotion causal detection.

    Each sample is a full conversation and includes:
      - conv_id: Conversation ID.
      - utterances: Tensor of token IDs for all utterances (shape: [N, seq_len]).
      - attention_masks: Tensor of attention masks (shape: [N, seq_len]).
      - Ac: An (N, N) lower-triangular adjacency matrix (with self-loops).
      - labels: A dictionary mapping candidate pairs (i, j) with j < i to binary labels.
                The label is 1 if the candidate utterance (turn number) is in the target’s
                "expanded emotion cause evidence"; otherwise 0.
    """
    def __init__(self, json_file, tokenizer, max_length=64):
        with open(json_file, "r", encoding="utf-8") as f:
            self.data = json.load(f)
        # Get conversation IDs as a list for indexing.
        self.conv_ids = list(self.data.keys())
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.conv_ids)

    def __getitem__(self, idx):
        conv_id = self.conv_ids[idx]
        conv_instances = self.data[conv_id]
        # Assume each conversation ID has at least one conversation instance;
        # here, we take the first instance.
        conv = conv_instances[0]
        # Sort turns by "turn" field.
        conv = sorted(conv, key=lambda x: x["turn"])

        utterances_list = []
        attention_masks_list = []
        for turn in conv:
            encoded = self.tokenizer(
                turn["utterance"],
                return_tensors="pt",
                max_length=self.max_length,
                padding="max_length",
                truncation=True
            )
            utterances_list.append(encoded["input_ids"].squeeze(0))
            attention_masks_list.append(encoded["attention_mask"].squeeze(0))
        utterances = torch.stack(utterances_list, dim=0)          # (N, seq_len)
        attention_masks = torch.stack(attention_masks_list, dim=0)  # (N, seq_len)

        N = len(conv)
        # Build Ac: Lower triangular matrix with self-loops.
        Ac = torch.zeros((N, N), dtype=torch.long)
        for i in range(N):
            for j in range(i):
                Ac[i, j] = 1
            Ac[i, i] = 1

        # Build labels dictionary.
        labels = {}
        for i in range(N):
            target = conv[i]
            # Proceed only if "expanded emotion cause evidence" exists and is a list.
            if "expanded emotion cause evidence" in target and isinstance(target["expanded emotion cause evidence"], list):
                # Collect evidence turns that are integers.
                evidence = [e for e in target["expanded emotion cause evidence"] if isinstance(e, int)]
                # For every candidate (j) preceding target (i)
                for j in range(i):
                    candidate = conv[j]
                    candidate_turn = candidate["turn"]
                    label = 1 if candidate_turn in evidence else 0
                    labels[(i, j)] = torch.tensor(label, dtype=torch.long)
        sample = {
            "conv_id": conv_id,
            "utterances": utterances,           # (N, seq_len)
            "attention_masks": attention_masks, # (N, seq_len)
            "Ac": Ac,                           # (N, N)
            "labels": labels                    # dict {(i, j): tensor(0 or 1)}
        }
        return sample

In [3]:
# --------------------------
# Utterance Encoder
# --------------------------
class UtteranceEncoder(nn.Module):
    def __init__(self, roberta_model_name='roberta-base', output_dim=300):
        super(UtteranceEncoder, self).__init__()
        self.roberta = RobertaModel.from_pretrained(roberta_model_name)
        self.linear = nn.Linear(self.roberta.config.hidden_size, output_dim)

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state
        pooled, _ = torch.max(hidden_states, dim=1)
        utterance_rep = self.linear(pooled)
        return utterance_rep

# --------------------------
# DAGLayer (Graph-Based Aggregator)
# --------------------------
class DAGLayer(nn.Module):
    def __init__(self, d):
        super(DAGLayer, self).__init__()
        self.d = d
        self.attn_linear = nn.Linear(2 * d, 1)
        self.gru = nn.GRUCell(d, d)

    def forward(self, h, Ac):
        N = h.size(0)
        device = h.device
        updated_h = h.clone()
        for i in range(N):
            neighbor_msgs = []
            for j in range(i):
                if Ac[i, j] == 1:
                    concat_vec = torch.cat([h[i], h[j]], dim=-1)
                    attn_score = self.attn_linear(concat_vec)
                    neighbor_msgs.append((j, attn_score))
            if len(neighbor_msgs) > 0:
                if len(neighbor_msgs) == 1:
                    scores = neighbor_msgs[0][1].unsqueeze(0)
                else:
                    scores = torch.stack([score for (_, score) in neighbor_msgs]).squeeze(-1)
                attn_weights = F.softmax(scores, dim=0)
                agg = torch.zeros(self.d, device=device)
                for idx, (j, _) in enumerate(neighbor_msgs):
                    agg += attn_weights[idx] * h[j]
                updated_h[i] = self.gru(agg, h[i])
            else:
                updated_h[i] = h[i]
        return updated_h

# --------------------------
# Graph-Based Model (Without External Knowledge)
# --------------------------
class GraphModel(nn.Module):
    def __init__(self, d=300, num_layers=2, roberta_model_name='roberta-base'):
        super(GraphModel, self).__init__()
        self.d = d
        self.utterance_encoder = UtteranceEncoder(roberta_model_name, d)
        self.num_layers = num_layers
        self.dag_layers = nn.ModuleList([DAGLayer(d) for _ in range(num_layers)])
        # Remove the final Sigmoid so that the model outputs logits.
        self.cause_predictor = nn.Sequential(
            nn.Linear(2 * d, 600),
            nn.ReLU(),
            nn.Linear(600, 300),
            nn.ReLU(),
            nn.Linear(300, 300),
            nn.ReLU(),
            nn.Linear(300, 1)
        )

    def forward(self, utterance_ids, attention_masks, Ac):
        h = self.utterance_encoder(utterance_ids, attention_masks)
        for layer in self.dag_layers:
            h = layer(h, Ac)
        N = h.size(0)
        scores = {}
        for i in range(N):
            for j in range(i):
                pair_rep = torch.cat([h[i], h[j]], dim=-1)
                # Outputs logits
                score = self.cause_predictor(pair_rep)
                scores[(i, j)] = score
        return scores

In [4]:
# --------------------------
# Training Loop with Weighted Loss
# --------------------------
def train_model_graph(model, dataloader, optimizer, num_epochs=2, device="cuda"):
    model.train()
    # Set positive weight based on your dataset distribution (e.g., 4.13)
    pos_weight = torch.tensor(4.13).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            utterances = batch["utterances"].squeeze(0).to(device)
            attention_masks = batch["attention_masks"].squeeze(0).to(device)
            Ac = batch["Ac"].squeeze(0).to(device)
            labels = batch["labels"]

            optimizer.zero_grad()
            scores = model(utterances, attention_masks, Ac)
            loss = 0.0
            count = 0
            for key, score in scores.items():
                label = labels.get(key, torch.tensor(0, dtype=torch.long)).to(device).float()
                # Ensure both score and label are 1D tensors
                loss += criterion(score.view(-1), label.view(-1))
                count += 1
            if count > 0:
                loss = loss / count
            else:
                loss = torch.tensor(0.0, device=device)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        avg_loss = epoch_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")


In [13]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

def evaluate_model_graph(model, dataloader, device="cuda"):
    model.eval()
    all_preds = []
    all_labels = []
    conversation_em_scores = []  # To record conversation-level exact match scores

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            # Process each conversation (batch_size assumed to be 1)
            utterances = batch["utterances"].squeeze(0).to(device)         # (N, seq_len)
            attention_masks = batch["attention_masks"].squeeze(0).to(device)  # (N, seq_len)
            Ac = batch["Ac"].squeeze(0).to(device)                            # (N, N)
            labels = batch["labels"]  # dict mapping candidate pair keys to tensor(0) or tensor(1)

            # Get candidate pair scores from the model
            scores = model(utterances, attention_masks, Ac)

            convo_preds = {}
            convo_gold = {}
            for key, score in scores.items():
                # Apply sigmoid to convert logits to probability
                prob = torch.sigmoid(score)
                pred = 1 if prob.item() >= 0.5 else 0
                convo_preds[key] = pred
                # If a key is missing, default label is 0
                gold = labels.get(key, torch.tensor(0, dtype=torch.long)).item()
                convo_gold[key] = gold

                # Add to overall lists
                all_preds.append(pred)
                all_labels.append(gold)

            # Compute conversation-level Exact Match:
            # If all candidate pairs in this conversation are predicted correctly, count as EM=1; else 0.
            if len(convo_gold) > 0 and all(convo_preds.get(k, 0) == v for k, v in convo_gold.items()):
                conversation_em_scores.append(1)
            else:
                conversation_em_scores.append(0)

    accuracy = accuracy_score(all_labels, all_preds)
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    report = classification_report(all_labels, all_preds, digits=4)
    exact_match = sum(conversation_em_scores) / len(conversation_em_scores) if conversation_em_scores else 0.0

    print("Evaluation Accuracy:", accuracy)
    print("Evaluation Macro F1:", macro_f1)
    print("Conversation-Level Exact Match (EM):", exact_match)
    print("Classification Report:\n", report)

    return accuracy, macro_f1, exact_match, report

In [14]:
import time
if __name__ == "__main__":
    device = "cuda" if torch.cuda.is_available() else "cpu"
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    dataset = ConversationDataset("original_annotation/dailydialog_train.json", tokenizer, max_length=64)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

    model = GraphModel(d=300, num_layers=2, roberta_model_name="roberta-base")
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=3e-5)

    begin  = time.time()
    # Train the model with weighted loss
    train_model_graph(model, dataloader, optimizer, num_epochs=10, device=device)

    end = time.time()
    print(f"Elapsed time for training: {end-begin}")

    # Evaluate the model
    evaluate_model_graph(model, dataloader, device=device)

Epoch 1/10: 100%|██████████| 834/834 [02:27<00:00,  5.66it/s]


Epoch 1/10, Loss: 1.0655


Epoch 2/10: 100%|██████████| 834/834 [02:45<00:00,  5.05it/s]


Epoch 2/10, Loss: 0.9595


Epoch 3/10: 100%|██████████| 834/834 [02:45<00:00,  5.03it/s]


Epoch 3/10, Loss: 0.9393


Epoch 4/10: 100%|██████████| 834/834 [02:46<00:00,  5.00it/s]


Epoch 4/10, Loss: 0.9377


Epoch 5/10: 100%|██████████| 834/834 [02:46<00:00,  5.02it/s]


Epoch 5/10, Loss: 0.9342


Epoch 6/10: 100%|██████████| 834/834 [02:45<00:00,  5.04it/s]


Epoch 6/10, Loss: 0.9380


Epoch 7/10: 100%|██████████| 834/834 [02:46<00:00,  5.02it/s]


Epoch 7/10, Loss: 0.9299


Epoch 8/10: 100%|██████████| 834/834 [02:47<00:00,  4.97it/s]


Epoch 8/10, Loss: 0.9298


Epoch 9/10: 100%|██████████| 834/834 [02:46<00:00,  5.01it/s]


Epoch 9/10, Loss: 0.9289


Epoch 10/10: 100%|██████████| 834/834 [02:46<00:00,  4.99it/s]


Epoch 10/10, Loss: 0.9284
Elapsed time for training: 1643.7697632312775


Evaluating: 100%|██████████| 834/834 [01:05<00:00, 12.75it/s]

Evaluation Accuracy: 0.8287833895008255
Evaluation Macro F1: 0.6319499152860791
Conversation-Level Exact Match (EM): 0.002398081534772182
Classification Report:
               precision    recall  f1-score   support

           0     0.9376    0.8674    0.9011     39761
           1     0.2901    0.4841    0.3628      4452

    accuracy                         0.8288     44213
   macro avg     0.6138    0.6757    0.6319     44213
weighted avg     0.8724    0.8288    0.8469     44213






In [16]:
torch.save(model.state_dict(), "kec_model-graph.pt")

In [None]:
"""tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
dataset = ConversationDataset("original_annotation/dailydialog_train.json", tokenizer, max_length=64)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
evaluate_model_graph(model, dataloader, device)"""

In [17]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
dataset = ConversationDataset("original_annotation/dailydialog_valid.json", tokenizer, max_length=64)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
evaluate_model_graph(model, dataloader, device)

Evaluating: 100%|██████████| 47/47 [00:04<00:00, 10.57it/s]

Evaluation Accuracy: 0.8393724318266716
Evaluation Macro F1: 0.624520175463776
Conversation-Level Exact Match (EM): 0.0
Classification Report:
               precision    recall  f1-score   support

           0     0.9596    0.8627    0.9085      2476
           1     0.2461    0.5522    0.3405       201

    accuracy                         0.8394      2677
   macro avg     0.6028    0.7075    0.6245      2677
weighted avg     0.9060    0.8394    0.8659      2677






(0.8393724318266716,
 0.624520175463776,
 0.0,
 '              precision    recall  f1-score   support\n\n           0     0.9596    0.8627    0.9085      2476\n           1     0.2461    0.5522    0.3405       201\n\n    accuracy                         0.8394      2677\n   macro avg     0.6028    0.7075    0.6245      2677\nweighted avg     0.9060    0.8394    0.8659      2677\n')

In [18]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
dataset = ConversationDataset("original_annotation/dailydialog_test.json", tokenizer, max_length=64)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
evaluate_model_graph(model, dataloader, device)

Evaluating: 100%|██████████| 225/225 [00:18<00:00, 12.00it/s]

Evaluation Accuracy: 0.8344267183539585
Evaluation Macro F1: 0.6143751198828616
Conversation-Level Exact Match (EM): 0.0
Classification Report:
               precision    recall  f1-score   support

           0     0.9493    0.8659    0.9057     12315
           1     0.2429    0.4823    0.3231      1099

    accuracy                         0.8344     13414
   macro avg     0.5961    0.6741    0.6144     13414
weighted avg     0.8915    0.8344    0.8579     13414






(0.8344267183539585,
 0.6143751198828616,
 0.0,
 '              precision    recall  f1-score   support\n\n           0     0.9493    0.8659    0.9057     12315\n           1     0.2429    0.4823    0.3231      1099\n\n    accuracy                         0.8344     13414\n   macro avg     0.5961    0.6741    0.6144     13414\nweighted avg     0.8915    0.8344    0.8579     13414\n')