In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd drive/MyDrive/Colab \Notebooks

/content/drive/MyDrive/Colab Notebooks


In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizer, RobertaModel
import pandas as pd
import numpy as np

In [4]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

print(device)

cpu


# Data preparation

In [5]:
DATASET = 'MELD'    # @param ['MELD', 'MaSaC']
MAX_LENGTH = 128    # @param [96, 128, 256] {type: 'raw'}
BATCH_SIZE = 16    # @param [8, 16, 32] {type: 'raw'}

In [6]:
train_df = pd.read_json(f'data/EDiReF_train_data/{DATASET}_train_efr.json')
train_df["triggers"] = train_df["triggers"].apply(lambda lst: [np.nan if x is None else x for x in lst])
train_df = train_df[train_df["triggers"].apply(lambda lst: not any(pd.isna(x) for x in lst))]

flattened_emotions = [sent for conv in train_df['emotions'] for sent in conv]
unique_emotions = set(flattened_emotions)

labels_to_ids = {k: v for v, k in enumerate(unique_emotions)}
ids_to_labels = {v: k for v, k in enumerate(unique_emotions)}

train_conversations = list(train_df['utterances'])
train_emotions = [[labels_to_ids[emotion] for emotion in conv] for conv in list(train_df['emotions'])]
train_triggers = list(train_df['triggers'])

In [7]:
val_df = pd.read_json(f'data/EDiReF_val_data/{DATASET}_val_efr.json')
val_df["triggers"] = val_df["triggers"].apply(lambda lst: [np.nan if x is None else x for x in lst])
val_df = val_df[val_df["triggers"].apply(lambda lst: not any(pd.isna(x) for x in lst))]

val_conversations = list(val_df['utterances'])
val_emotions = [[labels_to_ids[emotion] for emotion in conv] for conv in list(val_df['emotions'])]
val_triggers = list(val_df['triggers'])

In [8]:
conversations = train_conversations + val_conversations
emotions = train_emotions + val_emotions
triggers = train_triggers + val_triggers

In [9]:
from sklearn.model_selection import train_test_split

def train_val_test_split(X, y1, y2, val_size = 0.2, test_size = 0.2, random_state = None):
    X_train_val, X_test, y1_train_val, y1_test, y2_train_val, y2_test = train_test_split(
        X, y1, y2, test_size=test_size, random_state=random_state
    )

    val_relative_size = val_size / (1 - test_size)

    X_train, X_val, y1_train, y1_val, y2_train, y2_val = train_test_split(
        X_train_val, y1_train_val, y2_train_val, test_size=val_relative_size, random_state=random_state
    )

    return (X_train, X_val, X_test, y1_train, y1_val, y1_test, y2_train, y2_val, y2_test)

In [10]:
X_train, X_val, X_test, y1_train, y1_val, y1_test, y2_train, y2_val, y2_test = train_val_test_split(
    conversations, emotions, triggers, test_size=0.15, val_size=0.15, random_state=2024
    )

In [11]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [12]:
def tokenize_conversation(conversations, max_length = 128):
    input_ids = []
    attention_masks = []

    for conversation in conversations:
        dialogue = " [SEP] ".join(conversation)
        encoded = tokenizer(
            dialogue,
            truncation = True,
            padding = 'max_length',
            max_length = max_length,
            return_tensors = "pt"
        )
        input_ids.append(encoded["input_ids"].squeeze(0))
        attention_masks.append(encoded["attention_mask"].squeeze(0))

    return input_ids, attention_masks

In [13]:
def pad_labels(labels, max_length = 128):
    padded_labels = []
    for label_set in labels:
        label_tensor = torch.tensor(label_set, dtype = torch.float)
        # Pad with -1 to ignore padding tokens in the loss function
        padded_tensor = torch.cat(
            [label_tensor, torch.full((max_length - len(label_set),), -1)]
        )
        padded_labels.append(padded_tensor)
    return padded_labels

In [14]:
class ConversationDataset(Dataset):
    def __init__(self, input_ids, attention_masks, emotion_labels, trigger_labels):
        self.input_ids = input_ids
        self.attention_masks = attention_masks
        self.emotion_labels = emotion_labels
        self.trigger_labels = trigger_labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            "input_ids": self.input_ids[idx],
            "attention_mask": self.attention_masks[idx],
            "emotion_labels": self.emotion_labels[idx],
            "trigger_labels": self.trigger_labels[idx],
        }

In [15]:
train_input_ids, train_attention_masks = tokenize_conversation(X_train, max_length = MAX_LENGTH)

train_emotion_labels = pad_labels(y1_train, max_length = MAX_LENGTH)
train_trigger_labels = pad_labels(y2_train, max_length = MAX_LENGTH)

train_dataset = ConversationDataset(train_input_ids, train_attention_masks, train_emotion_labels, train_trigger_labels)

In [16]:
val_input_ids, val_attention_masks = tokenize_conversation(X_val, max_length = MAX_LENGTH)

val_emotion_labels = pad_labels(y1_val, max_length = MAX_LENGTH)
val_trigger_labels = pad_labels(y2_val, max_length = MAX_LENGTH)

val_dataset = ConversationDataset(val_input_ids, val_attention_masks, val_emotion_labels, val_trigger_labels)

In [17]:
test_input_ids, test_attention_masks = tokenize_conversation(X_test, max_length = MAX_LENGTH)

test_emotion_labels = pad_labels(y1_test, max_length = MAX_LENGTH)
test_trigger_labels = pad_labels(y2_test, max_length = MAX_LENGTH)

test_dataset = ConversationDataset(test_input_ids, test_attention_masks, test_emotion_labels, test_trigger_labels)

In [18]:
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = False)
val_loader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)

# Model configuration

In [19]:
GATE_TYPE = 'linear'  # @param ['linear', 'mlp']
EXPERT_TYPE = 'linear' # @param ['linear', 'mlp', 'rnn']
NUM_EXPERTS = 2 # @param {type: 'slider', min: 1, max: 8, step: 1}
TOP_K = 2 # @param {type: 'slider', min: 1, max: 8, step: 1}

In [20]:
assert TOP_K <= NUM_EXPERTS, "Select different values for TOP_K and NUM_EXPERTS!"

In [22]:
class MoEForEmotionAndTriggerClassification(nn.Module):
    def __init__(self, num_experts, k, num_classes, gate_type = 'linear', expert_type = 'linear'):
        super(MoEForEmotionAndTriggerClassification, self).__init__()

        self.roberta = RobertaModel.from_pretrained('roberta-base')
        for param in self.roberta.parameters():
            param.requires_grad = True  # Set to True if you want to fine-tune RoBERTa
        hidden_size = self.roberta.config.hidden_size

        gate_setup = {
            'linear': nn.Linear(hidden_size, num_experts),
            'mlp': nn.Sequential(nn.Linear(hidden_size, 512), nn.ReLU(), nn.Linear(512, num_experts)),
        }

        expert_setup = {
            'linear': nn.Linear(hidden_size, hidden_size),
            'mlp': nn.Sequential(nn.Linear(hidden_size, 512), nn.ReLU(), nn.Linear(512, hidden_size)),
            'rnn': nn.LSTM(hidden_size, hidden_size),
        }

        self.gating_network = gate_setup[GATE_TYPE]
        self.experts = nn.ModuleList([expert_setup[EXPERT_TYPE] for _ in range(num_experts)])

        self.emotion_classifier = nn.Linear(hidden_size, num_classes)
        self.trigger_classifier = nn.Linear(hidden_size, 1)

        self.k = k
        self.dropout = nn.Dropout(p = 0.1)

    def forward(self, input_ids, attention_mask):
        roberta_outputs = self.roberta(input_ids = input_ids, attention_mask = attention_mask)
        embeddings = roberta_outputs.last_hidden_state  # (batch_size, seq_len, hidden_size)
        pooled_embeddings = embeddings.mean(dim = 1)    # (batch_size, hidden_size)
        pooled_embeddings = self.dropout(pooled_embeddings)

        # expert weights
        expert_weights = self.gating_network(pooled_embeddings) # (batch_size, num_experts)
        expert_weights = torch.softmax(expert_weights, dim = -1)

        # aggregate expert outputs
        combined_output = self._compute_expert_output(embeddings, expert_weights)
        combined_output = self.dropout(combined_output)

        emotion_logits = self.emotion_classifier(combined_output)   # (batch_size, seq_len, num_classes)
        trigger_logits = self.trigger_classifier(combined_output).squeeze(-1)   # (batch_size, seq_len)

        return emotion_logits, trigger_logits

    def _compute_expert_output(self, embeddings, expert_weights):
        batch_size, num_experts = expert_weights.size()
        combined_output = torch.zeros_like(embeddings)

        # top-k experts only are activated
        topk_weights, topk_indices = torch.topk(expert_weights, self.k, dim = -1)

        for i in range(self.k):
            expert_idx = topk_indices[:, i]
            weight = topk_weights[:, i].unsqueeze(-1).unsqueeze(-1)

            expert_outputs = []
            for j in range(expert_idx.size(0)):
                expert = self.experts[expert_idx[j]]

                if isinstance(expert, nn.LSTM):
                    embedding_input = embeddings[j].unsqueeze(0)
                    output, _ = expert(embedding_input)
                    expert_outputs.append(output.squeeze(0))

                elif isinstance(expert, nn.Linear) or isinstance(expert, nn.Sequential):
                    output = expert(embeddings[j])
                    expert_outputs.append(output)

            expert_outputs = torch.stack(expert_outputs)
            combined_output += weight * expert_outputs

        return combined_output

# Training parameters

In [23]:
LEARNING_RATE = 0.00002  # @param {type: 'slider', min: 1E-5, max: 5E-5, step: 1E-5}
NUM_EPOCHS = 10  # @param {type: 'slider', min: 5, max: 25, step: 5}

In [24]:
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss, BCEWithLogitsLoss

moe = MoEForEmotionAndTriggerClassification(num_experts = NUM_EXPERTS, k = TOP_K, num_classes = len(labels_to_ids), gate_type = GATE_TYPE, expert_type = EXPERT_TYPE)
optimizer = AdamW(moe.parameters(), lr = LEARNING_RATE)

emotion_loss_fn = CrossEntropyLoss()
trigger_loss_fn = BCEWithLogitsLoss()

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
moe.to(device)

MoEForEmotionAndTriggerClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
    

In [26]:
def remove_padding(logits, labels, task):
    mask = labels != -1

    logits_flat = logits.view(-1, logits.size(-1)) if task == 'emotion' else logits.view(-1)
    labels_flat = labels.view(-1)

    logits = logits_flat[mask.view(-1)]
    labels = labels_flat[mask.view(-1)]

    return logits, labels

In [27]:
def evaluate(model, val_loader):
    model.eval()
    val_loss, nb_steps = 0.0, 0
    total_emotion_preds, correct_emotion_preds = 0, 0
    total_trigger_preds, correct_trigger_preds = 0, 0

    with torch.no_grad():
        for idx, batch in enumerate(val_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            emotion_labels = batch['emotion_labels'].to(device)
            trigger_labels = batch['trigger_labels'].to(device)

            emotion_logits, trigger_logits = model(input_ids, attention_mask)

            # removing padding
            emotion_logits, emotion_labels = remove_padding(emotion_logits, emotion_labels, 'emotion')
            trigger_logits, trigger_labels = remove_padding(trigger_logits, trigger_labels, 'trigger')

            # calculating loss
            emotion_loss = emotion_loss_fn(emotion_logits, emotion_labels.long())
            trigger_loss = trigger_loss_fn(trigger_logits, trigger_labels)

            loss = emotion_loss + trigger_loss
            val_loss += loss.item()

            # calculating accuracy
            emotion_preds = torch.argmax(emotion_logits, dim=-1)
            trigger_preds = (torch.sigmoid(trigger_logits).squeeze(-1) > 0.5).long()

            correct_emotion_preds += torch.sum(emotion_preds == emotion_labels).item()
            correct_trigger_preds += torch.sum(trigger_preds == trigger_labels).item()

            total_emotion_preds += emotion_labels.numel()
            total_trigger_preds += trigger_labels.numel()

            nb_steps += 1

            if idx % 100 == 0:
                loss_step = val_loss / nb_steps
                print(f'      Validation loss per 100 training steps: {loss_step}')

        avg_val_loss = val_loss / len(val_loader)
        emotion_accuracy = correct_emotion_preds / total_emotion_preds
        trigger_accuracy = correct_trigger_preds / total_trigger_preds
        avg_val_accuracy = (emotion_accuracy + trigger_accuracy)/2

    return avg_val_loss, avg_val_accuracy

In [28]:
def train_and_validate(model, train_loader, val_loader, num_epochs = 3):
    for epoch in range(num_epochs):
        print(f"Epoch [{epoch + 1}/{num_epochs}]")
        model.train()
        train_loss, nb_steps = 0.0, 0
        total_emotion_preds, correct_emotion_preds = 0, 0
        total_trigger_preds, correct_trigger_preds = 0, 0

        for idx, batch in enumerate(train_loader):
            optimizer.zero_grad()

            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            emotion_labels = batch['emotion_labels'].to(device)
            trigger_labels = batch['trigger_labels'].to(device)

            emotion_logits, trigger_logits = model(input_ids, attention_mask)

            # removing padding
            emotion_logits, emotion_labels = remove_padding(emotion_logits, emotion_labels, 'emotion')
            trigger_logits, trigger_labels = remove_padding(trigger_logits, trigger_labels, 'trigger')

            # calculating loss
            emotion_loss = emotion_loss_fn(emotion_logits, emotion_labels.long())
            trigger_loss = trigger_loss_fn(trigger_logits, trigger_labels)

            loss = emotion_loss + trigger_loss
            train_loss += loss.item()

            loss.backward()
            optimizer.step()

            # calculating accuracy
            emotion_preds = torch.argmax(emotion_logits, dim=-1)
            trigger_preds = (torch.sigmoid(trigger_logits).squeeze(-1) > 0.5).long()

            correct_emotion_preds += torch.sum(emotion_preds == emotion_labels).item()
            correct_trigger_preds += torch.sum(trigger_preds == trigger_labels).item()

            total_emotion_preds += emotion_labels.numel()
            total_trigger_preds += trigger_labels.numel()
            nb_steps += 1

            if idx % 100 == 0:
                loss_step = train_loss / nb_steps
                print(f'      Training loss per 100 training steps: {loss_step}')

        avg_train_loss = train_loss / len(train_loader)
        emotion_accuracy = correct_emotion_preds / total_emotion_preds
        trigger_accuracy = correct_trigger_preds / total_trigger_preds
        avg_train_accuracy = (emotion_accuracy + trigger_accuracy)/2

        val_loss, val_accuracy = evaluate(model, val_loader)

        print(f"   Training Loss: {avg_train_loss:.3f}, Training Accuracy: {avg_train_accuracy:.3f}")
        print(f"   Validation Loss: {val_loss:.3f}, Validation Accuracy: {val_accuracy:.3f}\n")

In [None]:
train_and_validate(moe, train_loader, val_loader, num_epochs = NUM_EPOCHS)

Epoch [1/10]
      Training loss per 100 training steps: 2.613065481185913
      Training loss per 100 training steps: 2.0700942041850325
      Validation loss per 100 training steps: 1.8622701168060303
   Training Loss: 1.990, Training Accuracy: 0.639
   Validation Loss: 1.833, Validation Accuracy: 0.656

Epoch [2/10]
      Training loss per 100 training steps: 1.7236175537109375
      Training loss per 100 training steps: 1.8152532483091448
      Validation loss per 100 training steps: 1.5877764225006104
   Training Loss: 1.736, Training Accuracy: 0.672
   Validation Loss: 1.592, Validation Accuracy: 0.694

Epoch [3/10]
      Training loss per 100 training steps: 1.4812678098678589
      Training loss per 100 training steps: 1.540615979987796
      Validation loss per 100 training steps: 1.3325444459915161
   Training Loss: 1.455, Training Accuracy: 0.722
   Validation Loss: 1.357, Validation Accuracy: 0.745

Epoch [4/10]
      Training loss per 100 training steps: 1.1693326234817505

In [None]:
torch.save(moe.state_dict(), f'trained_models/{DATASET}/moe_model_{GATE_TYPE}_gate_{NUM_EXPERTS}_{EXPERT_TYPE}_experts_{TOP_K}_active_{LEARNING_RATE}_lr_{NUM_EPOCHS}_epochs.pth')

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def get_metrics(model, data_loader, dev):
    model.eval()

    emotion_accuracy = 0.0
    emotion_precision = 0.0
    emotion_recall = 0.0
    emotion_f1 = 0.0

    trigger_accuracy = 0.0
    trigger_precision = 0.0
    trigger_recall = 0.0
    trigger_f1 = 0.0

    num_samples, nb_steps = 0, 0

    for batch in data_loader:
        input_ids = batch['input_ids'].to(dev)
        attention_mask = batch['attention_mask'].to(dev)
        emotion_labels = batch['emotion_labels'].to(dev)
        trigger_labels = batch['trigger_labels'].to(dev)

        with torch.no_grad():
            # Forward pass
            emotion_logits, trigger_logits = model(input_ids, attention_mask)

            # Compute predictions for emotions
            emotion_logits, emotion_labels = remove_padding(emotion_logits, emotion_labels, 'emotion')

            emotion_preds = torch.argmax(emotion_logits, dim = -1)

            emotion_preds_flat = emotion_preds.cpu().numpy()
            emotion_labels_flat = emotion_labels.cpu().numpy()

            # Compute predictions for triggers
            trigger_logits, trigger_labels = remove_padding(trigger_logits, trigger_labels, 'trigger')

            trigger_preds = (torch.sigmoid(trigger_logits).squeeze(-1) > 0.5).long()

            trigger_preds_flat = trigger_preds.cpu().numpy()
            trigger_labels_flat = trigger_labels.cpu().numpy()

            # Calculate metrics for emotion classification
            accuracy = accuracy_score(emotion_labels_flat, emotion_preds_flat)

            precision, recall, f1, _ = precision_recall_fscore_support(
                emotion_labels_flat, emotion_preds_flat, average='weighted', zero_division = 0
            )

            emotion_accuracy += accuracy
            emotion_precision += precision
            emotion_recall += recall
            emotion_f1 += f1

            # Calculate metrics for trigger classification
            accuracy = accuracy_score(trigger_labels_flat, trigger_preds_flat)

            precision, recall, f1, _ = precision_recall_fscore_support(
                trigger_labels_flat, trigger_preds_flat, average='weighted', zero_division = 0
            )

            trigger_accuracy += accuracy
            trigger_precision += precision
            trigger_recall += recall
            trigger_f1 += f1

            nb_steps += 1

    # Calculate average metrics
    avg_emotion_accuracy = emotion_accuracy / nb_steps
    avg_emotion_precision = emotion_precision / nb_steps
    avg_emotion_recall = emotion_recall / nb_steps
    avg_emotion_f1 = emotion_f1 / nb_steps

    avg_trigger_accuracy = trigger_accuracy / nb_steps
    avg_trigger_precision = trigger_precision / nb_steps
    avg_trigger_recall = trigger_recall / nb_steps
    avg_trigger_f1 = trigger_f1 / nb_steps

    return (avg_emotion_accuracy, avg_emotion_precision, avg_emotion_recall, avg_emotion_f1,
            avg_trigger_accuracy, avg_trigger_precision, avg_trigger_recall, avg_trigger_f1)

In [None]:
avg_emotion_accuracy, avg_emotion_precision, avg_emotion_recall, avg_emotion_f1, avg_trigger_accuracy, avg_trigger_precision, avg_trigger_recall, avg_trigger_f1 = get_metrics(moe, test_loader, device)

# Output results
print("Emotion classification:")
print(f"   Accuracy: {avg_emotion_accuracy:.3f}")
print(f"   Precision: {avg_emotion_precision:.3f}")
print(f"   Recall: {avg_emotion_recall:.3f}")
print(f"   F1-score: {avg_emotion_f1:.3f}")

print("\n Trigger classification:")
print(f"   Accuracy: {avg_trigger_accuracy:.3f}")
print(f"   Precision: {avg_trigger_precision:.3f}")
print(f"   Recall: {avg_trigger_recall:.3f}")
print(f"   F1-score: {avg_trigger_f1:.3f}")

# Load and test trained model

In [None]:
moe_loaded = MoEForEmotionAndTriggerClassification(num_experts = NUM_EXPERTS, k = TOP_K, num_classes = len(labels_to_ids), gate_type = GATE_TYPE, expert_type = EXPERT_TYPE)
moe_loaded.load_state_dict(torch.load(f'trained_models/{DATASET}/moe_model_{GATE_TYPE}_gate_{NUM_EXPERTS}_{EXPERT_TYPE}_experts_{TOP_K}_active_{LEARNING_RATE}_lr_{NUM_EPOCHS}_epochs.pth', map_location=torch.device('cpu')))

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  moe_loaded.load_state_dict(torch.load(f'trained_models/moe_model_{GATE_TYPE}_gate_{NUM_EXPERTS}_{EXPERT_TYPE}_experts_{TOP_K}_active_{LEARNING_RATE}_lr_{NUM_EPOCHS}_epochs.pth', map_location=torch.device("cpu")))


<All keys matched successfully>

In [None]:
avg_emotion_accuracy, avg_emotion_precision, avg_emotion_recall, avg_emotion_f1, avg_trigger_accuracy, avg_trigger_precision, avg_trigger_recall, avg_trigger_f1 = get_metrics(moe_loaded, test_loader, 'cpu')

# Output results
print("Emotion classification:")
print(f"   Accuracy: {avg_emotion_accuracy:.3f}")
print(f"   Precision: {avg_emotion_precision:.3f}")
print(f"   Recall: {avg_emotion_recall:.3f}")
print(f"   F1-score: {avg_emotion_f1:.3f}")

print("\n Trigger classification:")
print(f"   Accuracy: {avg_trigger_accuracy:.3f}")
print(f"   Precision: {avg_trigger_precision:.3f}")
print(f"   Recall: {avg_trigger_recall:.3f}")
print(f"   F1-score: {avg_trigger_f1:.3f}")