In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.abspath(os.getcwd()), ".."))
import evaluate
import pandas as pd
import wandb
import argparse

import torch
import torch.nn as nn
import torch.optim as optim

from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorWithPadding, TrainingArguments, Trainer, AutoModel
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts

from certainty import EventSentence, EventType, load_events, id2label, label2id, load_file, CACHE_DIR, GNNCertaintyPredictionModel, seed_everything, RANDOM_SEED


accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

2025-03-10 11:41:15.507464: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-10 11:41:15.645629: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Using the latest cached version of the module from /home/peder/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Wed Mar  6 15:19:33 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/peder/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c2

In [2]:
train = load_file('en_train.json')

In [72]:
train_df = pd.DataFrame(train).drop_duplicates('text').drop_duplicates('events')

In [76]:
train = train_df.to_dict("records")

In [77]:
def get_token_indices(span, offsets):
    start_char, end_char = map(int, span.split(":"))
    start_idx, end_idx = None, None

    for i, (start, end) in enumerate(offsets):
        if start <= start_char < end:  # Find first token in span
            start_idx = i
        if start < end_char <= end:  # Find last token in span
            end_idx = i
            break

    return start_idx, end_idx

In [78]:
train[:2]

[{'sent_id': 'bc/CNN_IP_20030329.1600.02/001',
  'text': 'It was in northern Iraq today that an eight artillery round hit the site occupied by Kurdish fighters near Chamchamal',
  'events': [{'event_type': 'Attack',
    'event_polarity': 'Positive',
    'event_genericity': 'Specific',
    'event_modality': 'Asserted',
    'trigger': [['hit'], ['60:63']],
    'arguments': [[['northern Iraq'], ['10:23'], 'Place'],
     [['today'], ['24:29'], 'Time-Within'],
     [['an eight artillery round'], ['35:59'], 'Instrument'],
     [['the site occupied by Kurdish fighters near Chamchamal'],
      ['64:117'],
      'Target']]}]},
 {'sent_id': 'bc/CNN_IP_20030329.1600.02/002',
  'text': 'A day ago it was controlled by Iraqi troops and packed with these land mines but now we can drive far enough to see the outskirts of Kirkuk',
  'events': [{'event_type': 'Transport',
    'event_polarity': 'Positive',
    'event_genericity': 'Specific',
    'event_modality': 'Asserted',
    'trigger': [['drive'], ['

In [79]:
model_name = 'distilbert/distilbert-base-uncased'

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=CACHE_DIR, local_files_only=True, trust_remote_code=True)



In [80]:
id2label = {0: "O", 1: "B-trigger", 2: "I-trigger"}
label2id = {label: idx for idx, label in id2label.items()}

In [81]:
train_encoded = []

for sample in train:
    toks = tokenizer(sample['text'], truncation=True, return_tensors="pt", add_special_tokens=False, return_offsets_mapping=True, padding="max_length")
    offset_mapping=toks.pop("offset_mapping")
    
    ner = ['O' for _ in range(0, len(toks['input_ids'][0]))]
    
    for i in range(0, len(sample['events'])):
        trigger_span = sample['events'][i]['trigger'][1][0]
        trigger_start, trigger_end = get_token_indices(trigger_span, offset_mapping[0])
        try:
            ner[trigger_start] = "B-trigger"
        except:
            print(trigger_start)
            print(example["events"])
            print
        for j in range(trigger_start+1, trigger_end+1):
            ner[j] = "I-trigger"

    # sample = {"text": sample["text"], "toks": toks, "ner_tags": ner}
    toks['labels'] = torch.tensor([label2id[label] for label in ner])
    toks["input_ids"] = toks["input_ids"].squeeze(0)  # Convert from (seq_length,) → (1, seq_length)
    toks["attention_mask"] = toks["attention_mask"].squeeze(0)  # Convert from (seq_length,) → (1, seq_length)
    train_encoded.append(toks)

In [82]:
train_encoded[0]

{'input_ids': tensor([ 2009,  2001,  1999,  2642,  5712,  2651,  2008,  2019,  2809,  4893,
         2461,  2718,  1996,  2609,  4548,  2011, 15553,  7299,  2379, 15775,
        12458,  3511,  2389,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0, 

In [83]:
seqeval = evaluate.load("seqeval")

Using the latest cached version of the module from /home/peder/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--seqeval/541ae017dc683f85116597d48f621abc7b21b88dc42ec937c71af5415f0af63c (last modified on Thu Mar  7 10:47:46 2024) since it couldn't be found locally at evaluate-metric--seqeval, or remotely on the Hugging Face Hub.


In [90]:
import numpy as np

label_list = ["O", "B-trigger", "I-trigger"]

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [91]:
from datasets import Dataset
train_dataset = Dataset.from_list(train_encoded)

In [92]:

model = AutoModelForTokenClassification.from_pretrained(
    model_name, cache_dir=CACHE_DIR, local_files_only=True, num_labels=3, trust_remote_code=True, id2label=id2label, label2id=label2id
)

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [93]:
OUTPUT_DIR = "../models/blabla"

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="no",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
    compute_metrics=compute_metrics,
)OUTPUT_DIR = "../models/blabla"

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="no",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
    compute_metrics=compute_metrics,
)

In [94]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.011312,0.0,0.0,0.0,0.997103
2,No log,0.007045,0.607581,0.394978,0.478737,0.997691


  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=406, training_loss=0.024475010745043824, metrics={'train_runtime': 680.5212, 'train_samples_per_second': 9.54, 'train_steps_per_second': 0.597, 'total_flos': 848215261900800.0, 'train_loss': 0.024475010745043824, 'epoch': 2.0})

Thread SenderThread:
Traceback (most recent call last):
  File "/home/peder/.pyenv/versions/3.10.13/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 48, in run
    self._run()
  File "/home/peder/.pyenv/versions/3.10.13/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 99, in _run
    self._process(record)
  File "/home/peder/.pyenv/versions/3.10.13/lib/python3.10/site-packages/wandb/sdk/internal/internal.py", line 327, in _process
    self._sm.send(record)
  File "/home/peder/.pyenv/versions/3.10.13/lib/python3.10/site-packages/wandb/sdk/internal/sender.py", line 385, in send
    send_handler(record)
  File "/home/peder/.pyenv/versions/3.10.13/lib/python3.10/site-packages/wandb/sdk/internal/sender.py", line 407, in send_request
    send_handler(record)
  File "/home/peder/.pyenv/versions/3.10.13/lib/python3.10/site-packages/wandb/sdk/internal/sender.py", line 1147, in send_request_summary_record
    self._update_summary_record(record.reques