In [1]:
import csv
import json
import random
import re
from collections import Counter, defaultdict

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd
import seaborn as sns

import torch

from datasets import (
    Dataset,
    DatasetDict,
    concatenate_datasets,
    load_dataset,
    load_from_disk,
)
from transformers import (
    AutoModelForTokenClassification,
    AutoTokenizer,
    DataCollatorForTokenClassification,
    Trainer,
    TrainingArguments,
    set_seed,
)

import evaluate

import nltk
from nltk.corpus import wordnet as wn
import pickle

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay




In [48]:
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

conll_label_to_id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3,
                     'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}
id2label = {v: k for k, v in conll_label_to_id.items()}

conll_encoded = load_from_disk("./splits/conll_encoded")
conll_encoded = conll_encoded.select(range(1))
ontonotes_encoded = load_from_disk("./splits/ontonotes_encoded")
ontonotes_encoded = ontonotes_encoded.select(range(1))

data_collator = DataCollatorForTokenClassification(tokenizer)

metric = evaluate.load("seqeval")


def compute_metrics(p):
    predictions, labels = p
    predictions = predictions.argmax(axis=-1)

    true_predictions = []
    true_labels = []

    for pred_seq, label_seq in zip(predictions, labels):
        true_predictions.append(
            [id2label[p] for p, l in zip(pred_seq, label_seq) if l != -100])
        true_labels.append([id2label[l] for l in label_seq if l != -100])

    return metric.compute(predictions=true_predictions, references=true_labels)

In [49]:
seeds = [42]
# seeds = [42, 106, 812, 2025, 9999]
results = []

# with open('./results/seed_var_results.pkl', 'rb') as f:
#     results = pickle.load(f)

for seed in seeds:
    set_seed(seed)

    conll_model = AutoModelForTokenClassification.from_pretrained(
        model_name,
        num_labels=len(conll_label_to_id)
    )

    conll_args = TrainingArguments(
        output_dir=f"./results/conll_seed_{seed}",
        seed=seed,
        eval_strategy="no",
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        weight_decay=0.01,
        load_best_model_at_end=True,
        report_to="none",
        fp16=True,
        logging_steps=1000,
        save_strategy="no",
    )

    conll_trainer = Trainer(
        model=conll_model,
        args=conll_args,
        train_dataset=conll_encoded,
        eval_dataset=ontonotes_encoded,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    conll_trainer.train()
    conll_trainer.save_model("./saved_model/conll_seed_"+str(seed))

    pred = conll_trainer.predict(ontonotes_encoded)
    eval_result = {'predictions': pred, 'seed': seed}
    results.append(eval_result)
    with open('./results/seed_var_results.pkl', 'wb') as f:
        pickle.dump(results, f)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  conll_trainer = Trainer(


Step,Training Loss


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
conll_trainer.predict(ontonotes_encoded)