In [None]:
pip install transformers

In [None]:
pip install datasets

In [None]:
import numpy as np
import random as rand
from datasets import load_dataset

In [None]:
# load datasets from the repositories on HF

datasetConll = load_dataset("conll2003")
datasetEndava = load_dataset("BeardedJohn/ubb-endava-assistant-ner-only-misc")
datasetConllEndava = load_dataset("BeardedJohn/ubb-endava-conll-assistant-ner-only-misc")


In [None]:
# create the label names arrays, which are used to retrieve the correct label when testing

ner_feature = datasetConll["test"].features["ner_tags"]
label_names_conll = ner_feature.feature.names

ner_feature = datasetEndava["test"].features["ner_tags"]
label_names_endava = ner_feature.feature.names

ner_feature = datasetConllEndava["test"].features["ner_tags"]
label_names_conll_endava = ner_feature.feature.names


In [None]:
# tokenizer specific for "bert-base-cased"

from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [None]:
# because the tokenizer uses the sub-word technique, we have to add proper labels to each resulted subword 
# the proper label is the label from the original word it has been split from

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)

    return new_labels

In [None]:
# tokenizes each sentence and align its new tokens with the correct version of their labels

def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["ner_tags"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs

In [None]:
tokenizedDatasetsConll = datasetConll.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=datasetConll["train"].column_names,
)
tokenizedDatasetsEndava = datasetEndava.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=datasetEndava["train"].column_names,
)
tokenizedDatasetsConllEndava = datasetConllEndava.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=datasetConllEndava["train"].column_names,
)


In [None]:
# data collator for dynamic padding

from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(
    tokenizer=tokenizer, return_tensors="tf"
)

In [None]:
# build the tensors for testing 

tfTestDatasetConll = tokenizedDatasetsConll["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=32,
)

tfTestDatasetEndava = tokenizedDatasetsEndava["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=64,
)

tfTestDatasetConllEndava = tokenizedDatasetsConllEndava["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=64,
)


In [None]:
# load the models

from transformers import AutoConfig, TFAutoModelForTokenClassification

model_checkpoint_conll = "BeardedJohn/bert-finetuned-ner-ubb-conll"
modelConll = TFAutoModelForTokenClassification.from_pretrained(model_checkpoint_conll)
model_checkpoint_endava = "BeardedJohn/bert-finetuned-ner-ubb-endava-only-misc"
modelEndava = TFAutoModelForTokenClassification.from_pretrained(model_checkpoint_endava)
model_checkpoint_conll_endava = "BeardedJohn/bert-finetuned-ner-ubb-conll-endava-only-misc"
modelConllEndava = TFAutoModelForTokenClassification.from_pretrained(model_checkpoint_conll_endava)

In [None]:
pip install seqeval

In [None]:
# load the metric object used for measuring the metrics

from datasets import load_metric

metric = load_metric("seqeval")

In [None]:
# test the conll model on its test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetConll:
    logits = modelConll.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names_conll[predicted_idx])
            all_labels.append(label_names_conll[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test the endava model on its test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetEndava:
    logits = modelEndava.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names_endava[predicted_idx])
            all_labels.append(label_names_endava[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test the endava-conll model on its test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetConllEndava:
    logits = modelConllEndava.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names_conll_endava[predicted_idx])
            all_labels.append(label_names_conll_endava[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test endava model on conll test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetConll:
    logits = modelEndava.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names_endava[predicted_idx])
            all_labels.append(label_names_conll[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test conll-endava model on conll test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetConll:
    logits = modelConllEndava.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names_conll_endava[predicted_idx])
            all_labels.append(label_names_conll[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test conll-endava model on endava test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetEndava:
    logits = modelConllEndava.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue 
            all_predictions.append(label_names_conll_endava[predicted_idx])
            all_labels.append(label_names_endava[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test conll model on endava test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetEndava:
    logits = modelConll.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names_conll[predicted_idx])
            all_labels.append(label_names_endava[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test conll model on conll-endava test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetConllEndava:
    logits = modelConll.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names_conll[predicted_idx])
            all_labels.append(label_names_conll_endava[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

In [None]:
# test endava model on conll-endava test set

all_predictions = []
all_labels = []
for batch in tfTestDatasetConllEndava:
    logits = modelEndava.predict(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue 
            all_predictions.append(label_names_endava[predicted_idx])
            all_labels.append(label_names_conll_endava[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])