In [1]:
%pip install accelerate -U
%pip install transformers
%pip install datasets
%pip install seqeval

Collecting accelerate
  Downloading accelerate-0.30.1-py3-none-any.whl.metadata (18 kB)
Downloading accelerate-0.30.1-py3-none-any.whl (302 kB)
   ---------------------------------------- 0.0/302.6 kB ? eta -:--:--
   - -------------------------------------- 10.2/302.6 kB ? eta -:--:--
   --------- ----------------------------- 71.7/302.6 kB 975.2 kB/s eta 0:00:01
   ---------------------------------------  297.0/302.6 kB 2.6 MB/s eta 0:00:01
   ---------------------------------------- 302.6/302.6 kB 2.3 MB/s eta 0:00:00
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.29.3
    Uninstalling accelerate-0.29.3:
      Successfully uninstalled accelerate-0.29.3
Successfully installed accelerate-0.30.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to

In [2]:
from transformers import AutoTokenizer
from transformers import AutoModelForTokenClassification, Trainer
from datasets import Dataset
import numpy as np
import pandas as pd
import pickle
from huggingface_hub import login
import os
from datasets import load_metric

## Testing

! IMPORTANT 
To run the testing you need to have a trained model that is already saved as well as a pickle of the label_indices from training.

Specify the path of the testing set and the path for saving the file with predictions

In [21]:
# The only thing you have to change is the language of the model you're working with
model_language = "english"





# List of test data file paths
test_data_files = [#r"data\zh_pud-ud-test.iob2",
                #    r"data\da_ddt-ud-test.iob2",
                #    r"data\en_ewt-ud-test.iob2",
                #    r"data\de_pud-ud-test.iob2",
                #    r"data\pt_pud-ud-test.iob2",
                #    r"data\ru_pud-ud-test.iob2",
                #    r"data\sk_snk-ud-test.iob2",
                #    r"data\sv_pud-ud-test.iob2",
                   r"data\sr_test.iob2",
                   r"data\hr_test.iob2",

]


# where should we pull the model from
hub_folder = f"annamariagnat/NEW_trained_{model_language}"

# specify a path to your label_indices
labels = f"01_label_indices/NEW_labels_{model_language}.pkl"

label_all_tokens = True # dw about it

In [9]:
# repeated the functions needed to load in and format the test dataset

#reading in data as a dataframe
def read_iob2_file(path):
    data = []
    current_words = []
    current_tags = []

    for line in open(path, encoding='utf-8'):
        line = line.strip()

        if line:
            if line[0] == '#':
                continue
            tok = line.split('\t')

            current_words.append(tok[1])
            current_tags.append(tok[2])
        else:
            if current_words:
                data.append((current_words, current_tags))
            current_words = []
            current_tags = []

    if current_tags != []:
        data.append((current_words, current_tags))

    df = pd.DataFrame(data, columns=['words', 'tags'])
    df['id'] = df.index
    df = df[['id', 'words', 'tags']]
    
    return df

# tokenizing the labels
def tokenize_and_align_labels(dataset, word_column, tag_column, tokenizer):
    tokenized_inputs = tokenizer(dataset[word_column].tolist(), truncation=True , is_split_into_words=True, padding = True)

    labels = []
    for i, label in enumerate(dataset[tag_column]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(label[word_idx] if label_all_tokens else -100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs.data


class Vocab():
    def __init__(self, pad_unk='<PAD>'):
        self.pad_unk = pad_unk
        self.word2idx = {}
        self.idx2word = []

    def getIdx(self, word, add=False):
        if word is None or word == self.pad_unk:
            return None
        if word not in self.word2idx:
            if add:
                idx = len(self.idx2word)
                self.word2idx[word] = idx
                self.idx2word.append(word)
                return idx
            else:
                return None
        return self.word2idx[word]

    def getWord(self, idx):
        return self.idx2word[idx]

In [6]:
# Password for the hub (Anna)

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [22]:
hub_folder

'annamariagnat/NEW_trained_english'

In [23]:
loaded_model = AutoModelForTokenClassification.from_pretrained(hub_folder)
trainer = Trainer(model = loaded_model)
tokenizer = tokenizer = AutoTokenizer.from_pretrained(hub_folder)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [24]:
# Load label_indices dictionary from the pickle file
with open(labels, 'rb') as f:
    label_indices = pickle.load(f)

label_list = label_indices.idx2word

In [25]:
metric = load_metric("seqeval")
for test_path in test_data_files:
    test_data = read_iob2_file(test_path)

    test_data['tag_idx'] = test_data['tags'].apply(lambda x: [label_indices.word2idx[tag] for tag in x])

    # tokenize the data
    tokenized_test_data = tokenize_and_align_labels(test_data, "words", "tag_idx", tokenizer)


    # turning the data into datasetdicts, to make them compatible with the trainer (otherwise they can't be indexed)

    test_dataset = Dataset.from_dict({
        'id': range(len(tokenized_test_data['input_ids'])),
        'input_ids': tokenized_test_data['input_ids'],
        'attention_mask': tokenized_test_data['attention_mask'],
        'labels': tokenized_test_data['labels']
    })

    # evaluating using test data
    test_dataset_new = Dataset.from_dict({
        'input_ids': test_dataset['input_ids'],
        'attention_mask': test_dataset['attention_mask'],
        'labels': test_dataset['labels']
    })

    predictions, labels, _ = trainer.predict(test_dataset_new)
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    print(f"Results for {test_path}:")
    print(results)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


  0%|          | 0/65 [00:00<?, ?it/s]

Results for data\sr_test.iob2:
{'LOC': {'precision': 0.7184713375796178, 'recall': 0.8257686676427526, 'f1': 0.7683923705722071, 'number': 683}, 'ORG': {'precision': 0.7175792507204611, 'recall': 0.43079584775086505, 'f1': 0.5383783783783784, 'number': 578}, 'PER': {'precision': 0.773308957952468, 'recall': 0.831041257367387, 'f1': 0.8011363636363636, 'number': 509}, 'overall_precision': 0.7361524717093508, 'overall_recall': 0.6983050847457627, 'overall_f1': 0.7167294868077703, 'overall_accuracy': 0.9548351757793049}


  0%|          | 0/142 [00:00<?, ?it/s]

Results for data\hr_test.iob2:
{'LOC': {'precision': 0.6758064516129032, 'recall': 0.7515695067264574, 'f1': 0.7116772823779194, 'number': 1115}, 'ORG': {'precision': 0.6655791190864601, 'recall': 0.3930635838150289, 'f1': 0.49424591156874625, 'number': 1038}, 'PER': {'precision': 0.7264150943396226, 'recall': 0.8269689737470167, 'f1': 0.7734375, 'number': 838}, 'overall_precision': 0.6907730673316709, 'overall_recall': 0.6482781678368439, 'overall_f1': 0.6688513280441531, 'overall_accuracy': 0.9569513383123811}
