In [None]:
!pip install datasets evaluate transformers[sentencepiece] seqeval
!pip install accelerate

Collecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os

import evaluate
import numpy as np
from datasets import Dataset, DatasetDict
from transformers import BertTokenizerFast, AutoTokenizer, DataCollatorForTokenClassification, \
    AutoModelForTokenClassification, TrainingArguments, Trainer

In [None]:
TOKENISER_CHECKPOINT: str = "dbmdz/bert-base-historic-multilingual-cased"

In [None]:
def print_aligned(
        list1: list,
        list2: list
):
    line1 = ""
    line2 = ""
    for item1, item2 in zip(list1, list2):
        max_length = max(len(item1), len(item2))
        line1 += item1 + " " * (max_length - len(item1) + 1)
        line2 += item2 + " " * (max_length - len(item2) + 1)
    print(line1)
    print(line2)

In [None]:
try:
    from google.colab import drive

    print(
        "You work on Colab. Gentle as we are, we will mount Drive for you. "
        "It'd help if you allowed this in the popup that opens."
    )
    drive.mount('/content/drive')
    DATA_DIR = os.path.join('drive', 'MyDrive', 'KEDiff', 'data')
except:
    print("You do not work on Colab")
    DATA_DIR = os.path.join('data')
print(f"{DATA_DIR=}", '-->', os.path.abspath(DATA_DIR))

You work on Colab. Gentle as we are, we will mount Drive for you. It'd help if you allowed this in the popup that opens.
Mounted at /content/drive
DATA_DIR='drive/MyDrive/KEDiff/data' --> /content/drive/MyDrive/KEDiff/data


In [None]:
BILOUs_hug = Dataset.load_from_disk(dataset_path=os.path.join(DATA_DIR, 'BILOUs_hf'))
print("Dataset:", BILOUs_hug, sep='\n')
print("Features:", BILOUs_hug.features, sep='\n')

Dataset:
Dataset({
    features: ['Text', 'EVENT-BILOUs', 'EVENT-IOBs', 'LOC-BILOUs', 'LOC-IOBs', 'MISC-BILOUs', 'MISC-IOBs', 'ORG-BILOUs', 'ORG-IOBs', 'PER-BILOUs', 'PER-IOBs', 'TIME-BILOUs', 'TIME-IOBs'],
    num_rows: 13928
})
Features:
{'Text': Value(dtype='string', id=None), 'EVENT-BILOUs': Sequence(feature=ClassLabel(names=['O', 'B-EVENT', 'I-EVENT', 'L-EVENT', 'U-EVENT', 'B-LOC', 'I-LOC', 'L-LOC', 'U-LOC', 'B-MISC', 'I-MISC', 'L-MISC', 'U-MISC', 'B-ORG', 'I-ORG', 'L-ORG', 'U-ORG', 'B-PER', 'I-PER', 'L-PER', 'U-PER', 'B-TIME', 'I-TIME', 'L-TIME', 'U-TIME'], id=None), length=-1, id=None), 'EVENT-IOBs': Sequence(feature=ClassLabel(names=['O', 'B-EVENT', 'I-EVENT', 'L-EVENT', 'U-EVENT', 'B-LOC', 'I-LOC', 'L-LOC', 'U-LOC', 'B-MISC', 'I-MISC', 'L-MISC', 'U-MISC', 'B-ORG', 'I-ORG', 'L-ORG', 'U-ORG', 'B-PER', 'I-PER', 'L-PER', 'U-PER', 'B-TIME', 'I-TIME', 'L-TIME', 'U-TIME'], id=None), length=-1, id=None), 'LOC-BILOUs': Sequence(feature=ClassLabel(names=['O', 'B-EVENT', 'I-EVENT', 'L-EV

In [None]:
train_testvalid = BILOUs_hug.train_test_split(test_size=0.2, seed=42)
test_valid = train_testvalid['test'].train_test_split(test_size=0.5, seed=42)

# gather everyone if you want to have a single DatasetDict
BILOUs_hug = DatasetDict({
    'train': train_testvalid['train'],
    'test': test_valid['test'],
    'validation': test_valid['train']}
)
del train_testvalid, test_valid
print(BILOUs_hug)

DatasetDict({
    train: Dataset({
        features: ['Text', 'EVENT-BILOUs', 'EVENT-IOBs', 'LOC-BILOUs', 'LOC-IOBs', 'MISC-BILOUs', 'MISC-IOBs', 'ORG-BILOUs', 'ORG-IOBs', 'PER-BILOUs', 'PER-IOBs', 'TIME-BILOUs', 'TIME-IOBs'],
        num_rows: 11142
    })
    test: Dataset({
        features: ['Text', 'EVENT-BILOUs', 'EVENT-IOBs', 'LOC-BILOUs', 'LOC-IOBs', 'MISC-BILOUs', 'MISC-IOBs', 'ORG-BILOUs', 'ORG-IOBs', 'PER-BILOUs', 'PER-IOBs', 'TIME-BILOUs', 'TIME-IOBs'],
        num_rows: 1393
    })
    validation: Dataset({
        features: ['Text', 'EVENT-BILOUs', 'EVENT-IOBs', 'LOC-BILOUs', 'LOC-IOBs', 'MISC-BILOUs', 'MISC-IOBs', 'ORG-BILOUs', 'ORG-IOBs', 'PER-BILOUs', 'PER-IOBs', 'TIME-BILOUs', 'TIME-IOBs'],
        num_rows: 1393
    })
})


In [None]:
tokeniser: BertTokenizerFast = AutoTokenizer.from_pretrained(TOKENISER_CHECKPOINT)
print(f"Is '{TOKENISER_CHECKPOINT}' a fast tokeniser?", tokeniser.is_fast)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/83.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/561 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/212k [00:00<?, ?B/s]

Is 'dbmdz/bert-base-historic-multilingual-cased' a fast tokeniser? True


In [None]:
def batch_embed_labels(batch):
    # embed in [CLS] and [SEP] tokens as required by BERT models
    for column in ['EVENT-BILOUs', 'LOC-BILOUs', 'MISC-BILOUs', 'ORG-BILOUs', 'PER-BILOUs', 'TIME-BILOUs',
                   'EVENT-IOBs',   'LOC-IOBs',   'MISC-IOBs',   'ORG-IOBs',   'PER-IOBs',   'TIME-IOBs']:
        all_labels = batch[column]
        new_labels = [[-100, *labels[1:-1], -100] for labels in all_labels]
        batch[column] = new_labels
    return batch
BILOUs_hug = BILOUs_hug.map(batch_embed_labels, batched=True)

In [None]:
def batch_tokenise(batch):
    tokenised_inputs = tokeniser(batch['Text'], truncation=True)
    return tokenised_inputs
BILOUs_hug_tokenised = BILOUs_hug.map(batch_tokenise, batched=True)
print(BILOUs_hug_tokenised)

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['Text', 'EVENT-BILOUs', 'EVENT-IOBs', 'LOC-BILOUs', 'LOC-IOBs', 'MISC-BILOUs', 'MISC-IOBs', 'ORG-BILOUs', 'ORG-IOBs', 'PER-BILOUs', 'PER-IOBs', 'TIME-BILOUs', 'TIME-IOBs', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 11142
    })
    test: Dataset({
        features: ['Text', 'EVENT-BILOUs', 'EVENT-IOBs', 'LOC-BILOUs', 'LOC-IOBs', 'MISC-BILOUs', 'MISC-IOBs', 'ORG-BILOUs', 'ORG-IOBs', 'PER-BILOUs', 'PER-IOBs', 'TIME-BILOUs', 'TIME-IOBs', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1393
    })
    validation: Dataset({
        features: ['Text', 'EVENT-BILOUs', 'EVENT-IOBs', 'LOC-BILOUs', 'LOC-IOBs', 'MISC-BILOUs', 'MISC-IOBs', 'ORG-BILOUs', 'ORG-IOBs', 'PER-BILOUs', 'PER-IOBs', 'TIME-BILOUs', 'TIME-IOBs', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1393
    })
})


In [None]:
def batch_set_labels_column(batch, column_name):
    batch["labels"] = batch[column_name]
    return batch

In [None]:
BILOUs_hug["train"].column_names

['Text',
 'EVENT-BILOUs',
 'EVENT-IOBs',
 'LOC-BILOUs',
 'LOC-IOBs',
 'MISC-BILOUs',
 'MISC-IOBs',
 'ORG-BILOUs',
 'ORG-IOBs',
 'PER-BILOUs',
 'PER-IOBs',
 'TIME-BILOUs',
 'TIME-IOBs']

In [None]:
# adapt this for each label type
BILOUs_hug_tokenised_PER = BILOUs_hug_tokenised.map(
    batch_set_labels_column,
    fn_kwargs={'column_name': 'PER-IOBs'},
    batched=True,
    remove_columns=BILOUs_hug["train"].column_names)

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

In [None]:
sample = BILOUs_hug_tokenised_PER["train"][1]
print(sample)
del sample

{'input_ids': [2, 14331, 16, 7098, 3616, 9042, 10976, 405, 11928, 1080, 18, 7241, 430, 18, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': [-100, 0, 0, 0, 17, 18, 18, 18, 18, 18, 18, 0, 0, 0, -100]}


In [None]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokeniser, padding=True)
batch = data_collator([BILOUs_hug_tokenised_PER["train"][i] for i in range(2)])
print(batch)
print(batch['labels'])

for i in range(2):
    print(BILOUs_hug_tokenised_PER["train"][i]["labels"])
del i


You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'input_ids': tensor([[    2,   964, 26753,   377,  1172,  3288, 10883,    18,    13,     3,
             0,     0,     0,     0,     0],
        [    2, 14331,    16,  7098,  3616,  9042, 10976,   405, 11928,  1080,
            18,  7241,   430,    18,     3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), 'labels': tensor([[-100,    0,    0,    0,    0,    0,    0,    0,    0, -100, -100, -100,
         -100, -100, -100],
        [-100,    0,    0,    0,   17,   18,   18,   18,   18,   18,   18,    0,
            0,    0, -100]])}
tensor([[-100,    0,    0,    0,    0,    0,    0,    0,    0, -100, -100, -100,
         -100, -100, -100],
        [-100,    0,    0,    0,   17,   18,   18,   18,   18,   18,   18,    0,
            0,    0, -100]])
[-100, 0, 0, 0, 0, 0, 0, 0,

In [None]:
label_names = BILOUs_hug["train"].features["PER-IOBs"].feature.names

batch = {'references': [], 'predictions': []}
for i in [0, 1]:
    labels = BILOUs_hug["train"][i]["PER-IOBs"]
    labels = [label_names[i] for i in labels[1:-1]]
    # fake predictions
    predictions = labels.copy()
    predictions[2] = "B-PER"
    predictions[3] = "I-PER"

    print_aligned(labels, predictions)

    batch['references'] += [labels]
    batch['predictions'] += [predictions]
del i, labels, predictions

# calculate metrics
for metric_name in ["seqeval", "poseval"]:
    print(f"Now evaluating using {metric_name=}")
    metric = evaluate.load(metric_name)
    metric_result = metric.compute(predictions=batch['predictions'], references=batch['references'])
    print(metric_result)
# del batch, metric, metric_name, metric_result


O O O     O     O O O O 
O O B-PER I-PER O O O O 
O O O     B-PER I-PER I-PER I-PER I-PER I-PER I-PER O O O 
O O B-PER I-PER I-PER I-PER I-PER I-PER I-PER I-PER O O O 
Now evaluating using metric_name='seqeval'


Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

{'PER': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1}, 'overall_precision': 0.0, 'overall_recall': 0.0, 'overall_f1': 0.0, 'overall_accuracy': 0.8095238095238095}
Now evaluating using metric_name='poseval'


Downloading builder script:   0%|          | 0.00/4.46k [00:00<?, ?B/s]

{'B-PER': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}, 'I-PER': {'precision': 0.75, 'recall': 1.0, 'f1-score': 0.8571428571428571, 'support': 6}, 'O': {'precision': 1.0, 'recall': 0.7857142857142857, 'f1-score': 0.88, 'support': 14}, 'accuracy': 0.8095238095238095, 'macro avg': {'precision': 0.5833333333333334, 'recall': 0.5952380952380952, 'f1-score': 0.579047619047619, 'support': 21}, 'weighted avg': {'precision': 0.8809523809523809, 'recall': 0.8095238095238095, 'f1-score': 0.8315646258503401, 'support': 21}}


In [None]:
metric = evaluate.load('poseval')


In [None]:
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    metric_result = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "macro precision": metric_result["macro avg"]["precision"],
        "macro recall": metric_result["macro avg"]["recall"],
        "macro f1": metric_result["macro avg"]["f1-score"],
        "macro support": metric_result["macro avg"]["support"],

        "weighted precision": metric_result["weighted avg"]["precision"],
        "weighted recall": metric_result["weighted avg"]["recall"],
        "weighted f1": metric_result["weighted avg"]["f1-score"],
        "weighted support": metric_result["weighted avg"]["support"],

        "accuracy": metric_result["accuracy"],
    }



In [None]:
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}



In [None]:
for label_type in ['EVENT', 'LOC', 'MISC', 'ORG', 'PER', 'TIME']:
    trained_model_name = f"oalz-1788-q1-ner-{label_type}"

    print(f"Now training '{trained_model_name}'")

    print("Compose the relevant tokenised and annotated dataset")
    label_ds = BILOUs_hug_tokenised.map(
        batch_set_labels_column,
        fn_kwargs={'column_name': f'{label_type}-IOBs'},
        batched=True,
        remove_columns=BILOUs_hug["train"].column_names)

    model = AutoModelForTokenClassification.from_pretrained(
        TOKENISER_CHECKPOINT,
        id2label=id2label,
        label2id=label2id,
    )
    model.config.num_labels

    args = TrainingArguments(
        output_dir = os.path.join(DATA_DIR, trained_model_name),
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        num_train_epochs=5,
        weight_decay=0.01
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=label_ds["train"],
        eval_dataset=label_ds["validation"],
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        tokenizer=tokeniser,
    )
    trainer.train()
    trainer.save_model(os.path.join(DATA_DIR, trained_model_name))


Now training 'oalz-1788-q1-ner-EVENT'
Compose the relevant tokenised and annotated dataset


Map:   0%|          | 0/11142 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dbmdz/bert-base-historic-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro precision,Macro recall,Macro f1,Macro support,Weighted precision,Weighted recall,Weighted f1,Weighted support,Accuracy
1,0.0258,0.021957,0.665233,0.343066,0.351528,37440,0.99497,0.9957,0.993655,37440,0.9957
2,0.0183,0.026581,0.544056,0.350338,0.364151,37440,0.993745,0.9957,0.993807,37440,0.9957
3,0.0107,0.025134,0.83785,0.473104,0.551782,37440,0.995996,0.996474,0.995455,37440,0.996474
4,0.0041,0.027222,0.720497,0.563774,0.618355,37440,0.995397,0.996181,0.995554,37440,0.996181
5,0.0014,0.029559,0.683685,0.551771,0.601081,37440,0.995009,0.99586,0.995285,37440,0.99586


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Now training 'oalz-1788-q1-ner-LOC'
Compose the relevant tokenised and annotated dataset


Map:   0%|          | 0/11142 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dbmdz/bert-base-historic-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro precision,Macro recall,Macro f1,Macro support,Weighted precision,Weighted recall,Weighted f1,Weighted support,Accuracy
1,0.0444,0.033602,0.829535,0.803648,0.814846,37440,0.990721,0.990999,0.990804,37440,0.990999
2,0.0229,0.0337,0.828745,0.835554,0.831673,37440,0.991492,0.99148,0.991474,37440,0.99148
3,0.0155,0.038988,0.862314,0.803043,0.830234,37440,0.991649,0.992067,0.991778,37440,0.992067
4,0.0067,0.045115,0.797205,0.828778,0.812301,37440,0.990587,0.990278,0.990417,37440,0.990278
5,0.0041,0.051421,0.82282,0.809988,0.816253,37440,0.990883,0.991026,0.990949,37440,0.991026


Now training 'oalz-1788-q1-ner-MISC'
Compose the relevant tokenised and annotated dataset


Map:   0%|          | 0/11142 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dbmdz/bert-base-historic-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro precision,Macro recall,Macro f1,Macro support,Weighted precision,Weighted recall,Weighted f1,Weighted support,Accuracy
1,0.1453,0.126152,0.98201,0.353961,0.36733,37440,0.966652,0.966052,0.950695,37440,0.966052
2,0.1098,0.123994,0.739221,0.503677,0.571298,37440,0.961573,0.968697,0.962329,37440,0.968697
3,0.0701,0.146716,0.690761,0.534221,0.58938,37440,0.962141,0.968216,0.96394,37440,0.968216
4,0.0405,0.160724,0.624809,0.585596,0.602709,37440,0.960501,0.961752,0.961075,37440,0.961752
5,0.0282,0.173072,0.675179,0.557776,0.603593,37440,0.962448,0.967735,0.964279,37440,0.967735


Now training 'oalz-1788-q1-ner-ORG'
Compose the relevant tokenised and annotated dataset


Map:   0%|          | 0/11142 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dbmdz/bert-base-historic-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro precision,Macro recall,Macro f1,Macro support,Weighted precision,Weighted recall,Weighted f1,Weighted support,Accuracy
1,0.0735,0.062769,0.744259,0.709738,0.726212,37440,0.9795,0.980288,0.979864,37440,0.980288
2,0.0439,0.082751,0.786944,0.641933,0.698793,37440,0.97867,0.981143,0.979108,37440,0.981143
3,0.0296,0.072782,0.777484,0.714781,0.74264,37440,0.981107,0.982399,0.981553,37440,0.982399
4,0.015,0.101954,0.811116,0.669784,0.726152,37440,0.981054,0.982933,0.981244,37440,0.982933
5,0.009,0.099921,0.775483,0.69935,0.733129,37440,0.980562,0.982078,0.981092,37440,0.982078


Now training 'oalz-1788-q1-ner-PER'
Compose the relevant tokenised and annotated dataset


Some weights of BertForTokenClassification were not initialized from the model checkpoint at dbmdz/bert-base-historic-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro precision,Macro recall,Macro f1,Macro support,Weighted precision,Weighted recall,Weighted f1,Weighted support,Accuracy
1,0.1023,0.062091,0.892505,0.825322,0.856436,37440,0.979684,0.980502,0.979927,37440,0.980502
2,0.0575,0.059186,0.914037,0.849048,0.87907,37440,0.982711,0.983253,0.982831,37440,0.983253
3,0.0389,0.065493,0.874987,0.876179,0.875517,37440,0.982287,0.982212,0.982244,37440,0.982212
4,0.0226,0.077691,0.874867,0.879443,0.877076,37440,0.98246,0.982318,0.982382,37440,0.982318
5,0.0153,0.086981,0.881904,0.877156,0.879198,37440,0.982794,0.982719,0.982734,37440,0.982719


Now training 'oalz-1788-q1-ner-TIME'
Compose the relevant tokenised and annotated dataset


Map:   0%|          | 0/11142 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Map:   0%|          | 0/1393 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dbmdz/bert-base-historic-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro precision,Macro recall,Macro f1,Macro support,Weighted precision,Weighted recall,Weighted f1,Weighted support,Accuracy
1,0.0273,0.01612,0.866866,0.724958,0.783099,37440,0.994114,0.994631,0.994098,37440,0.994631
2,0.0181,0.016485,0.874139,0.771485,0.817265,37440,0.995036,0.995379,0.995128,37440,0.995379
3,0.0105,0.020711,0.817115,0.814231,0.815101,37440,0.994991,0.994925,0.99495,37440,0.994925
4,0.0052,0.023391,0.853102,0.807937,0.829458,37440,0.995291,0.995486,0.995369,37440,0.995486
5,0.0022,0.023426,0.838887,0.814104,0.825925,37440,0.995299,0.995379,0.995334,37440,0.995379


In [None]:
pass
