In [1]:

from collections import Counter

import numpy as np


import torch

from datasets import (
    
    load_from_disk,
)
from transformers import (
    DataCollatorForTokenClassification,
    Trainer,
    TrainingArguments,
)


import pickle

from sklearn.metrics import  classification_report
from transformers import BertTokenizerFast, BertForTokenClassification

from evaluate import load as load_metric

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "bert-base-uncased"

In [3]:
conll_main = load_from_disk("./splits/conll_main")

ontonotes_main = load_from_disk("./splits/ontonotes_main")

ontonotes_nw_encoded = load_from_disk("./splits/ontonotes_newswire")
ontonotes_rest_encoded = load_from_disk("./splits/ontonotes_rest")

# Load GPU

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


# Tokenisation & Alignment

In [5]:
ontonotes_id_to_label = {
    0: "O", 1: "B-CARDINAL", 2: "B-DATE", 3: "I-DATE", 4: "B-PERSON", 5: "I-PERSON",
    6: "B-NORP", 7: "B-GPE", 8: "I-GPE", 9: "B-LAW", 10: "I-LAW", 11: "B-ORG", 12: "I-ORG",
    13: "B-PERCENT", 14: "I-PERCENT", 15: "B-ORDINAL", 16: "B-MONEY", 17: "I-MONEY",
    18: "B-WORK_OF_ART", 19: "I-WORK_OF_ART", 20: "B-FAC", 21: "B-TIME", 22: "I-CARDINAL",
    23: "B-LOC", 24: "B-QUANTITY", 25: "I-QUANTITY", 26: "I-NORP", 27: "I-LOC",
    28: "B-PRODUCT", 29: "I-TIME", 30: "B-EVENT", 31: "I-EVENT", 32: "I-FAC",
    33: "B-LANGUAGE", 34: "I-PRODUCT", 35: "I-ORDINAL", 36: "I-LANGUAGE"
}

conll_label_to_id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3,
                     'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}
id2label = {v: k for k, v in conll_label_to_id.items()}

ontonotes_to_conll_entity = {
    "PERSON": "PER", "ORG": "ORG", "GPE": "LOC", "LOC": "LOC",
    "NORP": "MISC", "FAC": "MISC", "EVENT": "MISC", "WORK_OF_ART": "MISC",
    "LAW": "MISC", "PRODUCT": "MISC", "LANGUAGE": "MISC",
    "DATE": None, "TIME": None, "PERCENT": None, "MONEY": None,
    "QUANTITY": None, "ORDINAL": None, "CARDINAL": None
}

In [6]:
def process_data(data_list):

    def process_single(data):
        word_ids = data['word_ids']
        predictions = data['predictions']
        gold = data['gold']
        tokenized_tokens = data['tokens']

        word_ids = [a for a in word_ids if a is not None]

        processed_predictions = []
        processed_gold = []

        current_word_id = None
        current_predictions = []
        current_gold = []

        for idx, word_id in enumerate(word_ids):
            if word_id != current_word_id:
                if current_predictions:
                    processed_predictions.append(
                        Counter(current_predictions).most_common(1)[0][0])
                    processed_gold.append(
                        Counter(current_gold).most_common(1)[0][0])

                current_word_id = word_id
                current_predictions = [predictions[idx]]
                current_gold = [gold[idx]]
            else:
                current_predictions.append(predictions[idx])
                current_gold.append(gold[idx])

        if current_predictions:
            processed_predictions.append(
                Counter(current_predictions).most_common(1)[0][0])
            processed_gold.append(
                Counter(current_gold).most_common(1)[0][0])

        return processed_predictions, processed_gold

    processed_predictions_list = []
    processed_gold_list = []

    for data in data_list:
        processed_predictions, processed_gold = process_single(data)
        processed_predictions_list.append(processed_predictions)
        processed_gold_list.append(processed_gold)

    return processed_predictions_list, processed_gold_list


def evaluate_predictions(p, test_data):
    predictions, labels, _ = p

    pred_indices = [np.argmax(p, axis=-1) for p in predictions]
    label_indices = labels

    pred_tags = [[id2label[p] for p, l in zip(p_seq, l_seq) if l != -100]
                 for p_seq, l_seq in zip(pred_indices, label_indices)]
    gold_tags = [[id2label[l] for l in l_seq if l != -100]
                 for l_seq in label_indices]

    def add_preds(example, idx):
        length = len(example['word_ids'])
        example['predictions'] = pred_tags[idx][:length]
        example['gold'] = gold_tags[idx][:length]
        return example

    test_data = test_data.map(add_preds, with_indices=True)

    length = len(test_data['predictions'][0])

    pred, gold = process_data(test_data)

    flat_pred = [label for seq in pred for label in seq]
    flat_gold = [label for seq in gold for label in seq]

    print(classification_report(flat_gold, flat_pred, zero_division=0))

    return (flat_pred, flat_gold)

# Model

In [7]:
tokenizer = BertTokenizerFast.from_pretrained(model_name)

label_list = ['O', 'B-PER', 'I-PER', 'B-ORG',
              'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']


def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        is_split_into_words=True,
        padding=True,
        return_special_tokens_mask=True,
        return_offsets_mapping=True,
    )
    all_word_ids = []
    all_labels = []
    for i, labels in enumerate(examples["labels"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        all_word_ids.append(word_ids)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            else:
                label_ids.append(labels[word_idx])
            previous_word_idx = word_idx
        all_labels.append(label_ids)

    tokenized_inputs["labels"] = all_labels
    tokenized_inputs["word_ids"] = all_word_ids
    return tokenized_inputs

In [8]:
conll_main = conll_main.map(tokenize_and_align_labels, batched=True)
ontonotes_main = ontonotes_main.map(tokenize_and_align_labels, batched=True)
onto_news = ontonotes_nw_encoded.map(tokenize_and_align_labels, batched=True)
onto_rest = ontonotes_rest_encoded.map(tokenize_and_align_labels, batched=True)

# Deciding params

In [9]:
train_data = conll_main
test_data = onto_news

In [10]:
train_data_name = 'news'

In [11]:
mod = BertForTokenClassification.from_pretrained(
    model_name, num_labels=len(label_list))

metric = load_metric("seqeval")


def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    return metric.compute(predictions=true_predictions, references=true_labels)


data_collator = DataCollatorForTokenClassification(tokenizer)

training_args = TrainingArguments(
    output_dir=f"./output/{model_name}",
    eval_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to="none",
    fp16=True,
    save_strategy="no",
)

trainer = Trainer(
    model=mod,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=test_data,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  trainer = Trainer(


In [12]:
trainer.train()

Step,Training Loss
500,0.2236
1000,0.0802
1500,0.0504
2000,0.0453
2500,0.0299
3000,0.0275


TrainOutput(global_step=3114, training_loss=0.07433305386939566, metrics={'train_runtime': 394.5068, 'train_samples_per_second': 126.196, 'train_steps_per_second': 7.893, 'total_flos': 3729913475126796.0, 'train_loss': 0.07433305386939566, 'epoch': 3.0})

In [13]:
trainer.save_model(f"./saved_model/{model_name}_{train_data_name}")

In [14]:
predictions = trainer.predict(test_data)

In [15]:
with open(f'./results/{model_name}_{train_data_name}_results.pkl', 'wb') as f:
    pickle.dump(predictions, f)

In [16]:
x, y = evaluate_predictions(predictions, test_data)

Map:   0%|          | 0/42753 [00:00<?, ? examples/s]

Map:   2%|▏         | 667/42753 [00:00<00:06, 6582.85 examples/s]

Map:   3%|▎         | 1348/42753 [00:00<00:08, 5146.49 examples/s]

Map:   5%|▍         | 2000/42753 [00:00<00:08, 4738.80 examples/s]

Map:   6%|▋         | 2709/42753 [00:00<00:07, 5483.48 examples/s]

Map:   8%|▊         | 3401/42753 [00:00<00:07, 5247.08 examples/s]

Map:   9%|▉         | 4000/42753 [00:00<00:07, 5157.06 examples/s]

Map:  11%|█         | 4800/42753 [00:00<00:06, 5940.13 examples/s]

Map:  13%|█▎        | 5418/42753 [00:00<00:06, 5642.14 examples/s]

Map:  14%|█▍        | 6000/42753 [00:01<00:06, 5404.13 examples/s]

Map:  16%|█▌        | 6857/42753 [00:01<00:05, 6262.91 examples/s]

Map:  18%|█▊        | 7755/42753 [00:01<00:05, 5955.60 examples/s]

Map:  20%|█▉        | 8452/42753 [00:01<00:06, 5704.01 examples/s]

Map:  22%|██▏       | 9433/42753 [00:01<00:05, 5807.14 examples/s]

Map:  24%|██▍       | 10335/42753 [00:01<00:05, 5716.07 examples/s]

Map:  26%|██▌       | 11000/42753 [00:01<00:05, 5311.63 examples/s]

Map:  27%|██▋       | 11754/42753 [00:02<00:05, 5810.67 examples/s]

Map:  29%|██▉       | 12395/42753 [00:02<00:05, 5511.47 examples/s]

Map:  30%|███       | 13000/42753 [00:02<00:05, 5225.55 examples/s]

Map:  32%|███▏      | 13830/42753 [00:02<00:04, 5971.92 examples/s]

Map:  34%|███▍      | 14649/42753 [00:02<00:05, 5554.31 examples/s]

Map:  36%|███▌      | 15429/42753 [00:02<00:05, 5339.45 examples/s]

Map:  37%|███▋      | 16000/42753 [00:02<00:05, 5248.64 examples/s]

Map:  39%|███▉      | 16782/42753 [00:02<00:04, 5867.75 examples/s]

Map:  41%|████      | 17412/42753 [00:03<00:04, 5598.29 examples/s]

Map:  42%|████▏     | 18000/42753 [00:03<00:04, 5343.83 examples/s]

Map:  44%|████▍     | 18785/42753 [00:03<00:04, 5984.38 examples/s]

Map:  45%|████▌     | 19428/42753 [00:03<00:04, 5678.23 examples/s]

Map:  48%|████▊     | 20418/42753 [00:03<00:03, 5754.90 examples/s]

Map:  50%|█████     | 21415/42753 [00:03<00:03, 5810.77 examples/s]

Map:  52%|█████▏    | 22374/42753 [00:03<00:03, 5786.71 examples/s]

Map:  54%|█████▍    | 23000/42753 [00:04<00:03, 5532.63 examples/s]

Map:  56%|█████▌    | 23823/42753 [00:04<00:03, 6146.60 examples/s]

Map:  58%|█████▊    | 24844/42753 [00:04<00:02, 6082.12 examples/s]

Map:  60%|██████    | 25831/42753 [00:04<00:02, 5989.55 examples/s]

Map:  63%|██████▎   | 26807/42753 [00:04<00:02, 5911.55 examples/s]

Map:  65%|██████▍   | 27775/42753 [00:04<00:02, 5813.79 examples/s]

Map:  67%|██████▋   | 28731/42753 [00:05<00:02, 5646.11 examples/s]

Map:  69%|██████▉   | 29430/42753 [00:05<00:02, 5493.90 examples/s]

Map:  70%|███████   | 30000/42753 [00:05<00:02, 5339.24 examples/s]

Map:  71%|███████▏  | 30558/42753 [00:05<00:03, 3802.40 examples/s]

Map:  73%|███████▎  | 31000/42753 [00:05<00:03, 3897.84 examples/s]

Map:  74%|███████▎  | 31465/42753 [00:05<00:02, 4053.50 examples/s]

Map:  75%|███████▍  | 31934/42753 [00:05<00:02, 4196.90 examples/s]

Map:  76%|███████▌  | 32543/42753 [00:06<00:02, 3877.09 examples/s]

Map:  77%|███████▋  | 33000/42753 [00:06<00:02, 3653.74 examples/s]

Map:  78%|███████▊  | 33538/42753 [00:06<00:02, 4044.98 examples/s]

Map:  80%|███████▉  | 34000/42753 [00:06<00:02, 3745.35 examples/s]

Map:  81%|████████  | 34672/42753 [00:06<00:01, 4451.67 examples/s]

Map:  83%|████████▎ | 35357/42753 [00:06<00:01, 4459.71 examples/s]

Map:  84%|████████▍ | 36000/42753 [00:06<00:01, 4486.88 examples/s]

Map:  86%|████████▌ | 36591/42753 [00:07<00:01, 4307.68 examples/s]

Map:  87%|████████▋ | 37392/42753 [00:07<00:01, 3975.98 examples/s]

Map:  89%|████████▉ | 38000/42753 [00:07<00:01, 4185.28 examples/s]

Map:  90%|█████████ | 38662/42753 [00:07<00:00, 4711.61 examples/s]

Map:  92%|█████████▏| 39327/42753 [00:07<00:00, 4558.77 examples/s]

Map:  94%|█████████▎| 39989/42753 [00:07<00:00, 5029.84 examples/s]

Map:  95%|█████████▍| 40581/42753 [00:07<00:00, 4594.34 examples/s]

Map:  97%|█████████▋| 41437/42753 [00:08<00:00, 4643.99 examples/s]

Map:  98%|█████████▊| 42000/42753 [00:08<00:00, 4724.30 examples/s]

Map: 100%|█████████▉| 42628/42753 [00:08<00:00, 5086.35 examples/s]

Map: 100%|██████████| 42753/42753 [00:09<00:00, 4423.21 examples/s]




              precision    recall  f1-score   support

       B-LOC       0.82      0.90      0.86     16435
      B-MISC       0.50      0.68      0.58      7455
       B-ORG       0.74      0.73      0.73     22446
       B-PER       0.89      0.88      0.88     12641
       I-LOC       0.68      0.60      0.64      5154
      I-MISC       0.52      0.27      0.36      5579
       I-ORG       0.89      0.72      0.80     30834
       I-PER       0.84      0.92      0.88      7761
           O       0.99      0.99      0.99    965876

    accuracy                           0.97   1074181
   macro avg       0.76      0.74      0.75   1074181
weighted avg       0.97      0.97      0.97   1074181



In [17]:
predictions = trainer.predict(onto_rest)
x, y = evaluate_predictions(predictions, onto_rest)

Map:   0%|          | 0/100516 [00:00<?, ? examples/s]

Map:   1%|          | 760/100516 [00:00<00:13, 7493.37 examples/s]

Map:   2%|▏         | 1967/100516 [00:00<00:14, 6894.92 examples/s]

Map:   3%|▎         | 2911/100516 [00:00<00:14, 6614.09 examples/s]

Map:   4%|▍         | 3826/100516 [00:00<00:15, 6312.23 examples/s]

Map:   5%|▍         | 4803/100516 [00:00<00:15, 6109.74 examples/s]

Map:   6%|▌         | 5798/100516 [00:00<00:15, 5977.73 examples/s]

Map:   6%|▋         | 6406/100516 [00:01<00:26, 3601.10 examples/s]

Map:   7%|▋         | 7000/100516 [00:01<00:24, 3867.53 examples/s]

Map:   8%|▊         | 7840/100516 [00:01<00:19, 4707.88 examples/s]

Map:   8%|▊         | 8441/100516 [00:01<00:19, 4822.60 examples/s]

Map:   9%|▉         | 9408/100516 [00:01<00:17, 5218.48 examples/s]

Map:  10%|▉         | 10000/100516 [00:01<00:17, 5162.27 examples/s]

Map:  11%|█         | 10851/100516 [00:02<00:15, 5939.40 examples/s]

Map:  12%|█▏        | 11892/100516 [00:02<00:14, 5982.45 examples/s]

Map:  13%|█▎        | 12811/100516 [00:02<00:14, 5877.02 examples/s]

Map:  13%|█▎        | 13450/100516 [00:02<00:15, 5710.45 examples/s]

Map:  14%|█▍        | 14431/100516 [00:02<00:14, 5868.41 examples/s]

Map:  15%|█▌        | 15455/100516 [00:02<00:14, 5999.26 examples/s]

Map:  16%|█▋        | 16437/100516 [00:02<00:13, 6100.28 examples/s]

Map:  17%|█▋        | 17386/100516 [00:03<00:13, 6057.13 examples/s]

Map:  18%|█▊        | 18000/100516 [00:03<00:14, 5715.15 examples/s]

Map:  19%|█▊        | 18772/100516 [00:03<00:13, 6167.48 examples/s]

Map:  19%|█▉        | 19431/100516 [00:03<00:13, 5809.23 examples/s]

Map:  20%|██        | 20335/100516 [00:03<00:14, 5669.07 examples/s]

Map:  21%|██        | 21000/100516 [00:03<00:15, 5233.67 examples/s]

Map:  22%|██▏       | 21770/100516 [00:03<00:13, 5790.55 examples/s]

Map:  22%|██▏       | 22396/100516 [00:04<00:14, 5446.62 examples/s]

Map:  23%|██▎       | 23000/100516 [00:04<00:14, 5238.10 examples/s]

Map:  24%|██▎       | 23692/100516 [00:04<00:13, 5648.67 examples/s]

Map:  24%|██▍       | 24447/100516 [00:04<00:14, 5397.77 examples/s]

Map:  25%|██▌       | 25357/100516 [00:04<00:13, 5492.38 examples/s]

Map:  26%|██▌       | 26000/100516 [00:04<00:14, 5166.93 examples/s]

Map:  26%|██▋       | 26593/100516 [00:04<00:13, 5342.69 examples/s]

Map:  27%|██▋       | 27379/100516 [00:05<00:14, 4984.54 examples/s]

Map:  28%|██▊       | 28000/100516 [00:05<00:14, 4876.78 examples/s]

Map:  29%|██▊       | 28731/100516 [00:05<00:13, 5440.77 examples/s]

Map:  29%|██▉       | 29418/100516 [00:05<00:13, 5282.03 examples/s]

Map:  30%|██▉       | 30000/100516 [00:05<00:13, 5230.01 examples/s]

Map:  31%|███       | 31000/100516 [00:05<00:11, 5888.96 examples/s]

Map:  32%|███▏      | 31788/100516 [00:05<00:10, 6380.49 examples/s]

Map:  32%|███▏      | 32475/100516 [00:06<00:24, 2749.02 examples/s]

Map:  33%|███▎      | 33048/100516 [00:06<00:21, 3155.19 examples/s]

Map:  34%|███▍      | 33943/100516 [00:06<00:16, 4099.55 examples/s]

Map:  35%|███▍      | 35000/100516 [00:06<00:14, 4558.60 examples/s]

Map:  36%|███▌      | 36000/100516 [00:06<00:12, 5293.87 examples/s]

Map:  37%|███▋      | 36964/100516 [00:06<00:10, 6174.64 examples/s]

Map:  38%|███▊      | 38000/100516 [00:07<00:10, 5912.11 examples/s]

Map:  39%|███▉      | 39000/100516 [00:07<00:15, 4000.29 examples/s]

Map:  40%|███▉      | 39929/100516 [00:07<00:12, 4797.47 examples/s]

Map:  41%|████      | 40877/100516 [00:07<00:11, 5148.64 examples/s]

Map:  42%|████▏     | 41834/100516 [00:08<00:10, 5361.34 examples/s]

Map:  42%|████▏     | 42521/100516 [00:08<00:10, 5451.55 examples/s]

Map:  43%|████▎     | 43458/100516 [00:08<00:09, 5830.06 examples/s]

Map:  44%|████▍     | 44451/100516 [00:08<00:09, 6012.79 examples/s]

Map:  45%|████▌     | 45514/100516 [00:08<00:08, 6285.37 examples/s]

Map:  46%|████▋     | 46498/100516 [00:08<00:08, 6567.94 examples/s]

Map:  47%|████▋     | 47505/100516 [00:08<00:07, 6770.23 examples/s]

Map:  48%|████▊     | 48323/100516 [00:08<00:07, 6527.26 examples/s]

Map:  49%|████▊     | 49000/100516 [00:09<00:08, 5823.63 examples/s]

Map:  50%|████▉     | 49954/100516 [00:09<00:07, 6662.24 examples/s]

Map:  51%|█████     | 51000/100516 [00:09<00:07, 6199.53 examples/s]

Map:  52%|█████▏    | 51928/100516 [00:09<00:07, 6880.37 examples/s]

Map:  53%|█████▎    | 52965/100516 [00:09<00:06, 6888.07 examples/s]

Map:  54%|█████▎    | 53942/100516 [00:09<00:06, 6768.81 examples/s]

Map:  55%|█████▍    | 55000/100516 [00:10<00:07, 6429.48 examples/s]

Map:  56%|█████▌    | 55870/100516 [00:10<00:06, 6920.74 examples/s]

Map:  57%|█████▋    | 57000/100516 [00:10<00:06, 6545.80 examples/s]

Map:  58%|█████▊    | 57982/100516 [00:10<00:05, 7256.76 examples/s]

Map:  59%|█████▊    | 58965/100516 [00:10<00:05, 7031.00 examples/s]

Map:  60%|█████▉    | 60000/100516 [00:10<00:06, 6607.46 examples/s]

Map:  61%|██████    | 61000/100516 [00:10<00:05, 6947.08 examples/s]

Map:  62%|██████▏   | 61993/100516 [00:10<00:05, 7626.11 examples/s]

Map:  63%|██████▎   | 63000/100516 [00:11<00:05, 6982.35 examples/s]

Map:  64%|██████▎   | 63871/100516 [00:11<00:04, 7378.84 examples/s]

Map:  65%|██████▍   | 64956/100516 [00:11<00:05, 6495.20 examples/s]

Map:  66%|██████▌   | 65867/100516 [00:11<00:06, 5629.38 examples/s]

Map:  66%|██████▌   | 66573/100516 [00:11<00:06, 5033.39 examples/s]

Map:  67%|██████▋   | 67242/100516 [00:12<00:07, 4575.36 examples/s]

Map:  67%|██████▋   | 67730/100516 [00:12<00:07, 4633.43 examples/s]

Map:  68%|██████▊   | 68409/100516 [00:12<00:07, 4400.80 examples/s]

Map:  69%|██████▊   | 69000/100516 [00:12<00:07, 4477.56 examples/s]

Map:  69%|██████▉   | 69730/100516 [00:12<00:06, 5100.96 examples/s]

Map:  70%|███████   | 70510/100516 [00:12<00:06, 4747.13 examples/s]

Map:  71%|███████   | 71317/100516 [00:12<00:06, 4400.00 examples/s]

Map:  72%|███████▏  | 71964/100516 [00:13<00:05, 4815.65 examples/s]

Map:  72%|███████▏  | 72708/100516 [00:13<00:05, 4711.07 examples/s]

Map:  73%|███████▎  | 73437/100516 [00:13<00:08, 3186.50 examples/s]

Map:  74%|███████▎  | 74000/100516 [00:13<00:07, 3481.31 examples/s]

Map:  74%|███████▍  | 74859/100516 [00:13<00:05, 4403.01 examples/s]

Map:  75%|███████▌  | 75431/100516 [00:13<00:05, 4524.17 examples/s]

Map:  76%|███████▌  | 76000/100516 [00:14<00:05, 4628.89 examples/s]

Map:  76%|███████▋  | 76799/100516 [00:14<00:04, 5413.98 examples/s]

Map:  77%|███████▋  | 77800/100516 [00:14<00:04, 5509.02 examples/s]

Map:  78%|███████▊  | 78655/100516 [00:14<00:04, 5237.71 examples/s]

Map:  79%|███████▉  | 79332/100516 [00:14<00:04, 4894.90 examples/s]

Map:  80%|███████▉  | 79989/100516 [00:14<00:03, 5252.35 examples/s]

Map:  80%|████████  | 80603/100516 [00:14<00:04, 4752.70 examples/s]

Map:  81%|████████  | 81429/100516 [00:15<00:04, 4727.02 examples/s]

Map:  82%|████████▏ | 82000/100516 [00:15<00:03, 4855.89 examples/s]

Map:  83%|████████▎ | 82954/100516 [00:15<00:02, 5943.72 examples/s]

Map:  83%|████████▎ | 83899/100516 [00:15<00:02, 6060.34 examples/s]

Map:  84%|████████▍ | 84580/100516 [00:15<00:02, 5424.41 examples/s]

Map:  85%|████████▌ | 85445/100516 [00:15<00:02, 5156.17 examples/s]

Map:  86%|████████▌ | 86000/100516 [00:15<00:02, 5223.06 examples/s]

Map:  86%|████████▋ | 86840/100516 [00:16<00:02, 5966.56 examples/s]

Map:  87%|████████▋ | 87485/100516 [00:16<00:02, 5878.46 examples/s]

Map:  88%|████████▊ | 88327/100516 [00:16<00:02, 5784.52 examples/s]

Map:  89%|████████▊ | 89000/100516 [00:16<00:02, 5125.91 examples/s]

Map:  89%|████████▉ | 89708/100516 [00:16<00:01, 5574.08 examples/s]

Map:  90%|████████▉ | 90380/100516 [00:16<00:01, 5283.89 examples/s]

Map:  91%|█████████ | 91000/100516 [00:16<00:01, 5062.58 examples/s]

Map:  91%|█████████▏| 91865/100516 [00:16<00:01, 5914.01 examples/s]

Map:  92%|█████████▏| 92780/100516 [00:17<00:01, 5822.72 examples/s]

Map:  93%|█████████▎| 93421/100516 [00:17<00:01, 5593.52 examples/s]

Map:  94%|█████████▎| 94000/100516 [00:17<00:01, 5411.81 examples/s]

Map:  94%|█████████▍| 94815/100516 [00:17<00:00, 6095.18 examples/s]

Map:  95%|█████████▌| 95503/100516 [00:17<00:00, 5920.60 examples/s]

Map:  96%|█████████▌| 96564/100516 [00:17<00:00, 6316.33 examples/s]

Map:  97%|█████████▋| 97462/100516 [00:17<00:00, 6415.82 examples/s]

Map:  98%|█████████▊| 98355/100516 [00:17<00:00, 6258.93 examples/s]

Map:  98%|█████████▊| 99000/100516 [00:18<00:00, 5670.19 examples/s]

Map:  99%|█████████▉| 99758/100516 [00:18<00:00, 6116.40 examples/s]

Map: 100%|█████████▉| 100397/100516 [00:18<00:00, 5693.67 examples/s]

Map: 100%|██████████| 100516/100516 [00:20<00:00, 5002.94 examples/s]




              precision    recall  f1-score   support

       B-LOC       0.58      0.88      0.69     14389
      B-MISC       0.50      0.70      0.58     10792
       B-ORG       0.39      0.55      0.46      7517
       B-PER       0.43      0.90      0.58     14691
       I-LOC       0.53      0.59      0.56      4350
      I-MISC       0.48      0.22      0.31      6920
       I-ORG       0.68      0.51      0.58      9826
       I-PER       0.84      0.85      0.84     11300
           O       0.99      0.98      0.98   1568935

    accuracy                           0.96   1648720
   macro avg       0.60      0.69      0.62   1648720
weighted avg       0.97      0.96      0.97   1648720

