In [1]:

from collections import Counter

import numpy as np


import torch

from datasets import (

    load_from_disk,
)
from transformers import (
    DataCollatorForTokenClassification,
    Trainer,
    TrainingArguments,
)


import pickle

from sklearn.metrics import classification_report
from transformers import BertTokenizerFast, BertForTokenClassification

from evaluate import load as load_metric




In [2]:
model_name = "bert-base-cased"

In [3]:
conll_main = load_from_disk("./splits/conll_main")

ontonotes_main = load_from_disk("./splits/ontonotes_main")

ontonotes_nw_encoded = load_from_disk("./splits/ontonotes_newswire")
ontonotes_rest_encoded = load_from_disk("./splits/ontonotes_rest")

In [22]:
from datasets import Dataset

# Reverse map: int → string label
id2label = {0: 'O', 1: 'B-PER', 2: 'I-PER', 3: 'B-ORG', 4: 'I-ORG',
            5: 'B-LOC', 6: 'I-LOC', 7: 'B-MISC', 8: 'I-MISC'}

# Define a function to map the list of integer labels to strings


def map_labels_to_names(example):
    example['label_names'] = [id2label[label] for label in example['labels']]
    return example


# Apply to entire dataset
conll_main = conll_main.map(map_labels_to_names)

Map:   0%|          | 0/16595 [00:00<?, ? examples/s]

In [25]:
def map_ontonotes_labels(example):
    example['label_names'] = [ontonotes_id_to_label[i]
                              for i in example['labels']]
    return example


# Apply to dataset
ontonotes_main = ontonotes_main.map(map_ontonotes_labels)

Map:   0%|          | 0/61371 [00:00<?, ? examples/s]

In [44]:
# Replace 'your_file.pkl' with your actual path
with open('results/results/bert-base-cased_conll_results.pkl', 'rb') as f:
    onto_pred = pickle.load(f)

In [61]:
flat_pred, flat_gold = evaluate_predictions(onto_pred, ontonotes_main)

Map:   0%|          | 0/61371 [00:00<?, ? examples/s]

IndexError: list index out of range

In [55]:
from itertools import chain

# Flatten all token sequences into a single list
flat_tokens = list(chain.from_iterable(ontonotes_main['tokens']))
len(flat_tokens)

1110194

In [51]:
len(predictions[0])

4149

In [19]:
from collections import Counter
import pandas as pd


def compute_ner_distribution(dataset):
    # Flatten and filter out 'O'
    all_tags = [tag for example in dataset['ner_conll2003'] for tag in example]
    filtered_tags = [tag for tag in all_tags if tag != 'O']

    # Count tag frequencies
    tag_counts = Counter(filtered_tags)
    total = sum(tag_counts.values())

    # Create (tag, count, percentage) triples
    stats = sorted([(tag, count, count / total * 100)
                   for tag, count in tag_counts.items()])

    # Create DataFrame
    df = pd.DataFrame(stats, columns=["Tag", "Count", "Percentage"])
    return df


# Example usage
df = compute_ner_distribution(ontonotes_nw_encoded)
df2 = compute_ner_distribution(ontonotes_rest_encoded)
print('news')
print(df)
print('rest')
print(df2)

news
      Tag  Count  Percentage
0   B-LOC  16435   15.174738
1  B-MISC   7455    6.883339
2   B-ORG  22446   20.724805
3   B-PER  12641   11.671668
4   I-LOC   5154    4.758783
5  I-MISC   5579    5.151193
6   I-ORG  30834   28.469600
7   I-PER   7761    7.165874
rest
      Tag  Count  Percentage
0   B-LOC  14389   18.034718
1  B-MISC  10792   13.526352
2   B-ORG   7517    9.421570
3   B-PER  14691   18.413236
4   I-LOC   4350    5.452153
5  I-MISC   6920    8.673310
6   I-ORG   9826   12.315598
7   I-PER  11300   14.163063


# Load GPU

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


# Tokenisation & Alignment

In [24]:
ontonotes_id_to_label = {
    0: "O", 1: "B-CARDINAL", 2: "B-DATE", 3: "I-DATE", 4: "B-PERSON", 5: "I-PERSON",
    6: "B-NORP", 7: "B-GPE", 8: "I-GPE", 9: "B-LAW", 10: "I-LAW", 11: "B-ORG", 12: "I-ORG",
    13: "B-PERCENT", 14: "I-PERCENT", 15: "B-ORDINAL", 16: "B-MONEY", 17: "I-MONEY",
    18: "B-WORK_OF_ART", 19: "I-WORK_OF_ART", 20: "B-FAC", 21: "B-TIME", 22: "I-CARDINAL",
    23: "B-LOC", 24: "B-QUANTITY", 25: "I-QUANTITY", 26: "I-NORP", 27: "I-LOC",
    28: "B-PRODUCT", 29: "I-TIME", 30: "B-EVENT", 31: "I-EVENT", 32: "I-FAC",
    33: "B-LANGUAGE", 34: "I-PRODUCT", 35: "I-ORDINAL", 36: "I-LANGUAGE"
}

conll_label_to_id = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3,
                     'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}
id2label = {v: k for k, v in conll_label_to_id.items()}

ontonotes_to_conll_entity = {
    "PERSON": "PER", "ORG": "ORG", "GPE": "LOC", "LOC": "LOC",
    "NORP": "MISC", "FAC": "MISC", "EVENT": "MISC", "WORK_OF_ART": "MISC",
    "LAW": "MISC", "PRODUCT": "MISC", "LANGUAGE": "MISC",
    "DATE": None, "TIME": None, "PERCENT": None, "MONEY": None,
    "QUANTITY": None, "ORDINAL": None, "CARDINAL": None
}

In [56]:
def process_data(data_list):

    def process_single(data):
        word_ids = data['word_ids']
        predictions = data['predictions']
        gold = data['gold']
        tokenized_tokens = data['tokens']

        word_ids = [a for a in word_ids if a is not None]

        processed_predictions = []
        processed_gold = []

        current_word_id = None
        current_predictions = []
        current_gold = []

        for idx, word_id in enumerate(word_ids):
            if word_id != current_word_id:
                if current_predictions:
                    processed_predictions.append(
                        Counter(current_predictions).most_common(1)[0][0])
                    processed_gold.append(
                        Counter(current_gold).most_common(1)[0][0])

                current_word_id = word_id
                current_predictions = [predictions[idx]]
                current_gold = [gold[idx]]
            else:
                current_predictions.append(predictions[idx])
                current_gold.append(gold[idx])

        if current_predictions:
            processed_predictions.append(
                Counter(current_predictions).most_common(1)[0][0])
            processed_gold.append(
                Counter(current_gold).most_common(1)[0][0])

        return processed_predictions, processed_gold

    processed_predictions_list = []
    processed_gold_list = []

    for data in data_list:
        processed_predictions, processed_gold = process_single(data)
        processed_predictions_list.append(processed_predictions)
        processed_gold_list.append(processed_gold)

    return processed_predictions_list, processed_gold_list


def evaluate_predictions(p, test_data):
    predictions, labels, _ = p

    pred_indices = [np.argmax(p, axis=-1) for p in predictions]
    label_indices = labels

    pred_tags = [[id2label[p] for p, l in zip(p_seq, l_seq) if l != -100]
                 for p_seq, l_seq in zip(pred_indices, label_indices)]
    gold_tags = [[id2label[l] for l in l_seq if l != -100]
                 for l_seq in label_indices]

    def add_preds(example, idx):
        length = len(example['word_ids'])
        example['predictions'] = pred_tags[idx][:length]
        example['gold'] = gold_tags[idx][:length]
        return example

    test_data = test_data.map(add_preds, with_indices=True)

    length = len(test_data['predictions'][0])

    pred, gold = process_data(test_data)

    flat_pred = [label for seq in pred for label in seq]
    flat_gold = [label for seq in gold for label in seq]

    print(classification_report(flat_gold, flat_pred, zero_division=0))

    return (flat_pred, flat_gold)

# Model

In [59]:
tokenizer = BertTokenizerFast.from_pretrained(model_name)

label_list = ['O', 'B-PER', 'I-PER', 'B-ORG',
              'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']


def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        is_split_into_words=True,
        padding=True,
        return_special_tokens_mask=True,
        return_offsets_mapping=True,
    )
    all_word_ids = []
    all_labels = []
    for i, labels in enumerate(examples["labels"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        all_word_ids.append(word_ids)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            else:
                label_ids.append(labels[word_idx])
            previous_word_idx = word_idx
        all_labels.append(label_ids)

    tokenized_inputs["labels"] = all_labels
    tokenized_inputs["word_ids"] = all_word_ids
    return tokenized_inputs

In [60]:
conll_main = conll_main.map(tokenize_and_align_labels, batched=True)
ontonotes_main = ontonotes_main.map(tokenize_and_align_labels, batched=True)
onto_news = ontonotes_nw_encoded.map(tokenize_and_align_labels, batched=True)
onto_rest = ontonotes_rest_encoded.map(tokenize_and_align_labels, batched=True)

Map:   0%|          | 0/16595 [00:00<?, ? examples/s]

Map:   0%|          | 0/61371 [00:00<?, ? examples/s]

Map:   0%|          | 0/42753 [00:00<?, ? examples/s]

# Deciding params

In [9]:
train_data = conll_main
test_data = onto_news

In [10]:
train_data_name = 'news'

In [11]:
mod = BertForTokenClassification.from_pretrained(
    model_name, num_labels=len(label_list))

metric = load_metric("seqeval")


def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    return metric.compute(predictions=true_predictions, references=true_labels)


data_collator = DataCollatorForTokenClassification(tokenizer)

training_args = TrainingArguments(
    output_dir=f"./output/{model_name}",
    eval_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to="none",
    fp16=True,
    save_strategy="no",
)

trainer = Trainer(
    model=mod,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=test_data,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  trainer = Trainer(


In [12]:
trainer.train()

Step,Training Loss
500,0.2483
1000,0.0893
1500,0.055
2000,0.0505
2500,0.0333
3000,0.0285


TrainOutput(global_step=3114, training_loss=0.08204985621960101, metrics={'train_runtime': 371.7057, 'train_samples_per_second': 133.937, 'train_steps_per_second': 8.378, 'total_flos': 3916666226042982.0, 'train_loss': 0.08204985621960101, 'epoch': 3.0})

In [13]:
trainer.save_model(f"./saved_model/{model_name}_{train_data_name}")

In [14]:
predictions = trainer.predict(test_data)

In [15]:
with open(f'./results/{model_name}_{train_data_name}_results.pkl', 'wb') as f:
    pickle.dump(predictions, f)

In [16]:
x, y = evaluate_predictions(predictions, test_data)

Map:   0%|          | 0/42753 [00:00<?, ? examples/s]

Map:   2%|▏         | 664/42753 [00:00<00:06, 6559.56 examples/s]

Map:   3%|▎         | 1342/42753 [00:00<00:08, 5096.73 examples/s]

Map:   5%|▍         | 2000/42753 [00:00<00:08, 4715.84 examples/s]

Map:   6%|▋         | 2723/42753 [00:00<00:07, 5505.22 examples/s]

Map:   8%|▊         | 3404/42753 [00:00<00:07, 5266.50 examples/s]

Map:   9%|▉         | 4000/42753 [00:00<00:07, 5129.90 examples/s]

Map:  11%|█         | 4800/42753 [00:00<00:06, 5919.96 examples/s]

Map:  13%|█▎        | 5418/42753 [00:00<00:06, 5576.56 examples/s]

Map:  14%|█▍        | 6000/42753 [00:01<00:06, 5355.86 examples/s]

Map:  16%|█▌        | 6866/42753 [00:01<00:05, 6246.79 examples/s]

Map:  18%|█▊        | 7795/42753 [00:01<00:05, 6022.60 examples/s]

Map:  20%|█▉        | 8444/42753 [00:01<00:05, 5743.14 examples/s]

Map:  22%|██▏       | 9404/42753 [00:01<00:05, 5828.32 examples/s]

Map:  23%|██▎       | 10000/42753 [00:01<00:05, 5603.14 examples/s]

Map:  25%|██▍       | 10652/42753 [00:01<00:05, 5825.65 examples/s]

Map:  27%|██▋       | 11366/42753 [00:02<00:05, 5370.73 examples/s]

Map:  28%|██▊       | 12000/42753 [00:02<00:06, 5090.16 examples/s]

Map:  30%|██▉       | 12771/42753 [00:02<00:05, 5717.93 examples/s]

Map:  31%|███▏      | 13408/42753 [00:02<00:05, 5450.70 examples/s]

Map:  33%|███▎      | 14000/42753 [00:02<00:05, 5247.45 examples/s]

Map:  34%|███▍      | 14642/42753 [00:02<00:05, 5543.26 examples/s]

Map:  36%|███▌      | 15433/42753 [00:02<00:05, 5324.71 examples/s]

Map:  37%|███▋      | 16000/42753 [00:02<00:05, 5222.19 examples/s]

Map:  39%|███▉      | 16802/42753 [00:03<00:04, 5931.32 examples/s]

Map:  42%|████▏     | 17807/42753 [00:03<00:04, 5837.52 examples/s]

Map:  44%|████▍     | 18792/42753 [00:03<00:04, 5754.43 examples/s]

Map:  45%|████▌     | 19414/42753 [00:03<00:04, 5518.82 examples/s]

Map:  47%|████▋     | 20000/42753 [00:03<00:04, 5332.40 examples/s]

Map:  49%|████▊     | 20810/42753 [00:03<00:03, 5995.28 examples/s]

Map:  50%|█████     | 21438/42753 [00:03<00:03, 5701.18 examples/s]

Map:  52%|█████▏    | 22402/42753 [00:04<00:03, 5788.11 examples/s]

Map:  54%|█████▍    | 23000/42753 [00:04<00:03, 5483.95 examples/s]

Map:  56%|█████▌    | 23816/42753 [00:04<00:03, 6134.36 examples/s]

Map:  58%|█████▊    | 24837/42753 [00:04<00:02, 6029.79 examples/s]

Map:  60%|██████    | 25855/42753 [00:04<00:02, 6012.09 examples/s]

Map:  63%|██████▎   | 26829/42753 [00:04<00:02, 5955.25 examples/s]

Map:  65%|██████▌   | 27790/42753 [00:04<00:02, 5813.28 examples/s]

Map:  67%|██████▋   | 28739/42753 [00:05<00:02, 5647.77 examples/s]

Map:  69%|██████▉   | 29434/42753 [00:05<00:02, 5477.54 examples/s]

Map:  70%|███████   | 30000/42753 [00:05<00:02, 5335.41 examples/s]

Map:  72%|███████▏  | 30800/42753 [00:05<00:02, 5941.11 examples/s]

Map:  74%|███████▎  | 31491/42753 [00:05<00:03, 3598.45 examples/s]

Map:  75%|███████▍  | 32000/42753 [00:06<00:03, 3414.89 examples/s]

Map:  76%|███████▌  | 32548/42753 [00:06<00:02, 3778.55 examples/s]

Map:  78%|███████▊  | 33269/42753 [00:06<00:02, 3733.47 examples/s]

Map:  79%|███████▉  | 33797/42753 [00:06<00:02, 4032.00 examples/s]

Map:  80%|████████  | 34336/42753 [00:06<00:02, 3915.26 examples/s]

Map:  82%|████████▏ | 35000/42753 [00:06<00:01, 4063.72 examples/s]

Map:  84%|████████▎ | 35714/42753 [00:06<00:01, 4739.49 examples/s]

Map:  85%|████████▌ | 36412/42753 [00:07<00:01, 4312.91 examples/s]

Map:  87%|████████▋ | 37000/42753 [00:07<00:01, 3722.05 examples/s]

Map:  88%|████████▊ | 37761/42753 [00:07<00:01, 4497.66 examples/s]

Map:  90%|████████▉ | 38332/42753 [00:07<00:00, 4441.11 examples/s]

Map:  91%|█████████ | 38997/42753 [00:07<00:00, 4942.55 examples/s]

Map:  93%|█████████▎| 39645/42753 [00:07<00:00, 4684.78 examples/s]

Map:  94%|█████████▍| 40289/42753 [00:07<00:00, 4462.63 examples/s]

Map:  96%|█████████▌| 40861/42753 [00:07<00:00, 4747.30 examples/s]

Map:  97%|█████████▋| 41423/42753 [00:08<00:00, 4547.59 examples/s]

Map:  98%|█████████▊| 42000/42753 [00:08<00:00, 4625.95 examples/s]

Map: 100%|█████████▉| 42632/42753 [00:08<00:00, 5045.78 examples/s]

Map: 100%|██████████| 42753/42753 [00:09<00:00, 4401.07 examples/s]




              precision    recall  f1-score   support

       B-LOC       0.83      0.91      0.87     16435
      B-MISC       0.52      0.73      0.60      7455
       B-ORG       0.72      0.76      0.74     22446
       B-PER       0.91      0.94      0.92     12641
       I-LOC       0.71      0.68      0.69      5154
      I-MISC       0.55      0.43      0.48      5579
       I-ORG       0.92      0.77      0.84     30834
       I-PER       0.91      0.92      0.92      7761
           O       0.99      0.99      0.99    965876

    accuracy                           0.97   1074181
   macro avg       0.78      0.79      0.78   1074181
weighted avg       0.97      0.97      0.97   1074181



In [17]:
predictions = trainer.predict(onto_rest)
x, y = evaluate_predictions(predictions, onto_rest)

Map:   0%|          | 0/100516 [00:00<?, ? examples/s]

Map:   1%|          | 766/100516 [00:00<00:13, 7546.51 examples/s]

Map:   2%|▏         | 1953/100516 [00:00<00:15, 6300.17 examples/s]

Map:   3%|▎         | 2862/100516 [00:00<00:15, 6165.79 examples/s]

Map:   4%|▍         | 3803/100516 [00:00<00:16, 5936.14 examples/s]

Map:   5%|▍         | 4702/100516 [00:00<00:17, 5470.05 examples/s]

Map:   5%|▌         | 5393/100516 [00:00<00:18, 5084.74 examples/s]

Map:   6%|▌         | 6000/100516 [00:01<00:20, 4641.92 examples/s]

Map:   7%|▋         | 6845/100516 [00:01<00:28, 3313.09 examples/s]

Map:   7%|▋         | 7418/100516 [00:01<00:26, 3519.99 examples/s]

Map:   8%|▊         | 8000/100516 [00:01<00:24, 3802.32 examples/s]

Map:   9%|▉         | 8869/100516 [00:01<00:19, 4765.92 examples/s]

Map:  10%|▉         | 9805/100516 [00:02<00:18, 4914.48 examples/s]

Map:  10%|█         | 10372/100516 [00:02<00:19, 4713.08 examples/s]

Map:  11%|█         | 11000/100516 [00:02<00:19, 4682.31 examples/s]

Map:  12%|█▏        | 11849/100516 [00:02<00:16, 5534.78 examples/s]

Map:  13%|█▎        | 12832/100516 [00:02<00:16, 5455.96 examples/s]

Map:  13%|█▎        | 13416/100516 [00:02<00:17, 5026.05 examples/s]

Map:  14%|█▍        | 14000/100516 [00:02<00:17, 5042.12 examples/s]

Map:  15%|█▍        | 14843/100516 [00:02<00:14, 5842.25 examples/s]

Map:  16%|█▌        | 15923/100516 [00:03<00:15, 5587.99 examples/s]

Map:  17%|█▋        | 16871/100516 [00:03<00:14, 5643.58 examples/s]

Map:  18%|█▊        | 17778/100516 [00:03<00:14, 5616.31 examples/s]

Map:  18%|█▊        | 18382/100516 [00:03<00:15, 5359.93 examples/s]

Map:  19%|█▉        | 19000/100516 [00:03<00:15, 5121.37 examples/s]

Map:  20%|█▉        | 19862/100516 [00:03<00:13, 5914.92 examples/s]

Map:  21%|██        | 20680/100516 [00:04<00:14, 5533.51 examples/s]

Map:  21%|██▏       | 21378/100516 [00:04<00:15, 5201.14 examples/s]

Map:  22%|██▏       | 22000/100516 [00:04<00:15, 4999.55 examples/s]

Map:  23%|██▎       | 22648/100516 [00:04<00:14, 5336.54 examples/s]

Map:  23%|██▎       | 23321/100516 [00:04<00:16, 4755.43 examples/s]

Map:  24%|██▍       | 23991/100516 [00:04<00:14, 5196.35 examples/s]

Map:  25%|██▍       | 24861/100516 [00:04<00:14, 5251.36 examples/s]

Map:  26%|██▌       | 25704/100516 [00:05<00:14, 5136.12 examples/s]

Map:  26%|██▌       | 26293/100516 [00:05<00:16, 4563.14 examples/s]

Map:  27%|██▋       | 26880/100516 [00:05<00:15, 4842.89 examples/s]

Map:  28%|██▊       | 27647/100516 [00:05<00:15, 4569.30 examples/s]

Map:  28%|██▊       | 28348/100516 [00:06<00:30, 2364.75 examples/s]

Map:  29%|██▉       | 29000/100516 [00:06<00:27, 2643.95 examples/s]

Map:  30%|██▉       | 29857/100516 [00:06<00:20, 3481.08 examples/s]

Map:  30%|███       | 30533/100516 [00:06<00:17, 3924.24 examples/s]

Map:  31%|███       | 31390/100516 [00:06<00:15, 4511.06 examples/s]

Map:  32%|███▏      | 32000/100516 [00:06<00:14, 4569.72 examples/s]

Map:  33%|███▎      | 32960/100516 [00:06<00:11, 5650.20 examples/s]

Map:  34%|███▎      | 33869/100516 [00:07<00:11, 5749.12 examples/s]

Map:  35%|███▍      | 35000/100516 [00:07<00:11, 5672.79 examples/s]

Map:  36%|███▌      | 36000/100516 [00:07<00:10, 6157.47 examples/s]

Map:  37%|███▋      | 36956/100516 [00:07<00:09, 6901.54 examples/s]

Map:  38%|███▊      | 37951/100516 [00:07<00:09, 6793.58 examples/s]

Map:  39%|███▉      | 39000/100516 [00:07<00:10, 5977.30 examples/s]

Map:  40%|███▉      | 40000/100516 [00:08<00:15, 4016.40 examples/s]

Map:  41%|████      | 40861/100516 [00:08<00:12, 4695.24 examples/s]

Map:  42%|████▏     | 41836/100516 [00:08<00:11, 4972.46 examples/s]

Map:  42%|████▏     | 42494/100516 [00:08<00:11, 5079.74 examples/s]

Map:  43%|████▎     | 43484/100516 [00:08<00:10, 5494.29 examples/s]

Map:  44%|████▍     | 44430/100516 [00:08<00:09, 5679.36 examples/s]

Map:  45%|████▌     | 45523/100516 [00:09<00:09, 5893.48 examples/s]

Map:  46%|████▋     | 46498/100516 [00:09<00:08, 6169.22 examples/s]

Map:  47%|████▋     | 47502/100516 [00:09<00:08, 6251.09 examples/s]

Map:  48%|████▊     | 48320/100516 [00:09<00:08, 6136.85 examples/s]

Map:  49%|████▊     | 49000/100516 [00:09<00:09, 5587.57 examples/s]

Map:  50%|████▉     | 49920/100516 [00:09<00:07, 6371.74 examples/s]

Map:  51%|█████     | 50983/100516 [00:10<00:07, 6522.21 examples/s]

Map:  52%|█████▏    | 51939/100516 [00:10<00:07, 6300.34 examples/s]

Map:  53%|█████▎    | 52934/100516 [00:10<00:07, 6295.10 examples/s]

Map:  54%|█████▎    | 53903/100516 [00:10<00:07, 6293.40 examples/s]

Map:  55%|█████▍    | 55000/100516 [00:10<00:07, 6050.83 examples/s]

Map:  56%|█████▌    | 55877/100516 [00:10<00:06, 6610.10 examples/s]

Map:  57%|█████▋    | 57000/100516 [00:10<00:06, 6242.95 examples/s]

Map:  58%|█████▊    | 57987/100516 [00:11<00:06, 6999.82 examples/s]

Map:  59%|█████▊    | 58935/100516 [00:11<00:06, 6787.81 examples/s]

Map:  60%|█████▉    | 60000/100516 [00:11<00:06, 6405.18 examples/s]

Map:  61%|██████    | 61000/100516 [00:11<00:05, 6740.28 examples/s]

Map:  62%|██████▏   | 61980/100516 [00:11<00:05, 7420.44 examples/s]

Map:  63%|██████▎   | 63000/100516 [00:11<00:05, 6781.58 examples/s]

Map:  64%|██████▎   | 63865/100516 [00:11<00:05, 7197.49 examples/s]

Map:  65%|██████▍   | 64931/100516 [00:12<00:05, 6212.82 examples/s]

Map:  66%|██████▌   | 65877/100516 [00:12<00:06, 5453.57 examples/s]

Map:  66%|██████▌   | 66575/100516 [00:12<00:06, 4939.48 examples/s]

Map:  67%|██████▋   | 67248/100516 [00:12<00:07, 4486.45 examples/s]

Map:  67%|██████▋   | 67740/100516 [00:12<00:07, 4567.43 examples/s]

Map:  68%|██████▊   | 68396/100516 [00:13<00:07, 4306.02 examples/s]

Map:  69%|██████▊   | 69000/100516 [00:13<00:07, 4395.46 examples/s]

Map:  69%|██████▉   | 69758/100516 [00:13<00:06, 5087.62 examples/s]

Map:  70%|███████   | 70415/100516 [00:13<00:09, 3157.88 examples/s]

Map:  71%|███████   | 70936/100516 [00:13<00:08, 3494.34 examples/s]

Map:  71%|███████   | 71594/100516 [00:13<00:08, 3545.77 examples/s]

Map:  72%|███████▏  | 72343/100516 [00:14<00:07, 3626.18 examples/s]

Map:  73%|███████▎  | 73000/100516 [00:14<00:07, 3815.02 examples/s]

Map:  73%|███████▎  | 73841/100516 [00:14<00:05, 4715.26 examples/s]

Map:  74%|███████▍  | 74413/100516 [00:14<00:05, 4644.17 examples/s]

Map:  75%|███████▍  | 75000/100516 [00:14<00:05, 4681.79 examples/s]

Map:  75%|███████▌  | 75819/100516 [00:14<00:04, 5508.79 examples/s]

Map:  76%|███████▋  | 76804/100516 [00:14<00:04, 5470.07 examples/s]

Map:  77%|███████▋  | 77757/100516 [00:15<00:04, 5338.94 examples/s]

Map:  78%|███████▊  | 78319/100516 [00:15<00:04, 4958.47 examples/s]

Map:  79%|███████▊  | 78964/100516 [00:15<00:04, 5283.49 examples/s]

Map:  79%|███████▉  | 79646/100516 [00:15<00:04, 4825.89 examples/s]

Map:  80%|███████▉  | 80295/100516 [00:15<00:04, 4515.24 examples/s]

Map:  80%|████████  | 80890/100516 [00:15<00:04, 4827.21 examples/s]

Map:  81%|████████  | 81442/100516 [00:15<00:04, 4621.43 examples/s]

Map:  82%|████████▏ | 82000/100516 [00:16<00:03, 4733.36 examples/s]

Map:  83%|████████▎ | 82947/100516 [00:16<00:02, 5916.57 examples/s]

Map:  83%|████████▎ | 83872/100516 [00:16<00:02, 5955.95 examples/s]

Map:  84%|████████▍ | 84557/100516 [00:16<00:03, 5305.36 examples/s]

Map:  85%|████████▌ | 85472/100516 [00:16<00:02, 5090.42 examples/s]

Map:  86%|████████▌ | 86400/100516 [00:16<00:02, 5401.33 examples/s]

Map:  87%|████████▋ | 87000/100516 [00:16<00:02, 5304.65 examples/s]

Map:  87%|████████▋ | 87945/100516 [00:17<00:02, 6252.41 examples/s]

Map:  88%|████████▊ | 88866/100516 [00:17<00:02, 5431.90 examples/s]

Map:  89%|████████▉ | 89724/100516 [00:17<00:02, 5144.75 examples/s]

Map:  90%|████████▉ | 90368/100516 [00:17<00:02, 4963.56 examples/s]

Map:  91%|█████████ | 91000/100516 [00:17<00:01, 4836.20 examples/s]

Map:  91%|█████████▏| 91866/100516 [00:17<00:01, 5670.41 examples/s]

Map:  92%|█████████▏| 92799/100516 [00:17<00:01, 5660.36 examples/s]

Map:  93%|█████████▎| 93400/100516 [00:18<00:01, 5411.05 examples/s]

Map:  94%|█████████▎| 94000/100516 [00:18<00:01, 5245.47 examples/s]

Map:  94%|█████████▍| 94805/100516 [00:18<00:00, 5918.89 examples/s]

Map:  95%|█████████▌| 95501/100516 [00:18<00:00, 5766.78 examples/s]

Map:  96%|█████████▌| 96581/100516 [00:18<00:00, 6247.59 examples/s]

Map:  97%|█████████▋| 97456/100516 [00:18<00:00, 6306.33 examples/s]

Map:  98%|█████████▊| 98354/100516 [00:18<00:00, 6199.39 examples/s]

Map:  98%|█████████▊| 99000/100516 [00:19<00:00, 5587.67 examples/s]

Map:  99%|█████████▉| 99773/100516 [00:19<00:00, 6079.85 examples/s]

Map: 100%|██████████| 100516/100516 [00:20<00:00, 1224.34 examples/s]

Map: 100%|██████████| 100516/100516 [00:21<00:00, 4760.87 examples/s]




              precision    recall  f1-score   support

       B-LOC       0.58      0.88      0.70     14389
      B-MISC       0.48      0.72      0.58     10792
       B-ORG       0.42      0.61      0.50      7517
       B-PER       0.45      0.90      0.60     14691
       I-LOC       0.54      0.66      0.59      4350
      I-MISC       0.44      0.39      0.41      6920
       I-ORG       0.70      0.62      0.66      9826
       I-PER       0.85      0.85      0.85     11300
           O       0.99      0.98      0.98   1568935

    accuracy                           0.96   1648720
   macro avg       0.61      0.73      0.65   1648720
weighted avg       0.97      0.96      0.97   1648720

