In [1]:
from datasets import load_dataset

raw_datasets = load_dataset("conll2003",trust_remote_code=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})

In [3]:
raw_datasets["train"][0]["tokens"], raw_datasets["train"][0]["ner_tags"], raw_datasets["train"][0]["pos_tags"], raw_datasets["train"][0]["chunk_tags"]

(['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.'],
 [3, 0, 7, 0, 0, 0, 7, 0, 0],
 [22, 42, 16, 21, 35, 37, 16, 21, 7],
 [11, 21, 11, 12, 21, 22, 11, 12, 0])

In [4]:
ner_feature = raw_datasets["train"].features["ner_tags"]
ner_feature

Sequence(feature=ClassLabel(names=['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC'], id=None), length=-1, id=None)

In [5]:
label_names = ner_feature.feature.names
label_names

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

In [6]:
words = raw_datasets["train"][0]["tokens"]
labels = raw_datasets["train"][0]["ner_tags"]
line1 = ""
line2 = ""
for word, label in zip(words, labels):
    full_label = label_names[label]
    max_length = max(len(word), len(full_label))
    line1 += word + " " * (max_length - len(word) + 1)
    line2 += full_label + " " * (max_length - len(full_label) + 1)

print(line1)
print(line2)

EU    rejects German call to boycott British lamb . 
B-ORG O       B-MISC O    O  O       B-MISC  O    O 


In [7]:
words = raw_datasets["train"][4]["tokens"]
labels = raw_datasets["train"][4]["ner_tags"]
line1 = ""
line2 = ""
for word, label in zip(words, labels):
    full_label = label_names[label]
    max_length = max(len(word), len(full_label))
    line1 += word + " " * (max_length - len(word) + 1)
    line2 += full_label + " " * (max_length - len(full_label) + 1)

print(line1)
print(line2)

Germany 's representative to the European Union 's veterinary committee Werner Zwingmann said on Wednesday consumers should buy sheepmeat from countries other than Britain until the scientific advice was clearer . 
B-LOC   O  O              O  O   B-ORG    I-ORG O  O          O         B-PER  I-PER     O    O  O         O         O      O   O         O    O         O     O    B-LOC   O     O   O          O      O   O       O 


In [8]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [9]:
tokenizer.is_fast

True

In [10]:
inputs = tokenizer(raw_datasets["train"][0]["tokens"], is_split_into_words=True)
inputs.tokens()

['[CLS]',
 'EU',
 'rejects',
 'German',
 'call',
 'to',
 'boycott',
 'British',
 'la',
 '##mb',
 '.',
 '[SEP]']

In [11]:
inputs.word_ids()

[None, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, None]

In [12]:
def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)

    return new_labels

In [13]:
labels = raw_datasets["train"][0]["ner_tags"]
word_ids = inputs.word_ids()
print(labels)
print(align_labels_with_tokens(labels, word_ids))

[3, 0, 7, 0, 0, 0, 7, 0, 0]
[-100, 3, 0, 7, 0, 0, 0, 7, 0, 0, 0, -100]


In [14]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["ner_tags"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs

In [15]:
tokenized_datasets = raw_datasets.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=raw_datasets["train"].column_names,
)

In [16]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [17]:
batch = data_collator([tokenized_datasets["train"][i] for i in range(2)])
batch["labels"]

tensor([[-100,    3,    0,    7,    0,    0,    0,    7,    0,    0,    0, -100],
        [-100,    1,    2, -100, -100, -100, -100, -100, -100, -100, -100, -100]])

In [18]:
for i in range(2):
    print(tokenized_datasets["train"][i]["labels"])

[-100, 3, 0, 7, 0, 0, 0, 7, 0, 0, 0, -100]
[-100, 1, 2, -100]


In [19]:
!pip install seqeval



In [20]:
import evaluate

metric = evaluate.load("seqeval")

In [21]:
labels = raw_datasets["train"][0]["ner_tags"]
labels = [label_names[i] for i in labels]
labels

['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']

In [22]:
predictions = labels.copy()
predictions[2] = "O"
metric.compute(predictions=[predictions], references=[labels])

{'MISC': {'precision': 1.0,
  'recall': 0.5,
  'f1': 0.6666666666666666,
  'number': 2},
 'ORG': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 1},
 'overall_precision': 1.0,
 'overall_recall': 0.6666666666666666,
 'overall_f1': 0.8,
 'overall_accuracy': 0.8888888888888888}

In [23]:
import numpy as np


def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)

    # Extract overall metrics
    overall_metrics = {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

    # Extract per-label metrics
    per_label_metrics = {
        label: {
            "precision": metrics["precision"],
            "recall": metrics["recall"],
            "f1": metrics["f1"],
            "number": metrics["number"],
        }
        for label, metrics in all_metrics.items() if not label.startswith("overall")
    }

    return {
        "overall": overall_metrics,
        "per_label": per_label_metrics,
    }

In [24]:
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

id2label, label2id 

({0: 'O',
  1: 'B-PER',
  2: 'I-PER',
  3: 'B-ORG',
  4: 'I-ORG',
  5: 'B-LOC',
  6: 'I-LOC',
  7: 'B-MISC',
  8: 'I-MISC'},
 {'O': 0,
  'B-PER': 1,
  'I-PER': 2,
  'B-ORG': 3,
  'I-ORG': 4,
  'B-LOC': 5,
  'I-LOC': 6,
  'B-MISC': 7,
  'I-MISC': 8})

In [25]:
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [26]:
model.config.num_labels

9

In [27]:
from transformers import TrainingArguments
from transformers import Trainer

args = TrainingArguments(
    "bert-finetuned-ner-checkpoint",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=20,
    weight_decay=0.01,

)


trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)




In [28]:
trainer.train()

  attn_output = torch.nn.functional.scaled_dot_product_attention(
  1%|▏         | 505/35120 [00:21<24:22, 23.67it/s]

{'loss': 0.2659, 'grad_norm': 3.4286012649536133, 'learning_rate': 1.9715261958997724e-05, 'epoch': 0.28}


  3%|▎         | 1002/35120 [01:12<59:11,  9.61it/s] 

{'loss': 0.1081, 'grad_norm': 0.7319045066833496, 'learning_rate': 1.9430523917995446e-05, 'epoch': 0.57}


  4%|▍         | 1502/35120 [02:04<56:36,  9.90it/s]  

{'loss': 0.0777, 'grad_norm': 3.7819042205810547, 'learning_rate': 1.9145785876993168e-05, 'epoch': 0.85}


  5%|▍         | 1755/35120 [02:30<57:27,  9.68it/s]  Trainer is attempting to log a value of "{'precision': 0.8912693298969072, 'recall': 0.931167956916863, 'f1': 0.9107818930041152, 'accuracy': 0.9814269735680226}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9239187076602398, 'recall': 0.965160587915079, 'f1': 0.9440894568690097, 'number': 1837}, 'MISC': {'precision': 0.7727272727272727, 'recall': 0.8297180043383948, 'f1': 0.8002092050209204, 'number': 922}, 'ORG': {'precision': 0.8624823695345557, 'recall': 0.9120059656972409, 'f1': 0.8865530989488946, 'number': 1341}, 'PER': {'precision': 0.9420520999468368, 'recall': 0.9619978284473398, 'f1': 0.9519204942250873, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.07203580439090729, 'eval_overall': {'precision': 0.8912693298969072, 'recall': 0.931167956916863, 'f1': 0.9107818930041152, 'accuracy': 0.9814269735680226}, 'eval_per_label': {'LOC': {'precision': 0.9239187076602398, 'recall': 0.965160587915079, 'f1': 0.9440894568690097, 'number': 1837}, 'MISC': {'precision': 0.7727272727272727, 'recall': 0.8297180043383948, 'f1': 0.8002092050209204, 'number': 922}, 'ORG': {'precision': 0.8624823695345557, 'recall': 0.9120059656972409, 'f1': 0.8865530989488946, 'number': 1341}, 'PER': {'precision': 0.9420520999468368, 'recall': 0.9619978284473398, 'f1': 0.9519204942250873, 'number': 1842}}, 'eval_runtime': 7.4533, 'eval_samples_per_second': 436.049, 'eval_steps_per_second': 54.607, 'epoch': 1.0}


  6%|▌         | 2003/35120 [03:04<44:11, 12.49it/s]   

{'loss': 0.0631, 'grad_norm': 0.11871737241744995, 'learning_rate': 1.886104783599089e-05, 'epoch': 1.14}


  7%|▋         | 2502/35120 [03:53<55:29,  9.80it/s]  

{'loss': 0.0464, 'grad_norm': 0.2318667322397232, 'learning_rate': 1.8576309794988612e-05, 'epoch': 1.42}


  9%|▊         | 3002/35120 [04:45<55:40,  9.61it/s]  

{'loss': 0.0442, 'grad_norm': 0.35079020261764526, 'learning_rate': 1.8291571753986334e-05, 'epoch': 1.71}


 10%|▉         | 3502/35120 [05:37<54:56,  9.59it/s]  

{'loss': 0.0411, 'grad_norm': 0.06829982250928879, 'learning_rate': 1.8006833712984056e-05, 'epoch': 1.99}


 10%|█         | 3512/35120 [05:38<50:22, 10.46it/s]Trainer is attempting to log a value of "{'precision': 0.9304723885562209, 'recall': 0.9414338606529788, 'f1': 0.9359210306173666, 'accuracy': 0.9842968152116324}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9511015583019882, 'recall': 0.9635274904735982, 'f1': 0.9572742022714981, 'number': 1837}, 'MISC': {'precision': 0.8725910064239829, 'recall': 0.8839479392624728, 'f1': 0.8782327586206896, 'number': 922}, 'ORG': {'precision': 0.9050822122571002, 'recall': 0.9030574198359433, 'f1': 0.9040686823441583, 'number': 1341}, 'PER': {'precision': 0.9568919638105375, 'recall': 0.9761129207383279, 'f1': 0.9664068798710024, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.07787470519542694, 'eval_overall': {'precision': 0.9304723885562209, 'recall': 0.9414338606529788, 'f1': 0.9359210306173666, 'accuracy': 0.9842968152116324}, 'eval_per_label': {'LOC': {'precision': 0.9511015583019882, 'recall': 0.9635274904735982, 'f1': 0.9572742022714981, 'number': 1837}, 'MISC': {'precision': 0.8725910064239829, 'recall': 0.8839479392624728, 'f1': 0.8782327586206896, 'number': 922}, 'ORG': {'precision': 0.9050822122571002, 'recall': 0.9030574198359433, 'f1': 0.9040686823441583, 'number': 1341}, 'PER': {'precision': 0.9568919638105375, 'recall': 0.9761129207383279, 'f1': 0.9664068798710024, 'number': 1842}}, 'eval_runtime': 7.5011, 'eval_samples_per_second': 433.267, 'eval_steps_per_second': 54.258, 'epoch': 2.0}


 11%|█▏        | 4002/35120 [06:34<54:05,  9.59it/s]   

{'loss': 0.0258, 'grad_norm': 4.330985069274902, 'learning_rate': 1.7722095671981778e-05, 'epoch': 2.28}


 13%|█▎        | 4501/35120 [07:26<49:48, 10.25it/s]

{'loss': 0.0284, 'grad_norm': 0.06637139618396759, 'learning_rate': 1.74373576309795e-05, 'epoch': 2.56}


 14%|█▍        | 5002/35120 [08:18<52:51,  9.50it/s]  

{'loss': 0.0269, 'grad_norm': 11.263030052185059, 'learning_rate': 1.7152619589977222e-05, 'epoch': 2.85}


 15%|█▍        | 5267/35120 [08:45<42:01, 11.84it/s]Trainer is attempting to log a value of "{'precision': 0.9318106587222774, 'recall': 0.9474924267923258, 'f1': 0.9395861148197597, 'accuracy': 0.9857685288750221}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9626218851570965, 'recall': 0.9673380511703865, 'f1': 0.9649742058104807, 'number': 1837}, 'MISC': {'precision': 0.8943355119825708, 'recall': 0.8904555314533622, 'f1': 0.8923913043478261, 'number': 922}, 'ORG': {'precision': 0.8772791023842917, 'recall': 0.9328859060402684, 'f1': 0.9042284062161186, 'number': 1341}, 'PER': {'precision': 0.9616630669546437, 'recall': 0.9668838219326819, 'f1': 0.9642663779101246, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.07202505320310593, 'eval_overall': {'precision': 0.9318106587222774, 'recall': 0.9474924267923258, 'f1': 0.9395861148197597, 'accuracy': 0.9857685288750221}, 'eval_per_label': {'LOC': {'precision': 0.9626218851570965, 'recall': 0.9673380511703865, 'f1': 0.9649742058104807, 'number': 1837}, 'MISC': {'precision': 0.8943355119825708, 'recall': 0.8904555314533622, 'f1': 0.8923913043478261, 'number': 922}, 'ORG': {'precision': 0.8772791023842917, 'recall': 0.9328859060402684, 'f1': 0.9042284062161186, 'number': 1341}, 'PER': {'precision': 0.9616630669546437, 'recall': 0.9668838219326819, 'f1': 0.9642663779101246, 'number': 1842}}, 'eval_runtime': 6.5043, 'eval_samples_per_second': 499.669, 'eval_steps_per_second': 62.574, 'epoch': 3.0}


 16%|█▌        | 5502/35120 [09:17<52:35,  9.39it/s]   

{'loss': 0.0226, 'grad_norm': 0.7242124676704407, 'learning_rate': 1.6867881548974945e-05, 'epoch': 3.13}


 17%|█▋        | 6001/35120 [10:08<49:51,  9.74it/s]

{'loss': 0.0165, 'grad_norm': 0.006902824155986309, 'learning_rate': 1.6583143507972667e-05, 'epoch': 3.42}


 19%|█▊        | 6502/35120 [11:01<49:41,  9.60it/s]

{'loss': 0.0176, 'grad_norm': 0.031127002090215683, 'learning_rate': 1.629840546697039e-05, 'epoch': 3.7}


 20%|█▉        | 7002/35120 [11:50<48:47,  9.60it/s]

{'loss': 0.0196, 'grad_norm': 0.0038718737196177244, 'learning_rate': 1.601366742596811e-05, 'epoch': 3.99}


 20%|█▉        | 7023/35120 [11:52<46:26, 10.08it/s]Trainer is attempting to log a value of "{'precision': 0.930635838150289, 'recall': 0.9483338943116796, 'f1': 0.9394015170459282, 'accuracy': 0.9850621063165951}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9533011272141707, 'recall': 0.9667936853565596, 'f1': 0.96, 'number': 1837}, 'MISC': {'precision': 0.8623188405797102, 'recall': 0.903470715835141, 'f1': 0.8824152542372882, 'number': 922}, 'ORG': {'precision': 0.8995664739884393, 'recall': 0.9284116331096197, 'f1': 0.9137614678899083, 'number': 1341}, 'PER': {'precision': 0.9668838219326819, 'recall': 0.9668838219326819, 'f1': 0.9668838219326819, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this 

{'eval_loss': 0.08569859713315964, 'eval_overall': {'precision': 0.930635838150289, 'recall': 0.9483338943116796, 'f1': 0.9394015170459282, 'accuracy': 0.9850621063165951}, 'eval_per_label': {'LOC': {'precision': 0.9533011272141707, 'recall': 0.9667936853565596, 'f1': 0.96, 'number': 1837}, 'MISC': {'precision': 0.8623188405797102, 'recall': 0.903470715835141, 'f1': 0.8824152542372882, 'number': 922}, 'ORG': {'precision': 0.8995664739884393, 'recall': 0.9284116331096197, 'f1': 0.9137614678899083, 'number': 1341}, 'PER': {'precision': 0.9668838219326819, 'recall': 0.9668838219326819, 'f1': 0.9668838219326819, 'number': 1842}}, 'eval_runtime': 7.5249, 'eval_samples_per_second': 431.899, 'eval_steps_per_second': 54.087, 'epoch': 4.0}


 21%|██▏       | 7502/35120 [12:50<49:42,  9.26it/s]   

{'loss': 0.0138, 'grad_norm': 0.17987537384033203, 'learning_rate': 1.5728929384965833e-05, 'epoch': 4.27}


 23%|██▎       | 8001/35120 [13:42<46:54,  9.64it/s]  

{'loss': 0.0134, 'grad_norm': 1.4385559558868408, 'learning_rate': 1.5444191343963555e-05, 'epoch': 4.56}


 24%|██▍       | 8500/35120 [14:31<21:44, 20.40it/s]

{'loss': 0.0139, 'grad_norm': 0.02356315404176712, 'learning_rate': 1.5159453302961277e-05, 'epoch': 4.84}


 25%|██▍       | 8779/35120 [15:00<45:42,  9.60it/s]Trainer is attempting to log a value of "{'precision': 0.9310686015831134, 'recall': 0.9501851228542578, 'f1': 0.9405297351324339, 'accuracy': 0.9851062577264967}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9627228525121556, 'recall': 0.9700598802395209, 'f1': 0.9663774403470715, 'number': 1837}, 'MISC': {'precision': 0.8386454183266933, 'recall': 0.913232104121475, 'f1': 0.8743509865005192, 'number': 922}, 'ORG': {'precision': 0.9070110701107011, 'recall': 0.9164802386278896, 'f1': 0.9117210682492581, 'number': 1341}, 'PER': {'precision': 0.9670981661272924, 'recall': 0.9733984799131379, 'f1': 0.9702380952380952, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.08417785912752151, 'eval_overall': {'precision': 0.9310686015831134, 'recall': 0.9501851228542578, 'f1': 0.9405297351324339, 'accuracy': 0.9851062577264967}, 'eval_per_label': {'LOC': {'precision': 0.9627228525121556, 'recall': 0.9700598802395209, 'f1': 0.9663774403470715, 'number': 1837}, 'MISC': {'precision': 0.8386454183266933, 'recall': 0.913232104121475, 'f1': 0.8743509865005192, 'number': 922}, 'ORG': {'precision': 0.9070110701107011, 'recall': 0.9164802386278896, 'f1': 0.9117210682492581, 'number': 1341}, 'PER': {'precision': 0.9670981661272924, 'recall': 0.9733984799131379, 'f1': 0.9702380952380952, 'number': 1842}}, 'eval_runtime': 7.5038, 'eval_samples_per_second': 433.114, 'eval_steps_per_second': 54.239, 'epoch': 5.0}


 26%|██▌       | 9001/35120 [15:32<46:42,  9.32it/s]   

{'loss': 0.0092, 'grad_norm': 0.7477717399597168, 'learning_rate': 1.4874715261958999e-05, 'epoch': 5.13}


 27%|██▋       | 9502/35120 [16:24<43:01,  9.92it/s]

{'loss': 0.006, 'grad_norm': 0.04322722181677818, 'learning_rate': 1.4589977220956721e-05, 'epoch': 5.41}


 28%|██▊       | 10001/35120 [17:15<36:13, 11.56it/s]

{'loss': 0.0073, 'grad_norm': 0.051729559898376465, 'learning_rate': 1.4305239179954442e-05, 'epoch': 5.69}


 30%|██▉       | 10502/35120 [18:05<42:47,  9.59it/s]

{'loss': 0.0079, 'grad_norm': 0.012189100496470928, 'learning_rate': 1.4020501138952165e-05, 'epoch': 5.98}


 30%|███       | 10536/35120 [18:08<40:11, 10.19it/s]Trainer is attempting to log a value of "{'precision': 0.9291845493562232, 'recall': 0.9473241332884551, 'f1': 0.9381666666666667, 'accuracy': 0.9844734208512392}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9546424759871932, 'recall': 0.9738704409363091, 'f1': 0.9641606036108865, 'number': 1837}, 'MISC': {'precision': 0.858739837398374, 'recall': 0.9164859002169198, 'f1': 0.8866736621196223, 'number': 922}, 'ORG': {'precision': 0.8921852387843705, 'recall': 0.9194630872483222, 'f1': 0.9056188027910393, 'number': 1341}, 'PER': {'precision': 0.9691969196919692, 'recall': 0.9565689467969598, 'f1': 0.9628415300546448, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.09725591540336609, 'eval_overall': {'precision': 0.9291845493562232, 'recall': 0.9473241332884551, 'f1': 0.9381666666666667, 'accuracy': 0.9844734208512392}, 'eval_per_label': {'LOC': {'precision': 0.9546424759871932, 'recall': 0.9738704409363091, 'f1': 0.9641606036108865, 'number': 1837}, 'MISC': {'precision': 0.858739837398374, 'recall': 0.9164859002169198, 'f1': 0.8866736621196223, 'number': 922}, 'ORG': {'precision': 0.8921852387843705, 'recall': 0.9194630872483222, 'f1': 0.9056188027910393, 'number': 1341}, 'PER': {'precision': 0.9691969196919692, 'recall': 0.9565689467969598, 'f1': 0.9628415300546448, 'number': 1842}}, 'eval_runtime': 7.4822, 'eval_samples_per_second': 434.362, 'eval_steps_per_second': 54.395, 'epoch': 6.0}


 31%|███▏      | 11002/35120 [19:05<42:10,  9.53it/s]   

{'loss': 0.008, 'grad_norm': 0.0029869996942579746, 'learning_rate': 1.3735763097949887e-05, 'epoch': 6.26}


 33%|███▎      | 11501/35120 [19:56<41:08,  9.57it/s]

{'loss': 0.0066, 'grad_norm': 0.018263721838593483, 'learning_rate': 1.3451025056947608e-05, 'epoch': 6.55}


 34%|███▍      | 12002/35120 [20:45<40:05,  9.61it/s]

{'loss': 0.0057, 'grad_norm': 0.020874306559562683, 'learning_rate': 1.3166287015945332e-05, 'epoch': 6.83}


 35%|███▍      | 12291/35120 [21:16<36:54, 10.31it/s]Trainer is attempting to log a value of "{'precision': 0.9343584656084656, 'recall': 0.9510265903736116, 'f1': 0.9426188490408675, 'accuracy': 0.9859745687878966}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9689373297002725, 'recall': 0.9678824169842134, 'f1': 0.968409586056645, 'number': 1837}, 'MISC': {'precision': 0.8805031446540881, 'recall': 0.911062906724512, 'f1': 0.8955223880597015, 'number': 922}, 'ORG': {'precision': 0.8851063829787233, 'recall': 0.930648769574944, 'f1': 0.9073064340239914, 'number': 1341}, 'PER': {'precision': 0.965386695511087, 'recall': 0.9690553745928339, 'f1': 0.967217556217827, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we d

{'eval_loss': 0.097112737596035, 'eval_overall': {'precision': 0.9343584656084656, 'recall': 0.9510265903736116, 'f1': 0.9426188490408675, 'accuracy': 0.9859745687878966}, 'eval_per_label': {'LOC': {'precision': 0.9689373297002725, 'recall': 0.9678824169842134, 'f1': 0.968409586056645, 'number': 1837}, 'MISC': {'precision': 0.8805031446540881, 'recall': 0.911062906724512, 'f1': 0.8955223880597015, 'number': 922}, 'ORG': {'precision': 0.8851063829787233, 'recall': 0.930648769574944, 'f1': 0.9073064340239914, 'number': 1341}, 'PER': {'precision': 0.965386695511087, 'recall': 0.9690553745928339, 'f1': 0.967217556217827, 'number': 1842}}, 'eval_runtime': 7.4789, 'eval_samples_per_second': 434.555, 'eval_steps_per_second': 54.42, 'epoch': 7.0}


 36%|███▌      | 12502/35120 [21:46<39:15,  9.60it/s]  

{'loss': 0.0031, 'grad_norm': 0.0007487820694223046, 'learning_rate': 1.2881548974943054e-05, 'epoch': 7.12}


 37%|███▋      | 13002/35120 [22:38<38:38,  9.54it/s]

{'loss': 0.0037, 'grad_norm': 15.724783897399902, 'learning_rate': 1.2596810933940776e-05, 'epoch': 7.4}


 38%|███▊      | 13502/35120 [23:27<37:59,  9.48it/s]

{'loss': 0.0061, 'grad_norm': 0.614613950252533, 'learning_rate': 1.2312072892938498e-05, 'epoch': 7.69}


 40%|███▉      | 14002/35120 [24:19<35:41,  9.86it/s]

{'loss': 0.0036, 'grad_norm': 0.0012391641503199935, 'learning_rate': 1.2027334851936218e-05, 'epoch': 7.97}


 40%|███▉      | 14047/35120 [24:24<37:02,  9.48it/s]Trainer is attempting to log a value of "{'precision': 0.9285831285831286, 'recall': 0.9540558734432851, 'f1': 0.9411471735701834, 'accuracy': 0.9852387119562018}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9664864864864865, 'recall': 0.9733260751224823, 'f1': 0.9698942229454841, 'number': 1837}, 'MISC': {'precision': 0.8497983870967742, 'recall': 0.9143167028199566, 'f1': 0.8808777429467085, 'number': 922}, 'ORG': {'precision': 0.9019607843137255, 'recall': 0.9261744966442953, 'f1': 0.9139072847682119, 'number': 1341}, 'PER': {'precision': 0.9522799575821845, 'recall': 0.9750271444082519, 'f1': 0.9635193133047211, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so

{'eval_loss': 0.10294844955205917, 'eval_overall': {'precision': 0.9285831285831286, 'recall': 0.9540558734432851, 'f1': 0.9411471735701834, 'accuracy': 0.9852387119562018}, 'eval_per_label': {'LOC': {'precision': 0.9664864864864865, 'recall': 0.9733260751224823, 'f1': 0.9698942229454841, 'number': 1837}, 'MISC': {'precision': 0.8497983870967742, 'recall': 0.9143167028199566, 'f1': 0.8808777429467085, 'number': 922}, 'ORG': {'precision': 0.9019607843137255, 'recall': 0.9261744966442953, 'f1': 0.9139072847682119, 'number': 1341}, 'PER': {'precision': 0.9522799575821845, 'recall': 0.9750271444082519, 'f1': 0.9635193133047211, 'number': 1842}}, 'eval_runtime': 7.6218, 'eval_samples_per_second': 426.41, 'eval_steps_per_second': 53.4, 'epoch': 8.0}


 41%|████▏     | 14501/35120 [25:20<34:25,  9.98it/s]   

{'loss': 0.0042, 'grad_norm': 0.15165293216705322, 'learning_rate': 1.1742596810933942e-05, 'epoch': 8.26}


 43%|████▎     | 15002/35120 [26:09<36:45,  9.12it/s]

{'loss': 0.0038, 'grad_norm': 0.01020512543618679, 'learning_rate': 1.1457858769931664e-05, 'epoch': 8.54}


 44%|████▍     | 15502/35120 [27:01<34:01,  9.61it/s]

{'loss': 0.0028, 'grad_norm': 0.00046127362293191254, 'learning_rate': 1.1173120728929384e-05, 'epoch': 8.83}


 45%|████▍     | 15803/35120 [27:32<32:19,  9.96it/s]Trainer is attempting to log a value of "{'precision': 0.9368072787427626, 'recall': 0.9530461124200605, 'f1': 0.944856928339034, 'accuracy': 0.9860187201977983}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9632034632034632, 'recall': 0.9689711486118672, 'f1': 0.966078697421981, 'number': 1837}, 'MISC': {'precision': 0.8518145161290323, 'recall': 0.9164859002169198, 'f1': 0.8829676071055382, 'number': 922}, 'ORG': {'precision': 0.9313207547169812, 'recall': 0.9202087994034303, 'f1': 0.9257314328582145, 'number': 1341}, 'PER': {'precision': 0.9595744680851064, 'recall': 0.9793702497285559, 'f1': 0.969371305749597, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we

{'eval_loss': 0.11473368108272552, 'eval_overall': {'precision': 0.9368072787427626, 'recall': 0.9530461124200605, 'f1': 0.944856928339034, 'accuracy': 0.9860187201977983}, 'eval_per_label': {'LOC': {'precision': 0.9632034632034632, 'recall': 0.9689711486118672, 'f1': 0.966078697421981, 'number': 1837}, 'MISC': {'precision': 0.8518145161290323, 'recall': 0.9164859002169198, 'f1': 0.8829676071055382, 'number': 922}, 'ORG': {'precision': 0.9313207547169812, 'recall': 0.9202087994034303, 'f1': 0.9257314328582145, 'number': 1341}, 'PER': {'precision': 0.9595744680851064, 'recall': 0.9793702497285559, 'f1': 0.969371305749597, 'number': 1842}}, 'eval_runtime': 7.5249, 'eval_samples_per_second': 431.9, 'eval_steps_per_second': 54.087, 'epoch': 9.0}


 46%|████▌     | 16002/35120 [28:01<32:11,  9.90it/s]  

{'loss': 0.0049, 'grad_norm': 0.17741906642913818, 'learning_rate': 1.0888382687927108e-05, 'epoch': 9.11}


 47%|████▋     | 16502/35120 [28:50<31:51,  9.74it/s]

{'loss': 0.003, 'grad_norm': 0.0018998866435140371, 'learning_rate': 1.060364464692483e-05, 'epoch': 9.4}


 48%|████▊     | 17002/35120 [29:42<32:09,  9.39it/s]

{'loss': 0.0042, 'grad_norm': 0.0021700740326195955, 'learning_rate': 1.0318906605922552e-05, 'epoch': 9.68}


 50%|████▉     | 17501/35120 [30:34<30:55,  9.50it/s]

{'loss': 0.0026, 'grad_norm': 0.014039441011846066, 'learning_rate': 1.0034168564920275e-05, 'epoch': 9.97}


 50%|████▉     | 17559/35120 [30:40<32:25,  9.03it/s]Trainer is attempting to log a value of "{'precision': 0.9358889623265036, 'recall': 0.9532144059239314, 'f1': 0.9444722361180591, 'accuracy': 0.9852681462294696}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9669197396963124, 'recall': 0.9706042460533478, 'f1': 0.9687584895408857, 'number': 1837}, 'MISC': {'precision': 0.8826638477801269, 'recall': 0.9056399132321041, 'f1': 0.8940042826552462, 'number': 922}, 'ORG': {'precision': 0.8961318051575932, 'recall': 0.9328859060402684, 'f1': 0.9141395688710267, 'number': 1341}, 'PER': {'precision': 0.9619506966773848, 'recall': 0.9744842562432139, 'f1': 0.9681769147788565, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so

{'eval_loss': 0.1184961125254631, 'eval_overall': {'precision': 0.9358889623265036, 'recall': 0.9532144059239314, 'f1': 0.9444722361180591, 'accuracy': 0.9852681462294696}, 'eval_per_label': {'LOC': {'precision': 0.9669197396963124, 'recall': 0.9706042460533478, 'f1': 0.9687584895408857, 'number': 1837}, 'MISC': {'precision': 0.8826638477801269, 'recall': 0.9056399132321041, 'f1': 0.8940042826552462, 'number': 922}, 'ORG': {'precision': 0.8961318051575932, 'recall': 0.9328859060402684, 'f1': 0.9141395688710267, 'number': 1341}, 'PER': {'precision': 0.9619506966773848, 'recall': 0.9744842562432139, 'f1': 0.9681769147788565, 'number': 1842}}, 'eval_runtime': 7.5803, 'eval_samples_per_second': 428.744, 'eval_steps_per_second': 53.692, 'epoch': 10.0}


 51%|█████▏    | 18001/35120 [31:32<29:42,  9.61it/s]  

{'loss': 0.0011, 'grad_norm': 0.00040622701635584235, 'learning_rate': 9.749430523917997e-06, 'epoch': 10.25}


 53%|█████▎    | 18501/35120 [32:24<28:07,  9.85it/s]

{'loss': 0.001, 'grad_norm': 0.0004215097869746387, 'learning_rate': 9.464692482915719e-06, 'epoch': 10.54}


 54%|█████▍    | 19001/35120 [33:16<27:50,  9.65it/s]

{'loss': 0.002, 'grad_norm': 0.0020023963879793882, 'learning_rate': 9.17995444191344e-06, 'epoch': 10.82}


 55%|█████▍    | 19315/35120 [33:48<27:53,  9.45it/s]Trainer is attempting to log a value of "{'precision': 0.9384717168375786, 'recall': 0.9548973409626389, 'f1': 0.9466132799466134, 'accuracy': 0.9857685288750221}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9658906334596643, 'recall': 0.9711486118671747, 'f1': 0.9685124864277959, 'number': 1837}, 'MISC': {'precision': 0.8954108858057631, 'recall': 0.9099783080260304, 'f1': 0.9026358257127487, 'number': 922}, 'ORG': {'precision': 0.9075812274368231, 'recall': 0.9373601789709173, 'f1': 0.922230374174615, 'number': 1341}, 'PER': {'precision': 0.9557805007991476, 'recall': 0.9739413680781759, 'f1': 0.9647754772788384, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.11947210878133774, 'eval_overall': {'precision': 0.9384717168375786, 'recall': 0.9548973409626389, 'f1': 0.9466132799466134, 'accuracy': 0.9857685288750221}, 'eval_per_label': {'LOC': {'precision': 0.9658906334596643, 'recall': 0.9711486118671747, 'f1': 0.9685124864277959, 'number': 1837}, 'MISC': {'precision': 0.8954108858057631, 'recall': 0.9099783080260304, 'f1': 0.9026358257127487, 'number': 922}, 'ORG': {'precision': 0.9075812274368231, 'recall': 0.9373601789709173, 'f1': 0.922230374174615, 'number': 1341}, 'PER': {'precision': 0.9557805007991476, 'recall': 0.9739413680781759, 'f1': 0.9647754772788384, 'number': 1842}}, 'eval_runtime': 7.5556, 'eval_samples_per_second': 430.142, 'eval_steps_per_second': 53.867, 'epoch': 11.0}


 56%|█████▌    | 19503/35120 [34:14<14:56, 17.43it/s]  

{'loss': 0.0028, 'grad_norm': 0.030722877010703087, 'learning_rate': 8.895216400911163e-06, 'epoch': 11.1}


 57%|█████▋    | 20002/35120 [35:05<26:11,  9.62it/s]

{'loss': 0.0033, 'grad_norm': 0.004747629631310701, 'learning_rate': 8.610478359908885e-06, 'epoch': 11.39}


 58%|█████▊    | 20502/35120 [35:58<25:58,  9.38it/s]

{'loss': 0.0018, 'grad_norm': 0.0014466517604887486, 'learning_rate': 8.325740318906607e-06, 'epoch': 11.67}


 60%|█████▉    | 21002/35120 [36:50<24:00,  9.80it/s]

{'loss': 0.0004, 'grad_norm': 0.005812987219542265, 'learning_rate': 8.041002277904329e-06, 'epoch': 11.96}


 60%|█████▉    | 21071/35120 [36:57<24:25,  9.59it/s]Trainer is attempting to log a value of "{'precision': 0.9431799302209669, 'recall': 0.9554022214742511, 'f1': 0.9492517348047823, 'accuracy': 0.9866074056631542}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.971195652173913, 'recall': 0.9727817093086554, 'f1': 0.9719880337231438, 'number': 1837}, 'MISC': {'precision': 0.9096844396082698, 'recall': 0.9067245119305857, 'f1': 0.9082020640956002, 'number': 922}, 'ORG': {'precision': 0.9121522693997072, 'recall': 0.9291573452647278, 'f1': 0.9205762837089029, 'number': 1341}, 'PER': {'precision': 0.9545934530095037, 'recall': 0.9815418023887079, 'f1': 0.9678800856531049, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.12146918475627899, 'eval_overall': {'precision': 0.9431799302209669, 'recall': 0.9554022214742511, 'f1': 0.9492517348047823, 'accuracy': 0.9866074056631542}, 'eval_per_label': {'LOC': {'precision': 0.971195652173913, 'recall': 0.9727817093086554, 'f1': 0.9719880337231438, 'number': 1837}, 'MISC': {'precision': 0.9096844396082698, 'recall': 0.9067245119305857, 'f1': 0.9082020640956002, 'number': 922}, 'ORG': {'precision': 0.9121522693997072, 'recall': 0.9291573452647278, 'f1': 0.9205762837089029, 'number': 1341}, 'PER': {'precision': 0.9545934530095037, 'recall': 0.9815418023887079, 'f1': 0.9678800856531049, 'number': 1842}}, 'eval_runtime': 7.7732, 'eval_samples_per_second': 418.106, 'eval_steps_per_second': 52.36, 'epoch': 12.0}


 61%|██████    | 21502/35120 [37:50<25:01,  9.07it/s]  

{'loss': 0.0006, 'grad_norm': 0.0022160038352012634, 'learning_rate': 7.75626423690205e-06, 'epoch': 12.24}


 63%|██████▎   | 22002/35120 [38:42<21:51, 10.00it/s]

{'loss': 0.0008, 'grad_norm': 0.00019432637782301754, 'learning_rate': 7.471526195899773e-06, 'epoch': 12.53}


 64%|██████▍   | 22502/35120 [39:34<21:59,  9.57it/s]

{'loss': 0.0011, 'grad_norm': 1.6397913694381714, 'learning_rate': 7.186788154897495e-06, 'epoch': 12.81}


 65%|██████▌   | 22828/35120 [40:05<19:34, 10.47it/s]Trainer is attempting to log a value of "{'precision': 0.9386808087504143, 'recall': 0.9532144059239314, 'f1': 0.9458917835671342, 'accuracy': 0.9860923058809677}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9675148890092041, 'recall': 0.9727817093086554, 'f1': 0.9701411509229099, 'number': 1837}, 'MISC': {'precision': 0.8782791185729276, 'recall': 0.9078091106290672, 'f1': 0.8927999999999999, 'number': 922}, 'ORG': {'precision': 0.9241741741741741, 'recall': 0.9179716629381058, 'f1': 0.9210624766180322, 'number': 1341}, 'PER': {'precision': 0.9511041009463722, 'recall': 0.9820846905537459, 'f1': 0.9663461538461537, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so

{'eval_loss': 0.12505799531936646, 'eval_overall': {'precision': 0.9386808087504143, 'recall': 0.9532144059239314, 'f1': 0.9458917835671342, 'accuracy': 0.9860923058809677}, 'eval_per_label': {'LOC': {'precision': 0.9675148890092041, 'recall': 0.9727817093086554, 'f1': 0.9701411509229099, 'number': 1837}, 'MISC': {'precision': 0.8782791185729276, 'recall': 0.9078091106290672, 'f1': 0.8927999999999999, 'number': 922}, 'ORG': {'precision': 0.9241741741741741, 'recall': 0.9179716629381058, 'f1': 0.9210624766180322, 'number': 1341}, 'PER': {'precision': 0.9511041009463722, 'recall': 0.9820846905537459, 'f1': 0.9663461538461537, 'number': 1842}}, 'eval_runtime': 7.5826, 'eval_samples_per_second': 428.613, 'eval_steps_per_second': 53.676, 'epoch': 13.0}


 65%|██████▌   | 23002/35120 [40:31<21:18,  9.48it/s]  

{'loss': 0.0015, 'grad_norm': 0.00018138765881303698, 'learning_rate': 6.9020501138952166e-06, 'epoch': 13.1}


 67%|██████▋   | 23502/35120 [41:23<19:56,  9.71it/s]

{'loss': 0.0007, 'grad_norm': 0.00034025911008939147, 'learning_rate': 6.617312072892939e-06, 'epoch': 13.38}


 68%|██████▊   | 24001/35120 [42:15<20:08,  9.20it/s]

{'loss': 0.0006, 'grad_norm': 0.00041584973223507404, 'learning_rate': 6.3325740318906616e-06, 'epoch': 13.67}


 70%|██████▉   | 24501/35120 [43:04<19:18,  9.16it/s]

{'loss': 0.0008, 'grad_norm': 0.0006968477973714471, 'learning_rate': 6.047835990888384e-06, 'epoch': 13.95}


 70%|██████▉   | 24583/35120 [43:13<17:48,  9.86it/s]Trainer is attempting to log a value of "{'precision': 0.9430043203722167, 'recall': 0.9550656344665096, 'f1': 0.9489966555183946, 'accuracy': 0.986342497203744}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9736986301369863, 'recall': 0.9673380511703865, 'f1': 0.9705079191698526, 'number': 1837}, 'MISC': {'precision': 0.8892438764643238, 'recall': 0.9056399132321041, 'f1': 0.8973670069854918, 'number': 922}, 'ORG': {'precision': 0.9200293470286134, 'recall': 0.9351230425055929, 'f1': 0.9275147928994084, 'number': 1341}, 'PER': {'precision': 0.9566367001586462, 'recall': 0.9820846905537459, 'f1': 0.9691936780069649, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.12853111326694489, 'eval_overall': {'precision': 0.9430043203722167, 'recall': 0.9550656344665096, 'f1': 0.9489966555183946, 'accuracy': 0.986342497203744}, 'eval_per_label': {'LOC': {'precision': 0.9736986301369863, 'recall': 0.9673380511703865, 'f1': 0.9705079191698526, 'number': 1837}, 'MISC': {'precision': 0.8892438764643238, 'recall': 0.9056399132321041, 'f1': 0.8973670069854918, 'number': 922}, 'ORG': {'precision': 0.9200293470286134, 'recall': 0.9351230425055929, 'f1': 0.9275147928994084, 'number': 1341}, 'PER': {'precision': 0.9566367001586462, 'recall': 0.9820846905537459, 'f1': 0.9691936780069649, 'number': 1842}}, 'eval_runtime': 7.5688, 'eval_samples_per_second': 429.396, 'eval_steps_per_second': 53.774, 'epoch': 14.0}


 71%|███████   | 25002/35120 [44:04<17:21,  9.71it/s]  

{'loss': 0.0006, 'grad_norm': 0.00020477738871704787, 'learning_rate': 5.763097949886105e-06, 'epoch': 14.24}


 73%|███████▎  | 25502/35120 [44:57<16:38,  9.63it/s]

{'loss': 0.0007, 'grad_norm': 0.00032146400189958513, 'learning_rate': 5.478359908883827e-06, 'epoch': 14.52}


 74%|███████▍  | 26002/35120 [45:46<15:04, 10.08it/s]

{'loss': 0.0014, 'grad_norm': 0.000293695367872715, 'learning_rate': 5.19362186788155e-06, 'epoch': 14.81}


 75%|███████▍  | 26339/35120 [46:22<15:07,  9.67it/s]Trainer is attempting to log a value of "{'precision': 0.938937613767996, 'recall': 0.9548973409626389, 'f1': 0.9468502294534834, 'accuracy': 0.9861364572908695}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9607948442534908, 'recall': 0.9738704409363091, 'f1': 0.9672884563395511, 'number': 1837}, 'MISC': {'precision': 0.8742203742203742, 'recall': 0.9121475054229935, 'f1': 0.8927813163481952, 'number': 922}, 'ORG': {'precision': 0.9322289156626506, 'recall': 0.9231916480238628, 'f1': 0.9276882727613338, 'number': 1341}, 'PER': {'precision': 0.9550502379693284, 'recall': 0.9804560260586319, 'f1': 0.9675863916421109, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so 

{'eval_loss': 0.1253676861524582, 'eval_overall': {'precision': 0.938937613767996, 'recall': 0.9548973409626389, 'f1': 0.9468502294534834, 'accuracy': 0.9861364572908695}, 'eval_per_label': {'LOC': {'precision': 0.9607948442534908, 'recall': 0.9738704409363091, 'f1': 0.9672884563395511, 'number': 1837}, 'MISC': {'precision': 0.8742203742203742, 'recall': 0.9121475054229935, 'f1': 0.8927813163481952, 'number': 922}, 'ORG': {'precision': 0.9322289156626506, 'recall': 0.9231916480238628, 'f1': 0.9276882727613338, 'number': 1341}, 'PER': {'precision': 0.9550502379693284, 'recall': 0.9804560260586319, 'f1': 0.9675863916421109, 'number': 1842}}, 'eval_runtime': 8.6163, 'eval_samples_per_second': 377.191, 'eval_steps_per_second': 47.236, 'epoch': 15.0}


 75%|███████▌  | 26502/35120 [46:48<14:57,  9.60it/s]  

{'loss': 0.0008, 'grad_norm': 0.00019819244334939867, 'learning_rate': 4.908883826879272e-06, 'epoch': 15.09}


 77%|███████▋  | 27001/35120 [47:41<14:13,  9.51it/s]

{'loss': 0.0008, 'grad_norm': 0.00021079520229250193, 'learning_rate': 4.624145785876993e-06, 'epoch': 15.38}


 78%|███████▊  | 27502/35120 [48:30<13:29,  9.41it/s]

{'loss': 0.0003, 'grad_norm': 0.00012124140630476177, 'learning_rate': 4.339407744874715e-06, 'epoch': 15.66}


 80%|███████▉  | 28002/35120 [49:22<12:07,  9.78it/s]

{'loss': 0.0008, 'grad_norm': 0.000182148942258209, 'learning_rate': 4.054669703872437e-06, 'epoch': 15.95}


 80%|███████▉  | 28095/35120 [49:32<12:10,  9.62it/s]Trainer is attempting to log a value of "{'precision': 0.9447862963578912, 'recall': 0.9560753954897341, 'f1': 0.9503973232956922, 'accuracy': 0.9866957084829575}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9680043383947939, 'recall': 0.9716929776810016, 'f1': 0.969845150774246, 'number': 1837}, 'MISC': {'precision': 0.8954108858057631, 'recall': 0.9099783080260304, 'f1': 0.9026358257127487, 'number': 922}, 'ORG': {'precision': 0.9301634472511144, 'recall': 0.9336316181953765, 'f1': 0.9318943059173799, 'number': 1341}, 'PER': {'precision': 0.957051961823966, 'recall': 0.9799131378935939, 'f1': 0.9683476394849786, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.12416187673807144, 'eval_overall': {'precision': 0.9447862963578912, 'recall': 0.9560753954897341, 'f1': 0.9503973232956922, 'accuracy': 0.9866957084829575}, 'eval_per_label': {'LOC': {'precision': 0.9680043383947939, 'recall': 0.9716929776810016, 'f1': 0.969845150774246, 'number': 1837}, 'MISC': {'precision': 0.8954108858057631, 'recall': 0.9099783080260304, 'f1': 0.9026358257127487, 'number': 922}, 'ORG': {'precision': 0.9301634472511144, 'recall': 0.9336316181953765, 'f1': 0.9318943059173799, 'number': 1341}, 'PER': {'precision': 0.957051961823966, 'recall': 0.9799131378935939, 'f1': 0.9683476394849786, 'number': 1842}}, 'eval_runtime': 8.6628, 'eval_samples_per_second': 375.168, 'eval_steps_per_second': 46.983, 'epoch': 16.0}


 81%|████████  | 28502/35120 [50:24<11:26,  9.64it/s]  

{'loss': 0.001, 'grad_norm': 0.019741835072636604, 'learning_rate': 3.76993166287016e-06, 'epoch': 16.23}


 83%|████████▎ | 29002/35120 [51:13<10:35,  9.63it/s]

{'loss': 0.0003, 'grad_norm': 0.00012148160021752119, 'learning_rate': 3.4851936218678815e-06, 'epoch': 16.51}


 84%|████████▍ | 29502/35120 [52:06<10:18,  9.09it/s]

{'loss': 0.0005, 'grad_norm': 0.00011341737263137475, 'learning_rate': 3.200455580865604e-06, 'epoch': 16.8}


 85%|████████▍ | 29851/35120 [52:42<09:06,  9.64it/s]Trainer is attempting to log a value of "{'precision': 0.9438631456568676, 'recall': 0.9564119824974756, 'f1': 0.9500961297333445, 'accuracy': 0.9868428798492965}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.971195652173913, 'recall': 0.9727817093086554, 'f1': 0.9719880337231438, 'number': 1837}, 'MISC': {'precision': 0.8761609907120743, 'recall': 0.920824295010846, 'f1': 0.8979375991538868, 'number': 922}, 'ORG': {'precision': 0.9307004470938898, 'recall': 0.9313944817300522, 'f1': 0.9310473350726798, 'number': 1341}, 'PER': {'precision': 0.9614973262032086, 'recall': 0.9761129207383279, 'f1': 0.9687500000000001, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.12274094671010971, 'eval_overall': {'precision': 0.9438631456568676, 'recall': 0.9564119824974756, 'f1': 0.9500961297333445, 'accuracy': 0.9868428798492965}, 'eval_per_label': {'LOC': {'precision': 0.971195652173913, 'recall': 0.9727817093086554, 'f1': 0.9719880337231438, 'number': 1837}, 'MISC': {'precision': 0.8761609907120743, 'recall': 0.920824295010846, 'f1': 0.8979375991538868, 'number': 922}, 'ORG': {'precision': 0.9307004470938898, 'recall': 0.9313944817300522, 'f1': 0.9310473350726798, 'number': 1341}, 'PER': {'precision': 0.9614973262032086, 'recall': 0.9761129207383279, 'f1': 0.9687500000000001, 'number': 1842}}, 'eval_runtime': 8.9038, 'eval_samples_per_second': 365.012, 'eval_steps_per_second': 45.711, 'epoch': 17.0}


 85%|████████▌ | 30002/35120 [53:08<09:18,  9.16it/s]  

{'loss': 0.0007, 'grad_norm': 0.00015434053784701973, 'learning_rate': 2.9157175398633257e-06, 'epoch': 17.08}


 87%|████████▋ | 30503/35120 [53:59<05:49, 13.20it/s]

{'loss': 0.0, 'grad_norm': 0.00021848917822353542, 'learning_rate': 2.6309794988610482e-06, 'epoch': 17.37}


 88%|████████▊ | 31001/35120 [54:50<07:45,  8.84it/s]

{'loss': 0.0002, 'grad_norm': 0.0001007779865176417, 'learning_rate': 2.34624145785877e-06, 'epoch': 17.65}


 90%|████████▉ | 31502/35120 [55:42<06:13,  9.69it/s]

{'loss': 0.0002, 'grad_norm': 0.00010259783448418602, 'learning_rate': 2.061503416856492e-06, 'epoch': 17.94}


 90%|████████▉ | 31607/35120 [55:53<06:12,  9.44it/s]Trainer is attempting to log a value of "{'precision': 0.9440199335548173, 'recall': 0.9564119824974756, 'f1': 0.9501755559271025, 'accuracy': 0.9870636368988049}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9701249321021184, 'recall': 0.9722373434948285, 'f1': 0.9711799891245242, 'number': 1837}, 'MISC': {'precision': 0.8810020876826722, 'recall': 0.9154013015184381, 'f1': 0.8978723404255319, 'number': 922}, 'ORG': {'precision': 0.926829268292683, 'recall': 0.9351230425055929, 'f1': 0.9309576837416481, 'number': 1341}, 'PER': {'precision': 0.9630620985010707, 'recall': 0.9766558089033659, 'f1': 0.969811320754717, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.12293359637260437, 'eval_overall': {'precision': 0.9440199335548173, 'recall': 0.9564119824974756, 'f1': 0.9501755559271025, 'accuracy': 0.9870636368988049}, 'eval_per_label': {'LOC': {'precision': 0.9701249321021184, 'recall': 0.9722373434948285, 'f1': 0.9711799891245242, 'number': 1837}, 'MISC': {'precision': 0.8810020876826722, 'recall': 0.9154013015184381, 'f1': 0.8978723404255319, 'number': 922}, 'ORG': {'precision': 0.926829268292683, 'recall': 0.9351230425055929, 'f1': 0.9309576837416481, 'number': 1341}, 'PER': {'precision': 0.9630620985010707, 'recall': 0.9766558089033659, 'f1': 0.969811320754717, 'number': 1842}}, 'eval_runtime': 8.7313, 'eval_samples_per_second': 372.226, 'eval_steps_per_second': 46.614, 'epoch': 18.0}


 91%|█████████ | 32001/35120 [56:44<04:17, 12.10it/s]  

{'loss': 0.0001, 'grad_norm': 0.0010326108895242214, 'learning_rate': 1.7767653758542143e-06, 'epoch': 18.22}


 93%|█████████▎| 32502/35120 [57:16<02:29, 17.53it/s]

{'loss': 0.0007, 'grad_norm': 0.00016551795124541968, 'learning_rate': 1.4920273348519363e-06, 'epoch': 18.51}


 94%|█████████▍| 33003/35120 [57:45<02:04, 16.99it/s]

{'loss': 0.0004, 'grad_norm': 0.00024008983746170998, 'learning_rate': 1.2072892938496584e-06, 'epoch': 18.79}


 95%|█████████▌| 33364/35120 [58:06<01:39, 17.63it/s]Trainer is attempting to log a value of "{'precision': 0.944813829787234, 'recall': 0.9565802760013463, 'f1': 0.9506606455929084, 'accuracy': 0.9872255254017779}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.971677559912854, 'recall': 0.9711486118671747, 'f1': 0.9714130138851075, 'number': 1837}, 'MISC': {'precision': 0.8712667353244078, 'recall': 0.9175704989154013, 'f1': 0.8938193343898574, 'number': 922}, 'ORG': {'precision': 0.9383458646616541, 'recall': 0.930648769574944, 'f1': 0.9344814676151254, 'number': 1341}, 'PER': {'precision': 0.961149547631719, 'recall': 0.9804560260586319, 'f1': 0.9707067992475142, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we 

{'eval_loss': 0.12236841022968292, 'eval_overall': {'precision': 0.944813829787234, 'recall': 0.9565802760013463, 'f1': 0.9506606455929084, 'accuracy': 0.9872255254017779}, 'eval_per_label': {'LOC': {'precision': 0.971677559912854, 'recall': 0.9711486118671747, 'f1': 0.9714130138851075, 'number': 1837}, 'MISC': {'precision': 0.8712667353244078, 'recall': 0.9175704989154013, 'f1': 0.8938193343898574, 'number': 922}, 'ORG': {'precision': 0.9383458646616541, 'recall': 0.930648769574944, 'f1': 0.9344814676151254, 'number': 1341}, 'PER': {'precision': 0.961149547631719, 'recall': 0.9804560260586319, 'f1': 0.9707067992475142, 'number': 1842}}, 'eval_runtime': 6.5081, 'eval_samples_per_second': 499.381, 'eval_steps_per_second': 62.538, 'epoch': 19.0}


 95%|█████████▌| 33503/35120 [58:22<01:30, 17.84it/s]

{'loss': 0.0001, 'grad_norm': 0.00016071839490905404, 'learning_rate': 9.225512528473805e-07, 'epoch': 19.08}


 97%|█████████▋| 34002/35120 [58:51<01:08, 16.34it/s]

{'loss': 0.0001, 'grad_norm': 9.665234392741695e-05, 'learning_rate': 6.378132118451026e-07, 'epoch': 19.36}


 98%|█████████▊| 34503/35120 [59:19<00:35, 17.54it/s]

{'loss': 0.0001, 'grad_norm': 0.004949676804244518, 'learning_rate': 3.530751708428246e-07, 'epoch': 19.65}


100%|█████████▉| 35003/35120 [59:48<00:06, 18.11it/s]

{'loss': 0.0005, 'grad_norm': 0.00014002641546539962, 'learning_rate': 6.83371298405467e-08, 'epoch': 19.93}


100%|██████████| 35120/35120 [59:54<00:00, 17.48it/s]Trainer is attempting to log a value of "{'precision': 0.9443891102257637, 'recall': 0.9574217435207001, 'f1': 0.9508607721878657, 'accuracy': 0.9872549596750456}" of type <class 'dict'> for key "eval/overall" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'LOC': {'precision': 0.9727371864776445, 'recall': 0.9711486118671747, 'f1': 0.9719422500681013, 'number': 1837}, 'MISC': {'precision': 0.8746113989637305, 'recall': 0.9154013015184381, 'f1': 0.8945416004239534, 'number': 922}, 'ORG': {'precision': 0.9336810730253353, 'recall': 0.9343773303504848, 'f1': 0.9340290719344019, 'number': 1341}, 'PER': {'precision': 0.960169941582581, 'recall': 0.9815418023887079, 'f1': 0.970738255033557, 'number': 1842}}" of type <class 'dict'> for key "eval/per_label" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so w

{'eval_loss': 0.12258101999759674, 'eval_overall': {'precision': 0.9443891102257637, 'recall': 0.9574217435207001, 'f1': 0.9508607721878657, 'accuracy': 0.9872549596750456}, 'eval_per_label': {'LOC': {'precision': 0.9727371864776445, 'recall': 0.9711486118671747, 'f1': 0.9719422500681013, 'number': 1837}, 'MISC': {'precision': 0.8746113989637305, 'recall': 0.9154013015184381, 'f1': 0.8945416004239534, 'number': 922}, 'ORG': {'precision': 0.9336810730253353, 'recall': 0.9343773303504848, 'f1': 0.9340290719344019, 'number': 1341}, 'PER': {'precision': 0.960169941582581, 'recall': 0.9815418023887079, 'f1': 0.970738255033557, 'number': 1842}}, 'eval_runtime': 6.7405, 'eval_samples_per_second': 482.16, 'eval_steps_per_second': 60.381, 'epoch': 20.0}


100%|██████████| 35120/35120 [1:00:04<00:00,  9.74it/s]

{'train_runtime': 3604.2086, 'train_samples_per_second': 77.914, 'train_steps_per_second': 9.744, 'train_loss': 0.013757273165407275, 'epoch': 20.0}





TrainOutput(global_step=35120, training_loss=0.013757273165407275, metrics={'train_runtime': 3604.2086, 'train_samples_per_second': 77.914, 'train_steps_per_second': 9.744, 'total_flos': 6145780901050062.0, 'train_loss': 0.013757273165407275, 'epoch': 20.0})

In [29]:
# Saving the Model and Tokenizer

import json
model_output_dir = "bert-finetuned-ner"

trainer.save_model(model_output_dir)

tokenizer.save_pretrained(model_output_dir)

('bert-finetuned-ner\\tokenizer_config.json',
 'bert-finetuned-ner\\special_tokens_map.json',
 'bert-finetuned-ner\\vocab.txt',
 'bert-finetuned-ner\\added_tokens.json',
 'bert-finetuned-ner\\tokenizer.json')

In [30]:
import os

# Process log_history for structured saving
log_history = trainer.state.log_history

# Extract overall and per-label metrics from evaluation logs
metrics = []
for log in log_history:
    if "eval_loss" in log:  # Check if it's an evaluation log
        metrics_entry = {
            "epoch": log.get("epoch"),
            "step": log.get("step"),
            "eval_loss": log.get("eval_loss"),
            "overall": log.get("eval_overall", {}),
            "per_label": log.get("eval_per_label", {}),
            "runtime": {
                "eval_runtime": log.get("eval_runtime"),
                "samples_per_second": log.get("eval_samples_per_second"),
                "steps_per_second": log.get("eval_steps_per_second"),
            },
        }
        metrics.append(metrics_entry)

# Save overall and per-label metrics to metrics.json
metrics_output_file = os.path.join(model_output_dir, "metrics.json")
with open(metrics_output_file, "w") as f:
    json.dump(metrics, f, indent=4)

# Save the entire log_history to log_history.json
log_history_output_file = os.path.join(model_output_dir, "log_history.json")
with open(log_history_output_file, "w") as f:
    json.dump(log_history, f, indent=4)

print(f"Metrics saved to {metrics_output_file}")
print(f"Log history saved to {log_history_output_file}")

Metrics saved to bert-finetuned-ner\metrics.json
Log history saved to bert-finetuned-ner\log_history.json


In [32]:
import torch
from transformers import TrainingArguments

# Load the training arguments from the bin file
training_args = torch.load(f"{model_output_dir}/training_args.bin")

# Convert to dictionary to view contents
training_args_dict = training_args.to_dict()

# Print the training arguments
for key, value in training_args_dict.items():
    print(f"{key}: {value}")



output_dir: bert-finetuned-ner-checkpoint
overwrite_output_dir: False
do_train: False
do_eval: True
do_predict: False
eval_strategy: epoch
prediction_loss_only: False
per_device_train_batch_size: 8
per_device_eval_batch_size: 8
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 1
eval_accumulation_steps: None
eval_delay: 0
torch_empty_cache_steps: None
learning_rate: 2e-05
weight_decay: 0.01
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 20
max_steps: -1
lr_scheduler_type: linear
lr_scheduler_kwargs: {}
warmup_ratio: 0.0
warmup_steps: 0
log_level: passive
log_on_each_node: True
logging_dir: bert-finetuned-ner-checkpoint\runs\Dec01_00-17-24_Prism
logging_strategy: steps
logging_first_step: False
logging_steps: 500
logging_nan_inf_filter: True
save_strategy: epoch
save_steps: 500
save_total_limit: None
save_safetensors: True
save_on_each_node: False
save_only_model: False
restore_callback_states_from_check

In [34]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("token-classification", model=model_output_dir)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [35]:
result = pipe('My name is Clara and I live in Berkeley, California.')

In [36]:
result

[{'entity': 'B-PER',
  'score': 0.99972135,
  'index': 4,
  'word': 'Clara',
  'start': 11,
  'end': 16},
 {'entity': 'B-LOC',
  'score': 0.99998116,
  'index': 9,
  'word': 'Berkeley',
  'start': 31,
  'end': 39},
 {'entity': 'B-LOC',
  'score': 0.9999862,
  'index': 11,
  'word': 'California',
  'start': 41,
  'end': 51}]

In [37]:
result = pipe("Bill Gates was the CEO of Microsoft until 2000, when he stepped down and was succeeded by Steve Ballmer12. Gates remained chairman of the board of directors and became chief software architect2. Microsoft's current CEO is Satya Nadella, who took over in 20141.")
result

[{'entity': 'B-PER',
  'score': 0.99998915,
  'index': 1,
  'word': 'Bill',
  'start': 0,
  'end': 4},
 {'entity': 'I-PER',
  'score': 0.9999895,
  'index': 2,
  'word': 'Gates',
  'start': 5,
  'end': 10},
 {'entity': 'B-ORG',
  'score': 0.9999784,
  'index': 7,
  'word': 'Microsoft',
  'start': 26,
  'end': 35},
 {'entity': 'B-PER',
  'score': 0.9999889,
  'index': 19,
  'word': 'Steve',
  'start': 90,
  'end': 95},
 {'entity': 'I-PER',
  'score': 0.9999908,
  'index': 20,
  'word': 'Ball',
  'start': 96,
  'end': 100},
 {'entity': 'I-PER',
  'score': 0.9999912,
  'index': 21,
  'word': '##mer',
  'start': 100,
  'end': 103},
 {'entity': 'I-PER',
  'score': 0.99998987,
  'index': 22,
  'word': '##12',
  'start': 103,
  'end': 105},
 {'entity': 'B-PER',
  'score': 0.9999858,
  'index': 24,
  'word': 'Gates',
  'start': 107,
  'end': 112},
 {'entity': 'B-ORG',
  'score': 0.9999821,
  'index': 39,
  'word': 'Microsoft',
  'start': 195,
  'end': 204},
 {'entity': 'B-PER',
  'score': 0.99

In [38]:
import os
import json
import numpy as np
from transformers import AutoModelForTokenClassification, AutoTokenizer, DataCollatorForTokenClassification
from torch.utils.data import DataLoader
import torch
import evaluate
from tqdm.auto import tqdm


test_metrics_file = os.path.join(model_output_dir, "test_metrics.json")

# Load the model and tokenizer
model = AutoModelForTokenClassification.from_pretrained(model_output_dir)
tokenizer = AutoTokenizer.from_pretrained(model_output_dir)

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
test_dataloader = DataLoader(
    tokenized_datasets["test"],  # Replace with your test dataset
    collate_fn=data_collator,
    batch_size=8
)

# Load the seqeval metric
metric = evaluate.load("seqeval")

# Move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# Testing loop
model.eval()
progress_bar = tqdm(test_dataloader)
all_logits = []
all_labels = []

max_seq_len = 0

# First pass: Determine the maximum sequence length
for batch in test_dataloader:
    max_seq_len = max(max_seq_len, batch["input_ids"].size(1))

# Second pass: Collect padded logits and labels
for batch in progress_bar:
    with torch.no_grad():
        # Move batch to device
        batch = {key: val.to(device) for key, val in batch.items()}
        
        # Forward pass
        outputs = model(**batch)
        
        # Pad logits and labels to the maximum sequence length
        logits_padded = np.pad(
            outputs.logits.cpu().numpy(),
            ((0, 0), (0, max_seq_len - outputs.logits.size(1)), (0, 0)),
            mode="constant",
        )
        labels_padded = np.pad(
            batch["labels"].cpu().numpy(),
            ((0, 0), (0, max_seq_len - batch["labels"].size(1))),
            mode="constant",
            constant_values=-100,
        )
        all_logits.append(logits_padded)
        all_labels.append(labels_padded)

# Concatenate all logits and labels
all_logits = np.concatenate(all_logits, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

# Compute test metrics
test_metrics = compute_metrics((all_logits, all_labels))

# Define helper function to handle JSON serialization
def convert_to_serializable(obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")


# Save test metrics to a JSON file
with open(test_metrics_file, "w") as f:
    json.dump(test_metrics, f, indent=4, default=convert_to_serializable)

# Display test metrics
print("Test metrics saved to", test_metrics_file)
print(json.dumps(test_metrics, indent=4, default=convert_to_serializable))



100%|██████████| 432/432 [00:03<00:00, 124.74it/s]


Test metrics saved to bert-finetuned-ner\test_metrics.json
{
    "overall": {
        "precision": 0.899030135088327,
        "recall": 0.9190864022662889,
        "f1": 0.9089476448958151,
        "accuracy": 0.9727391626353193
    },
    "per_label": {
        "LOC": {
            "precision": 0.9286570743405276,
            "recall": 0.9286570743405276,
            "f1": 0.9286570743405276,
            "number": 1668
        },
        "MISC": {
            "precision": 0.7555266579973993,
            "recall": 0.8276353276353277,
            "f1": 0.7899388171312032,
            "number": 702
        },
        "ORG": {
            "precision": 0.8832159624413145,
            "recall": 0.9060806742925949,
            "f1": 0.8945022288261516,
            "number": 1661
        },
        "PER": {
            "precision": 0.9528475199020208,
            "recall": 0.9622758194186766,
            "f1": 0.9575384615384616,
            "number": 1617
        }
    }
}


In [10]:
from transformers import pipeline

# Replace with your specific model checkpoint
model_checkpoint = "bert-finetuned-ner"
token_classifier = pipeline(
    "token-classification", 
    model=model_checkpoint, 
    aggregation_strategy="simple"
)

# Example usage
result = token_classifier("My name is Sylvain and I work at Hugging Face in Brooklyn.")
print(result)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'entity_group': 'PER', 'score': 0.9999881, 'word': 'Sylvain', 'start': 11, 'end': 18}, {'entity_group': 'ORG', 'score': 0.99961376, 'word': 'Hugging Face', 'start': 33, 'end': 45}, {'entity_group': 'LOC', 'score': 0.99989843, 'word': 'Brooklyn', 'start': 49, 'end': 57}]
