### Read data, Preprocess and split

In [2]:
import pandas as pd
from datasets import Dataset, DatasetDict

# Load dataset
df = pd.read_csv("data/ner_dataset.csv", encoding="latin1").ffill()

# Group by sentence
agg_func = lambda s: {"tokens": s["Word"].tolist(), "ner_tags": s["Tag"].tolist()}
grouped = df.groupby("Sentence #").apply(agg_func).tolist()

dataset = Dataset.from_list(grouped)

# Train/val/test split
dataset = dataset.train_test_split(test_size=0.2, seed=42)
temp = dataset['train'].train_test_split(test_size=0.1, seed=42)
dataset = DatasetDict({
    "train": temp["train"],
    "validation": temp["test"],
    "test": dataset["test"]
})

# Get unique tags
unique_tags = sorted({tag for doc in dataset["train"]["ner_tags"] for tag in doc})
tag2id = {t: i for i, t in enumerate(unique_tags)}
id2tag = {i: t for t, i in tag2id.items()}

print("NER Tags:", unique_tags)



  grouped = df.groupby("Sentence #").apply(agg_func).tolist()


NER Tags: ['B-art', 'B-eve', 'B-geo', 'B-gpe', 'B-nat', 'B-org', 'B-per', 'B-tim', 'I-art', 'I-eve', 'I-geo', 'I-gpe', 'I-nat', 'I-org', 'I-per', 'I-tim', 'O']


In [3]:

from transformers import AutoTokenizer

model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        is_split_into_words=True,
        padding="max_length",
        max_length=128
    )
    
    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = []
        previous_word = None
        for word_id in word_ids:
            if word_id is None:
                label_ids.append(-100)  # ignore
            elif word_id != previous_word:
                label_ids.append(tag2id[label[word_id]])
            else:
                label_ids.append(tag2id[label[word_id]] if label[word_id].startswith("I-") else -100)
            previous_word = word_id
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)


  _torch_pytree._register_pytree_node(


Map:   0%|          | 0/34530 [00:00<?, ? examples/s]

Map:   0%|          | 0/3837 [00:00<?, ? examples/s]

Map:   0%|          | 0/9592 [00:00<?, ? examples/s]

In [8]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 34530
    })
    validation: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 3837
    })
    test: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 9592
    })
})

### Build and train

In [4]:
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(unique_tags),
    id2label=id2tag,
    label2id=tag2id
)


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate

metric = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_labels = [[id2tag[l] for l in label if l != -100] for label in labels]
    true_predictions = [[id2tag[p] for (p, l) in zip(pred, label) if l != -100]
                        for pred, label in zip(predictions, labels)]

    return metric.compute(predictions=true_predictions, references=true_labels)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_dir="./logs",
    # logging_steps=50,
    fp16=True  # mixed precision for faster training on GPU
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()


  0%|          | 0/21590 [00:00<?, ?it/s]

{'loss': 0.1794, 'learning_rate': 4.884205650764243e-05, 'epoch': 0.23}
{'loss': 0.1394, 'learning_rate': 4.7684113015284854e-05, 'epoch': 0.46}
{'loss': 0.1274, 'learning_rate': 4.652616952292728e-05, 'epoch': 0.69}
{'loss': 0.1176, 'learning_rate': 4.536822603056971e-05, 'epoch': 0.93}


  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.5714285714285714, 'recall': 0.13793103448275862, 'f1': 0.2222222222222222, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8467289719626169, 'recall': 0.8692037096258395, 'f1': 0.8578191573299668, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9335347432024169, 'recall': 0.9641185647425897, 'f1': 0.9485801995395242, 'number': 1282}" of 

{'eval_loss': 0.1136072650551796, 'eval_art': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 33}, 'eval_eve': {'precision': 0.5714285714285714, 'recall': 0.13793103448275862, 'f1': 0.2222222222222222, 'number': 29}, 'eval_geo': {'precision': 0.8467289719626169, 'recall': 0.8692037096258395, 'f1': 0.8578191573299668, 'number': 3127}, 'eval_gpe': {'precision': 0.9335347432024169, 'recall': 0.9641185647425897, 'f1': 0.9485801995395242, 'number': 1282}, 'eval_nat': {'precision': 0.5, 'recall': 0.4, 'f1': 0.4444444444444445, 'number': 15}, 'eval_org': {'precision': 0.6157049375371803, 'recall': 0.6201318154583583, 'f1': 0.617910447761194, 'number': 1669}, 'eval_per': {'precision': 0.7351313969571232, 'recall': 0.773090909090909, 'f1': 0.7536334633108827, 'number': 1375}, 'eval_tim': {'precision': 0.8857142857142857, 'recall': 0.8269230769230769, 'f1': 0.8553095925569457, 'number': 1612}, 'eval_overall_precision': 0.8048541575968655, 'eval_overall_recall': 0.8089039597462262, 'eval_o

  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.4, 'recall': 0.06060606060606061, 'f1': 0.10526315789473685, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.4, 'recall': 0.20689655172413793, 'f1': 0.2727272727272727, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8516009852216748, 'recall': 0.8845538855132715, 'f1': 0.867764705882353, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9631949882537196, 'recall': 0.9594383775351014, 'f1': 0.9613130128956624, 'num

{'eval_loss': 0.10650458931922913, 'eval_art': {'precision': 0.4, 'recall': 0.06060606060606061, 'f1': 0.10526315789473685, 'number': 33}, 'eval_eve': {'precision': 0.4, 'recall': 0.20689655172413793, 'f1': 0.2727272727272727, 'number': 29}, 'eval_geo': {'precision': 0.8516009852216748, 'recall': 0.8845538855132715, 'f1': 0.867764705882353, 'number': 3127}, 'eval_gpe': {'precision': 0.9631949882537196, 'recall': 0.9594383775351014, 'f1': 0.9613130128956624, 'number': 1282}, 'eval_nat': {'precision': 0.6666666666666666, 'recall': 0.4, 'f1': 0.5, 'number': 15}, 'eval_org': {'precision': 0.664043583535109, 'recall': 0.6572798082684242, 'f1': 0.6606443842216201, 'number': 1669}, 'eval_per': {'precision': 0.756114605171209, 'recall': 0.7869090909090909, 'f1': 0.7712045616535994, 'number': 1375}, 'eval_tim': {'precision': 0.8621755253399258, 'recall': 0.8653846153846154, 'f1': 0.8637770897832817, 'number': 1612}, 'eval_overall_precision': 0.8194489465153971, 'eval_overall_recall': 0.82957777

  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.5, 'recall': 0.09090909090909091, 'f1': 0.15384615384615385, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.21052631578947367, 'recall': 0.13793103448275862, 'f1': 0.16666666666666666, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8320588235294117, 'recall': 0.9047009913655261, 'f1': 0.8668607323425769, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.977327935222672, 'recall': 0.9414976599063962, 'f1': 0.95907

{'eval_loss': 0.12094799429178238, 'eval_art': {'precision': 0.5, 'recall': 0.09090909090909091, 'f1': 0.15384615384615385, 'number': 33}, 'eval_eve': {'precision': 0.21052631578947367, 'recall': 0.13793103448275862, 'f1': 0.16666666666666666, 'number': 29}, 'eval_geo': {'precision': 0.8320588235294117, 'recall': 0.9047009913655261, 'f1': 0.8668607323425769, 'number': 3127}, 'eval_gpe': {'precision': 0.977327935222672, 'recall': 0.9414976599063962, 'f1': 0.9590782677791021, 'number': 1282}, 'eval_nat': {'precision': 0.6666666666666666, 'recall': 0.4, 'f1': 0.5, 'number': 15}, 'eval_org': {'precision': 0.6406160867084997, 'recall': 0.6728579988016776, 'f1': 0.6563413208649911, 'number': 1669}, 'eval_per': {'precision': 0.786096256684492, 'recall': 0.7483636363636363, 'f1': 0.7667660208643815, 'number': 1375}, 'eval_tim': {'precision': 0.8616822429906542, 'recall': 0.857940446650124, 'f1': 0.8598072738576312, 'number': 1612}, 'eval_overall_precision': 0.8123393316195373, 'eval_overall_re

  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.4166666666666667, 'recall': 0.15151515151515152, 'f1': 0.2222222222222222, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.28, 'recall': 0.2413793103448276, 'f1': 0.25925925925925924, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8627943485086342, 'recall': 0.8787975695554845, 'f1': 0.870722433460076, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9550038789759504, 'recall': 0.9602184087363494, 'f1': 0.9576040

{'eval_loss': 0.13224658370018005, 'eval_art': {'precision': 0.4166666666666667, 'recall': 0.15151515151515152, 'f1': 0.2222222222222222, 'number': 33}, 'eval_eve': {'precision': 0.28, 'recall': 0.2413793103448276, 'f1': 0.25925925925925924, 'number': 29}, 'eval_geo': {'precision': 0.8627943485086342, 'recall': 0.8787975695554845, 'f1': 0.870722433460076, 'number': 3127}, 'eval_gpe': {'precision': 0.9550038789759504, 'recall': 0.9602184087363494, 'f1': 0.957604045118631, 'number': 1282}, 'eval_nat': {'precision': 0.35294117647058826, 'recall': 0.4, 'f1': 0.37500000000000006, 'number': 15}, 'eval_org': {'precision': 0.6637579988365329, 'recall': 0.6836428999400839, 'f1': 0.6735537190082646, 'number': 1669}, 'eval_per': {'precision': 0.7375838926174496, 'recall': 0.7992727272727272, 'f1': 0.7671902268760908, 'number': 1375}, 'eval_tim': {'precision': 0.8611793611793612, 'recall': 0.869727047146402, 'f1': 0.8654320987654321, 'number': 1612}, 'eval_overall_precision': 0.8156967431927389, '

  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.2857142857142857, 'recall': 0.12121212121212122, 'f1': 0.1702127659574468, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.30434782608695654, 'recall': 0.2413793103448276, 'f1': 0.2692307692307692, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8505359877488514, 'recall': 0.8880716341541414, 'f1': 0.8688986232790988, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9631083202511774, 'recall': 0.9570982839313572, 

{'eval_loss': 0.1561158448457718, 'eval_art': {'precision': 0.2857142857142857, 'recall': 0.12121212121212122, 'f1': 0.1702127659574468, 'number': 33}, 'eval_eve': {'precision': 0.30434782608695654, 'recall': 0.2413793103448276, 'f1': 0.2692307692307692, 'number': 29}, 'eval_geo': {'precision': 0.8505359877488514, 'recall': 0.8880716341541414, 'f1': 0.8688986232790988, 'number': 3127}, 'eval_gpe': {'precision': 0.9631083202511774, 'recall': 0.9570982839313572, 'f1': 0.9600938967136151, 'number': 1282}, 'eval_nat': {'precision': 0.5, 'recall': 0.4, 'f1': 0.4444444444444445, 'number': 15}, 'eval_org': {'precision': 0.6582132564841499, 'recall': 0.6842420611144397, 'f1': 0.6709753231492361, 'number': 1669}, 'eval_per': {'precision': 0.7460097154753643, 'recall': 0.7818181818181819, 'f1': 0.7634943181818182, 'number': 1375}, 'eval_tim': {'precision': 0.8461070559610706, 'recall': 0.8629032258064516, 'f1': 0.8544226044226044, 'number': 1612}, 'eval_overall_precision': 0.810905612244898, 'ev

  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.2857142857142857, 'recall': 0.12121212121212122, 'f1': 0.1702127659574468, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.3684210526315789, 'recall': 0.2413793103448276, 'f1': 0.2916666666666667, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8408685306365259, 'recall': 0.9040614007035497, 'f1': 0.871320696563415, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9479724560061209, 'recall': 0.9664586583463338, 'f

{'eval_loss': 0.1776302456855774, 'eval_art': {'precision': 0.2857142857142857, 'recall': 0.12121212121212122, 'f1': 0.1702127659574468, 'number': 33}, 'eval_eve': {'precision': 0.3684210526315789, 'recall': 0.2413793103448276, 'f1': 0.2916666666666667, 'number': 29}, 'eval_geo': {'precision': 0.8408685306365259, 'recall': 0.9040614007035497, 'f1': 0.871320696563415, 'number': 3127}, 'eval_gpe': {'precision': 0.9479724560061209, 'recall': 0.9664586583463338, 'f1': 0.9571263035921206, 'number': 1282}, 'eval_nat': {'precision': 0.5454545454545454, 'recall': 0.4, 'f1': 0.4615384615384615, 'number': 15}, 'eval_org': {'precision': 0.6949585194639438, 'recall': 0.652486518873577, 'f1': 0.6730531520395551, 'number': 1669}, 'eval_per': {'precision': 0.7524271844660194, 'recall': 0.7890909090909091, 'f1': 0.7703230386936458, 'number': 1375}, 'eval_tim': {'precision': 0.8699690402476781, 'recall': 0.8715880893300249, 'f1': 0.8707778122094825, 'number': 1612}, 'eval_overall_precision': 0.82060619

  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.4, 'recall': 0.12121212121212122, 'f1': 0.186046511627907, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.30434782608695654, 'recall': 0.2413793103448276, 'f1': 0.2692307692307692, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8405494177366378, 'recall': 0.9002238567316917, 'f1': 0.8693638048177889, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9660883280757098, 'recall': 0.9555382215288611, 'f1': 0.96078431

{'eval_loss': 0.1858556568622589, 'eval_art': {'precision': 0.4, 'recall': 0.12121212121212122, 'f1': 0.186046511627907, 'number': 33}, 'eval_eve': {'precision': 0.30434782608695654, 'recall': 0.2413793103448276, 'f1': 0.2692307692307692, 'number': 29}, 'eval_geo': {'precision': 0.8405494177366378, 'recall': 0.9002238567316917, 'f1': 0.8693638048177889, 'number': 3127}, 'eval_gpe': {'precision': 0.9660883280757098, 'recall': 0.9555382215288611, 'f1': 0.9607843137254901, 'number': 1282}, 'eval_nat': {'precision': 0.5, 'recall': 0.4, 'f1': 0.4444444444444445, 'number': 15}, 'eval_org': {'precision': 0.6785058175137784, 'recall': 0.6638705811863391, 'f1': 0.6711084191399151, 'number': 1669}, 'eval_per': {'precision': 0.7519217330538085, 'recall': 0.7825454545454545, 'f1': 0.7669280114041339, 'number': 1375}, 'eval_tim': {'precision': 0.8651477832512315, 'recall': 0.8715880893300249, 'f1': 0.8683559950556241, 'number': 1612}, 'eval_overall_precision': 0.8177540106951872, 'eval_overall_reca

  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.3076923076923077, 'recall': 0.12121212121212122, 'f1': 0.17391304347826086, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.35, 'recall': 0.2413793103448276, 'f1': 0.2857142857142857, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8532110091743119, 'recall': 0.8922289734569875, 'f1': 0.8722838830701891, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9461123941493457, 'recall': 0.9586583463338534, 'f1': 0.952344

{'eval_loss': 0.20449940860271454, 'eval_art': {'precision': 0.3076923076923077, 'recall': 0.12121212121212122, 'f1': 0.17391304347826086, 'number': 33}, 'eval_eve': {'precision': 0.35, 'recall': 0.2413793103448276, 'f1': 0.2857142857142857, 'number': 29}, 'eval_geo': {'precision': 0.8532110091743119, 'recall': 0.8922289734569875, 'f1': 0.8722838830701891, 'number': 3127}, 'eval_gpe': {'precision': 0.9461123941493457, 'recall': 0.9586583463338534, 'f1': 0.9523440526927547, 'number': 1282}, 'eval_nat': {'precision': 0.5333333333333333, 'recall': 0.5333333333333333, 'f1': 0.5333333333333333, 'number': 15}, 'eval_org': {'precision': 0.6619469026548672, 'recall': 0.6722588376273217, 'f1': 0.6670630202140309, 'number': 1669}, 'eval_per': {'precision': 0.7565463552724699, 'recall': 0.7774545454545455, 'f1': 0.7668579626972741, 'number': 1375}, 'eval_tim': {'precision': 0.8694841516469857, 'recall': 0.8678660049627791, 'f1': 0.8686743247438682, 'number': 1612}, 'eval_overall_precision': 0.817

Checkpoint destination directory ./results\checkpoint-17272 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.0059, 'learning_rate': 9.485873089393238e-06, 'epoch': 8.11}
{'loss': 0.005, 'learning_rate': 8.327929597035665e-06, 'epoch': 8.34}
{'loss': 0.0049, 'learning_rate': 7.169986104678092e-06, 'epoch': 8.57}
{'loss': 0.0046, 'learning_rate': 6.012042612320519e-06, 'epoch': 8.8}


  0%|          | 0/240 [00:00<?, ?it/s]

Trainer is attempting to log a value of "{'precision': 0.2857142857142857, 'recall': 0.12121212121212122, 'f1': 0.1702127659574468, 'number': 33}" of type <class 'dict'> for key "eval/art" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.2727272727272727, 'recall': 0.20689655172413793, 'f1': 0.23529411764705882, 'number': 29}" of type <class 'dict'> for key "eval/eve" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8543127095397745, 'recall': 0.8963863127598337, 'f1': 0.8748439450686641, 'number': 3127}" of type <class 'dict'> for key "eval/geo" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.947652040030793, 'recall': 0.9602184087363494, 

{'eval_loss': 0.21622690558433533, 'eval_art': {'precision': 0.2857142857142857, 'recall': 0.12121212121212122, 'f1': 0.1702127659574468, 'number': 33}, 'eval_eve': {'precision': 0.2727272727272727, 'recall': 0.20689655172413793, 'f1': 0.23529411764705882, 'number': 29}, 'eval_geo': {'precision': 0.8543127095397745, 'recall': 0.8963863127598337, 'f1': 0.8748439450686641, 'number': 3127}, 'eval_gpe': {'precision': 0.947652040030793, 'recall': 0.9602184087363494, 'f1': 0.9538938395970554, 'number': 1282}, 'eval_nat': {'precision': 0.5, 'recall': 0.4666666666666667, 'f1': 0.4827586206896552, 'number': 15}, 'eval_org': {'precision': 0.6743480897513645, 'recall': 0.6662672258837627, 'f1': 0.6702833031946956, 'number': 1669}, 'eval_per': {'precision': 0.7489597780859917, 'recall': 0.7854545454545454, 'f1': 0.7667731629392971, 'number': 1375}, 'eval_tim': {'precision': 0.8627935723114957, 'recall': 0.8660049627791563, 'f1': 0.8643962848297212, 'number': 1612}, 'eval_overall_precision': 0.8179

In [7]:
# results = trainer.evaluate(tokenized_datasets["test"])
# print(results)


### Evaluate on test data

In [5]:
from transformers import DistilBertTokenizerFast, DistilBertForTokenClassification

# Load saved model
model_path = "results/checkpoint-19431"
tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
model = DistilBertForTokenClassification.from_pretrained(model_path)


In [6]:
from transformers import Trainer
import evaluate
import numpy as np

seqeval = evaluate.load("seqeval")

label_list = model.config.id2label

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_labels = [
        [label_list[l] for l in label if l != -100]
        for label in labels
    ]
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

results = trainer.evaluate(tokenized_datasets["test"])
print(results)


You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


  0%|          | 0/1199 [00:00<?, ?it/s]

{'eval_loss': 0.21233141422271729, 'eval_precision': 0.8171621862204024, 'eval_recall': 0.8329930583911801, 'eval_f1': 0.8250016850524613, 'eval_accuracy': 0.9673390519695527, 'eval_runtime': 38.4888, 'eval_samples_per_second': 249.215, 'eval_steps_per_second': 31.152}


### Results - after 9 iterations

In [9]:
# {
#     'eval_loss': 0.21233141422271729, 
#     'eval_precision': 0.8171621862204024, 
#     'eval_recall': 0.8329930583911801, 
#     'eval_f1': 0.8250016850524613, 
#     'eval_accuracy': 0.9673390519695527, 
#     'eval_runtime': 38.4888, 
#     'eval_samples_per_second': 249.215, 
#     'eval_steps_per_second': 31.152
# }


### Prediction script for api

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

sentence = ["Michael", "Jackson", "lives", "in", "New", "Delhi"]
sentence = ['They','say','not','all','of','the','rockets','exploded','upon','impact','.']
sentence = ['The', 'former', 'prime', 'minister', 'was', 'replaced', 'October', '19', 'by', 'hardliner', 'Lieutenant', 'General', 'Soe', 'Win', '.']
sentence = ['President', 'Karzai', 'thanked', 'his', 'allies', 'for', 'their', 'help', 'in', 'battling', 'terrorism', '.']

inputs = tokenizer(
    sentence,
    is_split_into_words=True,
    return_tensors="pt",
    truncation=True,
    padding=True
).to(device)   # <<< move tensors to GPU

with torch.no_grad():
    outputs = model(**inputs).logits

predictions = outputs.argmax(-1).squeeze().tolist()
predicted_labels = [model.config.id2label[p] for p in predictions[:len(sentence)]]

print(list(zip(sentence, predicted_labels)))


[('President', 'O'), ('Karzai', 'B-per'), ('thanked', 'I-per'), ('his', 'I-per'), ('allies', 'I-per'), ('for', 'O'), ('their', 'O'), ('help', 'O'), ('in', 'O'), ('battling', 'O'), ('terrorism', 'O'), ('.', 'O')]
