# 🏷️ Token Classification with Transformers (NER, POS, Chunking)

Token classification attributes a label to **each token** in a sequence (e.g., NER, POS, Chunking).  
We'll fine-tune BERT for NER using the CoNLL-2003 dataset.  
Let's walk through from data loading to model training and inference!


Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [None]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install accelerate
# To run the training on TPU, you will need to uncomment the following line:
# !pip install cloud-tpu-client==0.10 torch==1.9.0 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl
!apt install git-lfs

In [None]:
pip install datasets==3.6.0 --force-reinstall


You will need to setup git, adapt your email and name in the following cell.

In [None]:
!git config --global user.email ""
!git config --global user.name ""

You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.

In [None]:
from huggingface_hub import notebook_login

notebook_login()

## 1️⃣ Load and Inspect the CoNLL-2003 Dataset

News stories pre-tokenized into words, with multiple label columns.


In [None]:
from datasets import load_dataset

raw_datasets=load_dataset("conll2003")
print(raw_datasets)

## 2️⃣ Visualize Token and Label Mappings

Get tokens and integer NER labels for sample sentences, map them to class names.


In [None]:
# Inspect first training example: tokens and NER tags
print("Tokens:",raw_datasets["train"][0]["tokens"])
print("NER tag IDs:",raw_datasets["train"][0]["ner_tags"])

ner_feature=raw_datasets["train"].features["ner_tags"]
label_names=ner_feature.feature.names
print("NER Classes:",label_names)

In [None]:
# Display word-label table for the first sentence
words,labels=raw_datasets["train"][0]["tokens"],raw_datasets["train"][0]["ner_tags"]
output_word=""
output_label=""
for w,l in zip(words,labels):
  name=label_names[l]
  length=max(len(w),len(name))
  output_word += w+" "*(length-len(w)+1)
  output_label += name+" "*(length-len(name)+1)
print(output_word)
print(output_label)

## "Your turn": Show POS and Chunking labels for the same sentences

Change `label_field` below to `"pos_tags"` or `"chunk_tags"` to explore other annotation layers.


In [None]:
def print_word_labels(idx, label_field="ner_tags"):
    words = raw_datasets["train"][idx]["tokens"]
    labels = raw_datasets["train"][idx][label_field]
    names = raw_datasets["train"].features[label_field].feature.names
    output_word = ""
    output_label = ""
    for w, l in zip(words, labels):
        name = names[l]
        length = max(len(w), len(name))
        output_word += w + " " * (length - len(w) + 1)
        output_label += name + " " * (length - len(name) + 1)
    print(output_word)
    print(output_label)
    print()

# Your turn: print POS and chunk tags for first and fourth examples
print_word_labels(0, "pos_tags")
print_word_labels(0, "chunk_tags")
print_word_labels(4, "pos_tags")
print_word_labels(4, "chunk_tags")


## 3️⃣ Tokenizer: Fast Subword Mapping and Label Alignment

Use a BERT tokenizer (fast) and align token-level labels to subwords.


In [None]:
from transformers import AutoTokenizer

model_checkpoint="bert-base-cased"
tokenizer=AutoTokenizer.from_pretrained(model_checkpoint)
print("Is fast tokenizer?",tokenizer.is_fast)

# Tokenize pre-tokenized words
inputs=tokenizer(raw_datasets["train"][0]["tokens"],is_split_into_words=True)
print("Tokenized:",inputs.tokens())
print("Word IDs:",inputs.word_ids())  # Word → token mapping (None = special token)

In [None]:
# Align word-level labels to tokens (NER example)
def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            new_labels.append(-100)
        else:
            label = labels[word_id]
            # If 'B-XXX' class (odd index), switch to I
            if label % 2 == 1:
                label += 1
            new_labels.append(label)
    return new_labels

labels = raw_datasets["train"][0]["ner_tags"]
word_ids = inputs.word_ids()
aligned_labels = align_labels_with_tokens(labels, word_ids)
print("Original labels:", labels)
print("Aligned token labels:", aligned_labels)



### "Your turn": Assign only one label per word, and -100 for all other subtokens.


In [None]:
def align_labels_single_per_word(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
        else:
            label = -100 if word_id is not None else -100
        new_labels.append(label)
    return new_labels

print("Single-label per word:", align_labels_single_per_word(labels, word_ids))


## 4️⃣ Preprocessing: Mapping across dataset splits

Apply tokenization and alignment to all samples using batched map.


In [None]:
def tokenize_and_align_labels(examples):
  tokenized_inputs=tokenizer(
      examples["tokens"],truncation=True,is_split_into_words=True
  )
  new_labels=[
      align_labels_with_tokens(labels,tokenized_inputs.word_ids(i))
      for i,labels in enumerate(examples["ner_tags"])
  ]
  tokenized_inputs["labels"]=new_labels
  return tokenized_inputs

tokenized_datasets=raw_datasets.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=raw_datasets["train"].column_names
)

## 5️⃣ Data Collator for Token Classification

Pad both inputs and labels (with -100 for padding/subtokens), producing tensors for training.


In [None]:
from transformers import DataCollatorForTokenClassification

data_collator=DataCollatorForTokenClassification(tokenizer=tokenizer)
batch=data_collator([tokenized_datasets["train"][i] for i in range(2)])
print(batch["labels"])

## 6️⃣ Metrics: F1, Precision, Recall, Accuracy using SeqEval

Evaluate token classification predictions with seqeval.


In [None]:
! pip install seqeval

In [None]:
!pip install evaluate


In [None]:

import evaluate
metric = evaluate.load("seqeval")
labels = raw_datasets["train"][0]["ner_tags"]
true_str_labels = [label_names[i] for i in labels]

fake_pred = true_str_labels.copy()
fake_pred[2] = "O"
print(metric.compute(predictions=[fake_pred], references=[true_str_labels]))


### Metric Function for Trainer

Aggregates metrics from all predictions/labels.


In [None]:
import numpy as np

def compute_metrics(eval_preds):
  logits,labels=eval_preds
  predictions=np.argmax(logits,axis=-1)
  true_labels=[[label_names[l] for l in label if l!=-100] for label in labels]
  true_predictions=[
      [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
      for prediction, label in zip(predictions, labels)
  ]

  all_metrics=metric.compute(predictions=true_predictions,references=true_labels)
  return{
      "precision": all_metrics["overall_precision"],
       "recall": all_metrics["overall_recall"],
       "f1": all_metrics["overall_f1"],
       "accuracy": all_metrics["overall_accuracy"],
  }

In [None]:
labels = raw_datasets["train"][0]["ner_tags"]
labels = [label_names[i] for i in labels]
labels

## 7️⃣ Model Setup

Define the BERT token classification model with label mappings.


In [None]:
from transformers import AutoModelForTokenClassification

id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)
print("Model label count:", model.config.num_labels)


## 8️⃣ Training Arguments and Trainer

Log in, set up Trainer, then fine-tune!


In [None]:
from huggingface_hub import notebook_login
notebook_login()

from transformers import TrainingArguments, Trainer

args = TrainingArguments(
    "bert-finetuned-ner",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()
trainer.push_to_hub(commit_message="Training complete")


## 9️⃣ Inference with the Fine-Tuned Model

Test your fine-tuned pipeline as shown in the course!


In [None]:
from transformers import pipeline

model_checkpoint = "huggingface-course/bert-finetuned-ner"
token_classifier = pipeline(
    "token-classification", model=model_checkpoint, aggregation_strategy="simple"
)

result = token_classifier("My name is Sylvain and I work at Hugging Face in Brooklyn.")
print(result)


In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    tokenized_datasets["train"],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=8,
)
eval_dataloader = DataLoader(
    tokenized_datasets["validation"], collate_fn=data_collator, batch_size=8
)

In [None]:
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

In [None]:
from torch.optim import AdamW

optimizer = AdamW(model.parameters(), lr=2e-5)

In [None]:
from accelerate import Accelerator

accelerator = Accelerator()
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
    model, optimizer, train_dataloader, eval_dataloader
)

In [None]:
from transformers import get_scheduler

num_train_epochs = 3
num_update_steps_per_epoch = len(train_dataloader)
num_training_steps = num_train_epochs * num_update_steps_per_epoch

lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

In [None]:
from huggingface_hub import Repository, get_full_repo_name

model_name = "bert-finetuned-ner-accelerate"
repo_name = get_full_repo_name(model_name)
repo_name

In [None]:
output_dir = "bert-finetuned-ner-accelerate"
repo = Repository(output_dir, clone_from=repo_name)

In [None]:
def postprocess(predictions, labels):
    predictions = predictions.detach().cpu().clone().numpy()
    labels = labels.detach().cpu().clone().numpy()

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    return true_labels, true_predictions

In [None]:
from tqdm.auto import tqdm
import torch

progress_bar = tqdm(range(num_training_steps))

for epoch in range(num_train_epochs):
    # Training
    model.train()
    for batch in train_dataloader:
        outputs = model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)

    # Evaluation
    model.eval()
    for batch in eval_dataloader:
        with torch.no_grad():
            outputs = model(**batch)

        predictions = outputs.logits.argmax(dim=-1)
        labels = batch["labels"]

        # Necessary to pad predictions and labels for being gathered
        predictions = accelerator.pad_across_processes(predictions, dim=1, pad_index=-100)
        labels = accelerator.pad_across_processes(labels, dim=1, pad_index=-100)

        predictions_gathered = accelerator.gather(predictions)
        labels_gathered = accelerator.gather(labels)

        true_predictions, true_labels = postprocess(predictions_gathered, labels_gathered)
        metric.add_batch(predictions=true_predictions, references=true_labels)

    results = metric.compute()
    print(
        f"epoch {epoch}:",
        {
            key: results[f"overall_{key}"]
            for key in ["precision", "recall", "f1", "accuracy"]
        },
    )

    # Save and upload
    accelerator.wait_for_everyone()
    unwrapped_model = accelerator.unwrap_model(model)
    unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save)
    if accelerator.is_main_process:
        tokenizer.save_pretrained(output_dir)
        repo.push_to_hub(
            commit_message=f"Training in progress epoch {epoch}", blocking=False
        )

In [None]:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save)

In [None]:
from transformers import pipeline

# Replace this with your own checkpoint
model_checkpoint = "huggingface-course/bert-finetuned-ner"
token_classifier = pipeline(
    "token-classification", model=model_checkpoint, aggregation_strategy="simple"
)
token_classifier("My name is Sylvain and I work at Hugging Face in Brooklyn.")