In [1]:
import datasets
import numpy as np
import pandas as pd
from evaluate import load


In [2]:
data = datasets.load_dataset("conll2003")

In [3]:
print(data)

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})


In [4]:
def label_map(data): # Accessing the label names from the 'ner_tags' feature.
    label_names = data['train'].features['ner_tags'].feature.names
    # Create mapping from label ID to label string name
    get_label = {k: v for k, v in enumerate(label_names)} 
# Create reverse mapping from label name to label ID
    get_id = {v: k for k, v in enumerate(label_names)}
    return lambda x : get_label[x], lambda label : get_id[label]

id2label, label2id = label_map(data)

In [5]:
def recognize_named_entities(dataset):
    train_df = pd.DataFrame(dataset['train'])
    named_entity_dict = {(tok, id2label(prev)): id2label(tag) for _, row in train_df.iterrows() for tok, prev, tag in zip(row['tokens'], [0] + row['ner_tags'][:-1], row['ner_tags']) if tag}
    test_df = pd.DataFrame(dataset['test'])
    recognized_entities = []
    for sentence in test_df['tokens']:
        last_tag = 'O'
        pred_sentence = []
        for word in sentence:
            if (word, last_tag) in named_entity_dict:
                last_tag = named_entity_dict[(word, last_tag)]
                pred_sentence.append(last_tag)
            else:
                last_tag = 'O'
                pred_sentence.append(last_tag)
        recognized_entities.append(pred_sentence)
    return recognized_entities, [[id2label(tag) for tag in sentence]  for sentence in test_df['ner_tags']]

In [6]:
predicts, labels = recognize_named_entities(data)
metric = load("seqeval")
metric.compute(predictions=predicts, references=labels)



{'LOC': {'precision': np.float64(0.6159029649595688),
  'recall': np.float64(0.5479616306954437),
  'f1': np.float64(0.5799492385786802),
  'number': np.int64(1668)},
 'MISC': {'precision': np.float64(0.6376582278481012),
  'recall': np.float64(0.5740740740740741),
  'f1': np.float64(0.6041979010494752),
  'number': np.int64(702)},
 'ORG': {'precision': np.float64(0.18875780707841777),
  'recall': np.float64(0.32751354605659244),
  'f1': np.float64(0.23948932423508693),
  'number': np.int64(1661)},
 'PER': {'precision': np.float64(0.3693107932379714),
  'recall': np.float64(0.1756338899196042),
  'f1': np.float64(0.23805532271584243),
  'number': np.int64(1617)},
 'overall_precision': np.float64(0.3719438182764002),
 'overall_recall': np.float64(0.37978045325779036),
 'overall_f1': np.float64(0.37582128777923784),
 'overall_accuracy': 0.8463012813610423}