In [1]:
import sys
sys.path.append("../src")
from mistral_client import run_mistral
from ner_post_processing import parse_entities_promptner, get_token_labels

import pandas as pd
from tqdm import tqdm
from datasets import load_dataset
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("DFKI-SLT/cross_ner", "politics")

In [3]:
class_labels = dataset["validation"].features["ner_tags"].feature.names
index2label = {i: label for i, label in enumerate(class_labels)}
label2index = {v: k for k, v in index2label.items()}

label2index

{'O': 0,
 'B-academicjournal': 1,
 'I-academicjournal': 2,
 'B-album': 3,
 'I-album': 4,
 'B-algorithm': 5,
 'I-algorithm': 6,
 'B-astronomicalobject': 7,
 'I-astronomicalobject': 8,
 'B-award': 9,
 'I-award': 10,
 'B-band': 11,
 'I-band': 12,
 'B-book': 13,
 'I-book': 14,
 'B-chemicalcompound': 15,
 'I-chemicalcompound': 16,
 'B-chemicalelement': 17,
 'I-chemicalelement': 18,
 'B-conference': 19,
 'I-conference': 20,
 'B-country': 21,
 'I-country': 22,
 'B-discipline': 23,
 'I-discipline': 24,
 'B-election': 25,
 'I-election': 26,
 'B-enzyme': 27,
 'I-enzyme': 28,
 'B-event': 29,
 'I-event': 30,
 'B-field': 31,
 'I-field': 32,
 'B-literarygenre': 33,
 'I-literarygenre': 34,
 'B-location': 35,
 'I-location': 36,
 'B-magazine': 37,
 'I-magazine': 38,
 'B-metrics': 39,
 'I-metrics': 40,
 'B-misc': 41,
 'I-misc': 42,
 'B-musicalartist': 43,
 'I-musicalartist': 44,
 'B-musicalinstrument': 45,
 'I-musicalinstrument': 46,
 'B-musicgenre': 47,
 'I-musicgenre': 48,
 'B-organisation': 49,
 'I-o

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 200
    })
    validation: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 541
    })
    test: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 651
    })
})

In [5]:
#  You are an expert linguist. Your task 
 
prompt = lambda text: f"""
Dfn: An entity is a person (person), organisation (organisation), politician (politician), political party (politicalparty), event (event), election (election), 
country (country), location (location) or other political entity (misc). Dates, times, abstract concepts, adjectives, and verbs are not entities.

Example 1: Sitting as a Liberal Party of Canada Member of Parliament (MP) for Niagara Falls, she joined the Canadian Cabinet after the Liberals defeated the 
Progressive Conservative Party of Canada government of John Diefenbaker in the 1963 Canadian federal election.

Answer:
1. Liberal Party of Canada | True | as it is a political party (politicalparty)
2. Parliament | True | as it is an organisation (organisation)
3. Niagara Falls | True | as it is a location (location)
4. Canadian Cabinet | True | as it is a political entity (misc)
5. Liberals | True | as it is a political group by not the party name (misc)
6. Progressive Conservative Party of Canada | True | as it is a political party (politicalparty)
7. government | False | as it is not actually an entity in this sentence
8. John Diefenbaker | True | as it is a politician (politician)
9. 1963 Canadian federal election | True | as it is an election (election)

Example 2: The MRE took part to the consolidation of The Olive Tree as a joint electoral list both for the
2004 European Parliament election and the 2006 Italian general election, along with the Democrats of the Left
and Democracy is Freedom - The Daisy.

Answer:
1. MRE | True | as it is a political party (politicalparty)
2. consolidation | False | as it is an action
3. The Olive Tree | True | as it is a group or organisation (organisation)
4. 2004 European Parliament election | True | as it is an election (election)
5. 2006 Italian general election | True | as it is an election (election)
6. Democrats of the Left | True | as it is a political party (politicalparty)
7. Democracy is Freedom - The Daisy | True | as it is an political party (politicalparty)

Q. Given the paragraph below, identify a list of possible entities and for each entry explain why it either is or is not an entity.

Paragraph: {text}
"""


In [6]:
import evaluate

metric = evaluate.load("seqeval")

def score_ner(prediction_batch, gold_batch):
    labeled_predicions = []
    for prediction in prediction_batch:
        labeled_predicions.append([index2label[i] for i in prediction])
    labeled_gold = []
    for gold in gold_batch:
        labeled_gold.append([index2label[i] for i in gold])
    return metric.compute(
        predictions=labeled_predicions, 
        references=labeled_gold
    )

In [7]:
scored = defaultdict(list)

for idx, example in enumerate(tqdm(dataset["test"])):
    if (idx + 1) % 100 == 0:
        df_scored = pd.DataFrame(scored)
        df_scored.to_csv("../data/scored/test.mistral_7b.csv", index=False)
    
    try:
        text = " ".join(example["tokens"])
        prompt_input = prompt(text)
        output = run_mistral(prompt_input, model="open-mistral-7b")
        ner_tags = get_token_labels(text, parse_entities_promptner(output), label2index)

        scored["id"].append(example["id"])
        scored["tokens"].append(example["tokens"])
        scored["prompt"].append(prompt_input)
        scored["output"].append(output)
        scored["ner_tags"].append(ner_tags)
    except Exception as e:
        print(e)
        continue

df_scored = pd.DataFrame(scored)
df_scored.to_csv("../data/scored/test.mistral_7b.csv", index=False)

100%|██████████| 651/651 [32:46<00:00,  3.02s/it]


In [8]:
score_ner(df_scored["ner_tags"].to_list(), dataset["test"]["ner_tags"])

  _warn_prf(average, modifier, msg_start, len(result))


{'country': {'precision': 0.6782608695652174,
  'recall': 0.37320574162679426,
  'f1': 0.4814814814814814,
  'number': 418},
 'election': {'precision': 0.8169491525423729,
  'recall': 0.5552995391705069,
  'f1': 0.6611796982167353,
  'number': 434},
 'event': {'precision': 0.6296296296296297,
  'recall': 0.4358974358974359,
  'f1': 0.5151515151515151,
  'number': 195},
 'location': {'precision': 0.7574257425742574,
  'recall': 0.5108514190317195,
  'f1': 0.6101694915254237,
  'number': 599},
 'misc': {'precision': 0.07124681933842239,
  'recall': 0.10852713178294573,
  'f1': 0.08602150537634408,
  'number': 258},
 'organisation': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 513},
 'person': {'precision': 0.5452380952380952,
  'recall': 0.6468926553672316,
  'f1': 0.5917312661498707,
  'number': 354},
 'politicalparty': {'precision': 0.7504215851602024,
  'recall': 0.4669464847848898,
  'f1': 0.5756791720569211,
  'number': 953},
 'politician': {'precision': 0.7534883720930232

In [9]:
scored = defaultdict(list)

for idx, example in enumerate(tqdm(dataset["validation"])):
    if (idx + 1) % 100 == 0:
        df_scored = pd.DataFrame(scored)
        df_scored.to_csv("../data/scored/validation.mistral_7b.csv", index=False)
    
    text = " ".join(example["tokens"])
    prompt_input = prompt(text)
    output = run_mistral(prompt_input)
    ner_tags = get_token_labels(text, parse_entities_promptner(output), label2index)

    scored["id"].append(example["id"])
    scored["tokens"].append(example["tokens"])
    scored["prompt"].append(prompt_input)
    scored["output"].append(output)
    scored["ner_tags"].append(ner_tags)

df_scored = pd.DataFrame(scored)
df_scored.to_csv("../data/scored/validation.mistral_7b.csv", index=False)

100%|██████████| 541/541 [51:51<00:00,  5.75s/it]  


In [10]:
score_ner(df_scored["ner_tags"].to_list(), dataset["validation"]["ner_tags"])

{'country': {'precision': 0.6416184971098265,
  'recall': 0.6065573770491803,
  'f1': 0.6235955056179776,
  'number': 183},
 'election': {'precision': 0.783303730017762,
  'recall': 0.8352272727272727,
  'f1': 0.8084326306141154,
  'number': 528},
 'event': {'precision': 0.4648648648648649,
  'recall': 0.46236559139784944,
  'f1': 0.46361185983827496,
  'number': 186},
 'location': {'precision': 0.4962121212121212,
  'recall': 0.6208530805687204,
  'f1': 0.5515789473684211,
  'number': 211},
 'misc': {'precision': 0.1050228310502283,
  'recall': 0.11917098445595854,
  'f1': 0.1116504854368932,
  'number': 193},
 'organisation': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 431},
 'person': {'precision': 0.6333333333333333,
  'recall': 0.7972027972027972,
  'f1': 0.7058823529411764,
  'number': 286},
 'politicalparty': {'precision': 0.6825028968713789,
  'recall': 0.5593542260208927,
  'f1': 0.6148225469728602,
  'number': 1053},
 'politician': {'precision': 0.6957831325301205,