In [14]:
import json
import os
from collections import defaultdict

# Entity Linking

In [15]:
data_dir = "/data/home/keminglu/workspace/devcloud"
file_name = "7b_ie_output_con_b1.txt"
file_path = os.path.join(data_dir, file_name)

In [16]:
with open(file_path) as f:
    data = [json.loads(line) for line in f.readlines()]

In [18]:
results = defaultdict(lambda: 0)
total_cnt = defaultdict(lambda: 0)
for record in data:
    dataset = record['dataset']
    true = record['true'][0]
    try:
        output = json.loads(record['prompt'] + ' ' + record['output'])['entities'][0]
    except json.JSONDecodeError:
        continue
    aliases = output['aliases'] if 'aliases' in output else []
    pred = [output['title']] + aliases
    if true in pred:
        results[dataset] += 1
    else:
        verbose = False
        if verbose:
            print(true, pred)
    total_cnt[dataset] += 1

In [19]:
results

defaultdict(<function __main__.<lambda>()>,
            {'ace2004-test-kilt.jsonl': 208,
             'aida-test-kilt.jsonl': 3406,
             'clueweb-test-kilt.jsonl': 7352,
             'msnbc-test-kilt.jsonl': 482,
             'aquaint-test-kilt.jsonl': 581})

In [20]:
accuracy = {key: results[key]/total_cnt[key] for key in total_cnt}
accuracy

{'ace2004-test-kilt.jsonl': 0.8093385214007782,
 'aida-test-kilt.jsonl': 0.7594202898550725,
 'clueweb-test-kilt.jsonl': 0.659194835470277,
 'msnbc-test-kilt.jsonl': 0.7347560975609756,
 'aquaint-test-kilt.jsonl': 0.7991746905089409}

# NER

In [10]:
data_dir = "/data/home/keminglu/workspace/devcloud"
file_name = "7b_ner_output.txt"
file_path = os.path.join(data_dir, file_name)

In [11]:
with open(file_path) as f:
    data = [json.loads(line) for line in f.readlines()]

In [12]:
all_types = defaultdict(set)
for record in data:
    for _, t in record['true']:
        all_types[(record['dataset'], record['split'])].add(t.lower())

In [13]:
results = defaultdict(lambda: {"pos_true": 0, "pos_num": 0, "pred_num": 0})
for record in data:
    dataset = (record['dataset'], record['split'])
    true = [(name.lower(), t.lower()) for name, t in record['true']]
    results[dataset]["pos_num"] += len(set(true))
    try:
        output = json.loads(record['output'])['entities']
    except json.JSONDecodeError:
        continue

    preds = []
    for each in output:
        if 'type' in each:
            for t in each['type']:
                if t.lower() in all_types[dataset]:
                    preds.append((each['mention'].lower(), t.lower()))

    results[dataset]["pos_true"] += len(set(true).intersection(set(preds)))
    results[dataset]["pred_num"] += len(set(preds))

def get_f1(res):
    precision = res['pos_true']/res['pred_num']
    recall = res['pos_true']/res['pos_num']
    f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1

{key: get_f1(results[key]) for key in results}

{('conllpp', 'none'): (0.18995056544998984,
  0.5016991593632624,
  0.27556734453286175),
 ('crossner_politics', 'none'): (0.2820707070707071,
  0.27303837692495725,
  0.2774810582536332),
 ('crossner_politics', 'gold'): (0.31191885038038886,
  0.27059398680029334,
  0.2897905759162303),
 ('crossner_science', 'none'): (0.1788386177413609,
  0.16628022524014574,
  0.17233093031239272),
 ('crossner_science', 'gold'): (0.20333951762523192,
  0.181517058628685,
  0.19180959047952398),
 ('conllpp', 'gold'): (0.1994267287710498,
  0.4977642639957074,
  0.2847641461168525),
 ('crossner_ai', 'none'): (0.12055974165769645,
  0.18972332015810275,
  0.1474330846862659),
 ('crossner_ai', 'gold'): (0.11835003855050116,
  0.17334839073969507,
  0.1406643757159221),
 ('crossner_literature', 'none'): (0.20318352059925093,
  0.30264993026499304,
  0.2431372549019608),
 ('crossner_literature', 'gold'): (0.21764280549530007,
  0.2798698279869828,
  0.24486475493186902),
 ('crossner_music', 'none'): (0.25