In [1]:
import json, os
from glob import glob
import pandas as pd

In [2]:
def evaluate_ner(true_entities, predicted_entities):
	tp, fp, fn = 0, 0, 0

	true_set = {(ent['pos'][0], ent['pos'][1], ent['type']) for ent in true_entities}
	pred_set = {(ent['start'], ent['end'], ent['label']) for ent in predicted_entities}

	tp = len(true_set & pred_set)  # Intersection: Correctly predicted
	fp = len(pred_set - true_set)  # Predicted but not in ground truth
	fn = len(true_set - pred_set)  # Ground truth but not predicted

	precision = tp / (tp + fp) if (tp + fp) > 0 else 0
	recall = tp / (tp + fn) if (tp + fn) > 0 else 0
	f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

	return {"Precision": precision, "Recall": recall, "F1-score": f1_score}


In [3]:
pred_files = glob("data/NEREvalsLast/*.json")

all_scores = {}

for pred_file in pred_files:
	name = os.path.basename(pred_file).replace(".json", "")
	exp_file = f"data/IE_INSTRUCTIONS/NER/{name}/test.json"
	
	predictions = json.load(open(pred_file))
	expected = json.load(open(exp_file))

	thrs = [
		i / 10 for i in range(1, 10)
	]
	scores = {
		i: [
			evaluate_ner(k1["entities"], [n for n in k2 if n["score"] > i]) for k1, k2 in zip(expected, predictions)
		] for i in thrs
	}

	final_scores = pd.DataFrame({
		k: pd.DataFrame(score).mean() for k, score in scores.items()
	})
	all_scores[name] = final_scores.to_dict()

In [4]:
len(all_scores)

23

In [5]:
all_scores["ACE 2004"]

{0.1: {'Precision': 0.2936410196262413,
  'Recall': 0.1866251769576893,
  'F1-score': 0.2171485046446325},
 0.2: {'Precision': 0.26718664477285164,
  'Recall': 0.1447400608422776,
  'F1-score': 0.17696128057216726},
 0.3: {'Precision': 0.25467100633356793,
  'Recall': 0.12121984815950333,
  'F1-score': 0.1536939761062441},
 0.4: {'Precision': 0.2456486042692939,
  'Recall': 0.10166593464007258,
  'F1-score': 0.13467792454173214},
 0.5: {'Precision': 0.21747302369223548,
  'Recall': 0.08364616021635726,
  'F1-score': 0.11201425450012814},
 0.6: {'Precision': 0.20155993431855504,
  'Recall': 0.07153817378571073,
  'F1-score': 0.09787139048546879},
 0.7: {'Precision': 0.14244663382594416,
  'Recall': 0.04388048322777387,
  'F1-score': 0.06233502961043796},
 0.8: {'Precision': 0.05295566502463054,
  'Recall': 0.01683477988896708,
  'F1-score': 0.023827944086564773},
 0.9: {'Precision': 0.0, 'Recall': 0.0, 'F1-score': 0.0}}

In [6]:
with open("data/Results/alexmodel-last.json", "w") as f:
    json.dump(all_scores, f)