In [11]:
import json
import re
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

In [12]:
#!pip install nltk

In [13]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /upb/users/b/balram/profiles/unix/cs/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [14]:
def calculate_precision_recall_f1(gold, pred):
    if len(pred) == 0:
        return 0, 0, 0
    p = len(gold.intersection(pred)) / len(pred)
    r = len(gold.intersection(pred)) / len(gold)
    f1 = 2 * ((p * r) / (p + r)) if (p + r) > 0 else 0
    return p, r, f1

In [15]:
def get_subject_object_hallucinations(ps, test_sentence, triples):
    if len(triples) == 0:
        return 0, 0

    stemmed_sentence = "".join([ps.stem(word) for word in word_tokenize(test_sentence)])
    normalized_stemmed_sentence = re.sub(r"(_|\s+)", '', stemmed_sentence).lower()

    num_subj_hallucinations, num_obj_hallucinations = 0, 0
    for triple in triples:
        normalized_stemmed_subject = clean_entity_string(ps, triple[0])
        normalized_stemmed_object = clean_entity_string(ps, triple[2])

        if normalized_stemmed_sentence.find(normalized_stemmed_subject) == -1:
            num_subj_hallucinations += 1
        if normalized_stemmed_sentence.find(normalized_stemmed_object) == -1:
            num_obj_hallucinations += 1

    subj_hallucination = num_subj_hallucinations / len(triples)
    obj_hallucination = num_obj_hallucinations / len(triples)
    return subj_hallucination, obj_hallucination

In [16]:
def get_ontology_conformance(ontology_rels, triples):
    if len(triples) == 0:
        return 1, 0
    num_rels_conformant = len([tr for tr in triples if tr[1] in ontology_rels])
    ont_conformance = num_rels_conformant / len(triples)
    rel_hallucination = 1 - ont_conformance
    return ont_conformance, rel_hallucination

In [17]:
def normalize_triple(sub_label, rel_label, obj_label):
    sub_label = re.sub(r"(_|\s+)", '', sub_label).lower()
    rel_label = re.sub(r"(_|\s+)", '', rel_label).lower()
    obj_label = re.sub(r"(_|\s+)", '', obj_label).lower()
    return f"{sub_label}{rel_label}{obj_label}"

In [18]:
def clean_entity_string(ps, entity):
    stemmed_entity = "".join([ps.stem(word) for word in word_tokenize(entity)])
    normalized_stemmed_entity = re.sub(r"(_|\s+)", '', stemmed_entity).lower()
    return normalized_stemmed_entity

In [19]:
def evaluate_and_save_results(ground_truth_data, model_data, output_file):
    ps = PorterStemmer()

    results = []
    for gt_entry, model_entry in zip(ground_truth_data, model_data):
        gt_triples = [[tr['sub'], tr['rel'], tr['obj']] for tr in gt_entry['triples']]
        system_triples = [[tr['sub'], tr['rel'], tr['obj']] for tr in model_entry['triples']]
       # print('gt_triples',gt_triples)
       # print('system_triples',system_triples)

        gt_relations = {tr[1].replace(" ", "_") for tr in gt_triples}
        filtered_system_triples = [tr for tr in system_triples if tr[1] in gt_relations]

        #print('gt_relations',gt_relations)
        #print('filtered_system_triples',filtered_system_triples)
        
        normalized_gt_triples = {normalize_triple(tr[0], tr[1], tr[2]) for tr in gt_triples}
        #print('normalized_gt_triples',normalized_gt_triples)
        normalized_system_triples = {normalize_triple(tr[0], tr[1], tr[2]) for tr in filtered_system_triples}
        #print('normalized_system_triples',normalized_system_triples)

        precision, recall, f1 = calculate_precision_recall_f1(normalized_gt_triples, normalized_system_triples)
        ont_conformance, rel_hallucination = get_ontology_conformance(gt_relations, system_triples)
        subj_hallucination, obj_hallucination = get_subject_object_hallucinations(ps, gt_entry['sent'], system_triples)

        result = {
            "id": gt_entry['id'],
            "precision": f"{precision:.2f}",
            "recall": f"{recall:.2f}",
            "f1": f"{f1:.2f}",
            "onto_conf": f"{ont_conformance:.2f}",
            "rel_halluc": f"{rel_hallucination:.2f}",
            "sub_halluc": f"{subj_hallucination:.2f}",
            "obj_halluc": f"{obj_hallucination:.2f}",
            "llm_triples": system_triples,
            "filtered_llm_triples": filtered_system_triples,
            "gt_triples": gt_triples,
            "sent": gt_entry['sent']
        }

        results.append(result)

    with open(output_file, "w") as f:
        for result in results:
            f.write(json.dumps(result) + "\n")

In [20]:
def read_ground_truth_jsonl(file_path):
    with open(file_path, "r") as file:
        data = [json.loads(line) for line in file]
        # Validate each entry to ensure it has 'id', 'sent', and 'triples'
        for entry in data:
            if not all(key in entry for key in ['id', 'sent', 'triples']):
                raise ValueError(f"Entry missing required keys: {entry}")
        return data

In [21]:
def read_model_jsonl(file_path):
    with open(file_path, "r") as file:
        data = [json.loads(line) for line in file]
        # Validate each entry to ensure it has 'id' and 'triples'
        for entry in data:
            if not all(key in entry for key in ['id', 'triples']):
                raise ValueError(f"Entry missing required keys: {entry}")
        return data

In [30]:
output_filepath='Wikidata/Evaluation_Statistics/ont_9_nature_llm_stats.jsonl'
ground_truth_filepath='Wikidata/Ground_Truth/ont_9_nature_ground_truth.jsonl'
model_response_filepath='Wikidata/Response/ont_9_nature_llm_response.jsonl'

In [31]:
# Read data
ground_truth_data = read_ground_truth_jsonl(ground_truth_filepath)
model_data = read_model_jsonl(model_response_filepath)

In [32]:
evaluate_and_save_results(ground_truth_data, model_data, output_filepath)