In [129]:
import json
import re
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

import nltk
nltk.download('punkt')

def calculate_precision_recall_f1(gold, pred):
    print("gold",gold)
    print("pred",pred)
    if len(pred) == 0:
        return 0, 0, 0
    p = len(gold.intersection(pred)) / len(pred)
    r = len(gold.intersection(pred)) / len(gold)
    f1 = 2 * ((p * r) / (p + r)) if (p + r) > 0 else 0
    return p, r, f1

def get_subject_object_hallucinations(ps, test_sentence, triples):
    if len(triples) == 0:
        return 0, 0

    stemmed_sentence = "".join([ps.stem(word) for word in word_tokenize(test_sentence)])
    normalized_stemmed_sentence = re.sub(r"(_|\s+)", '', stemmed_sentence).lower()

    num_subj_hallucinations, num_obj_hallucinations = 0, 0
    for triple in triples:
        normalized_stemmed_subject = clean_entity_string(ps, triple[0])
        normalized_stemmed_object = clean_entity_string(ps, triple[2])

        if normalized_stemmed_sentence.find(normalized_stemmed_subject) == -1:
            num_subj_hallucinations += 1
        if normalized_stemmed_sentence.find(normalized_stemmed_object) == -1:
            num_obj_hallucinations += 1

    subj_hallucination = num_subj_hallucinations / len(triples)
    obj_hallucination = num_obj_hallucinations / len(triples)
    return subj_hallucination, obj_hallucination

def get_ontology_conformance(ontology_rels, triples):
    if len(triples) == 0:
        return 1, 0
    num_rels_conformant = len([tr for tr in triples if tr[1] in ontology_rels])
    ont_conformance = num_rels_conformant / len(triples)
    rel_hallucination = 1 - ont_conformance
    return ont_conformance, rel_hallucination

def normalize_triple(sub_label, rel_label, obj_label):
    sub_label = re.sub(r"(_|\s+)", '', sub_label).lower()
    rel_label = re.sub(r"(_|\s+)", '', rel_label).lower()
    obj_label = re.sub(r"(_|\s+)", '', obj_label).lower()
    return f"{sub_label}{rel_label}{obj_label}"

def clean_entity_string(ps, entity):
    stemmed_entity = "".join([ps.stem(word) for word in word_tokenize(entity)])
    normalized_stemmed_entity = re.sub(r"(_|\s+)", '', stemmed_entity).lower()
    return normalized_stemmed_entity

def evaluate_and_save_results(ground_truth_data, model_data, output_file):
    ps = PorterStemmer()
    results = []

    for gt_entry, model_entry in zip(ground_truth_data, model_data):
        # ✅ Skip if ground truth has no triples
        if not gt_entry.get('triples'):
            continue

        gt_triples = [[tr['sub'], tr['rel'], tr['obj']] for tr in gt_entry['triples']]
        system_triples = [[tr['sub'], tr['rel'], tr['obj']] for tr in model_entry['triples']]

        gt_relations = {tr[1].replace(" ", "_") for tr in gt_triples}
        filtered_system_triples = [tr for tr in system_triples if tr[1] in gt_relations]

        normalized_gt_triples = {normalize_triple(tr[0], tr[1], tr[2]) for tr in gt_triples}
        normalized_system_triples = {normalize_triple(tr[0], tr[1], tr[2]) for tr in filtered_system_triples}

        precision, recall, f1 = calculate_precision_recall_f1(normalized_gt_triples, normalized_system_triples)
        ont_conformance, rel_hallucination = get_ontology_conformance(gt_relations, system_triples)
        subj_hallucination, obj_hallucination = get_subject_object_hallucinations(ps, gt_entry['sent'], system_triples)

        result = {
            "id": gt_entry['id'],
            "precision": f"{precision:.2f}",
            "recall": f"{recall:.2f}",
            "f1": f"{f1:.2f}",
            "onto_conf": f"{ont_conformance:.2f}",
            "rel_halluc": f"{rel_hallucination:.2f}",
            "sub_halluc": f"{subj_hallucination:.2f}",
            "obj_halluc": f"{obj_hallucination:.2f}",
            "llm_triples": system_triples,
            "filtered_llm_triples": filtered_system_triples,
            "gt_triples": gt_triples,
            "sent": gt_entry['sent']
        }

        results.append(result)

    with open(output_file, "w") as f:
        for result in results:
            f.write(json.dumps(result) + "\n")

def read_ground_truth_jsonl(file_path):
    with open(file_path, "r") as file:
        data = [json.loads(line) for line in file]
        for entry in data:
            if not all(key in entry for key in ['id', 'sent', 'triples']):
                raise ValueError(f"Entry missing required keys: {entry}")
        return data

def read_model_jsonl(file_path):
    with open(file_path, "r") as file:
        data = [json.loads(line) for line in file]
        for entry in data:
            if not all(key in entry for key in ['id', 'triples']):
                raise ValueError(f"Entry missing required keys: {entry}")
        return data

[nltk_data] Downloading package punkt to
[nltk_data]     /upb/users/b/balram/profiles/unix/cs/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [130]:
# File paths
output_filepath = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/evaluation_statistics/without_missing_GT/ont_8_politics_llm_stats_improved.jsonl"
ground_truth_filepath = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/ground_truth/ont_8_politics_ground_truth.jsonl"
model_response_filepath ="/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/wikidata/response/Llama3/cot_response_without_quant_batch/ont_8_politics_llm_response_improved.jsonl"


In [131]:
# Read and evaluate
ground_truth_data = read_ground_truth_jsonl(ground_truth_filepath)
model_data = read_model_jsonl(model_response_filepath)
evaluate_and_save_results(ground_truth_data, model_data, output_filepath)

gold {'egyptheadofstateabdelfattahel-sisi'}
pred {'egyptheadofstateabdelfattahel-sisi'}
gold {'thegambiaheadofstateadamabarrow', 'thegambiaheadofgovernmentadamabarrow'}
pred {'thegambiaheadofgovernmentadamabarrow'}
gold {'uzbekistanheadofgovernmentislamkarimov', 'uzbekistanheadofstateshavkatmirziyoyev'}
pred set()
gold {'cameroonheadofstatepaulbiya'}
pred set()
gold {'zimbabweheadofgovernmentemmersonmnangagwa', 'zimbabweheadofstateemmersonmnangagwa'}
pred {'zimbabweheadofgovernmentemmersonmnangagwa'}
gold {'fijiheadofgovernmentfrankbainimarama'}
pred set()
gold {'turkmenistanheadofstategurbangulyberdimuhamedow', 'turkmenistanheadofgovernmentgurbangulyberdimuhamedow'}
pred set()
gold {'zambiaheadofstateguyscott', 'zambiaheadofgovernmentedgarlungu', 'zambiaheadofstateedgarlungu'}
pred {'zambiaheadofgovernmentedgarlungu', 'zambiaheadofstateedgarlungu'}
gold {'dmitrymedvedevpositionheldpresidentofrussia'}
pred {'dmitrymedvedevpositionheldpresidentofrussia'}
gold {'ephraimkatzirpositionheld

In [None]:
# File paths
output_filepath = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/dbpedia_webnig/evaluation_statistics/without_gt_Quant_batch_stats/ont_4_building_llm_stats_improved.jsonl"
ground_truth_filepath = "/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/dbpedia_webnig/ground_truth/ont_4_building_ground_truth.jsonl"
model_response_filepath ="/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/dbpedia_webnig/response/Llama3/cot_response_without_quant_batch/ont_4_building_llm_response_improved.jsonl"