In [1]:
import os
import uuid
import re
import nltk
from nltk.tokenize import sent_tokenize
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from rouge import Rouge
nltk.download('punkt_tab')
import fitz
from collections import defaultdict
import requests
from neo4j import GraphDatabase
import torch
import torch.nn.functional as F
from torch_geometric.utils import from_networkx
from torch_geometric.nn import GATConv
import time
import json
import jsonlines
import networkx as nx
import pickle
import igraph as ig
import leidenalg
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import numpy as np
import hnswlib
import string
from tqdm import tqdm
from transformers import GPT2TokenizerFast

pdf_path="docs\medbook1\medical_book.pdf"

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Aneel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
class Chunker:
    def __init__(self, store_file="data/chunked_output.jsonl"):
        self.store_file = store_file

    def pdf_to_text(self, path):
        text = ""

        if os.path.isfile(path) and path.lower().endswith(".pdf"):
            with fitz.open(path) as doc:
                for page in doc:
                    text += page.get_text()
        elif os.path.isdir(path):
            pdf_files = [f for f in os.listdir(path) if f.lower().endswith(".pdf")]
            pdf_files.sort()
            for fname in pdf_files:
                full_path = os.path.join(path, fname)
                with fitz.open(full_path) as doc:
                    for page in doc:
                        text += page.get_text()
        else:
            raise ValueError("Input must be a .pdf file or a directory containing .pdf files.")

        return text

    def segment_into_chunks(self, text, chunk_size=400, overlap=40):
        sentences = sent_tokenize(text)
        chunks = []
        current_chunk = []
        current_length = 0
        
        for sentence in sentences:
            words = sentence.split()
            sentence_length = len(words)
            
            if current_length + sentence_length > chunk_size and current_chunk:
                id = uuid.uuid4().hex
                chunks.append({"id" : id, "text" : " ".join(current_chunk)})
                
                overlap_words = []
                overlap_length = 0
                while current_chunk and overlap_length < overlap:
                    s = current_chunk.pop()
                    s_words = s.split()
                    overlap_words.extend(s_words)
                    overlap_length += len(s_words)
                
                overlap_words = overlap_words[::-1]
                current_chunk = [" ".join(overlap_words)]
                current_length = overlap_length
            
            current_chunk.append(sentence)
            current_length += sentence_length
        
        if current_chunk:
            id = uuid.uuid4().hex
            chunks.append({"id" : id, "text" : " ".join(current_chunk)})
            
        with jsonlines.open(self.store_file, "w") as f:
            for chunk in chunks:
                f.write(chunk)
        
        return chunks


In [4]:
class GroqHandler:
    def __init__(self):
        self.headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}

    def _call_groq(self, messages, is_json=True):
        payload = {
            "model": "meta-llama/llama-4-scout-17b-16e-instruct",
            "messages": messages,
            "temperature": 0.2
        }
        if is_json:
            payload["response_format"] = {
                "type": "json_object"
            }
        try:
            response = requests.post(
                url=GROQ_API_URL,
                json=payload,
                headers=self.headers
            )
            print(response.json())
            #response.raise_for_status()

            if response.status_code==200:
                res=response.json()
                content = res["choices"][0]["message"]["content"]
                if content:
                    processed_content=content
                    #print(content)
                    return processed_content

            if response.status_code==400:
                    error_data = response.json()
                    if (
                        "error" in error_data
                        and "failed_generation" in error_data["error"]
                    ):
                        print("Handling failed generation from 400 error.")
                        failed_gen = error_data["error"]["failed_generation"]
                        processed_failed_gen = failed_gen
                        #print(processed_failed_gen)
                        return processed_failed_gen
                    elif (
                        "error" in error_data
                        and "code" in error_data["error"]
                        and error_data["error"]["code"] == "rate_limit_exceeded"
                    ):
                        print("Rate limit exceeded. Retrying after 10s...")
                        return error_data["error"]["code"]
                    else:
                        print("400 error encountered but no 'failed_generation' found.")
                        return None

        except (requests.exceptions.RequestException, json.JSONDecodeError, KeyError, IndexError) as e:
            return None

    def parse_entities(self, entities_string):
        pattern = r'"([a-zA-Z][^"]*[a-zA-Z])"'
        return re.findall(pattern, entities_string)

    def extract_entities(self, text_chunk):
        messages = [{
                "role": "system",
                "content": f"""You are a Named Entity Extractor Agent. Below is a text chunk, your job is to extract ALL single and multi-word named entities and output a SINGLE-LEVEL json_object containing those extracted entities. DO NOT have anything else in output, just the json object. Maximum entities to extract is 12 and this should NOT be extended under any circumstances. Response formats:
                    <Response>
                        {
                            "Entity1",
                            "Entity2",
                            "Entity3",
                            "Entity4"
                        }
                    </Response>
                """
            },{
                "role": "user",
                "content": f"Text: {text_chunk}"
            }]
        attempt=1
        while attempt <= 3:
            result = self._call_groq(messages)
            attempt += 1
            if result != None and result != "rate_limit_exceeded":
                break
            if result == None and result == "rate_limit_exceeded":
                print("Rate limit exceeded. Retrying after 10s...")
                time.sleep(10)
                continue
            print(f"Trial number {attempt} failed.")
            time.sleep(2)
        print(f"Result--> {result}")

        if result:
            entities_list = self.parse_entities(result)
            print(f"Parsed entities: {entities_list}")

            return entities_list
        return None

    def extract_relations(self, text_chunk, entity_list):
        messages = [{
                "role": "system",
                "content": f"You are a relationship extraction agent. Referencing the given text chunk, generate only contextually relevant Source-Relationship-Destination triplets STRICTLY in the format: {{'Source': 'S', 'Relationship': 'R', 'Destination': 'D'}} where Source, Relationship, Destination are key-value pairs, S and D are STRICTLY part of the given entity list and NOT anywhere else, and R is relevant text that connects S and D, referencing ONLY the given text chunk. Conditions: 1. MAXIMUM SIZE LIMIT of json output is 12 and SHOULD NOT EXCEED, 2. Output should only consist of a JSON object."
            },{
                "role": "user",
                "content": f"""
                    <Text_Chunk>{text_chunk}</Text_Chunk>
                    <Entity_List>{entity_list}</Entity_List>
                """
            }]
        try:
            result = self._call_groq(messages)
            if result is None:
                print(f"No triplets generated.")
                return
            
            print(result)
            input_string = str(result)

            kv_pattern = re.compile(r"""['\"]?Source['\"]?:\s*['\"']?([^,'\"}]+)['\"']?\s*,\s*['\"]?Relationship['\"]?:\s*['\"']?([^,'\"}]+)['\"']?\s*,\s*['\"]?Destination['\"]?:\s*['\"']?([^,'\"}]+)['\"']?""")
            set_pattern = re.compile(r"""[{(]['\"]([^'\"]+)['\"]\s*,\s*['\"]([^'\"]+)['\"][})]""")

            # KV format
            triplets_kv = re.findall(kv_pattern, input_string)
            if triplets_kv:
                print("Extracted triplets in key-value format:")
                triplets = []
                for triplet in triplets_kv:
                    triplets.append({"Source": triplet[0], "Relationship": triplet[1], "Destination": triplet[2]})
                    print({"Source": triplet[0], "Relationship": triplet[1], "Destination": triplet[2]})
                return triplets

            # Set format
            triplets_set = re.findall(set_pattern, input_string)
            if triplets_set:
                print("Extracted triplets in set format:")
                triplets = []
                for triplet in triplets_set:
                    triplets.append({"Source": triplet[0], "Relationship": triplet[1], "Destination": triplet[2]})
                    print({"Source": triplet[0], "Relationship": triplet[1], "Destination": triplet[2]})
                return triplets_set

        except Exception as e:
            print("Error processing triplets:", e)
    
    def generate_summary(self, text_chunk):
        messages = [{
                "role": "system",
                "content": f"Paraphrase the given text_chunk into a concise, independent event‐level summary capturing the core semantic unit. Maximum word limit is 50 and should not be crossed. Output ONLY the summary and no other text."
            },{
                "role": "user",
                "content": f"<Text_Chunk>{text_chunk}</Text_Chunk>"
            }]
        return self._call_groq(messages, is_json=False)
    
    def generate_attributes(self, entity_name, context):
        messages = [{
                "role": "system",
                "content": f"You are an attribute based summarisation agent."
            },{
                "role": "user",
                "content": f"""
                    Using the following context paragraphs that involve {entity_name}, 
                    produce a concise attribute summary describing {entity_name}'s salient properties 
                    (e.g., roles, attributes, dates, relationships).

                    Context:
                    {context}

                    Summary (30 to 60 tokens):
                """
            }]
        return self._call_groq(messages, is_json=False)
    
    def generate_summary(self, text_chunk):
        messages = [{
                "role": "system",
                "content": f"Paraphrase the given text_chunk into a concise, independent event‐level summary capturing the core semantic unit. Maximum word limit is 50 and should not be crossed. Output ONLY the summary and no other text."
            },{
                "role": "user",
                "content": f"<Text_Chunk>{text_chunk}</Text_Chunk>"
            }]
        return self._call_groq(messages, is_json=False)
    
    def generate_highlevel_summary(self, context_list):
        context = "\n".join(context_list)
        messages = [{
            "role": "system",
            "content": "Using the following context paragraphs from a topical cluster, produce a high-level summary (100–150 tokens) describing the core theme, key insights, and salient points. Output ONLY the summary."
        }, {
            "role": "user",
            "content": f"<Community_Context>{context}</Community_Context>"
        }]
        return self._call_groq(messages, is_json=False)

    def generate_title(self, highlevel_summary):
        messages = [{
            "role": "system",
            "content": "Extract a concise 5–8 word keyword-style title from the following summary. Output ONLY the title."
        }, {
            "role": "user",
            "content": f"<Summary>{highlevel_summary}</Summary>"
        }]
        return self._call_groq(messages, is_json=False)

    def generate_reasoning_chain(self, prompt):
        messages = [{
            "role": "system",
            "content": "You are an advanced reasoning agent who thinks each step deeply, and returns a step-by-step reasoning chain capturing the question and the context provided."
        }, {
            "role": "user",
            "content": f"{prompt}"
        }]
        return self._call_groq(messages, is_json=False)
    
    def generate_answer(self, prompt):
        messages = [{
            "role": "system",
            "content": "You are a helpful AI agent. Your job is to answer the question asked by user correctly based on context provided, and NOT anywhere else. If you do not have context ot do not know the answer, say that you do not know the answer."
        }, {
            "role": "user",
            "content": f"{prompt}"
        }]
        return self._call_groq(messages, is_json=False)

In [5]:
class GraphBuilder:
    def __init__(self, groq_handler: GroqHandler, jsonl_path, neo4j_uri, neo4j_user, neo4j_password, neo4j_database):
        self.groq_handler = groq_handler
        self.jsonl_path = jsonl_path
        self.neo4j_driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password), database=neo4j_database)
        self.G = nx.MultiDiGraph()
        self.incomplete_graph_elements = []

    def normalize(self, name):
        return name.lower().replace(" ", "_").replace("-", "_")

    def build_graph(self, line_start=0, line_end=None):
        with jsonlines.open(self.jsonl_path, mode='r') as reader:
            for i, chunk in enumerate(reader):
                if i < line_start:
                    continue
                if line_end is not None and i >= line_end:
                    break

                doc_id = chunk.get("id") or chunk.get("doc_id")
                if not doc_id:
                    continue

                missing_fields = []

                text = chunk.get("text", "")
                summary = chunk.get("summary", "")
                entities = chunk.get("entities", [])
                relations = chunk.get("relations", [])

                if not text:
                    missing_fields.append("text")
                if not summary:
                    missing_fields.append("summary")
                if not entities:
                    missing_fields.append("entities")
                if not relations:
                    missing_fields.append("relations")

                if missing_fields:
                    self.incomplete_graph_elements.append({
                        "doc_id": doc_id,
                        "missing_fields": missing_fields
                    })
                    print(f"⚠️ Skipping doc_id={doc_id} due to missing fields: {missing_fields}")
                    continue

                # Begin adding graph nodes and edges
                Ti_id = f"T_{doc_id}"
                Si_id = f"S_{doc_id}"

                
                if Ti_id not in self.G:
                    self.G.add_node(Ti_id, type="T", text=text)
                if Si_id not in self.G:
                    self.G.add_node(Si_id, type="S", text=summary)
                self.G.add_edge(Ti_id, Si_id, label="derived_from")

                for ent in entities:
                    ent_id = f"N_{self.normalize(ent)}"
                    if ent_id not in self.G:
                        self.G.add_node(ent_id, type="N", text=ent)
                    self.G.add_edge(Si_id, ent_id, label="mentions")

                for rel in relations:
                    subj_id = f"N_{self.normalize(rel['Source'])}"
                    obj_id = f"N_{self.normalize(rel['Destination'])}"
                    rel_id = f"R_{uuid.uuid4().hex[:8]}"

                    if rel_id not in self.G:
                        self.G.add_node(rel_id, type="R", text=rel['Relationship'])
                    self.G.add_edge(rel_id, subj_id, label="subj")
                    self.G.add_edge(rel_id, obj_id, label="obj")
        
        print(f"Edges--> {self.G.edges()}, Nodes--> {self.G.nodes()}")

    def to_simple_graph(self, G: nx.MultiDiGraph):
        simple = nx.Graph()
        simple.add_nodes_from(G.nodes(data=True))
        print(f"Edges--> {G.edges()}, Nodes--> {G.nodes()}")
        for u, v in G.edges():
            simple.add_edge(u, v)
        return simple
    
    def detect_and_save_communities(self, output_path="data/community_map.json"):
        print("📊 [Step 1] Running Leiden community detection on S/N/R node subgraph...")

        sub_nodes = [n for n, d in self.G.nodes(data=True) if d.get("type") in {"S", "N", "R"}]
        subgraph = self.G.subgraph(sub_nodes)
        id_map = {node: idx for idx, node in enumerate(subgraph.nodes())}
        rev_map = {v: k for k, v in id_map.items()}
        edges = [(id_map[u], id_map[v]) for u, v in subgraph.edges()]

        ig_graph = ig.Graph(n=len(id_map), edges=edges)
        partition = leidenalg.find_partition(ig_graph, leidenalg.ModularityVertexPartition)

        print(f"🔍 Found {len(partition)} communities.")

        community_map = {}
        for community_id, members in enumerate(partition):
            for idx in members:
                node_id = rev_map[idx]
                community_map[node_id] = community_id
                self.G.nodes[node_id]["community"] = community_id  # update networkx for consistency

        context_map = defaultdict(lambda: {"summary_texts": [], "relation_texts": []})

        for node_id, comm_id in community_map.items():
            node = self.G.nodes.get(node_id)
            if not node:
                continue

            node_type = node.get("type")

            if node_type == "S":
                text = node.get("text", "")
                if text.strip():
                    context_map[str(comm_id)]["summary_texts"].append(text)

            elif node_type == "R":
                predicate = node.get("text", "")
                subject_id = None
                object_id = None

                for _, target, data in self.G.out_edges(node_id, data=True):
                    if data.get("label") == "subj":
                        subject_id = target
                    elif data.get("label") == "obj":
                        object_id = target

                if subject_id and object_id:
                    subject_text = self.G.nodes.get(subject_id, {}).get("text", "")
                    object_text = self.G.nodes.get(object_id, {}).get("text", "")
                    if subject_text and object_text:
                        triple = f"{subject_text} {predicate} {object_text}"
                        context_map[str(comm_id)]["relation_texts"].append(triple)

                with open(output_path, "w", encoding="utf-8") as f:
                    json.dump(context_map, f, indent=2, ensure_ascii=False)

                print(f"✅ Saved enriched context for {len(context_map)} communities to {output_path}")

    def add_HO_nodes_from_community_map(self, neo4j=False, community_map_path="data/community_map.json"):
        if not os.path.exists(community_map_path):
            print("❌ Missing community_map.json")
            return

        with open(community_map_path, "r", encoding="utf-8") as f:
            community_data = json.load(f)

        for comm_id, data in community_data.items():
            hl_summary = data.get("HL_summary")
            title = data.get("title")

            if not hl_summary or not title:
                print(f"⚠️ Skipping community {comm_id}: missing HL_summary or title.")
                continue

            H_id = f"H_{comm_id}"
            O_id = f"O_{comm_id}"

            if H_id not in self.G:
                self.G.add_node(H_id, type="H", text=hl_summary)
            if O_id not in self.G:
                self.G.add_node(O_id, type="O", text=title)
            self.G.add_edge(O_id, H_id, label="overview_of")

            for node_id, d in self.G.nodes(data=True):
                if d.get("type") == "S" and str(self.G.nodes[node_id].get("community")) == str(comm_id):
                    self.G.add_edge(H_id, node_id, label="summarizes")

            if neo4j:
                with self.neo4j_driver.session() as session:
                    session.run("MERGE (n:Node {id: $id}) SET n.type = 'H', n.text = $text", id=H_id, text=hl_summary)
                    session.run("MERGE (n:Node {id: $id}) SET n.type = 'O', n.text = $text", id=O_id, text=title)

                    session.run("""
                        MATCH (a:Node {id: $src}), (b:Node {id: $dst})
                        MERGE (a)-[:REL {label: 'overview_of'}]->(b)
                    """, src=O_id, dst=H_id)

                    for node_id, d in self.G.nodes(data=True):
                        if d.get("type") == "S" and str(self.G.nodes[node_id].get("community")) == str(comm_id):
                            session.run("""
                                MATCH (a:Node {id: $src}), (b:Node {id: $dst})
                                MERGE (a)-[:REL {label: 'summarizes'}]->(b)
                            """, src=H_id, dst=node_id)

            print(f"✅ Added H & O for community {comm_id}")

    def finalize_graph_G3_from_community_map(self, neo4j=False, community_map_path="data/community_map.json"):
        embed_nodes = [n for n, d in self.G.nodes(data=True) if d.get("type") in {"S", "H"}]
        texts = [self.G.nodes[n]["text"] for n in embed_nodes]

        print(f"🔍 Embedding {len(embed_nodes)} nodes...")
        model = SentenceTransformer("all-MiniLM-L6-v2")
        embeddings = model.encode(texts)

        k = int(np.ceil(np.sqrt(len(embed_nodes))))
        print(f"📈 Clustering into {k} clusters with KMeans...")
        kmeans = KMeans(n_clusters=k, random_state=42).fit(embeddings)
        labels = kmeans.labels_

        print("🔗 Adding semantically_related edges...")
        for i in range(len(embed_nodes)):
            for j in range(i + 1, len(embed_nodes)):
                node_i = embed_nodes[i]
                node_j = embed_nodes[j]
                if labels[i] == labels[j]:
                    c_i = self.G.nodes[node_i].get("community")
                    c_j = self.G.nodes[node_j].get("community")
                    if c_i == c_j and c_i is not None:
                        self.G.add_edge(node_i, node_j, label="semantically_related")
                        if neo4j:
                            with self.neo4j_driver.session() as session:
                                session.run("""
                                    MATCH (a:Node {id: $src}), (b:Node {id: $dst})
                                    MERGE (a)-[:REL {label: 'semantically_related'}]->(b)
                                """, src=node_i, dst=node_j)

        print("✅ Semantic edges added. G3 construction complete.")
    
    def build_G4_text_attachment(self, neo4j=False, chunk_file="data/chunked_output.jsonl"):
        print("📎 Reattaching original text chunks as T nodes...")

        with jsonlines.open(chunk_file, mode='r') as reader:
            chunks = list(reader)

        for chunk in chunks:
            tid = chunk.get("id")
            text = chunk.get("text")
            entities = chunk.get("entities", [])
            sid = f"S_{tid}"

            if not text or not tid or sid not in self.G:
                continue

            tid = f"T_{tid}"
            if tid not in self.G:
                self.G.add_node(tid, type="T", text=text)
            self.G.add_edge(tid, sid, label="derived_from")

            for entity in entities:
                nid = f"N_{entity['normalized']}" if isinstance(entity, dict) and 'normalized' in entity else f"N_{entity}"
                if nid in self.G:
                    self.G.add_edge(tid, nid, label="mentions_entity")

            if neo4j:
                with self.neo4j_driver.session() as session:
                    session.run("MERGE (n:Node {id: $id}) SET n.type = 'T', n.text = $text", id=tid, text=text)
                    session.run("""
                        MATCH (a:Node {id: $src}), (b:Node {id: $dst})
                        MERGE (a)-[:REL {label: 'derived_from'}]->(b)
                    """, src=tid, dst=sid)

                    for entity in entities:
                        nid = f"N_{entity['normalized']}" if isinstance(entity, dict) and 'normalized' in entity else f"N_{entity}"
                        if nid in self.G:
                            session.run("""
                                MATCH (a:Node {id: $src}), (b:Node {id: $dst})
                                MERGE (a)-[:REL {label: 'mentions_entity'}]->(b)
                            """, src=tid, dst=nid)

        print("✅ G4 built: all T nodes attached and connected.")

    def build_G5_semantic_hnsw_index(self, neo4j=False, dim=384, ef=200, M=16, index_path="data/hnsw_index.bin"):
        print("🧠 Starting G5 semantic index construction...")

        embed_nodes = [n for n, d in self.G.nodes(data=True) if d.get("type") in {"T", "S", "H"}]
        texts = [self.G.nodes[n]["text"] for n in embed_nodes]

        print(f"📚 Embedding {len(embed_nodes)} nodes...")
        model = SentenceTransformer("all-MiniLM-L12-v2")
        embeddings = model.encode(texts)

        p = hnswlib.Index(space='cosine', dim=dim)
        p.init_index(max_elements=len(embeddings), ef_construction=ef, M=M)
        p.add_items(embeddings, list(range(len(embeddings))))
        p.set_ef(ef)

        print("🔗 Performing HNSW neighbor search and edge enrichment...")

        labels, distances = p.knn_query(embeddings, k=10)

        for i, neighbors in enumerate(labels):
            src = embed_nodes[i]
            c1 = self.G.nodes[src].get("community")
            for j in neighbors:
                dst = embed_nodes[j]
                if src == dst:
                    continue
                c2 = self.G.nodes[dst].get("community")
                if c1 != c2:
                    continue

                if self.G.has_edge(src, dst):
                    if isinstance(self.G, (nx.MultiGraph, nx.MultiDiGraph)):
                        for key in self.G[src][dst]:
                            self.G[src][dst][key]['weight'] = self.G[src][dst][key].get('weight', 1) + 1
                    else:
                        self.G[src][dst]['weight'] = self.G[src][dst].get('weight', 1) + 1
                else:
                    self.G.add_edge(src, dst, label="semantic_hnsw", weight=1)
                    if neo4j:
                        with self.neo4j_driver.session() as session:
                            session.run("""
                                MATCH (a:Node {id: $src}), (b:Node {id: $dst})
                                MERGE (a)-[:REL {label: 'semantic_hnsw', weight: 1}]->(b)
                            """, src=src, dst=dst)

        p.save_index(index_path)
        print(f"💾 HNSW index saved to: {index_path}")
        print("✅ G5 completed: HNSW semantic links embedded.")

    def get_entry_points(self, query, hnsw_index_path="data/hnsw_index.bin", top_k=30):
        print("🎯 Identifying entry points for query...")

        query_tokens = set(query.lower().split())
        matched_nodes = set()

        for node_id, data in self.G.nodes(data=True):
            if data.get("type") not in {"N", "O"}:
                continue
            label = data.get("text", "").lower()
            label_tokens = set(label.split())
            jaccard = len(query_tokens & label_tokens) / len(query_tokens | label_tokens | {"."})
            if jaccard > 0.3:
                matched_nodes.add(node_id)

        print(f"🔎 Found {len(matched_nodes)} exact-match nodes from N/O.")

        model = SentenceTransformer("all-MiniLM-L12-v2")
        q_emb = model.encode([query])[0]

        embed_nodes = [n for n, d in self.G.nodes(data=True) if d.get("type") in {"T", "S", "H"}]
        embed_id_to_node = {i: node_id for i, node_id in enumerate(embed_nodes)}

        dim = len(q_emb)
        index = hnswlib.Index(space="cosine", dim=dim)
        index.load_index(hnsw_index_path)
        index.set_ef(200)

        labels, distances = index.knn_query(q_emb, k=top_k)
        try:
            sim_nodes = {embed_id_to_node[i] for i in labels[0]}
        except KeyError as e:
            print(f"❌ Missing index ID in embed_id_to_node: {e}")
            print("Available keys:", list(embed_id_to_node.keys())[:10])
            raise

        print(f"📐 Retrieved {len(sim_nodes)} semantic neighbors from HNSW.")

        entry_nodes = matched_nodes.union(sim_nodes)
        if not entry_nodes:
            print("❌ No entry points found.")
            return set(), {}

        p_i = {node: 1 / len(entry_nodes) for node in entry_nodes}

        print(f"✅ Entry set V_entry contains {len(entry_nodes)} nodes.")
        return entry_nodes, p_i
    
    def run_shallow_ppr(self, entry_nodes, personalization_weights, m=5, alpha=0.5):
        print("🔄 Starting Shallow Personalized PageRank (t=2)...")

        node_list = list(self.G.nodes())
        node_index = {node: i for i, node in enumerate(node_list)}
        index_node = {i: node for node, i in node_index.items()}
        N = len(node_list)

        A = np.zeros((N, N))
        for u, v, data in self.G.edges(data=True):
            i, j = node_index[u], node_index[v]
            weight = data.get("weight", 1)
            A[j, i] += weight

        col_sums = A.sum(axis=0)
        P = A / (col_sums + 1e-8)

        pi_0 = np.zeros(N)
        for node, value in personalization_weights.items():
            if node in node_index:
                pi_0[node_index[node]] = value

        pi_1 = alpha * pi_0 + (1 - alpha) * P @ pi_0
        pi_2 = alpha * pi_0 + (1 - alpha) * P @ pi_1

        scores_by_type = defaultdict(list)
        for i, score in enumerate(pi_2):
            node_id = index_node[i]
            node_type = self.G.nodes[node_id].get("type")
            if node_type in {"T", "S", "H", "R"}:
                scores_by_type[node_type].append((node_id, score))

        V_cross = set()
        for node_type, scored in scores_by_type.items():
            top_nodes = sorted(scored, key=lambda x: -x[1])[:m]
            for node_id, _ in top_nodes:
                V_cross.add(node_id)

        print(f"📌 Selected {len(V_cross)} V_cross nodes (top {m} per type)")

        V_raw = set(entry_nodes) | V_cross
        V_raw = {v for v in V_raw if self.G.nodes[v].get("type") in {"T", "S", "H", "R"}}

        G_raw = self.G.subgraph(V_raw).copy()
        print(f"📎 Induced G_raw with {len(G_raw.nodes)} nodes and {len(G_raw.edges)} edges.")

        return G_raw, V_raw

    def persist_to_neo4j(self):
        with self.neo4j_driver.session() as session:
            for node_id, data in self.G.nodes(data=True):
                session.run(
                    """
                    MERGE (n:Node {id: $id})
                    SET n.type = $type, n.text = $text
                    """,
                    id=node_id, type=data.get("type"), text=data.get("text", "")
                )

            for src, dst, edge_data in self.G.edges(data=True):
                session.run(
                    """
                    MATCH (a:Node {id: $src}), (b:Node {id: $dst})
                    MERGE (a)-[r:REL {label: $label}]->(b)
                    """,
                    src=src, dst=dst, label=edge_data.get("label", "")
                )

    def save_incomplete_entries(self, path="data/incomplete_graph_elements.json"):
        if self.incomplete_graph_elements:
            os.makedirs(os.path.dirname(path), exist_ok=True)
            with open(path, "w") as f:
                json.dump(self.incomplete_graph_elements, f, indent=4)
            print(f"🛑 {len(self.incomplete_graph_elements)} incomplete chunks saved to {path}")
        else:
            print("✅ No incomplete graph elements to save.")

    def graph_to_json(self, G: nx.Graph) -> dict:
        return {
            "nodes": [
                {"id": node, **data}
                for node, data in G.nodes(data=True)
            ],
            "edges": [
                {"source": u, "target": v, **data}
                for u, v, data in G.edges(data=True)
            ]
        }

    def json_to_graph(self, graph_json: dict) -> nx.DiGraph:
        G = nx.DiGraph()
        for node in graph_json["nodes"]:
            node_id = node["id"]
            attrs = {k: v for k, v in node.items() if k != "id"}
            G.add_node(node_id, **attrs)

        for edge in graph_json["edges"]:
            u = edge["source"]
            v = edge["target"]
            attrs = {k: v for k, v in edge.items() if k not in {"source", "target"}}
            G.add_edge(u, v, **attrs)

        return G

    def save_pickle(self, filename="graph.pkl"):
        with open(filename, "wb") as f:
            pickle.dump(self.G, f)

    def load_graph_pickle(self, path="data/G.pkl"):
        import pickle
        with open(path, "rb") as f:
            self.G = pickle.load(f)
    
    def reasoning_chain(self, query, G_raw):
        context_texts = []
        for node_id in G_raw.nodes:
            node_type = self.G.nodes[node_id]["type"]
            if node_type in {"T", "S", "H", "R"}:
                context_texts.append(self.G.nodes[node_id].get("text", ""))
        context="\n".join(context_texts)

        llm_prompt = f"""
            Given the question: {query}, and the following context from the candidate subgraph nodes:
            {context},
            produce a step-by-step reasoning chain that connects the question to the correct answer, referencing intermediate entities/nodes.
        """
                
        return self.groq_handler.generate_reasoning_chain(llm_prompt)
    
    def align_nodes_via_gnn(self, query, G_raw, reasoning_chain, model, optimizer, device, top_n=5):
        type_map = {'T':0, 'S':1, 'H':2, 'R':3, 'N':4, 'O':5}
        model.eval()

        node_texts = []
        node_types = []

        for node_id in G_raw.nodes:
            text = self.G.nodes[node_id].get("text", "")
            typ = self.G.nodes[node_id].get("type")
            node_texts.append(text)
            type_vec = [0] * 7
            type_vec[type_map[typ]] = 1
            node_types.append(type_vec)

        encoder = SentenceTransformer("all-MiniLM-L12-v2")
        text_embeds = encoder.encode(node_texts)
        x_input = torch.tensor([list(t) + type_vec for t, type_vec in zip(text_embeds, node_types)], dtype=torch.float32)

        required_edge_keys = {"label"}
        G_cleaned = nx.Graph()
        required_node_keys = {"type", "text"}
        for n, d in G_raw.nodes(data=True):
            G_cleaned.add_node(n)
            for key in required_node_keys:
                G_cleaned.nodes[n][key] = d.get(key, "" if key == "text" else "Unknown")

        for u, v, edata in G_raw.edges(data=True):
            norm_edata = {}
            for key in required_edge_keys:
                norm_edata[key] = edata.get(key, "")
            G_cleaned.add_edge(u, v, **norm_edata)
        pyg_data = from_networkx(G_cleaned)
        pyg_data.x = x_input
        edge_index = pyg_data.edge_index
        batch = torch.zeros(x_input.shape[0], dtype=torch.long)

        # 2. Query Embedding
        q_emb = encoder.encode([query])
        q_tensor = torch.tensor(q_emb[0], dtype=torch.float32)

        # 3. Extract ground-truth reasoning hits
        reasoning_text = reasoning_chain.lower()
        hit_counts = {}
        for i, text in enumerate(node_texts):
            count = len(re.findall(re.escape(text.lower()), reasoning_text))
            hit_counts[i] = count

        p_reason = torch.tensor(
            [(hit_counts.get(i, 0) + 1e-6) for i in range(len(node_texts))],
            dtype=torch.float32
        )
        p_reason = p_reason / p_reason.sum()

        # 4. Forward and loss
        node_embeds, p_pred = model(x_input, edge_index, batch, q_tensor)

        kl_loss = F.kl_div(p_pred.log(), p_reason, reduction='batchmean')

        optimizer.zero_grad()
        kl_loss.backward()
        optimizer.step()

        # 5. Prune via Top-K p_pred
        topk = torch.topk(p_pred, top_n)
        keep_nodes = set(topk.indices.tolist())
        for i in topk.indices.tolist():
            for j in G_raw.neighbors(list(G_raw.nodes)[i]):
                keep_nodes.add(list(G_raw.nodes).index(j))

        G_aligned = G_raw.subgraph([list(G_raw.nodes)[i] for i in keep_nodes]).copy()
        print(f"🧠 G_aligned contains {len(G_aligned)} nodes.")

        return G_aligned, node_embeds, p_pred

    def compute_graph_embedding(self, G_aligned, reasoning_chain, node_embeds, model_dim=256):
        node_emb_matrix = node_embeds.detach()
        r_g = node_emb_matrix.mean(dim=0)

        encoder = SentenceTransformer("all-MiniLM-L12-v2")
        r_s = encoder.encode([reasoning_chain])[0]
        r_s = torch.tensor(r_s, dtype=torch.float32)

        mlp_g = torch.nn.Sequential(
            torch.nn.Linear(256, 12288),
            torch.nn.ReLU(),
            torch.nn.Linear(12288, 12288)
        )

        mlp_s = torch.nn.Sequential(
            torch.nn.Linear(384, 12288),
            torch.nn.ReLU(),
            torch.nn.Linear(12288, 12288)
        )

        r_hat_g = mlp_g(r_g)
        r_hat_s = mlp_s(r_s)

        cos = torch.nn.CosineSimilarity(dim=0)
        sim_score = cos(r_hat_g, r_hat_s).item()
        print(f"🔗 Alignment score: {sim_score:.4f}")

        return r_hat_g, r_hat_s, sim_score


    def close(self):
        self.neo4j_driver.close()


In [6]:
class GraphAugmentedGenerator:
    def __init__(self, groq_handler: GroqHandler, graph_token="<GRAPH>", max_tokens=256):
        self.groq_handler = groq_handler
        self.graph_token = graph_token
        self.max_tokens = max_tokens

    def textualize_graph(self, G_aligned):
        lines = []
        for node in G_aligned.nodes():
            typ = G_aligned.nodes[node].get("type", "")
            txt = G_aligned.nodes[node].get("text", "").strip().replace("\n", " ")
            lines.append(f"[{typ}: {node}] {txt}")

        for u, v, data in G_aligned.edges(data=True):
            label = data.get("label", "")
            lines.append(f"[EDGE: {u} → {v}] {label}")

        return "\n".join(lines)

    def build_prompt(self, query, G_aligned):
        tg = self.textualize_graph(G_aligned)
        print(f"Context: {tg}")
        prompt = f"""Below is a user query and a pruned subgraph context. Use both to generate an accurate, comprehensive answer.

### Query:
{query}

### Subgraph (aligned):
{tg}

Answer:"""

        return prompt

    def generate_answer(self, query, G_aligned):
        prompt = self.build_prompt(query, G_aligned)
        print("📨 Sending prompt to Groq LLM...")
        answer = self.groq_handler.generate_answer(prompt)
        return answer

    def save_graph_embedding(self, r_hat_g, path="data/graph_embeddings.jsonl", metadata=None):
        item = {"embedding": r_hat_g.detach().cpu().tolist()}
        if metadata:
            item.update(metadata)

        with jsonlines.open(path, mode='a') as writer:
            writer.write(item)

        print(f"💾 Saved graph embedding to {path}")


In [7]:
class EntityHandler:
    def __init__(self, groq_handler: GroqHandler, graph_builder: GraphBuilder, generator: GraphAugmentedGenerator, batch_size=8, wait_time=60,
                 progress_file="data/progress.json",
                 failed_file="data/permanent_failed.json",
                 store_file="data/chunked_output.jsonl",
                 community_file="data/community_map.json",
                 progress_file_name="ProgressCheckpoint",
                 eval_dump_file="data/eval_dump.json",
                 max_retries=3):
        self.groq_handler = groq_handler
        self.graph_builder = graph_builder
        self.generator = generator
        self.batch_size = batch_size
        self.wait_time = wait_time
        self.progress_file = progress_file
        self.failed_file = failed_file
        self.store_file = store_file
        self.community_file = community_file
        self.progress_file_name = progress_file_name
        self.eval_dump_file = eval_dump_file
        self.max_retries = max_retries
        self.processed_count = 0
        self.Entity_Unprocessed_elements = []

    def process_documents(self, type: str, start_index=0):
        self.load_progress(type)

        if type in {"reasoning_chain", "answer"}:
            data_file = self.eval_dump_file
            try:
                with open(data_file, "r", encoding="utf-8") as f:
                    json_data = json.load(f)

                if not json_data:
                    print("No data found in the eval_dump JSON.")
                    return

                doc_ids = list(json_data.keys())
                total_docs = len(doc_ids)

                for i in range(start_index, total_docs, self.batch_size):
                    batch_doc_ids = doc_ids[i : i + self.batch_size]
                    batch_processed = False

                    for doc_id in batch_doc_ids:
                        content = json_data[doc_id]
                        try:
                            query = content.get("query")
                            if not query:
                                print(f"⚠️ No query found in {doc_id}")
                                self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})
                                continue

                            if type == "reasoning_chain":
                                G_raw = self.graph_builder.json_to_graph(content.get("G_raw", {}))
                                if not G_raw:
                                    print(f"⚠️ No G_raw graph found in {doc_id}")
                                    self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})
                                    continue
                                result = self.graph_builder.reasoning_chain(query, G_raw)
                                key = "reasoning_chain"

                            elif type == "answer":
                                G_aligned = self.graph_builder.json_to_graph(content.get("G_aligned", {}))
                                if not G_aligned:
                                    print(f"⚠️ No G_aligned graph found in {doc_id}")
                                    self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})
                                    continue
                                result = self.generator.generate_answer(query, G_aligned)
                                key = "answer"

                            if not result:
                                print(f"⚠️ No result for {type} in {doc_id}")
                                self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})
                                continue

                            content[key] = result
                            self.processed_count += 1
                            batch_processed = True
                            print(f"✅ Processed {type} for {doc_id} ({self.processed_count}/{total_docs})")

                        except Exception as e:
                            print(f"❌ Error processing {type} for {doc_id}: {e}")
                            self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})

                    if batch_processed:
                        with open(data_file, "w", encoding="utf-8") as f:
                            json.dump(json_data, f, indent=2, ensure_ascii=False)
                        print(f"✅ Batch {(i // self.batch_size) + 1} complete. Waiting {self.wait_time} seconds.")
                        time.sleep(self.wait_time)

            except Exception as e:
                print(f"⛔ Process interrupted: {e}")
                print(f"Documents processed before interruption: {self.processed_count}")
            finally:
                self.save_progress(type)
    
        if type in {"HL_summary", "title"}:
            data_file = self.community_file
            try:
                with open(data_file, "r", encoding="utf-8") as f:
                    json_data = json.load(f)

                if not json_data:
                    print("No data found in the JSON file.")
                    return

                doc_ids = list(json_data.keys())
                total_docs = len(doc_ids)
                
                for i in range(start_index, total_docs, self.batch_size):
                    batch_doc_ids = doc_ids[i : i + self.batch_size]
                    batch_processed = False

                    for doc_id in batch_doc_ids:
                        content = json_data[doc_id]
                        try:
                            if type == "HL_summary":
                                context_lines = content.get("summary_texts", []) + content.get("relation_texts", [])
                                if not context_lines:
                                    print(f"⚠️ No context for HL_summary in {doc_id}")
                                    self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})
                                    continue
                                result = self.groq_handler.generate_highlevel_summary(context_lines)
                                key = "HL_summary"

                            elif type == "title":
                                hl_summary = content.get("HL_summary")
                                if not hl_summary:
                                    print(f"⚠️ No HL_summary for title generation in {doc_id}")
                                    self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})
                                    continue
                                result = self.groq_handler.generate_title(hl_summary)
                                key = "title"

                            if not result:
                                print(f"⚠️ No result for {type} in {doc_id}")
                                self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})
                                continue

                            content[key] = result
                            self.processed_count += 1
                            batch_processed = True
                            print(f"✅ Processed {type} for community {doc_id} ({self.processed_count}/{total_docs})")

                        except Exception as e:
                            print(f"❌ Error processing {type} for {doc_id}: {e}")
                            self.Entity_Unprocessed_elements.append({"doc_id": doc_id, "type": type})

                    if batch_processed:
                        with open(data_file, "w", encoding="utf-8") as f:
                            json.dump(json_data, f, indent=2, ensure_ascii=False)
                        print(f"✅ Batch {(i // self.batch_size) + 1} complete. Waiting {self.wait_time} seconds.")
                        time.sleep(self.wait_time)

            except Exception as e:
                print(f"⛔ Process interrupted: {e}")
                print(f"Documents processed before interruption: {self.processed_count}")
            finally:
                self.save_progress(type)
            
        if type in {"entity", "summary", "relationship"}:
            try:
                with jsonlines.open(self.store_file, mode='r') as reader:
                    jsonl_data = list(reader)

                if not jsonl_data:
                    print("No data found in the JSONL file.")
                    return

                for i in range(start_index, len(jsonl_data), self.batch_size):
                    batch_chunks = jsonl_data[i : i + self.batch_size]

                    for chunk in batch_chunks:
                        try:
                            doc_id = chunk.get("id") or chunk.get("doc_id")
                            text_chunk = chunk.get("text")

                            if not text_chunk or not doc_id:
                                print(f"Skipping due to missing text or doc_id: {chunk}")
                                continue

                            if type == "entity":
                                result = self.groq_handler.extract_entities(text_chunk)
                                key = "entities"

                            elif type == "summary":
                                result = self.groq_handler.generate_summary(text_chunk)
                                key = "summary"

                            elif type == "relationship":
                                entity_list = chunk.get("entities")
                                if not entity_list:
                                    print(f"⚠️ No entities in chunk for relationship extraction (doc_id={doc_id}). Skipping.")
                                    self.Entity_Unprocessed_elements.append({
                                        "doc_id": doc_id,
                                        "text_chunk": text_chunk,
                                        "type": type
                                    })
                                    continue
                                result = self.groq_handler.extract_relations(text_chunk, entity_list)
                                key = "relations"

                            else:
                                raise ValueError(f"Unsupported type: {type}")

                            if not result:
                                self.Entity_Unprocessed_elements.append({
                                    "doc_id": doc_id,
                                    "text_chunk": text_chunk,
                                    "type": type
                                })
                                print(f"⚠️ No result for document {doc_id}. Added to unprocessed.")
                                continue

                            chunk[key] = result
                            self.processed_count += 1
                            print(f"✅ Processed {type} for document {doc_id} ({self.processed_count} total).")

                            with jsonlines.open(self.store_file, mode='w') as writer:
                                writer.write_all(jsonl_data)

                        except Exception as e:
                            print(f"❌ Error processing document {chunk.get('doc_id')}: {e}")
                            continue

                    self.save_progress(type)
                    print(f"✅ Batch {(i // self.batch_size) + 1} complete. Waiting {self.wait_time} seconds.")
                    time.sleep(self.wait_time)

            except Exception as e:
                print(f"⛔ Process interrupted: {e}")
                print(f"Documents processed before interruption: {self.processed_count}")
            finally:
                self.save_progress(type)

    def retry_stored_unprocessed_documents(self, type, max_retries=3):
        self.load_progress(type)

        if type in {"reasoning_chain", "answer"}:
            data_file = self.eval_dump_file
            checkpoint_files = [f"data/{self.progress_file_name}-{type}-{i}.json" for i in range(1, max_retries + 1)]

            try:
                with open(data_file, "r", encoding="utf-8") as f:
                    json_data = json.load(f)

                if not json_data:
                    print("No data found in eval_dump.")
                    return

                for retry_idx, checkpoint_file in enumerate(checkpoint_files):
                    if not self.Entity_Unprocessed_elements:
                        print(f"✅ All documents processed successfully by retry {retry_idx}.")
                        return

                    print(f"\n🔄 Retry {retry_idx + 1}: Processing {len(self.Entity_Unprocessed_elements)} unprocessed elements...")
                    retried_elements = []

                    for item in self.Entity_Unprocessed_elements:
                        doc_id = item.get("doc_id")
                        if not doc_id:
                            continue

                        content = json_data.get(doc_id)
                        if not content:
                            print(f"⚠️ Document ID {doc_id} not found in eval_dump.")
                            continue

                        try:
                            query = content.get("query")
                            if not query:
                                print(f"⚠️ No query in {doc_id}. Skipping.")
                                retried_elements.append(item)
                                continue

                            if type == "reasoning_chain":
                                G_raw = self.graph_builder.json_to_graph(content.get("G_raw", {}))
                                if not G_raw:
                                    print(f"⚠️ No G_raw for {doc_id}")
                                    retried_elements.append(item)
                                    continue
                                result = self.graph_builder.reasoning_chain(query, G_raw)
                                key = "reasoning_chain"

                            elif type == "answer":
                                G_aligned = self.graph_builder.json_to_graph(content.get("G_aligned", {}))
                                if not G_aligned:
                                    print(f"⚠️ No G_aligned for {doc_id}")
                                    retried_elements.append(item)
                                    continue
                                result = self.generator.generate_answer(query, G_aligned)
                                key = "answer"

                            if not result:
                                print(f"⚠️ No result for {type} in {doc_id}")
                                retried_elements.append(item)
                                continue

                            content[key] = result
                            self.processed_count += 1
                            print(f"✅ Retry {retry_idx + 1}: Successfully processed {type} for {doc_id}")

                            # Save after each success
                            with open(data_file, "w", encoding="utf-8") as f:
                                json.dump(json_data, f, indent=2, ensure_ascii=False)

                        except Exception as e:
                            print(f"❌ Error retrying {doc_id}: {e}")
                            retried_elements.append(item)

                    # Save retry checkpoint
                    with open(checkpoint_file, "w") as f:
                        json.dump({"Entity_Unprocessed_elements": retried_elements}, f, indent=4)
                    print(f"📁 Saved {len(retried_elements)} remaining to {checkpoint_file}")

                    self.Entity_Unprocessed_elements = retried_elements
                    self.save_progress(type)

            except Exception as e:
                print(f"⛔ Process interrupted during retry: {e}")
                print(f"Documents processed before interruption: {self.processed_count}")
            finally:
                self.save_progress(type)

        if type in {"HL_summary", "title"}:
            data_file = self.community_file
            checkpoint_files = [f"data/{self.progress_file_name}-{type}-{i}.json" for i in range(1, max_retries + 1)]

            try:
                with open(data_file, "r", encoding="utf-8") as f:
                    json_data = json.load(f)

                if not json_data:
                    print("No data found in the JSON file.")
                    return

                for retry_idx, checkpoint_file in enumerate(checkpoint_files):
                    if not self.Entity_Unprocessed_elements:
                        print(f"✅ All documents processed successfully by retry {retry_idx}.")
                        return

                    print(f"\n🔄 Retry {retry_idx + 1}: Processing {len(self.Entity_Unprocessed_elements)} unprocessed elements...")
                    retried_elements = []

                    for item in self.Entity_Unprocessed_elements:
                        doc_id = item["doc_id"]
                        if not doc_id:
                            continue

                        content = json_data.get(doc_id)
                        if not content:
                            print(f"⚠️ Document ID {doc_id} not found in JSON.")
                            continue

                        try:
                            if type == "HL_summary":
                                context_lines = content.get("summary_texts", []) + content.get("relation_texts", [])
                                if not context_lines:
                                    print(f"⚠️ No context for HL_summary in {doc_id}")
                                    retried_elements.append(item)
                                    continue
                                result = self.groq_handler.generate_highlevel_summary(context_lines)
                                key = "HL_summary"

                            elif type == "title":
                                hl_summary = content.get("HL_summary")
                                if not hl_summary:
                                    print(f"⚠️ No HL_summary for title generation in {doc_id}")
                                    retried_elements.append(item)
                                    continue
                                result = self.groq_handler.generate_title(hl_summary)
                                key = "title"

                            if not result:
                                print(f"⚠️ No result for {type} in {doc_id}")
                                retried_elements.append(item)
                                continue

                            content[key] = result
                            self.processed_count += 1
                            print(f"✅ Retry {retry_idx + 1}: Successfully processed {type} for {doc_id}.")

                            with open(data_file, "w", encoding="utf-8") as f:
                                json.dump(json_data, f, indent=2, ensure_ascii=False)

                        except Exception as e:
                            print(f"❌ Error retrying {doc_id}: {e}")
                            retried_elements.append(item)

                    with open(checkpoint_file, "w") as f:
                        json.dump({"Entity_Unprocessed_elements": retried_elements}, f, indent=4)
                    print(f"📁 Saved {len(retried_elements)} remaining to {checkpoint_file}")

                    self.Entity_Unprocessed_elements = retried_elements
                    self.save_progress(type)

            except Exception as e:
                print(f"⛔ Process interrupted: {e}")
                print(f"Documents processed before interruption: {self.processed_count}")
            finally:
                self.save_progress(type)

        elif type in {"entity", "summary", "relationship"}:
            store_file = self.store_file
            checkpoint_files = [f"data/ProgressCheckpoint-{type}-{i}.json" for i in range(1, max_retries + 1)]

            with jsonlines.open(store_file, mode='r') as reader:
                jsonl_data = list(reader)

            for retry_idx, checkpoint_file in enumerate(checkpoint_files):
                if not self.Entity_Unprocessed_elements:
                    print(f"✅ All documents processed successfully by retry {retry_idx}.")
                    return

                print(f"\n🔄 Retry {retry_idx + 1}: Processing {len(self.Entity_Unprocessed_elements)} unprocessed elements...")
                retried_elements = []

                for item in self.Entity_Unprocessed_elements:
                    doc_id = item.get("doc_id")
                    if not doc_id:
                        continue

                    chunk = next((c for c in jsonl_data if c.get("id") == doc_id or c.get("doc_id") == doc_id), None)

                    if not chunk:
                        print(f"⚠️ Document ID {doc_id} not found in JSONL.")
                        continue

                    try:
                        text_chunk = chunk.get("text")
                        if not text_chunk:
                            print(f"⚠️ No text for document {doc_id}.")
                            retried_elements.append(item)
                            continue

                        if type == "entity":
                            result = self.groq_handler.extract_entities(text_chunk)
                            key = "entities"
                        elif type == "summary":
                            result = self.groq_handler.generate_summary(text_chunk)
                            key = "summary"
                        elif type == "relationship":
                            entity_list = chunk.get("entities")
                            if not entity_list:
                                print(f"⚠️ No entities in JSONL for {doc_id}. Skipping.")
                                retried_elements.append(item)
                                continue
                            result = self.groq_handler.extract_relations(text_chunk, entity_list)
                            key = "relations"
                        else:
                            raise ValueError(f"Unsupported type: {type}")

                        if not result:
                            retried_elements.append(item)
                            print(f"⚠️ Retry {retry_idx + 1}: Document {doc_id} still failed.")
                            continue

                        chunk[key] = result
                        self.processed_count += 1
                        print(f"✅ Retry {retry_idx + 1}: Successfully processed {doc_id}.")

                        with jsonlines.open(store_file, mode='w') as writer:
                            writer.write_all(jsonl_data)

                    except Exception as e:
                        print(f"❌ Error retrying {doc_id}: {e}")
                        retried_elements.append(item)

                with open(checkpoint_file, "w") as f:
                    json.dump({"Entity_Unprocessed_elements": retried_elements}, f, indent=4)
                print(f"📁 Saved {len(retried_elements)} remaining to {checkpoint_file}")

                self.Entity_Unprocessed_elements = retried_elements
                self.save_progress(type)

        print("✅ Retry process completed.")

    def save_progress(self, type):
        try:
            if not hasattr(self, "progress_data"):
                self.progress_data = {}

            if os.path.exists(self.progress_file):
                with open(self.progress_file, "r") as f:
                    self.progress_data = json.load(f)

            self.progress_data[type] = {
                "processed_count": self.processed_count,
                "Entity_Unprocessed_elements": self.Entity_Unprocessed_elements
            }

            with open(self.progress_file, "w") as f:
                json.dump(self.progress_data, f, indent=4)

            print(f"💾 Progress for `{type}` saved. {self.processed_count} processed, {len(self.Entity_Unprocessed_elements)} unprocessed.")

        except Exception as e:
            print(f"❌ Failed to save progress for `{type}`: {e}")

    def load_progress(self, type):
        if os.path.exists(self.progress_file):
            try:
                with open(self.progress_file, "r") as f:
                    data = json.load(f)

                self.progress_data = data
                type_data = data.get(type, {})
                self.processed_count = type_data.get("processed_count", 0)
                self.Entity_Unprocessed_elements = type_data.get("Entity_Unprocessed_elements", [])

                print(f"🔁 Resumed `{type}` progress: {self.processed_count} processed, {len(self.Entity_Unprocessed_elements)} unprocessed.")

            except Exception as e:
                print(f"❌ Failed to load progress for `{type}`: {e}")
        else:
            self.progress_data = {}
            self.processed_count = 0
            self.Entity_Unprocessed_elements = []

    def delete_progress(self):
        if os.path.exists(self.progress_file):
            try:
                with open(self.progress_file, "w") as f:
                    json.dump({}, f)
            except Exception as e:
                print(f"❌ Failed to flush json: {e}")

In [8]:
class NodeAlignerGNN(torch.nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super().__init__()
        self.gnn1 = GATConv(in_dim, hidden_dim, heads=2, concat=True)
        self.gnn2 = GATConv(hidden_dim * 2, out_dim, heads=1)
        self.query_mlp = torch.nn.Sequential(
            torch.nn.Linear(out_dim + 384, out_dim),  # Match to query emb dim
            torch.nn.ReLU(),
            torch.nn.Linear(out_dim, 1)
        )

    def forward(self, x, edge_index, batch, query_emb):
        x = self.gnn1(x, edge_index)
        x = self.gnn2(x, edge_index)
        q = query_emb.unsqueeze(0).repeat(x.size(0), 1)
        scores = self.query_mlp(torch.cat([x, q], dim=1)).squeeze()
        return x, F.softmax(scores, dim=0)


In [8]:
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

class NodeRAGEvaluator:
    def __init__(self, chunker: Chunker, builder: GraphBuilder, generator: GraphAugmentedGenerator, entity_handler: EntityHandler, tokenizer: GPT2TokenizerFast):
        self.chunker = chunker
        self.builder = builder
        self.generator = generator
        self.entity_handler = entity_handler
        self.tokenizer = tokenizer
    
    def load_musique(self, path, max_samples=None):
        examples = []
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                item = json.loads(line)
                if not item.get("answerable", True):
                    continue

                id = item["id"]
                question = item["question"]
                answer_main = item["answer"]
                aliases = item.get("answer_aliases", [])
                answers = list({a.strip() for a in [answer_main] + aliases if a.strip()})

                paragraphs = item.get("paragraphs", [])
                full_context = " ".join(p["paragraph_text"].strip() for p in paragraphs if p.get("paragraph_text"))

                examples.append({
                    "id": id,
                    "question": question,
                    "answers": answers,
                    "context": full_context
                })

                if max_samples and len(examples) >= max_samples:
                    break

        return examples

    def normalize_answer(self, s):
        def remove_articles(text):
            return re.sub(r'\b(a|an|the)\b', ' ', text)

        def white_space_fix(text):
            return ' '.join(text.split())

        def remove_punc(text):
            return ''.join(ch for ch in text if ch not in string.punctuation)

        def lower(text):
            return text.lower()

        return white_space_fix(remove_articles(remove_punc(lower(s))))

    def compute_f1(self, prediction, ground_truth):
        pred_tokens = self.normalize_answer(prediction).split()
        gt_tokens = self.normalize_answer(ground_truth).split()
        common = set(pred_tokens) & set(gt_tokens)
        if not common:
            return 0, 0, 0
        prec = len(common) / len(pred_tokens)
        rec = len(common) / len(gt_tokens)
        f1 = 2 * prec * rec / (prec + rec)
        return f1, prec, rec

    def compute_exact(self, prediction, ground_truth):
        return int(self.normalize_answer(prediction) == self.normalize_answer(ground_truth))

    def compute_substring_match(self, prediction, ground_truth):
        return int(self.normalize_answer(ground_truth) in self.normalize_answer(prediction))

    def get_metric_score(self, prediction, references):
        if not references:
            return {}

        prediction = prediction.strip()
        if references and isinstance(references[0], list):
            references = references[0]
        references = [r.strip() for r in references if isinstance(r, str) and r.strip()]
        best_f1, best_prec, best_rec = 0, 0, 0
        best_em = 0
        best_acc = 0
        best_bleu1, best_bleu4 = 0, 0
        best_bleu1_smooth, best_bleu4_smooth = 0, 0
        best_meteor = 0
        best_rouge_l = 0

        smoothing = SmoothingFunction()
        rouge = Rouge()

        for ref in references:
            f1, prec, rec = self.compute_f1(prediction, ref)
            best_f1 = max(best_f1, f1)
            best_prec = max(best_prec, prec)
            best_rec = max(best_rec, rec)

            em = self.compute_exact(prediction, ref)
            best_em = max(best_em, em)

            acc = self.compute_substring_match(prediction, ref)
            best_acc = max(best_acc, acc)

            try:
                bleu1 = sentence_bleu([ref.split()], prediction.split(), weights=(1, 0, 0, 0))
                bleu4 = sentence_bleu([ref.split()], prediction.split(), weights=(0.25, 0.25, 0.25, 0.25))
                bleu1_smooth = sentence_bleu([ref.split()], prediction.split(), weights=(1, 0, 0, 0), smoothing_function=smoothing.method1)
                bleu4_smooth = sentence_bleu([ref.split()], prediction.split(), weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smoothing.method1)
            except:
                bleu1 = bleu4 = bleu1_smooth = bleu4_smooth = 0

            best_bleu1 = max(best_bleu1, bleu1)
            best_bleu4 = max(best_bleu4, bleu4)
            best_bleu1_smooth = max(best_bleu1_smooth, bleu1_smooth)
            best_bleu4_smooth = max(best_bleu4_smooth, bleu4_smooth)

            try:
                best_meteor = max(best_meteor, meteor_score([ref], prediction))
            except:
                pass

            try:
                rouge_scores = rouge.get_scores(prediction, ref)
                best_rouge_l = max(best_rouge_l, rouge_scores[0]["rouge-l"]["f"])
            except:
                pass

        return {
            "f1": best_f1,
            "precision": best_prec,
            "recall": best_rec,
            "exact_match": best_em,
            "accuracy": best_acc,
            "bleu_1": best_bleu1,
            "bleu_4": best_bleu4,
            "bleu_1_smooth": best_bleu1_smooth,
            "bleu_4_smooth": best_bleu4_smooth,
            "meteor": best_meteor,
            "rouge_l": best_rouge_l
        }

    def build_from_paragraphs(self, eval: bool = True, dataset_name: str = None, dataset_path: str = None, pdf_path: str = None, max_samples=100):
        suffix = "-eval" if eval else ""

        if eval:
            dataset_name = dataset_name.lower()
            if dataset_name == "musique":
                examples = self.load_musique(dataset_path, max_samples)
            else:
                raise NotImplementedError(f"Dataset '{dataset_name}' not yet supported.")

            for example in examples:
                self.chunker.segment_into_chunks(text=example["context"])
        else:
            text = self.chunker.pdf_to_text(path=pdf_path)
            chunks = self.chunker.segment_into_chunks(text=text)

        self.entity_handler.process_documents(type="entity")
        self.entity_handler.retry_stored_unprocessed_documents(type="entity")
        self.entity_handler.process_documents(type="relationship")
        self.entity_handler.retry_stored_unprocessed_documents(type="relationship")
        self.entity_handler.process_documents(type="summary")
        self.entity_handler.retry_stored_unprocessed_documents(type="summary")

        self.builder.build_graph()
        self.builder.detect_and_save_communities(output_path=f"data/community_map{suffix}.json")
        self.builder.save_incomplete_entries(f"data/incomplete_graph_elements{suffix}.json")
        self.builder.save_pickle(f"graph{suffix}.pkl")

        self.entity_handler.process_documents(type="HL_summary")
        self.entity_handler.retry_stored_unprocessed_documents(type="HL_summary")
        self.entity_handler.process_documents(type="title")
        self.entity_handler.retry_stored_unprocessed_documents(type="title")

        self.builder.add_HO_nodes_from_community_map(community_map_path=f"data/community_map{suffix}.json")
        self.builder.finalize_graph_G3_from_community_map(community_map_path=f"data/community_map{suffix}.json")
        self.builder.build_G4_text_attachment(chunk_file=f"data/chunked_output{suffix}.jsonl")
        self.builder.build_G5_semantic_hnsw_index(index_path=f"data/graph{suffix}-index.bin")
        self.builder.save_pickle(f"graph{suffix}.pkl")

        print(f"✅ build_from_paragraphs complete (G5 ready, eval={eval}).")

    def get_single_answer(self, query: str, hnsw_index_path="data\graph-index.bin", graph_path="graph.pkl"):
        self.builder.load_graph_pickle(graph_path)

        entry_nodes, p_weights = self.builder.get_entry_points(query=query, hnsw_index_path=hnsw_index_path, top_k=10)
        G_raw, _ = self.builder.run_shallow_ppr(entry_nodes, p_weights)
        rc = self.builder.reasoning_chain(query, G_raw)
        print(rc)

        model = NodeAlignerGNN(in_dim=391, hidden_dim=128, out_dim=256)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        G_aligned, node_embeds, _ = self.builder.align_nodes_via_gnn(query, G_raw, rc, model, optimizer, device="cpu")

        #r_g, r_s, sim = self.builder.compute_graph_embedding(G_aligned, rc, node_embeds)
        pred_answer = self.generator.generate_answer(query, G_aligned)

        print(pred_answer)

    def evaluate(self, dataset_name: str, dataset_path, hnsw_index_path, max_samples=100,
                dump_path="data/eval_dump.json", type="entry_points"):
        
        dataset_name = dataset_name.lower()
        
        if type == "entry_points":
            if dataset_name == "musique":
                examples = self.load_musique(dataset_path, max_samples)
            else:
                raise NotImplementedError(f"Dataset '{dataset_name}' not supported.")
        else:
            examples = None

        self.builder.load_graph_pickle("graph-eval.pkl")

        if os.path.exists(dump_path):
            with open(dump_path, "r", encoding="utf-8") as f:
                uid_to_item = json.load(f)
        else:
            uid_to_item = {}

        model = NodeAlignerGNN(in_dim=391, hidden_dim=128, out_dim=256)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

        if type == "entry_points":
            for item in tqdm(examples, desc=f"🔍 Evaluating {dataset_name} ({type})"):
                query = item["question"]
                gold_answer = item["answers"]
                uid = item.get("id")
                aliases = item.get("aliases", [])
                aliases.append(gold_answer)

                if uid not in uid_to_item:
                    uid_to_item[uid] = {
                        "id": uid,
                        "query": query,
                        "answers": aliases
                    }

                try:
                    entry_nodes, p_weights = self.builder.get_entry_points(query, hnsw_index_path, top_k=10)
                    G_raw, _ = self.builder.run_shallow_ppr(entry_nodes, p_weights)
                    uid_to_item[uid]["G_raw"] = self.builder.graph_to_json(G_raw)
                except Exception as e:
                    print(f"❌ Entry point or PPR failed: {uid} → {e}")
                    continue

            with open(dump_path, "w", encoding="utf-8") as f:
                json.dump(uid_to_item, f, indent=2)
            return None

        for uid, sample in tqdm(uid_to_item.items(), desc=f"🔧 Running step: {type}"):
            query = sample["query"]

            if type == "align":
                try:
                    G_raw = self.builder.json_to_graph(sample["G_raw"])
                    rc = sample.get("reasoning_chain", "")
                    G_aligned, node_embeds, _ = self.builder.align_nodes_via_gnn(
                        query, G_raw, rc, model, optimizer, device="cpu"
                    )
                    sample["G_aligned"] = self.builder.graph_to_json(G_aligned)
                except Exception as e:
                    print(f"⚠️ Alignment failed: {uid} → {e}")
                    continue

            elif type == "prompt":
                try:
                    G_aligned = self.builder.json_to_graph(sample["G_aligned"])
                    sample["prompt"] = self.generator.build_prompt(query, G_aligned)
                except Exception as e:
                    print(f"⚠️ Prompt generation failed: {uid} → {e}")
                    continue

        if type == "reasoning_chain":
            self.entity_handler.process_documents("reasoning_chain")

        if type == "answer":
            self.entity_handler.process_documents("answer")

        if type in {"align", "prompt"}:
            with open(dump_path, "w", encoding="utf-8") as f:
                json.dump(uid_to_item, f, indent=2)

        if type == "eval":
            predictions = []
            for uid, sample in uid_to_item.items():
                pred_answer = sample.get("answer", "")
                prompt = sample.get("prompt", "")
                gold_answer = sample["answers"]
                query = sample["query"]

                total_tokens = len(self.tokenizer.encode(prompt)) + len(self.tokenizer.encode(pred_answer))
                score = self.get_metric_score(pred_answer, gold_answer)

                score.update({
                    "tokens": total_tokens,
                    "question": query,
                    "prediction": pred_answer,
                    "gold": gold_answer
                })

                predictions.append(score)

            print("✅ Final evaluation complete.")
            return self.aggregate(predictions)

        return None

    def aggregate(self, prediction_logs):
        summary = {}
        if not prediction_logs:
            return {"error": "No predictions"}

        keys = [k for k in prediction_logs[0].keys() if isinstance(prediction_logs[0][k], (float, int)) and k != "tokens"]
        for k in keys:
            summary[k] = sum(p[k] for p in prediction_logs) / len(prediction_logs)

        summary["avg_tokens"] = sum(p["tokens"] for p in prediction_logs) / len(prediction_logs)
        return {
            "summary": summary,
            "predictions": prediction_logs
        }

    def save_results(self, result_dict, path="results/noderag_eval.json"):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, "w") as f:
            json.dump(result_dict, f, indent=2)
        print(f"📁 Saved results to {path}")

    def save_sample(self, json_data, path="data/eval-dump.json"):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, "w") as f:
            json.dump(json_data, f, indent=2)

In [11]:
chunker = Chunker(store_file="data/chunked_output.jsonl")
groq_handler = GroqHandler()
builder = GraphBuilder(
    groq_handler=groq_handler,
    jsonl_path="data/chunked_output.jsonl",
    neo4j_uri=NEO4J_URI,
    neo4j_user=NEO4J_USER,
    neo4j_password=NEO4J_PASSWORD,
    neo4j_database=NEO4J_DATABASE
)
generator = GraphAugmentedGenerator(groq_handler=groq_handler)
entity_handler = EntityHandler(
    groq_handler=groq_handler,
    graph_builder=builder,
    generator=generator,
    progress_file="data/progress.json",
    failed_file="data/permanent_failed.json",
    store_file="data/chunked_output.jsonl",
    community_file="data/community_map.json",
    progress_file_name="ProgressCheckpoint"
)
evaluator = NodeRAGEvaluator(
    chunker=chunker,
    entity_handler=entity_handler,
    builder=builder,
    generator=generator,
    tokenizer=tokenizer
)

In [14]:
evaluator.get_single_answer(
    query="What are precautions for different types of cancer therapy?",
    hnsw_index_path="data\hnsw_index.bin",
    graph_path="G1_batch.pkl"
)

🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.
📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 21 nodes and 84 edges.
{'id': 'chatcmpl-23308b1c-8157-48c8-91ba-23f2cba51812', 'object': 'chat.completion', 'created': 1750520580, 'model': 'meta-llama/llama-4-scout-17b-16e-instruct', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': "Here's a step-by-step reasoning chain that connects the question to the correct answer:\n\n**Step 1: Understand the Question**\nThe question asks about precautions for different types of cancer therapy.\n\n**Step 2: Identify Relevant Information**\nThe context provided includes information on various cancer treatments, including chemotherapy, surgery, radiation therapy, immunotherapy, hormone therapy, and bone marrow transplantation.\n\n**Step 3: Analyze Chemothe

In [12]:
chunker = Chunker(store_file="data/chunked_output-eval.jsonl")
groq_handler = GroqHandler()
builder = GraphBuilder(
    groq_handler=groq_handler,
    jsonl_path="data/chunked_output-eval.jsonl",
    neo4j_uri=NEO4J_URI,
    neo4j_user=NEO4J_USER,
    neo4j_password=NEO4J_PASSWORD,
    neo4j_database=NEO4J_DATABASE
)
generator = GraphAugmentedGenerator(groq_handler=groq_handler)
entity_handler = EntityHandler(
    groq_handler=groq_handler,
    graph_builder=builder,
    generator=generator,
    progress_file="data/progress-eval.json",
    failed_file="data/permanent_failed-eval.json",
    store_file="data/chunked_output-eval.jsonl",
    community_file="data/community_map-eval.json",
    progress_file_name="ProgressCheckpoint-eval",
    eval_dump_file="data/eval_dump.json"
)
evaluator = NodeRAGEvaluator(
    chunker=chunker,
    entity_handler=entity_handler,
    builder=builder,
    generator=generator,
    tokenizer=tokenizer
)

In [13]:
evaluator.build_from_paragraphs(
    dataset_name="musique",
    dataset_path="docs\musique_ans_v1.0_dev.jsonl",
    max_samples=50
)

{'id': 'chatcmpl-d1a7aa22-b54f-433f-ab78-31e02f33d660', 'object': 'chat.completion', 'created': 1750693261, 'model': 'meta-llama/llama-4-scout-17b-16e-instruct', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '{\n  "entities": [\n    "Robbie Amell",\n    "Kate Melton",\n    "Hayley Kiyoko",\n    "Nick Palatas",\n    "Scooby-Doo",\n    "Frank Welker",\n    "Lorne Michaels",\n    "Peter Aykroyd",\n    "Jim Downey",\n    "Brian Doyle-Murray",\n    "Don Novello",\n    "Tom Schiller",\n    "Alan Zweibel",\n    "Paul Shaffer",\n    "Harry Shearer",\n    "Timothy Wallace Credeur II",\n    "Jesse Taylor",\n    "C.B. Dollaway",\n    "Brittany Petros",\n    "Kris Jenner",\n    "Bruce Jenner",\n    "Caitlyn Jenner",\n    "Kourtney",\n    "Scott Disick",\n    "Khloé",\n    "Lamar Odom",\n    "Kanye West",\n    "Kim",\n    "Brody",\n    "Brandon",\n    "Leah",\n    "Rob Kardashian",\n    "Kendall",\n    "Kylie Jenner"\n  ]\n}'}, 'logprobs': None, 'finish_reason': 'stop'}], 'us

In [14]:
results = evaluator.evaluate(
    type="entry_points",
    dataset_name="musique",
    dataset_path="docs\musique_ans_v1.0_dev.jsonl",
    hnsw_index_path="data\graph-eval-index.bin",
    max_samples=50,
    dump_path="data/eval_dump.json"
)

🔍 Evaluating musique (entry_points):   0%|          | 0/50 [00:00<?, ?it/s]

🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):   2%|▏         | 1/50 [00:02<02:13,  2.73s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):   4%|▍         | 2/50 [00:05<02:07,  2.66s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):   6%|▌         | 3/50 [00:08<02:05,  2.66s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):   8%|▊         | 4/50 [00:10<02:01,  2.63s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 67 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  10%|█         | 5/50 [00:13<01:59,  2.65s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  12%|█▏        | 6/50 [00:16<02:00,  2.75s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  14%|█▍        | 7/50 [00:18<01:55,  2.69s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  16%|█▌        | 8/50 [00:21<01:52,  2.67s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 70 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  18%|█▊        | 9/50 [00:23<01:48,  2.64s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  20%|██        | 10/50 [00:26<01:45,  2.63s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  22%|██▏       | 11/50 [00:29<01:42,  2.63s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  24%|██▍       | 12/50 [00:31<01:39,  2.62s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  26%|██▌       | 13/50 [00:34<01:37,  2.64s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  28%|██▊       | 14/50 [00:37<01:34,  2.64s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  30%|███       | 15/50 [00:39<01:32,  2.65s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  32%|███▏      | 16/50 [00:43<01:35,  2.81s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 67 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  34%|███▍      | 17/50 [00:45<01:30,  2.74s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  36%|███▌      | 18/50 [00:48<01:27,  2.72s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  38%|███▊      | 19/50 [00:51<01:25,  2.77s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  40%|████      | 20/50 [00:53<01:21,  2.72s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 70 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  42%|████▏     | 21/50 [00:56<01:20,  2.77s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  44%|████▍     | 22/50 [00:59<01:16,  2.72s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  46%|████▌     | 23/50 [01:01<01:13,  2.72s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  48%|████▊     | 24/50 [01:04<01:11,  2.74s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  50%|█████     | 25/50 [01:07<01:11,  2.85s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  52%|█████▏    | 26/50 [01:10<01:06,  2.77s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 65 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  54%|█████▍    | 27/50 [01:13<01:02,  2.72s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 70 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  56%|█████▌    | 28/50 [01:15<00:59,  2.69s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  58%|█████▊    | 29/50 [01:18<00:58,  2.77s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 70 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  60%|██████    | 30/50 [01:21<00:56,  2.81s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 70 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  62%|██████▏   | 31/50 [01:24<00:52,  2.77s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  64%|██████▍   | 32/50 [01:26<00:48,  2.71s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  66%|██████▌   | 33/50 [01:29<00:45,  2.67s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  68%|██████▊   | 34/50 [01:31<00:42,  2.66s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  70%|███████   | 35/50 [01:34<00:39,  2.65s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 21 nodes and 82 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  72%|███████▏  | 36/50 [01:37<00:39,  2.82s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  74%|███████▍  | 37/50 [01:40<00:36,  2.82s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  76%|███████▌  | 38/50 [01:43<00:33,  2.76s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  78%|███████▊  | 39/50 [01:45<00:30,  2.73s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  80%|████████  | 40/50 [01:48<00:26,  2.70s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  82%|████████▏ | 41/50 [01:51<00:25,  2.87s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  84%|████████▍ | 42/50 [01:54<00:22,  2.79s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  86%|████████▌ | 43/50 [01:57<00:20,  2.86s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  88%|████████▊ | 44/50 [02:00<00:16,  2.80s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  90%|█████████ | 45/50 [02:02<00:13,  2.74s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  92%|█████████▏| 46/50 [02:05<00:11,  2.83s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  94%|█████████▍| 47/50 [02:08<00:08,  2.78s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  96%|█████████▌| 48/50 [02:10<00:05,  2.72s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points):  98%|█████████▊| 49/50 [02:13<00:02,  2.70s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 70 edges.
🎯 Identifying entry points for query...
🔎 Found 0 exact-match nodes from N/O.


🔍 Evaluating musique (entry_points): 100%|██████████| 50/50 [02:16<00:00,  2.72s/it]

📐 Retrieved 10 semantic neighbors from HNSW.
✅ Entry set V_entry contains 10 nodes.
🔄 Starting Shallow Personalized PageRank (t=2)...
📌 Selected 20 V_cross nodes (top 5 per type)
📎 Induced G_raw with 20 nodes and 72 edges.





In [15]:
results = evaluator.evaluate(
    type="reasoning_chain",
    dataset_name="musique",
    dataset_path="docs\musique_ans_v1.0_dev.jsonl",
    hnsw_index_path="data\graph-eval-index.bin",
    max_samples=50,
    dump_path="data/eval_dump.json"
)

🔧 Running step: reasoning_chain: 100%|██████████| 50/50 [00:00<?, ?it/s]




🔁 Resumed `reasoning_chain` progress: 0 processed, 0 unprocessed.
{'id': 'chatcmpl-503180a6-62ee-4b38-bd72-fa7660478cc9', 'object': 'chat.completion', 'created': 1750694138, 'model': 'meta-llama/llama-4-scout-17b-16e-instruct', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'To find the spouse of the Green performer, let\'s analyze the given context step by step:\n\n1. **Understanding the Context**: The provided context seems to be disjointed and covers various topics, including TV shows, movies, and reality TV. However, none of the paragraphs directly mention a "Green performer."\n\n2. **Identifying Potentially Relevant Information**: Given that there\'s no direct mention of a "Green performer," we need to look for any information that could be tangentially related. The context includes information about various TV shows and their cast members.\n\n3. **Analyzing TV Shows and Cast Members**: Several TV shows are mentioned, such as "The Young and the Restless," "Ba

In [16]:
results = evaluator.evaluate(
    type="align",
    dataset_name="musique",
    dataset_path="docs\musique_ans_v1.0_dev.jsonl",
    hnsw_index_path="data\graph-eval-index.bin",
    max_samples=50,
    dump_path="data/eval_dump.json"
)
results = evaluator.evaluate(
    type="prompt",
    dataset_name="musique",
    dataset_path="docs\musique_ans_v1.0_dev.jsonl",
    hnsw_index_path="data\graph-eval-index.bin",
    max_samples=50,
    dump_path="data/eval_dump.json"
)

🔧 Running step: align:   0%|          | 0/50 [00:00<?, ?it/s]

🔧 Running step: align:   2%|▏         | 1/50 [00:04<03:35,  4.39s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:   4%|▍         | 2/50 [00:07<03:06,  3.89s/it]

🧠 G_aligned contains 12 nodes.


🔧 Running step: align:   6%|▌         | 3/50 [00:11<02:54,  3.70s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align:   8%|▊         | 4/50 [00:14<02:44,  3.57s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  10%|█         | 5/50 [00:18<02:38,  3.53s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  12%|█▏        | 6/50 [00:21<02:31,  3.44s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  14%|█▍        | 7/50 [00:25<02:30,  3.50s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  16%|█▌        | 8/50 [00:28<02:27,  3.50s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align:  18%|█▊        | 9/50 [00:32<02:24,  3.52s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align:  20%|██        | 10/50 [00:35<02:17,  3.45s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align:  22%|██▏       | 11/50 [00:38<02:13,  3.43s/it]

🧠 G_aligned contains 12 nodes.


🔧 Running step: align:  24%|██▍       | 12/50 [00:42<02:12,  3.49s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  26%|██▌       | 13/50 [00:45<02:07,  3.43s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  28%|██▊       | 14/50 [00:49<02:04,  3.45s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  30%|███       | 15/50 [00:52<01:59,  3.40s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  32%|███▏      | 16/50 [00:55<01:53,  3.35s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  34%|███▍      | 17/50 [00:59<01:50,  3.34s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  36%|███▌      | 18/50 [01:02<01:51,  3.49s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  38%|███▊      | 19/50 [01:06<01:47,  3.47s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  40%|████      | 20/50 [01:09<01:42,  3.41s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  42%|████▏     | 21/50 [01:13<01:39,  3.42s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  44%|████▍     | 22/50 [01:16<01:35,  3.41s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  46%|████▌     | 23/50 [01:19<01:31,  3.41s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  48%|████▊     | 24/50 [01:23<01:28,  3.41s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  50%|█████     | 25/50 [01:26<01:24,  3.39s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  52%|█████▏    | 26/50 [01:30<01:21,  3.38s/it]

🧠 G_aligned contains 10 nodes.


🔧 Running step: align:  54%|█████▍    | 27/50 [01:33<01:17,  3.36s/it]

🧠 G_aligned contains 10 nodes.


🔧 Running step: align:  56%|█████▌    | 28/50 [01:36<01:14,  3.39s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  58%|█████▊    | 29/50 [01:40<01:10,  3.37s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  60%|██████    | 30/50 [01:43<01:09,  3.46s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  62%|██████▏   | 31/50 [01:47<01:04,  3.41s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  64%|██████▍   | 32/50 [01:50<01:01,  3.40s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  66%|██████▌   | 33/50 [01:54<00:59,  3.49s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  68%|██████▊   | 34/50 [01:58<00:57,  3.59s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  70%|███████   | 35/50 [02:01<00:53,  3.56s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  72%|███████▏  | 36/50 [02:04<00:48,  3.49s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  74%|███████▍  | 37/50 [02:08<00:45,  3.47s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  76%|███████▌  | 38/50 [02:11<00:41,  3.43s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  78%|███████▊  | 39/50 [02:15<00:38,  3.53s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  80%|████████  | 40/50 [02:19<00:36,  3.62s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  82%|████████▏ | 41/50 [02:22<00:31,  3.53s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  84%|████████▍ | 42/50 [02:25<00:27,  3.49s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  86%|████████▌ | 43/50 [02:29<00:23,  3.42s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  88%|████████▊ | 44/50 [02:32<00:20,  3.42s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  90%|█████████ | 45/50 [02:35<00:16,  3.39s/it]

🧠 G_aligned contains 5 nodes.


🔧 Running step: align:  92%|█████████▏| 46/50 [02:39<00:14,  3.57s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align:  94%|█████████▍| 47/50 [02:43<00:10,  3.59s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align:  96%|█████████▌| 48/50 [02:46<00:07,  3.52s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align:  98%|█████████▊| 49/50 [02:50<00:03,  3.45s/it]

🧠 G_aligned contains 11 nodes.


🔧 Running step: align: 100%|██████████| 50/50 [02:53<00:00,  3.47s/it]


🧠 G_aligned contains 11 nodes.


🔧 Running step: prompt: 100%|██████████| 50/50 [00:00<00:00, 11998.81it/s]

Context: [R: R_2f397a38] provides voice of
[R: R_de4ed8c4] plays
[R: R_b3d1c239] plays
[R: R_9f8a0402] plays
[R: R_2b7aa25a] plays
Context: [T: T_24a9cfef1770481995ae58896003617e] lovers. additional as concubines support and afford to enough wealthy were officials high and nobles although normal, also were marriages Monogamous remarry. and spouses their divorce to able were women and men both law, not custom, by dictated conditions certain Under The Young and the Restless is an American television soap opera, created by William J. Bell and Lee Phillip Bell for CBS. It first aired on March 26, 1973. The longest - running current cast member is Doug Davidson, who has portrayed private investigator Paul Williams since May 23, 1978. Jeanne Cooper, who portrayed the soap opera's matriarch Katherine Chancellor, holds the record for the series' longest - running cast member, airing from November 1973 until her death in May 2013. Melody Thomas Scott and Eric Braeden, who portray Nikki and Vict




In [17]:
results = evaluator.evaluate(
    type="answer",
    dataset_name="musique",
    dataset_path="docs\musique_ans_v1.0_dev.jsonl",
    hnsw_index_path="data\graph-eval-index.bin",
    max_samples=50,
    dump_path="data/eval_dump.json"
)

🔧 Running step: answer: 100%|██████████| 50/50 [00:00<00:00, 49860.96it/s]




🔁 Resumed `answer` progress: 0 processed, 0 unprocessed.
Context: [R: R_2f397a38] provides voice of
[R: R_de4ed8c4] plays
[R: R_b3d1c239] plays
[R: R_9f8a0402] plays
[R: R_2b7aa25a] plays
📨 Sending prompt to Groq LLM...
{'id': 'chatcmpl-5b796118-6763-40a4-82ac-942b628118fe', 'object': 'chat.completion', 'created': 1750694830, 'model': 'meta-llama/llama-4-scout-17b-16e-instruct', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'To accurately answer the query, I need to identify who the Green performer is and then find their spouse. However, the provided subgraph context does not directly mention a "Green performer" or any specific individual\'s spouse. \n\nGiven the structure of the subgraph, it seems to list various performers (identified by their IDs like R_de4ed8c4, R_b3d1c239, etc.) and their roles or the characters they play. Without additional context or a clear reference to a "Green performer," it\'s challenging to provide a direct answer.\n\nIf we assume tha

In [18]:
results = evaluator.evaluate(
    type="eval",
    dataset_name="musique",
    dataset_path="docs\musique_ans_v1.0_dev.jsonl",
    hnsw_index_path="data\graph-eval-index.bin",
    max_samples=50,
    dump_path="data/eval_dump.json"
)
print(results.get("summary"))
evaluator.save_results(results)

🔧 Running step: eval: 100%|██████████| 50/50 [00:00<00:00, 39983.83it/s]
Token indices sequence length is longer than the specified maximum sequence length for this model (4968 > 1024). Running this sequence through the model will result in indexing errors
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


✅ Final evaluation complete.
{'f1': 0.014258769291579739, 'precision': 0.007485265165878705, 'recall': 0.2075238095238095, 'exact_match': 0.0, 'accuracy': 0.08, 'bleu_1': 0.003751830362599912, 'bleu_4': 1.5703064323664198e-156, 'bleu_1_smooth': 0.003751830362599912, 'bleu_4_smooth': 0.0006394501048120502, 'meteor': 0.0, 'rouge_l': 0.010887218512115875, 'avg_tokens': 1448.28}
📁 Saved results to results/noderag_eval.json
