In [1]:
### Fixing import errors of the

import sys
import os

# This code navigates up one directory from the notebook's location ('examples/')
# to get the project's root directory.
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# We check if the path is already in the system path.
# If not, we add it to the beginning of the list.
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    print(f"Added project root to Python path: {project_root}")
else:
    print(f"Project root is already in Python path: {project_root}")

# Optional: You can print the first few paths to verify
print("\nVerifying sys.path:")
for i, path in enumerate(sys.path[:5]):
    print(f"{i}: {path}")

Added project root to Python path: /home/nick/projects/Llama-Index-GliREL-GraphRAG

Verifying sys.path:
0: /home/nick/projects/Llama-Index-GliREL-GraphRAG
1: /usr/lib/python312.zip
2: /usr/lib/python3.12
3: /usr/lib/python3.12/lib-dynload
4: 


In [2]:
import asyncio
import os
import logging
import nest_asyncio
import argparse
import json
from typing import Dict, List


from transformers import AutoModel, AutoTokenizer
from tqdm import tqdm

# Apply nest_asyncio for Jupyter environments
nest_asyncio.apply()
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

In [3]:

from llama_index.core import Document
from src.GlirelPathExtractor import GlirelPathExtractor 
from src.RecursivePathExtractor import RecursiveLLMPathExtractor
from llama_index.core import SimpleDirectoryReader, PropertyGraphIndex,Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import StorageContext, load_index_from_storage

In [4]:
with open('../.data/novel.json', 'r') as file:
    # Load the JSON data from the file into a Python object
    data = json.load(file)

In [5]:
def group_questions_by_source(question_list):
    grouped_questions = {}

    for question in question_list:
        source = question.get("source")

        if source not in grouped_questions:
            grouped_questions[source] = []

        grouped_questions[source].append(question)

    return grouped_questions

In [6]:
llm = Ollama(
    model= "gemma3:12b",
    request_timeout=120.0,
    context_window=8128,
    temperature=0.0
)

Settings.llm = llm
Settings.chunk_size=512
Settings.chunk_overlap=64

embed_model = OllamaEmbedding(
    model_name="snowflake-arctic-embed2:latest",
    ollama_additional_kwargs={"mirostat": 0},
)
Settings.embed_model = embed_model

In [7]:
SYSTEM_PROMPT = """
---Role---
You are a helpful assistant responding to user queries.

---Goal---
Generate direct and concise answers based strictly on the provided Knowledge Base.
Respond in plain text without explanations or formatting.
Maintain conversation continuity and use the same language as the query.
If the answer is unknown, respond with "I don't know".

---Conversation History---
{history}

---Knowledge Base---
{context_data}
"""

In [None]:
# initalize rag
index = load_index_from_storage(
    StorageContext.from_defaults(persist_dir="./.persistent_storage/llm/Novel-10146"))

Loading llama_index.core.storage.kvstore.simple_kvstore from ./.persistent_storage/gli/Novel-10146/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./.persistent_storage/gli/Novel-10146/index_store.json.


INFO:Loading all indices.


In [None]:
["Novel-5956","Novel-2544","Novel-8559","Novel-10146",]

In [64]:
query_engine = index.as_query_engine(
        llm=llm,
        response_mode="compact",
        similarity_top_k=8,
        embedding_mode="hybrid",
        include_text=True,
    
         
    )

In [65]:
response = await query_engine.aquery("In the narrative surrounding Ellen, what is the relationship between her father and the bishop, as indicated by their roles in the story?")
print(response.response)
response

INFO:HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
INFO:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Bishop Willoughby was a well-known character of the early times. A short conversation with him would readily make patent the fact that he wasn't really a bishop. The printer-reporter mistakenly said "Bishop Willoughby administered the rite of confirmation" when he should have said Bishop Whipple.


Response(response='Bishop Willoughby was a well-known character of the early times. A short conversation with him would readily make patent the fact that he wasn\'t really a bishop. The printer-reporter mistakenly said "Bishop Willoughby administered the rite of confirmation" when he should have said Bishop Whipple.', source_nodes=[NodeWithScore(node=TextNode(id_='5a674c9b-1160-48f7-bbe9-3899f82b3f5a', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='Novel-10146', node_type='4', metadata={}, hash='9ef9061939b8ffac1d8a9134eb0d7900d028a856f8170090f4e406fea3fcc7db'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='f79b9a8d-99e5-4eef-a94e-ace126f910aa', node_type='1', metadata={}, hash='8a242eea6eee6f205a3bd6fe4b8bdf552b4a1056c12c2f91c01be3c097a98a28'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='78267074-b83b-4375-bc91-74a5c6a87e6c', node_type='1', meta

In [9]:
from llama_index.core.indices.property_graph import (
    PGRetriever,
    VectorContextRetriever,
    LLMSynonymRetriever,
)

In [10]:
sub_retrievers = [
    VectorContextRetriever(index.property_graph_store),
    LLMSynonymRetriever(index.property_graph_store),
]

retriever = index.as_retriever(path_depth=5)



In [21]:
query_engine = index.as_query_engine(
        llm=llm,
        response_mode="compact",
        similarity_top_k=8,
        embedding_mode="hybrid",
        include_text=True,
    
         
    )

In [None]:
type(query_engine)

In [None]:
from llama_index.core.query_engine import RetrieverQueryEngine

In [None]:
query_engine  = RetrieverQueryEngine.from_args(
   retriever=retriever, llm=llm,
)

In [None]:
type(query_engine)

In [None]:
#nest_asyncio.apply()
response = await query_engine.aquery("In the narrative surrounding Ellen, what is the relationship between her father and the bishop, as indicated by their roles in the story?")
print(response.response)
response

INFO:HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


KeyError: 'Van Bibber_READ_of the fight between \\ Dutchy\\  Mack and the \\ Black Diamond\\ '

INFO:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


In [None]:
type(response.metadata)

In [None]:
print(response.get_formatted_sources(10000))
