In [1]:
### Fixing import errors of the

import sys
import os

# This code navigates up one directory from the notebook's location ('examples/')
# to get the project's root directory.
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# We check if the path is already in the system path.
# If not, we add it to the beginning of the list.
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    print(f"Added project root to Python path: {project_root}")
else:
    print(f"Project root is already in Python path: {project_root}")

# Optional: You can print the first few paths to verify
print("\nVerifying sys.path:")
for i, path in enumerate(sys.path[:5]):
    print(f"{i}: {path}")

Added project root to Python path: /home/nick/projects/Llama-Index-GliREL-GraphRAG

Verifying sys.path:
0: /home/nick/projects/Llama-Index-GliREL-GraphRAG
1: /usr/lib/python312.zip
2: /usr/lib/python3.12
3: /usr/lib/python3.12/lib-dynload
4: 


In [2]:
import asyncio
import os
import logging
import nest_asyncio
import argparse
import json
from typing import Dict, List


from transformers import AutoModel, AutoTokenizer
from tqdm import tqdm

# Apply nest_asyncio for Jupyter environments
nest_asyncio.apply()
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

In [3]:

from llama_index.core import Document
from src.GlirelPathExtractor import GlirelPathExtractor 
from src.RecursivePathExtractor import RecursiveLLMPathExtractor
from llama_index.core import SimpleDirectoryReader, PropertyGraphIndex,Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import StorageContext, load_index_from_storage

In [4]:
with open('../.data/novel.json', 'r') as file:
    # Load the JSON data from the file into a Python object
    data = json.load(file)

In [None]:
def group_questions_by_source(question_list):
    grouped_questions = {}

    for question in question_list:
        source = question.get("source")

        if source not in grouped_questions:
            grouped_questions[source] = []

        grouped_questions[source].append(question)

    return grouped_questions

In [7]:
llm = Ollama(
    model= "gemma3:12b",
    request_timeout=120.0,
    context_window=8128,
    temperature=0.0
)

Settings.llm = llm
Settings.chunk_size=512
Settings.chunk_overlap=64

embed_model = OllamaEmbedding(
    model_name="snowflake-arctic-embed2:latest",
    ollama_additional_kwargs={"mirostat": 0},
)
Settings.embed_model = embed_model

In [5]:
SYSTEM_PROMPT = """
---Role---
You are a helpful assistant responding to user queries.

---Goal---
Generate direct and concise answers based strictly on the provided Knowledge Base.
Respond in plain text without explanations or formatting.
Maintain conversation continuity and use the same language as the query.
If the answer is unknown, respond with "I don't know".

---Conversation History---
{history}

---Knowledge Base---
{context_data}
"""

In [61]:
# initalize rag
index = load_index_from_storage(
    StorageContext.from_defaults(persist_dir="./.persistent_storage/hybrid/Novel-2544"))

Loading llama_index.core.storage.kvstore.simple_kvstore from ./.persistent_storage/hybrid/Novel-2544/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./.persistent_storage/hybrid/Novel-2544/index_store.json.


INFO:Loading all indices.


In [62]:
from llama_index.core.indices.property_graph import (
    PGRetriever,
    VectorContextRetriever,
    LLMSynonymRetriever,
)

In [63]:
sub_retrievers = [
    VectorContextRetriever(index.property_graph_store),
    LLMSynonymRetriever(index.property_graph_store),
]

retriever = index.as_retriever(path_depth=5)



In [65]:
query_engine = index.as_query_engine(
    llm=llm,
    include_text="True",
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
    )

In [64]:
from llama_index.core.query_engine import RetrieverQueryEngine

In [58]:
query_engine  = RetrieverQueryEngine.from_args(
   retriever=retriever, llm=llm
)

In [69]:
#nest_asyncio.apply()
response =  query_engine.query("During the night-time stoppage of the coach, what item did Yuba Bill remove from the vehicle to provide light for the passengers and crew as they dealt with the fallen tree in the road?")
print(response.response)
response.source_nodes

INFO:HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


KeyError: 'readers of \\ The Red Dog Clarion\\ _OPINED_rich stage express companies were quite able to take care of their own property'