In [None]:
### Fixing import errors of the

import sys
import os

# This code navigates up one directory from the notebook's location ('examples/')
# to get the project's root directory.
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# We check if the path is already in the system path.
# If not, we add it to the beginning of the list.
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    print(f"Added project root to Python path: {project_root}")
else:
    print(f"Project root is already in Python path: {project_root}")

# Optional: You can print the first few paths to verify
print("\nVerifying sys.path:")
for i, path in enumerate(sys.path[:5]):
    print(f"{i}: {path}")

In [None]:
import asyncio
import os
import logging
import nest_asyncio
import argparse
import json
from typing import Dict, List


from transformers import AutoModel, AutoTokenizer
from tqdm import tqdm

# Apply nest_asyncio for Jupyter environments
nest_asyncio.apply()
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

In [None]:

from llama_index.core import Document
from src.GlirelPathExtractor import GlirelPathExtractor 
from src.RecursivePathExtractor import RecursiveLLMPathExtractor
from llama_index.core import SimpleDirectoryReader, PropertyGraphIndex,Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import StorageContext, load_index_from_storage

In [None]:
with open('../.data/novel.json', 'r') as file:
    # Load the JSON data from the file into a Python object
    data = json.load(file)

In [None]:
def group_questions_by_source(question_list):
    grouped_questions = {}

    for question in question_list:
        source = question.get("source")

        if source not in grouped_questions:
            grouped_questions[source] = []

        grouped_questions[source].append(question)

    return grouped_questions

In [None]:
llm = Ollama(
    model= "gemma3:12b",
    request_timeout=120.0,
    context_window=8128,
    temperature=0.0
)

Settings.llm = llm
Settings.chunk_size=512
Settings.chunk_overlap=64

embed_model = OllamaEmbedding(
    model_name="snowflake-arctic-embed2:latest",
    ollama_additional_kwargs={"mirostat": 0},
)
Settings.embed_model = embed_model

In [None]:
SYSTEM_PROMPT = """
---Role---
You are a helpful assistant responding to user queries.

---Goal---
Generate direct and concise answers based strictly on the provided Knowledge Base.
Respond in plain text without explanations or formatting.
Maintain conversation continuity and use the same language as the query.
If the answer is unknown, respond with "I don't know".

---Conversation History---
{history}

---Knowledge Base---
{context_data}
"""

In [None]:
# initalize rag
index = load_index_from_storage(
    StorageContext.from_defaults(persist_dir="./.persistent_storage/llm/Novel-30752"))

In [None]:
from llama_index.core.indices.property_graph import (
    PGRetriever,
    VectorContextRetriever,
    LLMSynonymRetriever,
)

In [None]:
sub_retrievers = [
    VectorContextRetriever(index.property_graph_store),
    LLMSynonymRetriever(index.property_graph_store),
]

retriever = index.as_retriever(path_depth=5)



In [None]:
query_engine = index.as_query_engine(
    llm=llm,
    include_text="True",
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
    path_depth = 4
    )

In [None]:
from llama_index.core.query_engine import RetrieverQueryEngine

In [None]:
query_engine  = RetrieverQueryEngine.from_args(
   retriever=retriever, llm=llm
)

In [None]:
#nest_asyncio.apply()
response = await query_engine.aquery("In the context of the ancient Maya civilization as discussed in the text, which archaeological site is specifically noted for containing inscriptions created by the Mayas?")
print(response.response)
response