In [2]:
from llama_index.core.node_parser import SentenceWindowNodeParser

node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text"
)

In [3]:
import os
from copy import deepcopy

import dotenv
from llama_index.core import Document, QueryBundle, StorageContext, VectorStoreIndex, load_index_from_storage
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.schema import NodeWithScore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import SimpleDirectoryReader
from llama_index.core import Settings
from llama_index.core.postprocessor import SentenceTransformerRerank

dotenv.load_dotenv()

Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.1)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
Settings.node_parser = node_parser


documents = SimpleDirectoryReader(
    input_files=["./my-document.md"]
).load_data()

document = Document(text="\n\n".join([doc.text for doc in documents]))

embedding_model = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

sentence_index = VectorStoreIndex.from_documents([document], llm=Settings.llm, embed_model=Settings.embed_model)


sentence_index.storage_context.persist(persist_dir="./sentence_index")


if not os.path.exists("./sentence_index"):
    sentence_index = VectorStoreIndex.from_documents([document], llm=Settings.llm, embed_model=Settings.embed_model)

    sentence_index.storage_context.persist(persist_dir="./sentence_index")
else:
    sentence_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./sentence_index"),
        llm=Settings.llm, embed_model=Settings.embed_model
    )


postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

nodes = node_parser.get_nodes_from_documents([Document(text="\n\n".join([doc.text for doc in documents]))])
scored_nodes = [NodeWithScore(node=x, score=1.0) for x in nodes]
nodes_old = [deepcopy(n) for n in nodes]

replaced_nodes = postproc.postprocess_nodes(scored_nodes)
# print(replaced_nodes[3].text)

rerank = SentenceTransformerRerank(
    top_n=2, model="BAAI/bge-reranker-base"
)

query = QueryBundle("What task processing system is used here?")

reranked_nodes = rerank.postprocess_nodes(
    scored_nodes, query_bundle=query
)

print([(x.text, x.score) for x in reranked_nodes])


'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /BAAI/bge-small-en-v1.5/resolve/main/modules.json (Caused by NameResolutionError("HTTPSConnection(host=\'huggingface.co\', port=443): Failed to resolve \'huggingface.co\' ([Errno 8] nodename nor servname provided, or not known)"))'), '(Request ID: 131b673f-42ba-41b7-9fc7-90a580011ffc)')' thrown while requesting HEAD https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/./modules.json
Retrying in 1s [Retry 1/5].
'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /BAAI/bge-small-en-v1.5/resolve/main/modules.json (Caused by NameResolutionError("HTTPSConnection(host=\'huggingface.co\', port=443): Failed to resolve \'huggingface.co\' ([Errno 8] nodename nor servname provided, or not known)"))'), '(Request ID: 6d9d9993-4e55-4b8f-903c-821da1047664)')' thrown while requesting HEAD https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[('5. run `python manage.py migrate`\n6. then `python manage.py runserver`\nfor testing, create a superuser too.\n\n # Celery-redis setup:\n1.  Celery is installed, install redis as well.  <br>\n2. for linux users: \n- run redis server with `redis-server` command.\n 3. for windows users:\n- Download redis (https://github.com/microsoftarchive/redis/releases/tag/win-3.0.504)\n- Run redis-server.exe and redis-cli.exe.\n\n And finally, while django server is running, run this command on another terminal <br>\n`celery -A config worker -l INFO`\n\n# Additional Resources:\n+ Create braintree sandbox account from: https://sandbox.braintreegateway.com/login\n\n# Usage/testing\nUse provided [fixtures](https://docs.djangoproject.com/en/3.1/howto/initial-data/) for each applications.  <br>\n*Provided Fixtures:*\n- teachers/teacher_fixtures.json\n*How to load fixtures?', np.float32(0.0017049544)), ("<br>\n2. for linux users: \n- run redis server with `redis-server` command.\n 3. for windows users:\

In [4]:
from llama_index.core.response.notebook_utils import display_response


sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6, node_postprocessors=[postproc, rerank]
)

# window_response = sentence_window_engine.query(
#     "What task processing system is used here?"
# )

window_response = sentence_window_engine.query(
    "What storage system is used here?"
)

display_response(window_response)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-12-11 10:57:29,091 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


**`Final Response:`** The storage system used here is Redis, which is set up for Celery to handle tasks.

# EVALUATION Common Setup

In [5]:
from trulens.core import Tru
from utils import get_prebuilt_trulens_recorder


Tru().reset_database()


eval_questions = []
with open('./md_evals/generated_questions.text', 'r') as file:
    for line in file:
        item = line.strip()
        eval_questions.append(item)

def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)

def build_sentence_window_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    Settings.llm = llm

    Settings.embed_model = HuggingFaceEmbedding(
        model_name=embed_model
    )
    Settings.node_parser = node_parser

    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            documents,
            llm=Settings.llm,
            embed_model=Settings.embed_model
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            llm=Settings.llm, embed_model=Settings.embed_model
        )

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index, similarity_top_k=6, rerank_top_n=2
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

  import pkg_resources
  Tru().reset_database()
2025-12-11 10:57:29,746 - INFO - Context impl SQLiteImpl.
2025-12-11 10:57:29,747 - INFO - Will assume non-transactional DDL.
2025-12-11 10:57:29,787 - INFO - Context impl SQLiteImpl.
2025-12-11 10:57:29,788 - INFO - Will assume non-transactional DDL.


âœ… In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
âœ… In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
âœ… In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
âœ… In Context Relevance, input response will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
âœ… In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
âœ… In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
ðŸ¦‘ Initialized with db url sqlite:///default.sqlite .
ðŸ›‘ Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


In [6]:
sentence_index_1 = build_sentence_window_index(
    [document],
    llm=OpenAI(model="gpt-4o-mini", temperature=0.1),
    embed_model="BAAI/bge-small-en-v1.5",
    sentence_window_size=1,
    save_dir="sentence_index_1",
)

sentence_window_engine_1 = get_sentence_window_query_engine(
    sentence_index_1
)

tru_recorder_1 = get_prebuilt_trulens_recorder(
    sentence_window_engine_1,
    app_id='sentence window engine 1'
)

run_evals(eval_questions, tru_recorder_1, sentence_window_engine_1)
Tru().run_dashboard()

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.embeddings.multi_modal_base.MultiModalEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base



Starting dashboard ...


  Tru().run_dashboard()

  Tru().run_dashboard()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valuâ€¦

Dashboard started at http://localhost:49554 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>



# **EVALUATION 2**

In [7]:
sentence_index_2 = build_sentence_window_index(
    [document],
    llm=OpenAI(model="gpt-4o-mini", temperature=0.1),
    embed_model="BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index_2",
)

sentence_window_engine_2 = get_sentence_window_query_engine(
    sentence_index_2
)

tru_recorder_2 = get_prebuilt_trulens_recorder(
    sentence_window_engine_2,
    app_id='sentence window engine 2'
)

run_evals(eval_questions, tru_recorder_2, sentence_window_engine_2)
Tru().run_dashboard()

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.embeddings.multi_modal_base.MultiModalEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base

  Tru().run_dashboard()


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>