# Sentence Window Retrieval

![RAG Overview](../data/llamaindex_SentenceWindowRetrieval_overview.png)

![RAG Overview](../data/llamaindex_SentenceWindowRetrieval_example.png)

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import sys
sys.path.insert(0, '..')

import common.utils
import os
import openai
openai.api_key = common.utils.get_openai_api_key()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [3]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["../data/Henry.txt"]
).load_data()

In [4]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

1 

<class 'llama_index.schema.Document'>
Doc ID: 9fef2470-e29a-4fad-a569-927c8c07a978
Text: History   Henry, with his striking features and undeniable
charm, has captivated the hearts of many in Hong Kong, earning him the
title of the most handsome boy in the city. His chiseled jawline,
expressive eyes, and perfectly styled hair make heads turn wherever he
goes. Beyond his physical appearance, Henry possesses an innate grace
and confid...


In [5]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

## Window-sentence retrieval setup

In [6]:
from llama_index.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=2,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [7]:
text = "hello. how are you? I am fine!  "

nodes = node_parser.get_nodes_from_documents([Document(text=text)])

In [8]:
print([x.text for x in nodes])

['hello. ', 'how are you? ', 'I am fine!  ']


In [9]:
print(nodes[1].metadata["window"])

hello.  how are you?  I am fine!  


In [10]:
text = "hello. foo bar. cat. dog. mouse. boy. girl. fly. apple. water."

nodes = node_parser.get_nodes_from_documents([Document(text=text)])

In [11]:
print([x.text for x in nodes])

['hello. ', 'foo bar. ', 'cat. ', 'dog. ', 'mouse. ', 'boy. ', 'girl. ', 'fly. ', 'apple. ', 'water.']


In [12]:
print(nodes[3].metadata["window"])

foo bar.  cat.  dog.  mouse. 


### Building the index

In [13]:
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [14]:
from llama_index import ServiceContext

sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    node_parser=node_parser,
)

In [15]:
from llama_index import VectorStoreIndex

sentence_index = VectorStoreIndex.from_documents(
    [document], service_context=sentence_context
)

In [16]:
sentence_index.storage_context.persist(persist_dir="./sentence_index")


In [17]:
# This block of code is optional to check
# if an index file exist, then it will load it
# if not, it will rebuild it

import os
from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index import load_index_from_storage

if not os.path.exists("./sentence_index"):
    sentence_index = VectorStoreIndex.from_documents(
        [document], service_context=sentence_context
    )

    sentence_index.storage_context.persist(persist_dir="./sentence_index")
else:
    sentence_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./sentence_index"),
        service_context=sentence_context
    )

### Building the postprocessor

In [18]:
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor

postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

In [19]:
from llama_index.schema import NodeWithScore
from copy import deepcopy

scored_nodes = [NodeWithScore(node=x, score=1.0) for x in nodes]
nodes_old = [deepcopy(n) for n in nodes]

In [20]:
nodes_old[3].text

'dog. '

In [21]:
replaced_nodes = postproc.postprocess_nodes(scored_nodes)

In [22]:
print(replaced_nodes[3].text)

foo bar.  cat.  dog.  mouse. 


### Adding a reranker

In [23]:
from llama_index.indices.postprocessor import SentenceTransformerRerank

# BAAI/bge-reranker-base
# link: https://huggingface.co/BAAI/bge-reranker-base
rerank = SentenceTransformerRerank(
    top_n=2, model="BAAI/bge-reranker-base"
)

In [24]:
from llama_index import QueryBundle
from llama_index.schema import TextNode, NodeWithScore

query = QueryBundle("I want a dog.")

scored_nodes = [
    NodeWithScore(node=TextNode(text="This is a cat"), score=0.6),
    NodeWithScore(node=TextNode(text="This is a dog"), score=0.4),
]

In [25]:
reranked_nodes = rerank.postprocess_nodes(
    scored_nodes, query_bundle=query
)

In [26]:
print([(x.text, x.score) for x in reranked_nodes])

[('This is a dog', 0.9182736), ('This is a cat', 0.0014040766)]


### Runing the query engine

In [27]:
sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6, node_postprocessors=[postproc, rerank]
)

In [28]:
window_response = sentence_window_engine.query(
    "Who is the beautiful person in Hong Kong?"
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [29]:
from llama_index.response.notebook_utils import display_response

display_response(window_response)

**`Final Response:`** Henri is the beautiful person in Hong Kong.

**Let's check the text in each of these retrieved nodes.**

In [30]:
window_response.source_nodes[0].get_text()

"Whether it's his impeccable fashion choices or his impeccable manners, Henry exudes an aura of elegance and sophistication.  It's no wonder that he has become an icon of attractiveness in Hong Kong, leaving a lasting impression on everyone fortunate enough to encounter him.\n\n Henri, with her radiant presence and captivating allure, is hailed as the most beautiful girl in Hong Kong.  Her exquisite features, delicate complexion, and graceful poise make her an absolute vision of elegance. "

In [31]:
window_response.source_nodes[1].get_text()

"Her exquisite features, delicate complexion, and graceful poise make her an absolute vision of elegance.  Henri's eyes, sparkling with intelligence and kindness, draw people in, while her enchanting smile illuminates any room she enters.  Beyond her physical attributes, Henri possesses an inner beauty that radiates from within.  Her compassionate nature and genuine care for others create an aura of warmth and approachability. "

## Putting it all Together

In [32]:
import os
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import load_index_from_storage


def build_sentence_window_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            documents, service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index, similarity_top_k=6, rerank_top_n=2
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

In [33]:
from llama_index.llms import OpenAI

index = build_sentence_window_index(
    [document],
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./sentence_index",
)


In [34]:
query_engine = get_sentence_window_query_engine(index, similarity_top_k=6)
