**Load the libraries**

In [1]:
from pyprojroot import here
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index import (load_index_from_storage,
                         set_global_service_context,
                         ServiceContext,
                         StorageContext,
                         SimpleDirectoryReader,
                         VectorStoreIndex,
                         Document)
from llama_index.indices.postprocessor import (SentenceTransformerRerank,
                                               MetadataReplacementPostProcessor)
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

**Load the GPT model and the embedding model from AzureOpenAI**

In [2]:
llm = AzureOpenAI(
    model="gpt-35-turbo-16k",
    engine="gpt-35-turbo-16k",
    deployment_name=os.getenv("gpt_deployment_name"),
    api_key=os.getenv("OPENAI_API_KEY"),
    azure_endpoint=os.getenv("OPENAI_API_BASE"),
    api_version=os.getenv("OPENAI_API_VERSION"),
)
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name=os.getenv("embed_deployment_name"),
    api_key=os.getenv("OPENAI_API_KEY"),
    azure_endpoint=os.getenv("OPENAI_API_BASE"),
    api_version=os.getenv("OPENAI_API_VERSION"),
)
# NOTE: Uncomment if you want to use an open source embedding model
# embed_model = "local:BAAI/bge-small-en-v1.5"

**Set the serivce context**

In [3]:
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
)

set_global_service_context(service_context)

**Load the documents**

In [4]:
documents = SimpleDirectoryReader(
    input_files=[here(f"data/docs/{d}") for d in os.listdir(here("data/docs"))]
).load_data()

**Process documents and prepare the index:**

Processing steps for `Sentence Retrieval RAG`:
1. Combine all the text into a one long pirce of text
2. We add a couple of sentences from before and after the selected chunk to it.

Functions:
- build_sentence_window_index
- get_sentence_window_query_engine

In [5]:
# Arguments
save_dir=here("data/indexes/sentence_index")
window_size = 3
similarity_top_k = 6
rerank_top_n = 2
rerank_model = "BAAI/bge-reranker-base"

In [6]:
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [7]:
def build_sentence_window_index(
    document, llm, embed_model="local:BAAI/bge-small-en-v1.5", save_dir="sentence_index"
):
    """
    Builds an index of sentence windows from a given document using a specified language model and embedding model.

    This function creates a sentence window node parser with default settings and uses it to parse the document.
    It then initializes a service context with the provided language model and embedding model. If the save directory
    does not exist, it creates a new VectorStoreIndex from the document and persists it to the specified directory.
    If the save directory already exists, it loads the index from storage.

    Args:
        document (str): The text document to be indexed.
        llm (LanguageModel): The language model to be used for parsing and embedding.
        embed_model (str, optional): The identifier for the embedding model to be used. Defaults to "local:BAAI/bge-small-en-v1.5".
        save_dir (str, optional): The directory where the sentence index will be saved or loaded from. Defaults to "sentence_index".

    Returns:
        VectorStoreIndex: The index of sentence windows created or loaded from the save directory.

    Raises:
        OSError: If there is an issue with creating or accessing the save directory.
        Other exceptions may be raised by the underlying storage or indexing operations.
    """
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            [document], service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index

sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model=embed_model,
    save_dir=save_dir # save the index automatically from here or manually as below
)

**You can save the index separately if you wish**

In [8]:
sentence_index.storage_context.persist(save_dir)

**Load the index separately**

In [12]:
# Rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=save_dir)

# Load index from the storage context
sentence_index = load_index_from_storage(storage_context)

**Build the query engine**

In [13]:
def get_sentence_window_query_engine(
    sentence_index,
    similarity_top_k:int=6,
    rerank_top_n:int=2,
):
    """
    Initializes a query engine for sentence window indexing with postprocessing capabilities.

    This function sets up a query engine using a given sentence index. It defines postprocessors for metadata replacement
    and reranking based on sentence embeddings. The query engine is configured to return a specified number of top
    similar results, and to rerank a subset of those results using a sentence transformer model.

    Args:
        sentence_index (VectorStoreIndex): The index of sentence windows to be queried.
        similarity_top_k (int, optional): The number of top similar results to return from the initial query.
                                          Defaults to 6.
        rerank_top_n (int, optional): The number of top results to rerank using the sentence transformer model.
                                      Defaults to 2.

    Returns:
        QueryEngine: The query engine configured for sentence window indexing with postprocessing.

    Raises:
        ValueError: If the provided `similarity_top_k` or `rerank_top_n` are not valid integers or are out of expected range.
        Other exceptions may be raised by the underlying query engine or postprocessing operations.
    """
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model=rerank_model
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[
            postproc, rerank]
    )
    return sentence_window_engine

sentence_window_engine = get_sentence_window_query_engine(sentence_index)

**Test with a query**

In [1]:
window_response = sentence_window_engine.query(
    "Explain is the architecture of vision transformer model"
)
print(str(window_response))

In [14]:
eval_questions = ["who is fred?", "what is the relationship between vision transformer and transformer?", "explain the architecture of the transformer"]