# Basic Rag

In [6]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["Recommendation System Overview.pdf"]
).load_data()

In [8]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

69 

<class 'llama_index.core.schema.Document'>
Doc ID: 5f19857b-c71a-4a4e-b764-178c1d82979c
Text: Recommendation System Overview The recommendation system is
responsible for generating an optimal work plan for a barge project.
It takes project information (tasks, schedules, workers) and produces
a Recommended Plan – a schedule of tasks with assigned workers and
timing that aims to meet project goals efficiently. The system also
computes supp...


In [9]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [10]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

Settings.llm = OpenAI(model="gpt-4.1-mini", temperature=0)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [None]:
index = VectorStoreIndex.from_documents([document])
query_engine = index.as_query_engine()

In [None]:
response = query_engine.query("What main components of system")
print(str(response))

# Sentence Window

In [None]:
from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
    Document,
    Settings,
)
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.postprocessor import SentenceTransformerRerank

import os


In [None]:
Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.1)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [None]:
def build_sentence_window_index(
    document: Document,
    save_dir: str = "sentence_index"
):
    """
    Builds a VectorStoreIndex with a sentence window node parser.

    This function will create and save an index if it doesn't exist,
    or load it from disk if it does.

    Args:
        document (Document): The document to index.
        save_dir (str): The directory to save/load the index from.

    Returns:
        VectorStoreIndex: The created or loaded index.
    """
    # Create the sentence window node parser with specified settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )

    # Check if the storage directory already exists
    if not os.path.exists(save_dir):
        print(f"Building new index and saving to {save_dir}")
        # If it doesn't exist, create the index from the document
        # The node_parser is passed directly to the index constructor.
        # The LLM and embed_model are taken from the global Settings.
        sentence_index = VectorStoreIndex.from_documents(
            [document],
            node_parser=node_parser,
        )
        # Persist the index to disk
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        print(f"Loading existing index from {save_dir}")
        # If the directory exists, load the index from storage
        # The StorageContext is created from the directory
        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
        sentence_index = load_index_from_storage(storage_context)

    return sentence_index
def get_sentence_window_query_engine(
    sentence_index: VectorStoreIndex,
    similarity_top_k: int = 6,
    rerank_top_n: int = 3,
):
    """
    Creates a query engine from a sentence window index with postprocessors.

    Args:
        sentence_index (VectorStoreIndex): The index to query.
        similarity_top_k (int): The number of top similar results to retrieve.
        rerank_top_n (int): The number of results to return after reranking.

    Returns:
        The query engine.
    """
    # Define postprocessors to enhance retrieval
    # Replaces the metadata with the actual text window
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")

    # Reranks the results for better relevance using a free, local model.
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    # Create the query engine from the index
    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k,
        node_postprocessors=[postproc, rerank],
    )
    return sentence_window_engine



In [None]:
index = build_sentence_window_index(document, save_dir="my_sentence_index")

# Create the query engine
query_engine = get_sentence_window_query_engine(index)

In [None]:
response = query_engine.query("What is the primary purpose of the QuantitiesRecommendationService, and how does its output directly influence the final schedule generated by the main RecommendationService?")

In [None]:
print("\n--- Source Nodes ---")
for node in response.source_nodes:
    # Note: The score from SentenceTransformerRerank is a relevance score, not a similarity score.
    # Higher is better.
    print(f"Relevance Score: {node.score:.4f}")
    print(f"Text: {node.text}\n")

# Auto Merging

In [None]:
import os
from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
    Settings,
)
from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Integration-specific imports
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Document
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["Recommendation System Overview.pdf"]
).load_data()
llm = OpenAI(model="gpt-4.1-mini", temperature=0.1)
document = Document(text="\n\n".join([doc.text for doc in documents]))


In [None]:
def build_automerging_index(
    documents,
    llm,
    embed_model_name="BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    """
    Builds an auto-merging index from a list of documents.

    Args:
        documents (list or Document): A list of LlamaIndex Document objects or a single Document.
        llm (LLM): The language model to use.
        embed_model_name (str): The name of the HuggingFace embedding model.
        save_dir (str): Directory to save or load the index from.
        chunk_sizes (list): A list of chunk sizes for hierarchical parsing.

    Returns:
        VectorStoreIndex: The created or loaded auto-merging index.
    """
    # Set up the global settings
    Settings.llm = llm
    Settings.embed_model = HuggingFaceEmbedding(model_name=embed_model_name)

    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)

    if isinstance(documents, Document):
        documents = [documents]

    # Check if the index already exists
    if not os.path.exists(save_dir):
        # Build the index from scratch
        nodes = node_parser.get_nodes_from_documents(documents)
        leaf_nodes = get_leaf_nodes(nodes)

        storage_context = StorageContext.from_defaults()
        storage_context.docstore.add_documents(nodes)

        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        # Load the index from storage
        storage_context = StorageContext.from_defaults(persist_dir=save_dir)
        automerging_index = load_index_from_storage(storage_context)

    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=2,
):
    """
    Creates an auto-merging query engine from an index.

    Args:
        automerging_index (VectorStoreIndex): The auto-merging index.
        similarity_top_k (int): The number of similar nodes to retrieve.
        rerank_top_n (int): The number of nodes to return after reranking.

    Returns:
        RetrieverQueryEngine: The configured query engine.
    """
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)

    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )

    reranker = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    auto_merging_engine = RetrieverQueryEngine(
        retriever=retriever, node_postprocessors=[reranker]
    )

    return auto_merging_engine


In [None]:
automerging_index = build_automerging_index(
    documents=document,
    llm=llm,
    embed_model_name="BAAI/bge-small-en-v1.5",
    save_dir="merging_index"
)

In [None]:
automerging_query_engine = get_automerging_query_engine(
    automerging_index,
)

In [None]:
auto_merging_response = automerging_query_engine.query(
    "How does the QualityOfRecommendations setting impact the behavior of the scheduling algorithm? Describe at least two distinct ways a 'high quality' setting can alter the final recommended plan compared to a 'low quality' setting."
)
print(str(auto_merging_response))

# Llamaindex and Mongo
## Sentence Window

In [6]:
import os
import pymongo
from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    Document,
    Settings,
)
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core import VectorStoreIndex, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
mongo_uri="mongodb://localhost:53888/?directConnection=true"


In [7]:
def build_sentence_window_index(
    document: Document,
    mongo_uri: str = "mongodb://localhost:53888/?directConnection=true",
    db_name: str = "my_rag_db",
    collection_name: str = "sentence_vectors",
    index_name: str = "vector_search_index"
):
    """
    Builds a VectorStoreIndex with a sentence window node parser,
    using MongoDB as the vector store.

    Args:
        document (Document): The document to index.
        mongo_uri (str): The MongoDB connection string from your local Atlas deployment.
        db_name (str): The name of the database.
        collection_name (str): The name of the collection for vectors.
        index_name (str): The name for the vector search index in MongoDB.

    Returns:
        VectorStoreIndex: The created or loaded index.
    """
    # 1. Connect to MongoDB
    print(f"Connecting to MongoDB at {mongo_uri}...")
    mongo_client = pymongo.MongoClient(mongo_uri)
    db = mongo_client[db_name]
    collection = db[collection_name]

    # 2. Create the vector store object
    vector_store = MongoDBAtlasVectorSearch(
        mongodb_client=mongo_client,
        db_name=db_name,
        collection_name=collection_name,
        vector_index_name=index_name
    )

    # 3. Create the vector search index if it doesn't exist.
    try:
        existing_indexes = [index['name'] for index in collection.list_search_indexes()]
        if index_name not in existing_indexes:
            print(f"Vector search index '{index_name}' not found. Creating a new one...")
            # FIX: Get embedding dimension reliably from the model
            # by creating a dummy embedding.
            embed_dim = len(Settings.embed_model.get_text_embedding("test"))
            vector_store.create_vector_search_index(
                dimensions=embed_dim,
                path="embedding", # The field where vectors are stored
                similarity="cosine"
            )
            print("Vector search index created successfully.")
        else:
            print(f"Vector search index '{index_name}' already exists.")
    except Exception as e:
        print(f"An error occurred while checking or creating the search index: {e}")
        print("Please ensure you are connected to a MongoDB Atlas instance or a local Atlas deployment (started with 'atlas dev deployments start').")


    # 4. Create the StorageContext
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # 5. Create the node parser
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )

    # 6. FIX: Check if documents need to be ingested by counting documents
    # in the collection, which avoids the NotImplementedError.
    if collection.count_documents({}) == 0:
        print(f"No documents found. Building new index in MongoDB collection '{collection_name}'...")
        nodes = node_parser.get_nodes_from_documents([document])
        sentence_index = VectorStoreIndex(
            nodes,
            storage_context=storage_context,
        )
    else:
        print(f"Loading existing index from MongoDB collection '{collection_name}'...")
        sentence_index = VectorStoreIndex.from_vector_store(
            vector_store=vector_store,
        )

    print(f"Building new index in MongoDB collection '{collection_name}' is finished.")

    return sentence_index


def get_sentence_window_query_engine(
    sentence_index: VectorStoreIndex,
    similarity_top_k: int = 6,
    rerank_top_n: int = 3,
):
    """
    Creates a query engine from a sentence window index with postprocessors.
    This function does not need to change, as it is independent of the
    storage backend.

    Args:
        sentence_index (VectorStoreIndex): The index to query.
        similarity_top_k (int): The number of top similar results to retrieve.
        rerank_top_n (int): The number of results to return after reranking.

    Returns:
        The query engine.
    """
    # Define postprocessors to enhance retrieval
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    # Create the query engine from the index
    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k,
        node_postprocessors=[postproc, rerank],
    )
    return sentence_window_engine


In [8]:
sentence_index = build_sentence_window_index(
    document=document,
    mongo_uri="mongodb://localhost:53888/?directConnection=true",
    db_name="rag_db",
    collection_name="rec_sys_overviews",
    index_name="rec_sys_overviews",
)

Connecting to MongoDB at mongodb://localhost:53888/?directConnection=true...
Vector search index 'rec_sys_overviews' already exists.
Loading existing index from MongoDB collection 'rec_sys_overviews'...
Building new index in MongoDB collection 'rec_sys_overviews' is finished.


In [9]:
query_engine = get_sentence_window_query_engine(sentence_index)

In [11]:
response = query_engine.query("What is the primary purpose of the QuantitiesRecommendationService, and how does its output directly influence the final schedule generated by the main RecommendationService?")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [12]:
print("\n--- Source Nodes ---")
for node in response.source_nodes:
    # Note: The score from SentenceTransformerRerank is a relevance score, not a similarity score.
    # Higher is better.
    print(f"Relevance Score: {node.score:.4f}")
    print(f"Text: {node.text}\n")


--- Source Nodes ---
Relevance Score: 0.9961
Text: This method likely doesn’t appear in output or user documentation except to note that
recommendations are saved, which is usually an implementation detail but useful to mention
that the system does keep a record.
 With these methods documented, the RecommendationService’s behavior from start to finish becomes
clear:  it  loads  data,  processes  it,  schedules  tasks  by  finding  earliest  start  times  and  selecting  the
appropriate workers, iterating until the full plan is built, and finally stores the outcome and returns it for
the user .
 The careful selection and scheduling logic ensures the recommended plan is realistic, efficient, and
leverages  the  available  workforce  effectively  while  respecting  project  constraints  and  any  pre-set
requirements.
 QuantitiesRecommendationService
Role: The QuantitiesRecommendationService analyzes historical and current project data to suggest
how much work should be planned for each 

In [13]:
response.response

'The primary purpose of the QuantitiesRecommendationService is to analyze historical and current project data to determine the appropriate planned quantities for various task types in future or ongoing projects. It utilizes statistical methods to provide data-driven estimates of how much work should be planned, such as volumes, lengths, or counts of work units.\n\nThe output from the QuantitiesRecommendationService directly influences the final schedule generated by the RecommendationService by updating the "Recommended Quantities" data. This data is then utilized by the RecommendationService when scheduling tasks, ensuring that the recommendations are based on accurate and relevant information, which contributes to a realistic and efficient project plan.'