In [None]:
"""
core/rag.py - The RAG Pipeline.

This module contains the `RAGPipeline` class, which orchestrates the retrieval
of relevant documents for a given query. It uses OpenAI embeddings and a FAISS
vector store to perform efficient similarity searches.
"""
import logging
from typing import List

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema import Document

# Set up logging for this module
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class RAGPipeline:
    """
    Implements a Retrieval-Augmented Generation (RAG) pipeline.

    This class handles the core logic for retrieving relevant knowledge from a
    vector store to provide context for an LLM.
    """
    def __init__(self, docs: List[str]):
        """
        Initializes the RAG pipeline with a list of documents.

        Args:
            docs: A list of strings representing the knowledge base documents.
        """
        logger.info("Initializing RAG pipeline with FAISS vector store...")
        self.embeddings = OpenAIEmbeddings()
        documents = [Document(page_content=d) for d in docs]
        self.db = FAISS.from_documents(documents, self.embeddings)
        logger.info(f"FAISS vector store created with {len(documents)} documents.")

    def retrieve(self, query: str, k: int = 2) -> List[Document]:
        """
        Retrieves the top-k most similar documents to the query.

        Args:
            query: The user's question.
            k: The number of documents to retrieve.

        Returns:
            A list of LangChain Document objects.
        """
        logger.debug(f"Retrieving top-{k} documents for query: '{query}'")
        return self.db.similarity_search(query, k=k)

    def build_context(self, query: str, k: int = 2) -> str:
        """
        Retrieves and formats the top-k documents into a single context string.

        Args:
            query: The user's question.
            k: The number of documents to retrieve.

        Returns:
            A formatted string containing the retrieved documents.
        """
        docs = self.retrieve(query, k)
        context = "\n\n".join(d.page_content for d in docs)
        logger.debug("Context built successfully.")
        return context