# Documenation for backend/inference.py

#### Description:

### Path setup for document retrieval

- Environment setup: Loads necessary environment variables for configuration
  
- File path management: Uses os for handling file paths

- Document retrieval: Implements LangChain for document retrieval operations

- Combining documents: Creates a chain for synthesizing document content

- MistralAI integration: Utilizes ChatMistralAI for chatbot functionalities

- Persistence directory: Defines path for storing FAISS index files.

In [9]:
import os  # Imports the `os` module for interacting with the operating system, crucial for managing file paths in the SWEBOK chatbot.

from langchain.chains.retrieval import create_retrieval_chain  # Imports the function for creating a retrieval chain, essential for accessing SWEBOK documents.

from langchain.chains.combine_documents import create_stuff_documents_chain  # Imports document combination functionality, aiding in synthesizing SWEBOK-related information.

from langchain_mistralai import ChatMistralAI  # Imports the `ChatMistralAI` class, allowing the chatbot to interact with SWEBOK data effectively.

from dotenv import load_dotenv  # Imports `load_dotenv` to load environment variables, enabling flexible configuration for the SWEBOK chatbot.

# Function to load environment variables and initialize paths
def initialize_environment():
    load_dotenv(override=True)  # Loads environment variables from a .env file, allowing customization of paths for SWEBOK resources.

    document_path = os.getenv("CORPUS_SOURCE")  # Retrieves the path to the SWEBOK document corpus from the 'CORPUS_SOURCE' environment variable.

    persist_directory = os.path.join(document_path, "faiss_indexes")  # Sets the path for storing FAISS index files, essential for efficient document retrieval.

    print("Environment initialized with document path and persist directory.")  # Confirms successful setup of paths for the SWEBOK chatbot.

    return document_path, persist_directory  # Returns the document path and persist directory for use in SWEBOK content retrieval.

# Calls `initialize_environment()` to set up the environment and assigns its output values.
document_path, persist_directory = initialize_environment()  
# Executes environment initialization to prepare for SWEBOK chatbot operations.

Environment initialized with document path and persist directory.


### Environment Setup and Document Retrieval

- Imports modules for SWEBOK document retrieval and processing.

- Loads environment variables for SWEBOK chatbot configuration.

- Loads SWEBOK documents and manages FAISS vector storage.

- Creates hybrid retriever for SWEBOK knowledge querying.

- Retrieves Mistral API key for chatbot integration.

- Handles errors if paths or API keys are missing.

In [23]:
import os 
from langchain.chains.retrieval import create_retrieval_chain 
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_mistralai import ChatMistralAI  
from document_loading import (  # Imports custom functions to load documents and embeddings.
    load_documents_from_directory, 
    load_or_create_faiss_vector_store,
    get_hybrid_retriever
)
from prompts import prompt  # Imports predefined prompts specific to SWEBOK questions.
from citations import get_answer_with_source  # Retrieves SWEBOK answers with source references, supporting reliable responses.
from dotenv import load_dotenv  # Loads environment variables, facilitating external configuration for SWEBOK chatbot setup.

# Load environment variables
load_dotenv(override=True)  # Loads environment variables with possible overrides, setting up SWEBOK chatbot configuration.

def load_embeddings():
    """
    Load documents and embeddings from FAISS store.
    Returns:
        retriever: Hybrid retriever for querying SWEBOK-aligned documents.
    """
    document_path = os.getenv("CORPUS_SOURCE")  # Retrieves path to SWEBOK corpus from environment settings.
    if not document_path:
        raise ValueError("CORPUS_SOURCE not found in environment variables.")  # Ensures SWEBOK document source path is specified.
    
    persist_directory = os.path.join(document_path, "faiss_indexes")  # Sets directory path for FAISS index persistence.
    top_k = 15  # Specifies number of top relevant documents to retrieve.

    # Load SWEBOK-aligned documents
    documents = load_documents_from_directory(document_path)  # Loads documents from SWEBOK corpus directory.
    print(f"Loaded {len(documents)} documents from {document_path}.")  # Confirms document loading for SWEBOK chatbot use.
    
    if not documents:
        raise ValueError("No documents loaded. Please check the document path.")  # Ensures documents are available for retrieval.

    # Create or load FAISS vector store for embeddings
    faiss_store = load_or_create_faiss_vector_store(documents, persist_directory)  # Manages embeddings for SWEBOK content retrieval.

    # Get the hybrid retriever for querying SWEBOK documents
    retriever = get_hybrid_retriever(documents, faiss_store, top_k)  # Sets up retrieval mechanism for SWEBOK queries.

    print("Embeddings and retriever loaded.")  # Indicates embeddings and retrieval setup completion for SWEBOK chatbot.
    return retriever  # Returns the retriever to support SWEBOK question answering.

def get_api_key():
    """
    Get Mistral API Key from environment.
    Returns:
        str: The API key for SWEBOK chatbot connection.
    """
    api_key = os.getenv("MISTRAL_API_KEY")  # Retrieves Mistral API key for SWEBOK chatbot interaction.
    if not api_key:
        raise ValueError("MISTRAL_API_KEY not found in environment variables.")  # Ensures API key is provided.

    return api_key  # Returns API key for SWEBOK chatbot use.

# Example usage
if __name__ == "__main__":
    try:
        retriever = load_embeddings()  # Loads embeddings for SWEBOK content querying.
        api_key = get_api_key()        # Retrieves API key for SWEBOK chatbot connection.
        print(f"Successfully retrieved API Key: {api_key}")  # Confirms API key retrieval.
    except ValueError as e:
        print(f"Error: {e}")  # Outputs error if loading SWEBOK configuration fails

Loading documents from /app/data/swebok...
Loaded 470 documents from /app/data/swebok.
Loading existing FAISS vector store from /app/data/swebok/faiss_indexes/collection...

Embeddings and retriever loaded.
Successfully retrieved API Key: KOswaOluwY1jBZqUHPmUGiKIiuR1FubH


### Mistral AI Setup

- Function Usage: Loads and configures the Mistral AI model

- API Key Retrieval: Obtains the Mistral API key securely

- Model Configuration: Sets parameters like temperature and max tokens

- Model Loading Attempt: Loads specified model and confirms successful loading

- Error Handling: Catches and displays loading-related exceptions

In [24]:
import os  
from langchain_mistralai import ChatMistralAI  
def load_llm_api(model_name):  # Defines a function that loads and configures the Mistral AI language model (LLM).
    """
    Load and configure the Mistral AI LLM.
    
    Args:
        model_name (str): The name of the model to load.
    
    Returns:
        ChatMistralAI: Configured LLM instance.
    """
    api_key = os.getenv("MISTRAL_API_KEY")  # Retrieves the Mistral API key from environment variables for authentication.

    if not api_key:  # Checks if the API key was successfully retrieved.
        raise ValueError("MISTRAL_API_KEY not found in environment variables.")  # Raises an error if the API key is missing, ensuring that the user is notified.

    return ChatMistralAI(  # Creates and returns an instance of the `ChatMistralAI` class with the specified parameters for model configuration.
        model=model_name,  # Sets the model name to be used for generating responses.
        mistral_api_key=api_key,  # Passes the retrieved API key for authorization when using the Mistral AI service.
        temperature=0.2,  # Configures the model's output randomness, with lower values producing more deterministic responses.
        max_tokens=256,  # Specifies the maximum number of tokens allowed in the generated output, controlling response length.
        top_p=0.4,  # Sets the cumulative probability for token selection, affecting diversity in responses.
    )

# Set the model name
MODEL_NAME = "open-mistral-7b"  # Defines the specific model variant to be loaded, which will be utilized in the chatbot.

# Load the model and print its configuration
try:
    llm = load_llm_api(MODEL_NAME)  # Attempts to load the specified model using the previously defined function.
    print("Successfully loaded the Mistral LLM.")  # Outputs a confirmation message indicating successful model loading.
    print(f"Model Name: {llm.model}")  # Prints the name of the loaded model for verification purposes.
    print(f"Temperature: {llm.temperature}")  # Displays the temperature setting of the model to inform about output randomness.
    print(f"Max Tokens: {llm.max_tokens}")  # Outputs the maximum number of tokens allowed in the model's responses.
    print(f"Top P: {llm.top_p}")  # Prints the top-p value to indicate the configuration for token selection.
except ValueError as e:  # Catches any ValueError exceptions raised during model loading.
    print(f"Error: {e}")  # Prints the error message to inform the user about loading failures.

Successfully loaded the Mistral LLM.
Model Name: open-mistral-7b
Temperature: 0.2
Max Tokens: 256
Top P: 0.4


### RAG chain implementation

- Function usage: Generates answers using Retrieval-Augmented Generation (RAG) chain

- Question logging: Prints the question being processed for debugging

- Chain creation: Sets up question-answer and retrieval chains

- Response initialization: Initializes a dictionary for storing answers and context

- Response streaming: Streams answers and context from the RAG chain

- Output structure: Returns complete answer and model name in dictionary

In [33]:
# Defines a function that generates a response in dictionary format.
def chat_completion_as_dict(question):  
    """
    Generate a response to a given question using the RAG chain,
    returning only the answer in a dictionary.

    Args:
        question (str): The user question to be answered.

    Returns:
        dict: A dictionary containing the answer and model name.
    """
    print(f"Running prompt: {question}")  # Prints the incoming question for debugging purposes.

    question_answer_chain = create_stuff_documents_chain(llm, prompt)  # Creates a chain for answering questions using the specified model and prompt.
    
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)  # Creates a retrieval-augmented generation (RAG) chain for enhanced responses.

    full_response = {"answer": "", "context": []}  # Initializes a dictionary to hold the final answer and context.

    for chunk in rag_chain.stream({"input": question}):  # Streams the response from the RAG chain for the provided question.
        if "answer" in chunk:  # Checks if the current chunk contains an answer.
            full_response["answer"] += chunk["answer"]  # Appends the answer to the full_response dictionary.

        if "context" in chunk:  # Checks if the current chunk contains context information.
            full_response["context"].extend(chunk["context"])  # Extends the context list with additional context from the chunk.

    
    # final_answer = get_answer_with_source(full_response) 

    remaining_answer = full_response["answer"]  # Extracts the final answer from the full_response dictionary.

    # Return the response without sources and context
    return {
        "complete_answer": remaining_answer,  # Returns the complete answer as part of the response.
        "model": MODEL_NAME  # Returns the name of the model used for generating the answer.
    }

# Example usage
if __name__ == "__main__":  # Ensures the following code runs only if this script is executed directly.
    question = "What are the benefits of Retrieval-Augmented Generation?"  # Defines a sample question for testing.
    
    response = chat_completion_as_dict(question)  # Calls the function with the sample question to generate a response.
    
    print(f"Response: {response['complete_answer']}\nModel: {response['model']}")  # Prints the complete answer and model name.

Running prompt: What are the benefits of Retrieval-Augmented Generation?
Response: Retrieval-Augmented Generation (RAG) is a technique that combines human expertise with machine learning to generate responses to natural language queries. The benefits of RAG include:

1. Improved efficiency: RAG can generate responses more quickly than a human alone, as it can process and analyze large amounts of data in a fraction of the time.
2. Increased accuracy: By leveraging machine learning algorithms, RAG can reduce the likelihood of errors and improve the overall accuracy of responses.
3. Enhanced consistency: RAG can ensure that responses are consistent across different queries, as it can learn from previous interactions and use that knowledge to generate future responses.
4. Scalability: RAG can handle a large volume of queries simultaneously, making it an ideal solution for applications with high traffic or complex query structures.
5. Cost savings: By automating the response generation proc