## To Implement Memroy for RAG System -- Need to do more testing

In [None]:
from typing import List, Tuple

# Global conversation history
conversation_history: List[Tuple[str, str]] = []

def inference_with_memory(
    query: str,
    index: faiss.Index,
    id_to_docs: Dict[int, Dict[str, Any]],
    memory: List[Tuple[str, str]] = conversation_history,
    top_k: int = 5
) -> str:
    """
    Inference function with memory support to simulate conversation history.

    Args:
        query (str): The current user query.
        index (faiss.Index): FAISS index for document retrieval.
        id_to_docs (Dict): Mapping from FAISS index to documents.
        model (SentenceTransformer): Preloaded embedding model.
        memory (List[Tuple[str, str]]): Previous (query, response) pairs.
        top_k (int): Number of top relevant docs to retrieve.

    Returns:
        str: LLM-generated response.
    """
    # Step 1: Retrieve context
    context_list = search_query(index, id_to_docs, query)
    context_str = "\n\n".join(context_list)

    # Step 2: Build history as part of prompt
    history_prompt = ""
    for past_query, past_response in memory:
        history_prompt += f"Previous Question: {past_query}\nPrevious Answer: {past_response}\n\n"

    # Step 3: Build the full prompt
    full_prompt = (
        history_prompt +
        build_prompt(query, context_str)
    )

    # Step 4: Call the LLM
    response = call_mistral_hf(full_prompt)

    # Step 5: Save this interaction in memory
    memory.append((query, response))

    return response


In [None]:
# First query
response1 = inference_with_memory("Tell me about Mejia and Sons?", index, id_to_docs)
print(response1)

# Second query with memory retained
response2 = inference_with_memory("What else do you know about the firm?", index, id_to_docs)
print(response2)