In [None]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.llms import LlamaCpp
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent

In [None]:
#Generate the model

model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en", 
                    model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)

vectorstore = FAISS.load_local("faiss_ifrs_index/", embeddings, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 6, "fetch_k":20, "lambda_mult":0.4})

llm = LlamaCpp(model_path="LLM/Qwen3-1.7B-Q8_0.gguf", n_ctx=4096,
        n_threads=6, temperature=0.2, max_tokens=512)

@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    retrieved_docs = vectorstore.similarity_search(last_query)

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = f"""Extract the most relevant passage from the retrieved documents {docs_content} that answers the query {last_query}.
    Return the Chapter and subpart from which you extract this text and display the exact text from {docs_content}."""

    return system_message


agent = create_agent(llm, tools=[], middleware=[prompt_with_context])

In [None]:
query = "What is the definition of credit-adjusted effective interest rate?"
result = agent.invoke({"messages": [{"role": "user", "content": query}]})
print(result["messages"][-1].content)