### Iterative Retrieval with Self Reflection

Iterative retrieval is a dynamic strategy where an AI agent doesn't settle for the first batch of retrieved documents. Instead, it evaluates the adequacy of the initial context, and if necessary it:
* refines the query
* retrieves again
* repeats the process until it's confident enough to answer the original question

In iterative RAG:
* the agent reflects on the retrieved content and the answer it  produced.
* it it's unsure, it can refine it's search (like a human researcher would).

In [2]:
#importing libraries
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

from langchain.chat_models import init_chat_model

  from .autonotebook import tqdm as notebook_tqdm
USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
urls=['https://docs.langchain.com/oss/python/langchain/agents',
'https://docs.langchain.com/oss/python/langchain/tools',
'https://docs.langchain.com/oss/python/langchain/streaming']

documents = WebBaseLoader(web_paths=urls).load()
chunks = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100).split_documents(documents)
embeddings = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

retriever = FAISS.from_documents(documents=chunks,
                                 embedding=embeddings).as_retriever()

In [4]:
#base model

model = init_chat_model(model="groq:llama-3.1-8b-instant")
model

ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 8192, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000001C40479DC90>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001C4046EFA10>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [5]:
# defining state schema
from pydantic import BaseModel
from typing import List
from langchain_core.documents import Document

class IterativeRAGState(BaseModel):
    question:str
    refined_question: str = ""
    retrieved_docs: List[Document] = []
    answer: str = ""
    verified: bool = False
    attempts: int = 0

In [None]:
# defining nodes
from typing_extensions import TypedDict
from langchain.agents import create_agent
#retrieving docs node

def retrieve_docs(state:IterativeRAGState)->IterativeRAGState:
    #this node retrieves relevant docs for the given question
    relevant_docs = retriever.invoke(state.question)
    return state.model_copy(update={"retrieved_docs": relevant_docs})

#generating answer node

class Context(TypedDict):
    context: str

def generate_answer(state:IterativeRAGState)->IterativeRAGState:
    #this node generates response based on given context information
    combined_doc = "\n\n".join([doc.page_content for doc in state.retrieved_docs])
    prompt = "generate answer to the given question only using the context information provided"
    gen_answer_agent = create_agent(model=model,
                                    system_prompt=prompt,
                                    context_schema=Context)
    gen_answer_response = gen_answer_agent.invoke({"messages":[{"role":"user", "content": state.question}]}, context=combined_doc)
    return state.model_copy(update={"answer": gen_answer_agent['messages'][-1].content})

#answer verifying node
from langchain.agents