In [None]:
import os
import sys
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq # https://console.groq.com/
from crewai_tools import SerperDevTool  # https://serper.dev/
from crewai import Agent, Task, Crew, LLM

# Let's load environment variables.
load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_API_KEY2 = os.getenv("GROQ_API_KEY2")
SERPER_API_KEY = os.getenv("SERPER_API_KEY")

# verification of API's
if not GROQ_API_KEY or not SERPER_API_KEY:
    raise ValueError("Please set GROQ_API_KEY and SERPER_API_KEY in your .env file.")

  from .autonotebook import tqdm as notebook_tqdm


- os - in order to access env variables and system config
- dotenv - for loading API_keys

- FAISS - Stores and searches embeddings in order to retrieve the most relevant text chunks
- PyPDFLoader - Read's PDF's and convert them into text
- RecursiveCharacterTextSplitter - Split long documents into smaller, chunks holding meaning

- HuggingFaceEmbeddings - convert text chunks into numberical embeddings (vectors)
- chatGroq - Connects langchain to Groq-hosted LLM's for faster inference.

- SerperDevTool - For search in internet.


- Agent - helps to define an AI role with goals,tools and behaviour
- Task - Describe a specific job an agent must complete
- Crew - Orchestrate multiple agents to work together on tasks


In [2]:
#Initializing main LLM - for routing and final answer.


llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0,
    max_tokens=500,
    groq_api_key=GROQ_API_KEY,
    max_retries=2,
)

#Intialize LLM for CrewAI agents

crew_llm_model = LLM(
    model="groq/llama-3.3-70b-versatile", 
    api_key=GROQ_API_KEY2,
    max_tokens=200,  
    temperature=0.7,
    is_litellm=True
)

We use the main LLM for routing decisions,final asnwers, and grounding on retireved context - so we keep temperature = 0

so it can be - consistent,repeatable and predictable.

We use crew_llm_model for search query formulation,Summarization,exploration, taking decision on how to approach a task - so we increase the temperature

so it can be - flexible,mild creative, ambigue

is_litellm : allows the crew to use litllm library to connect with other LLM's instead of dafault ones like OpenAI

In [3]:
#The decision maker - check if we can answer from local knowledge or  need to search on the web

def check_local_knowledge(query,context):
    """
    So this router function will determine whether we can answer from local knowledge or not.
    It returns True if local context is sufficient, False otherwise.
    
    """
    prompt = ''' Role: Question-Answering Assistant

    Task: Determine whether the system can answer the user's question based on the provided text.

    Instructions:
    - Analyze the text and identify if it contains the necessary information to answer the user's question.
    - Provide a clear and concise response indicating whether the system can answer the question or not.
    - Your response should include only a single word: "Yes" or "No". Noting else.

    Output Format:
    - Answer: Yes/No

    Examples:
    Input:
        Text: There is 14 districts in Kerala.
        User Queestion: How many districts are there in Kerala ?
    Expected Output:
        Answer: Yes
    
    Input:
        Text: Peacock is the national bird of India.
        User Question: What is the national bird of China?
    Expected Output:
        Answer: No
    
    Now analyze this:
    Input:
        User Question: {query}
        Text: {text}
    Output:'''

    formatted_prompt = prompt.format(text=context, query=query)
    response = llm.invoke(formatted_prompt)

    # We will extract the response.

    answer = response.content.strip().lower()
    return 'yes' in answer

We will use this function in order to understand whether we could fetch the information from the documents we provided itself or we should do Internet search.

The query = user question and the context = document data is passed into the function.

Prompt = We will describe and let the LLM know what it's role and task along with examples for ensuring the structure of output

format = we inject the query and context into the prompt

invoke = we use LLM for output answer - which should be Yes or No

will return True or False based on the response. (LLM's are mainly trained for give Yes or No answers over True or False)

In [4]:
# 4. Web Search Crew Setup â€” Search only, no scraping
def setup_web_search_crew():
    """
    Configures and returns a single-agent CrewAI crew for web searching.
    
    """
    search_tool = SerperDevTool(n_results=3)  # âœ… Only 3 results instead of 10

    web_search_agent = Agent(
        role="Expert Web Search Agent",
        goal="Search for information about the topic and summarize findings from search results",
        backstory="An expert at finding and summarizing information from web search results.",
        tools=[search_tool],
        verbose=True,
        llm=crew_llm_model
    )

    search_task = Task(
        description=(
            "Search for information about '{topic}'. "
            "Summarize the key facts and concepts from the search results concisely."
        ),
        expected_output=(
            "A concise summary of the most relevant information about '{topic}' "
            "based on the search results."
        ),
        agent=web_search_agent,
    )

    crew = Crew(
        agents=[web_search_agent],
        tasks=[search_task],
        verbose=True,
    )

    return crew

def get_web_content(query):
    print(f"Searching the web for: {query}")
    crew = setup_web_search_crew()
    result = crew.kickoff(inputs={"topic": query})
    return result.raw

- They are called Retrieval primitives.

**Web Search Agent**

- role - specifies the character of our agent
- goal - The success criteria the agent optimizes for while reasoning
- backstory - Giving extra behavioural context
- tools - the external function that the agent is allowed to call
- verbose - (True) it will logs (or print out) agent's internal reasoning steps, tool calls, and task progress to the console
- llm - the llm instance for output generation.

**Searching**

- description - The instrucitons an agent follows to perform the task.
- expected_output - tells how the output should be
- agent - the agent responsible for this task

Nb -: we can write those description and extra_output instructions in single line, we does this way for extra readability (better not miss spaces after one line)

**Crew that cordinate**

crew - control the coordination of agents and tasks
- agents - set of agent available to execute task
- tasks - The ordered list of tasks the crew will run.
- verbos - print execution flow,agent decisions,and tool usage.


In [5]:
# Creating vector database
# Stores local PDF knowledge.

def setup_vector_db(pdf_path):
    """
    setup vector database from PDF.

    steps:
    1. Load PDF and extract text
    2. Split text into manageable chunks
    3. Create embeddings for each chunk
    4. Store in FAISS vector database for fast retrieval
    
    """

    print(f"Loading PDF from: {pdf_path}")

    #Load and extract PDF content
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, #Each chunk with around 1000 characters.
        chunk_overlap=50, #50 characters overlap to maintain context
    )

    chunks = text_splitter.split_documents(documents)
    print(f"Split into {len(chunks)} chunks")

    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2"
    )

    vector_db = FAISS.from_documents(chunks,embeddings)
    print("Vector database created successfully")

    return vector_db

def get_local_content(vector_db, query):
    """
    Retrieve relevant content from vector database.

    Uses similarity search to find the 5 most relevant chunks for the given query.
    
    """

    docs = vector_db.similarity_search(query, k=5)
    # combine the top 5 most relevant chunks

    context = " ".join([doc.page_content for doc in docs])
    return context



- PDF loaded for extraction
- Using RecusiveCharacterTextSplitter from langchain_text_splitters we split the content into chunks
- Converts each chunk into a vector(numbers) , similar meanings have vectors which are closer - allows the model to be semantic based over keyword- based
- For fast similarity search -> store the embeddings into FAISS
- similarity_search allows find the most important chunks that suites for the query (semantic matching)

There are many embedding systems like BERT,OpenAI embeddings,etc.

In [6]:
def generate_final_answer(context,query):
    """
    We will generate the final answer using the LLM along with the retrieved context.

    Combines the context and user query into a structured prompt 
    that instructs the LLM to answer based on the provided inofrmation.
    
    """

    messages = [
        (
            "system",
            "You are a helpful asssistant. Use the provided context to answer the user's question accurately."
            "If the context doesn't contain enough information, say so clearly."

        ),

        ("system", f"Context: {context}"),
        ("human",query),
    ]

    response = llm.invoke(messages)
    return response.content

Once we have the necessary context (from either local or web), we can generate the final answer by passing the context and query to aan LLM

In [7]:
# Main query processing Pipelin.

def process_query(query, vector_db):

    """
    Main function for process the user query through the agentic RAG pipeline.

    Flow:
    1. check if we can answer from local knowledge (routing)
    2. If Yes: Retrieve from vector database.
    3. If No: Search and Scrape the web
    4. Generate final anwser using retrieved context.
    
    """

    print(f"Processing query -: {query}")
    
    # Check if answering from local context itself is possible or not.

    # We fetch specific chunks for *this* query to see if they work.
    print("! Checking local documents...")
    local_context = get_local_content(vector_db, query)

    can_answer_locally = check_local_knowledge(query, local_context)
    print(f"\nCan answer from local knowledge ? -: {can_answer_locally} ")

    if can_answer_locally:
        print("! Retrieving from local documents..")
        context = get_local_content(vector_db,query)
        source = "LOCAL DOCUMENTS"
    else:
        print("! Searching the web ..")
        context = get_web_content(query)
        source = "WEB SEARCH"

    print(f"\nRetrieved context from {source}.")
    print(f"Context length: {len(context)} characters\n")

    print("-> Generating Final answer...!\n")
    answer = generate_final_answer(context,query)
    
    return answer, source


- Routing : Decide between local or web retrieval
- Retrieval : Get relevant context from chosen source
- Generation : Create answer using context and query

In [8]:
# Main function
from pathlib import Path
def main():
    """
    Main function to run the Agentic RAG system.

    """
    pdf_path = r"C:\Users\devan\Desktop\Agentic_RAG\Basic-Biology-an-introduction.pdf"
    # pdf_path = Path(input("Enter the pdf path: ").strip()) #just to input the path

    print("Initializing Agentic RAG system..\n")

    # Initialize vector database
    print("Step 1: Setting up vector database..")
    vector_db = setup_vector_db(pdf_path)

    

    queries = [
        "What is Agentic RAG?", #To trigger web search
        "What are the key principles discussed in the document?", #To use the document data

    ]

    for query in queries:
        answer, source = process_query(query,vector_db)
        
        print(f"{'='*60}")
        print(f"FINAL ANSWER (source : {source}): ")
        print(f"{'='*60}")
        print(f"\n{answer}\n")
        print(f"{'='*60}\n\n")


if __name__ == '__main__':
    main()


Initializing Agentic RAG system..

Step 1: Setting up vector database..
Loading PDF from: C:\Users\devan\Desktop\Agentic_RAG\Basic-Biology-an-introduction.pdf
Split into 127 chunks
Vector database created successfully
Processing query -: What is Agentic RAG?
! Checking local documents...

Can answer from local knowledge ? -: False 
! Searching the web ..
Searching the web for: What is Agentic RAG?


[32mTool search_the_internet_with_serper executed with result: {'searchParameters': {'q': 'Agentic RAG definition', 'type': 'search', 'num': 3, 'engine': 'google'}, 'organic': [{'title': 'What is Agentic RAG? | IBM', 'link': 'https://www.ibm.com/think/topics/agen...[0m



Retrieved context from WEB SEARCH.
Context length: 561 characters

-> Generating Final answer...!



FINAL ANSWER (source : WEB SEARCH): 

Agentic RAG refers to the use of AI agents to facilitate Retrieval Augmented Generation (RAG). It involves adding AI agents to the RAG pipeline to increase the efficiency and effectiveness of the retrieval process, allowing for more accurate and relevant information to be retrieved and used in the generation process. In essence, Agentic RAG introduces AI agents as intelligent intermediaries between user queries and data sources, actively retrieving and using relevant information from a knowledge base.



Processing query -: What are the key principles discussed in the document?
! Checking local documents...

Can answer from local knowledge ? -: True 
! Retrieving from local documents..

Retrieved context from LOCAL DOCUMENTS.
Context length: 3523 characters

-> Generating Final answer...!

FINAL ANSWER (source : LOCAL DOCUMENTS): 

The document discusses 5 key principles that define how life is organized on Earth. These principles are:

1. Cells ar