# Agentic Advanced RAG for Question Generation

This notebook converts the advanced RAG pipeline into an agentic system. The core idea is to wrap the entire RAG process (retrieval, reranking, and generation) into a single tool that a LangChain agent can decide to use. This allows for more complex and multi-step reasoning in the future.

In [None]:
#!pip install python-dotenv sentence-transformers langchain langchain-google-genai

In [None]:
import os
from dotenv import load_dotenv
import re
import json
from sentence_transformers import CrossEncoder
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain import hub

load_dotenv()

# --- Configuration ---
googleapikey = os.environ.get("GOOGLE_API_KEY")
vector_db_path = r"../vector_db"  # Adjusted path
model = "gemini-1.5-flash"
temp = 0.3

### Step 1: Define the RAG Tool

In [None]:
def get_embedding_function():
    return HuggingFaceEmbeddings(
        model_name="Qwen/Qwen3-Embedding-0.6B",
        encode_kwargs={"batch_size": 4, "normalize_embeddings": True}
    )

reranker = CrossEncoder("BAAI/bge-reranker-v2-m3", max_length=512)

def parse_questions(text: str, total_questions: int, path: str = None):
    # This function remains the same as in your original notebook
    pass

def query_rag_tool(input_str: str):
    try:
        query_text, no_of_questions_str, output_path, subject, chapter_str = input_str.split('|')
        no_of_questions = int(no_of_questions_str)
        chapter = int(chapter_str)
    except ValueError:
        return "Invalid input format. Expected: query_text|no_of_questions|output_path|subject|chapter"

    embedding_function = get_embedding_function()
    db = Chroma(persist_directory=vector_db_path, embedding_function=embedding_function)
    results = db.similarity_search_with_score(query_text, k=5)
    pairs = [(query_text, doc.page_content) for doc, _ in results]
    scores = reranker.predict(pairs)
    reranked = sorted(zip(results, scores), key=lambda x: x[1], reverse=True)
    top_docs = [doc for (doc, _), _ in reranked]
    annotated_chunks = [f"[Source: {doc.metadata.get('source', 'unknown')}]\n{doc.page_content}" for doc in top_docs]
    context_text = "\n\n---\n\n".join(annotated_chunks)

    # --- THIS IS THE PLACEHOLDER ---
    prompt_text = "REPLACE_WITH_YOUR_PROMPT_TEMPLATE_STRING"
    prompt_template = ChatPromptTemplate.from_template(prompt_text)
    # ------------------------------

    inputs = {"context": context_text, "question": query_text, "no_of_questions": no_of_questions}
    llm = ChatGoogleGenerativeAI(model=model, temperature=temp, google_api_key=googleapikey)
    chain = LLMChain(llm=llm, prompt=prompt_template, output_parser=StrOutputParser())
    response = chain.run(inputs)
    final_output_path = f"{output_path}{subject}_Chapter{chapter}_{query_text}.json"
    # You will need to re-implement the parse_questions function or copy it from your original notebook
    # parse_questions(response, no_of_questions, final_output_path) 
    print(f"Response from LLM: {response}")
    return f"Successfully generated {no_of_questions} questions. JSON parsing needs to be re-enabled."

rag_tool = Tool(
    name="AdvancedRAGQuestionGenerator",
    func=query_rag_tool,
    description="Generates academic questions. Input must be a string with format: query_text|no_of_questions|output_path|subject|chapter"
)

tools = [rag_tool]

### Step 2: Create and Run the Agent

In [None]:
prompt = hub.pull("hwchase17/react")
llm = ChatGoogleGenerativeAI(model=model, temperature=temp, google_api_key=googleapikey)
agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

tool_input = "Nature of software|2|C:/Users/dhili/Desktop/SRIP/week3/Agentic_RAG/|SoftwareEngineering|1"
agent_executor.invoke({"input": f"Use the AdvancedRAGQuestionGenerator tool with the following input: {tool_input}"})