In [None]:
# 🧠 Agentic RAG with Gemini + FAISS + LangChain
# Includes logging, recursive splitting, rephrasing, and reasoning

# Install necessary packages
!pip install langchain faiss-cpu langchain-google-genai google-generativeai

# --- Setup Gemini API ---
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import os

GOOGLE_API_KEY = "AIzaSyB7Wc"

os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY   # Replace with your key

# llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=0.0)
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


In [None]:
!pip install -U langchain-community

In [None]:
# --- Load and Split Documents using RecursiveCharacterTextSplitter ---
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = TextLoader("/content/RAG.txt")  # Replace with your file path
raw_docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
docs = splitter.split_documents(raw_docs)

print(f"Loaded {len(docs)} document chunks.")


In [None]:
# --- Create FAISS Vector Store ---
from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(docs, embedding)
retriever = vectorstore.as_retriever()


In [6]:
# --- Logging Setup ---
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("AgenticRAG")


In [7]:
# --- Define Smart RAG Logic Tool ---
from langchain.tools import Tool

def smart_agentic_rag(query: str) -> str:
    logger.info(f"🔍 Received query: {query}")

    # Initial retrieval
    results = retriever.get_relevant_documents(query)
    if results:
        context = "\n\n".join([doc.page_content for doc in results])
        logger.info(f"📚 Retrieved {len(results)} docs for initial query.")

        if "crispr" in context.lower():  # Naive relevance check
            logger.info("✅ Context appears relevant. Generating answer.")
            return llm.invoke(f"Answer using the context:\n\n{context}\n\nQuestion: {query}")
        else:
            logger.warning("⚠️ Context found but seems not very relevant. Rephrasing query.")
    else:
        logger.warning("❌ No context found. Rephrasing query.")

    # Rephrase query
    rephrased = rephrase_query_tool(query)
    logger.info(f"🔄 Rephrased query: {rephrased}")

    new_results = retriever.get_relevant_documents(rephrased)
    if new_results:
        context = "\n\n".join([doc.page_content for doc in new_results])
        logger.info(f"📚 Retrieved {len(new_results)} docs after rephrasing.")

        if "crispr" in context.lower():
            logger.info("✅ Rephrased context looks good. Generating answer.")
            return llm.invoke(f"Answer using the context:\n\n{context}\n\nQuestion: {query}")
        else:
            logger.warning("⚠️ Still not relevant. Using LLM reasoning.")
    else:
        logger.warning("❌ Still no documents. Using LLM reasoning.")

    # Final fallback
    return reason_with_llm(query)


# --- Define Rephrase & Reasoning Tools ---
def rephrase_query_tool(query: str) -> str:
    prompt = f"Rephrase this query to improve retrieval effectiveness:\n\n{query}"
    result = llm.invoke(prompt)
    return result.content if hasattr(result, 'content') else result

def reason_with_llm(query: str) -> str:
    prompt = f"No documents are available. Please answer the question using reasoning:\n\n{query}"
    result = llm.invoke(prompt)
    return result.content if hasattr(result, 'content') else result


In [20]:
import os
os.environ["TAVILY_API_KEY"] = "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"

from langchain.tools.tavily_search import TavilySearchResults

tavily_tool = TavilySearchResults()


  tavily_tool = TavilySearchResults()


In [21]:
# --- Setup LangChain Agent ---
from langchain.agents import initialize_agent, AgentType

from langchain.tools import Tool

rag_tool = Tool(
    name="SmartRetriever",
    func=smart_agentic_rag,
    description="Retrieves relevant information or reasons with LLM if retrieval fails"
)

rephrase_tool = Tool(
    name="RephraseQuery",
    func=rephrase_query_tool,
    description="Rephrases the user's query to help improve retrieval"
)

reasoning_tool = Tool(
    name="LLMReasoning",
    func=reason_with_llm,
    description="Use LLM to answer directly without documents"
)

tools = [rag_tool, rephrase_tool, reasoning_tool,tavily_tool]

agent_executor = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)


In [23]:
# --- Run a Sample Query ---
query = "GENE EDITING ?"
response = agent_executor.run(query)
print("📤 Final Answer:\n", response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The query "GENE EDITING ?" is too broad.  I need to rephrase it to get more specific and useful results.

Action: RephraseQuery
Action Input: GENE EDITING ?[0m
Observation: [33;1m[1;3mHere are several ways to rephrase the query "GENE EDITING ?", depending on what you're looking for:

**More specific options:**

* **"CRISPR-Cas9 gene editing"**:  This targets a specific gene editing technology.
* **"Gene editing techniques"**: This is broader, but still more specific than just "gene editing".
* **"Gene editing applications in medicine"**: This focuses on a particular application.
* **"Ethical implications of gene editing"**: This targets a specific aspect of the topic.
* **"Gene editing research"**: This focuses on the scientific research aspect.
* **"Genome editing"**: This is a synonym that might yield different results.


**Options for broader searches:**

* **"Gene therapy"**: This is a related field that often