<a href="https://colab.research.google.com/github/Adyypower/LLMs-Model/blob/main/Rag_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install the required packages for our project
!pip install -qU \
    langchain \
    langchain-openai \
    langchain-google-genai \
    langchain-chroma \
    langgraph \
    langchain-community \
    pypdf \
    python-dotenv

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [7]:
!pip install -q langchain-huggingface sentence-transformers

In [8]:
import os
from google.colab import userdata
from typing import TypedDict, Annotated, Sequence
from operator import add as add_messages

# Import LangChain and related modules
from langgraph.graph import StateGraph, END
from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_google_genai import ChatGoogleGenerativeAI # <-- Import Google's model
from langchain_openai import OpenAIEmbeddings # <-- We still need this for embeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.tools import tool

# Set the API keys from Colab's secrets
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
# Keep the OpenAI key if you still plan to use their embedding model
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [21]:
# Initialize the Chat LLM with Gemini 1.5 Flash
# We set temperature=0 to make the model's output more deterministic
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

print("Using Hugging Face model for embeddings.")

Using Hugging Face model for embeddings.


In [5]:
from google.colab import files

# --- Upload your PDF file ---
print("Please upload your PDF file.")
uploaded = files.upload()

# Get the filename of the uploaded file
if not uploaded:
    raise ValueError("No file was uploaded. Please run the cell again and upload your PDF.")
pdf_path = list(uploaded.keys())[0]
print(f"\nSuccessfully uploaded '{pdf_path}'")


# --- Load and Chunk the PDF ---
pdf_loader = PyPDFLoader(pdf_path)

try:
    pages = pdf_loader.load()
    print(f"PDF has been loaded and has {len(pages)} pages.")
except Exception as e:
    print(f"Error loading PDF: {e}")
    raise

# Initialize the text splitter for chunking
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# Split the document pages into chunks
pages_split = text_splitter.split_documents(pages)
print(f"Document split into {len(pages_split)} chunks.")

Please upload your PDF file.


Saving attention_paper.pdf to attention_paper (1).pdf

Successfully uploaded 'attention_paper (1).pdf'
PDF has been loaded and has 15 pages.
Document split into 52 chunks.


In [10]:
# Define the directory to persist the vector database and a collection name
persist_directory = "db"
collection_name = "stock_market"

try:
    # Create the Chroma vector store from the document chunks
    vectorstore = Chroma.from_documents(
        documents=pages_split,
        embedding=embeddings,
        persist_directory=persist_directory,
        collection_name=collection_name
    )
    print("Successfully created ChromaDB vector store!")

except Exception as e:
    print(f"Error setting up ChromaDB: {str(e)}")
    raise

# Create the retriever from the vector store
# It will return the top 5 most similar document chunks (k=5)
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

Successfully created ChromaDB vector store!


In [22]:
@tool
def retriever_tool(query: str) -> str:
    """
    This tool searches and returns the information from the research paper Attention all you need document.
    """

    docs = retriever.invoke(query)

    if not docs:
        return "I found no relevant information in the paper attention all you need document."

    results = []
    for i, doc in enumerate(docs):
        results.append(f"Document {i+1}:\n{doc.page_content}")

    return "\n\n".join(results)


tools = [retriever_tool]

llm = llm.bind_tools(tools)

In [23]:
class AgentState(TypedDict):
  messages:Annotated[Sequence[BaseMessage], add_messages]


In [24]:
def should_continue(State:AgentState):
  """check if the last message contains tool calls"""
  result = State['messages'][-1]
  return hasattr(result, 'tool_calls') and len(result.tool_calls)>0

In [25]:
system_prompt = """
You are an intelligent AI assistant who answers questions about the research paper attention all you need based on the PDF document loaded into your knowledge base.
Use the retriever tool available to answer questions about the stock market performance data. You can make multiple calls if needed.
If you need to look up some information before asking a follow up question, you are allowed to do that!
Please always cite the specific parts of the documents you use in your answers.
"""

In [26]:
tools_dict = {our_tool.name: our_tool for our_tool in tools} # Creating a dictionary of our tools

In [27]:
def call_llm(state: AgentState) -> AgentState:
    """Function to call the LLM with the current state."""
    messages = list(state['messages'])
    messages = [SystemMessage(content=system_prompt)] + messages
    message = llm.invoke(messages)
    return {'messages': [message]}

In [28]:
def take_action(state: AgentState) -> AgentState:
    """Execute tool calls from the LLM's response."""

    tool_calls = state['messages'][-1].tool_calls
    results = []
    for t in tool_calls:
        print(f"Calling Tool: {t['name']} with query: {t['args'].get('query', 'No query provided')}")

        if not t['name'] in tools_dict: # Checks if a valid tool is present
            print(f"\nTool: {t['name']} does not exist.")
            result = "Incorrect Tool Name, Please Retry and Select tool from List of Available tools."

        else:
            result = tools_dict[t['name']].invoke(t['args'].get('query', ''))
            print(f"Result length: {len(str(result))}")


        # Appends the Tool Message
        results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))

    print("Tools Execution Complete. Back to the model!")
    return {'messages': results}

In [29]:
graph = StateGraph(AgentState)
graph.add_node("llm", call_llm)
graph.add_node("retriever_agent", take_action)

graph.add_conditional_edges(
    "llm",
    should_continue,
    {True: "retriever_agent", False: END}
)
graph.add_edge("retriever_agent", "llm")
graph.set_entry_point("llm")

rag_agent = graph.compile()

In [32]:
def running_agent():
  log_file_path = "conversation_log.txt"
  while True:
    user_input = input("\n what is your question about paper: ")
    if user_input.lower() in ["exit","quit"]:
      break
    messages = [HumanMessage(content = user_input)]

    result = rag_agent.invoke({"messages":messages})

    print("\n===== ANSWER=====")

    print(result['messages'][-1].content)
    try:
        with open(log_file_path, 'a', encoding='utf-8') as f:
                f.write(f"Q: {user_input}\n")
                f.write(f"A: {result['messages'][-1].content}\n")
                f.write("-" * 20 + "\n") # Adds a separator
    except Exception as e:
            print(f"\nError writing to log file: {e}")

In [33]:
running_agent()


 what is your question about paper: what is this paper about 
Calling Tool: retriever_tool with query: what is this paper about
Result length: 3640
Tools Execution Complete. Back to the model!

===== ANSWER=====
The provided documents do not contain an abstract or introduction that directly states the paper's main topic. However, based on the snippets, it appears to be a research paper related to **Natural Language Processing (NLP)**, specifically focusing on **attention mechanisms** in neural networks.

Here's what the snippets suggest:

*   **Encoder-Decoder Models:** Document 3 describes an encoder-decoder structure where an input sequence is mapped to continuous representations, and then a decoder generates an output sequence one element at a time, consuming previously generated symbols as input. This is a common architecture in sequence-to-sequence tasks like machine translation.
*   **Attention Mechanisms:** Document 4 shows "Attention Visualizations" and discusses "long-distanc