In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
from dotenv import load_dotenv
load_dotenv()
GOOGLE_API_KEY = os.getenv("API_KEY")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loader = PyPDFLoader("attention.pdf")
docs = loader.load_and_split()
docs

[Document(metadata={'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On 

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
texts = text_splitter.split_documents(docs)
texts

[Document(metadata={'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On 

In [5]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
model_name = "intfloat/e5-small" 
embeddings = SentenceTransformerEmbeddings(model_name=model_name)
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(texts, embedding=embeddings)

In [6]:
retriever = db.as_retriever()
retriever.invoke("what is attention model?")

[Document(metadata={'creationdate': '', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On English-to-French translation, we outperform the previoussingle state-of-the-art with model by 0.7 BLEU, achieving a BLEU score of 41.1.', 'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Lli

In [8]:
#Tool 3- Arxiv to work with research papers
from langchain_community.tools import ArxivQueryRun
from langchain_community.utilities import ArxivAPIWrapper
arxiv = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv_tool = ArxivQueryRun(api_wrapper = arxiv,name='arxiv_tool')
arxiv_tool

ArxivQueryRun(name='arxiv_tool', api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=200))

In [9]:
#creating tools to be used by the agent one by one
#Tool 1-to retrieve documents from wikipedia
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
wiki = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki, name="wikipedia_tool", description="Useful for when you need to look up a topic or person on Wikipedia")
wiki_tool

WikipediaQueryRun(name='wikipedia_tool', description='Useful for when you need to look up a topic or person on Wikipedia', api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\DCQUASTER JACK\\Documents\\GenAI-Projects\\P1\\.venv\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=200))

In [10]:
from langchain_core.tools import StructuredTool

def retriever_fn(query: str) -> str:
    """Fetch relevant docs using your retriever and return concise text."""
    docs = db.get_relevant_documents(query)
    if not docs:
        return "No relevant documents found."
    return "\n\n".join([doc.page_content for doc in docs[:3]])

retriever_tool = StructuredTool.from_function(
    func=retriever_fn,
    name="langsmith_search",
    description="Search for relevant information about LangSmith."
)


In [12]:
from langchain.agents import create_agent
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-pro",
    google_api_key=GOOGLE_API_KEY,
    temperature=0.2,
    max_output_tokens=1024,
)
tools= [retriever_tool, arxiv_tool, wiki_tool]
graph = create_agent(
    model=llm,
    tools=tools,
    system_prompt="You are a helpful assistant."
)

In [13]:
inputs = {"messages": [{"role": "user", "content": "what is attention model"}]}
for chunk in graph.stream(inputs, stream_mode="updates"):
    print(chunk)

{'model': {'messages': [AIMessage(content='', additional_kwargs={'function_call': {'name': 'wikipedia_tool', 'arguments': '{"query": "attention model"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-pro', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--b468f513-214a-454b-9795-c35af38814e3-0', tool_calls=[{'name': 'wikipedia_tool', 'args': {'query': 'attention model'}, 'id': '62abc965-77c9-474e-ac31-e90ea9b49e21', 'type': 'tool_call'}], usage_metadata={'input_tokens': 190, 'output_tokens': 436, 'total_tokens': 626, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 420}})]}}
{'tools': {'messages': [ToolMessage(content='Page: Transformer (deep learning architecture)\nSummary: In deep learning, the transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to nu', name=

In [14]:
graph.invoke(inputs)

{'messages': [HumanMessage(content='what is attention model', additional_kwargs={}, response_metadata={}, id='d08ba204-d6a5-4782-84bc-50a521d5ab95'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'wikipedia_tool', 'arguments': '{"query": "attention model"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-pro', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--c6aed347-b309-4edd-83e2-a61d40605e6b-0', tool_calls=[{'name': 'wikipedia_tool', 'args': {'query': 'attention model'}, 'id': '6cb531b7-89ea-4978-9aff-03bfe0e9fc2c', 'type': 'tool_call'}], usage_metadata={'input_tokens': 190, 'output_tokens': 370, 'total_tokens': 560, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 354}}),
  ToolMessage(content='Page: Transformer (deep learning architecture)\nSummary: In deep learning, the transformer is a neural ne

In [15]:
inputs = {"messages": [{"role": "user", "content": "Tell me about romeo and juliet"}]}
response = graph.invoke(inputs)

In [16]:
response

{'messages': [HumanMessage(content='Tell me about romeo and juliet', additional_kwargs={}, response_metadata={}, id='91c43ec2-fe23-4c0e-bcc6-b7f499d569e6'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'wikipedia_tool', 'arguments': '{"query": "Romeo and Juliet"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-pro', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--1d5cab67-03eb-41ef-b51a-1c5ea358a593-0', tool_calls=[{'name': 'wikipedia_tool', 'args': {'query': 'Romeo and Juliet'}, 'id': 'ccc5b7da-1905-44cd-90bd-d4638072f8fa', 'type': 'tool_call'}], usage_metadata={'input_tokens': 194, 'output_tokens': 384, 'total_tokens': 578, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 367}}),
  ToolMessage(content='Page: Romeo and Juliet\nSummary: The Tragedy of Romeo and Juliet, often shortened to Romeo and J

In [25]:
response['messages'][-1].text

'"Romeo and Juliet" is a famous tragedy written by William Shakespeare. It\'s about two young lovers from feuding Italian families. Would you like to know more about the plot, characters, or themes of the play?'