# Importing Libraries

In [None]:
from langchain_huggingface import (
    ChatHuggingFace, # for interacting with HuggingFace chat models
    HuggingFaceEndpoint, # for custom HuggingFace inference API endpoints
    HuggingFaceEmbeddings # for generating vector embeddings from text
  )

from langchain_community.document_loaders import (
  PyMuPDFLoader, # For pdfs with images, tables, formulas, text
  PyPDFLoader # for pdfs with texts
  )

from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.vectorstores import FAISS  # used to store and search vector embeddings efficiently

from langchain.tools import tool # decorate or create tools that agents can use

from langchain_core.prompts import PromptTemplate

from langsmith import traceable # Decorator to enable LangSmith tracing for functions or chains

from langchain_community.tools.ddg_search import DuckDuckGoSearchRun # Integration as tool


 # Agent executors and initialization utilities
from langchain.agents import (
    create_react_agent,  # Function to create a ReAct-style agent (reasoning + acting)
    AgentExecutor,       # Class to execute an agent with tools and handle reasoning steps
    initialize_agent     # Legacy function to initialize agents with LLMs and tools
)

# Note: The errors appeared after completing the project, Langchain keeps shiftings its modules to another places so its common 

from langchain_core.output_parsers import StrOutputParser # parses agent output as a string

# Runnables for creating custom chains or sequences of actions
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda, RunnableSequence

from langchain import hub # Hub for prebuilt prompts or chains

import fitz # Comes from PyMuPDF — used for reading and extracting text from PDFs
import re
import os

ImportError: cannot import name 'create_react_agent' from 'langchain.agents' (c:\Users\Joel\anaconda3\lib\site-packages\langchain\agents\__init__.py)

# API


In [None]:
LANGCHAIN_TRACING_V2=True # only then tracing will begin 
HUGGINGFACEHUB_API_KEY='hf_NRTxNOBaAlCQkYTrBFlg'
LANGCHAIN_ENDPOINT='https://api.smith.langchain.com' # this is endpoint
LANGCHAIN_API_KEY='lsv2_pt_c1c71f3d_f4d6bfa8b1' # langsmith key 
LANGCHAIN_PROJECT='langsmith-demo' # in langsmoth project will be created and will be used by langsmith to store traces

# Model

In [None]:
api_key = HUGGINGFACEHUB_API_KEY
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it", # Used Google Gemma model
    task="text-generation", # The type of task can be ('text-generation', 'summarization', 'conversational' etc)
    huggingfacehub_api_token=api_key
)

model = ChatHuggingFace(llm=llm)

# RAG

# Section 1a: Indexing - Document Ingestion

Indexing has 4 steps in it:


*   Document Ingestion
*   Text Splitting
*   Embedding Generation
*   Vector Storing






In [5]:
file_path = r"C:\Users\Joel\OneDrive\Documents\Books\Deep Learning.pdf"


In [6]:
try:
    loader = PyMuPDFLoader(file_path) # Its faster and more reliable than PyPDFLoader especially for extracting clean text and handling complex PDF layouts
    docs = loader.load() # Returns a list of Document objects
 # Checking if documents were loaded successfully
    if docs:
# Displaying part of a sample document page to verify loading
        first_doc_content = docs[199].page_content # randomly selected
        print(first_doc_content[:233]) # Print first 233 characters
    else:
        print("No documents were loaded from the PDF.")
# Error Handling
except FileNotFoundError:
    print(f"Error: The file was not found at the path: {file_path}")
except Exception as e:
    print(f"An error occurred: {e}")

This same general principle of using linear behavior to obtain easier optimization
also applies in other contexts besides deep linear networks. Recurrent networks can
learn from sequences and produce a sequence of states and outputs.


# Section 1b: Text Splitting

In [7]:
# Large PDFs (hundreds or thousands of pages) cannot be processed efficiently by the model at once
# So we split the text into smaller, meaningful chunks either by paragraphs or by fixed size tokens
# This process is known as "text splitting" and it helps the retriever fetch only the most relevant sections later

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Max tokens/characters per chunk
    chunk_overlap=50 ,  # Overlap between chunks for context continuity
    separators =["\n\n", "\n", ".", " "] # Hierarchical split: paragraph, then line, then sentence, then word
)

In [8]:
chunks = splitter.split_documents(docs) # # Splitting the document into overlapping chunks

In [9]:
print(len(chunks))

2130


In [10]:
chunks[777].page_content # # Preview a random chunks content

'until section 10.7, after recurrent networks have been described in more detail.\n8.2.6\nInexact Gradients\nMost optimization algorithms are designed with the assumption that we have\naccess to the exact gradient or Hessian matrix. In practice, we usually have only\na noisy or even biased estimate of these quantities. Nearly every deep learning\nalgorithm relies on sampling-based estimates, at least insofar as using a minibatch\nof training examples to compute the gradient.'

# Section 1c & 1d: Embedding Generation & Vector Storing


In [12]:
# Convert text into numerical form (since models process numbers, not language)
# Unlike simple encodings (One-Hot, Integer), embeddings capture the semantic meaning of words
# Similar words are placed close together in vector space, helping the model understand context

embedding = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2") # Initialize embedding model from Hugging Face

In [13]:

# Stores all text chunks as dense vectors inside a FAISS index
# FAISS (Facebook AI Similarity Search) enables fast and efficient semantic similarity search to a giveb query

vector_store = FAISS.from_documents(chunks, embedding) # Converts each chunk into an embedding vector using embedding and stores into vector stores

In [15]:
vector_store.index_to_docstore_id # Unique ID for eachh chunk

{0: '16b4adb8-8137-4c1f-a9b8-1229c8486f7e',
 1: '4f251eb1-be7b-4200-9c93-39044e3f4f4b',
 2: '79095fee-086a-4ff7-89c0-94626964743d',
 3: 'a92cf1c6-cfb5-40e8-b6d9-1a1e09cdc131',
 4: 'b43e1321-2c50-4fe8-a93c-fff73979cb2d',
 5: 'a0bea142-5d9a-4a89-aa7e-46b007a2d288',
 6: 'b3040132-4eeb-42c9-88ba-4fcf13bbde59',
 7: 'c60d4341-dd4e-4205-8c2a-250d3588dfb4',
 8: 'a6b5d6ab-b21f-4346-b395-ac7e814d89f3',
 9: '625e0a3a-e10f-4719-83ab-a3d90dc4a352',
 10: 'c7aad5d7-5c7a-4a5c-87fc-0ad1b7b2303c',
 11: '1360dd91-54c0-4dda-9d09-fd152a84bd53',
 12: 'e4c3b28a-3f7b-44bf-943c-5abdfbde7798',
 13: '212263e5-af5c-4322-9c0d-a8d7010cdffc',
 14: '3734ee77-7a2e-44e1-a0a0-3fcbb61e1d16',
 15: '5dfed245-fc83-4fdb-827f-e4d628969344',
 16: '476f5005-c4a9-4c05-b76a-ca7f147b2c11',
 17: '300ce999-e872-4888-93f1-e226a369b445',
 18: '809746cc-094b-461f-9bae-b0a81138b686',
 19: 'fc911bf2-776a-408a-8e10-ee833ec89311',
 20: '2ea95ea9-78d2-4720-903a-6196286269f5',
 21: '84ff5b39-9599-411d-a900-0aca0b0fcb1f',
 22: '7183945e-6b66-

In [16]:
vector_store.get_by_ids(['3b833674-c409-49d2-9339-611bb4f9e74a'])[0].page_content # Example ID 999th taken for preview

'image topology. To process one-dimensional sequential data, we turn next to\nanother powerful specialization of the neural networks framework: recurrent neural\nnetworks.'

# Section 2: Retrieval

In [17]:
from langchain.agents import Tool

retriever = vector_store.as_retriever(search_type="mmr", #  Ensures you don’t get three nearly identical chunks — it gives varied but relevant ones.
                                      search_kwargs={"k": 3, #  number of top chunks to fetch
                                      "lambda_mult": 0.5}) # trade-off between relevance (1.0) and diversity (0.0)

# as retriever transforms the FAISS vector store into a retriever object

In [18]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000190033ED4B0>, search_type='mmr', search_kwargs={'k': 3, 'lambda_mult': 0.5})

In [19]:
retrieved_docs = retriever.invoke('What is deep learning') # Retrieve the top-matching document chunks from the FAISS vector store
print(retrieved_docs[1].page_content) # Displaying the second retrived chunk

vision range from reproducing human visual abilities, such as recognizing faces,
to creating entirely new categories of visual abilities. As an example of the latter
category, one recent computer vision application is to recognize sound waves
from the vibrations they induce in objects visible in a video (Davis et al., 2014).
Most deep learning research on computer vision has focused not on such exotic
applications that expand the realm of what is possible with imagery but rather on
a small core of AI goals aimed at replicating human abilities. Most deep learning
for computer vision is used for object recognition or detection of some form,
whether this means reporting which object is present in an image, annotating
an image with bounding boxes around each object, transcribing a sequence of


In [20]:
retrieval_tool = Tool(
    name="BookRetriever",
    func=lambda q: qa({"query": q})["result"],
    description="Use this tool to answer questions from the book."
)

# Step 3: Augmentation

In [21]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [22]:
question = "What are Convolutional Neural Networks? Are CNN explained in the book?"
retrieved_docs = retriever.invoke(question)

In [23]:
retrieved_docs[1].page_content

'These chapters are the most important for a practitioner—someone who wants\nto begin implementing and using deep learning algorithms to solve real-world\nproblems today.'

In [24]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs) # The LLM can’t process lists so we join all the text into one large string.
context_text

'9.5\nVariants of the Basic Convolution Function\nWhen discussing convolution in the context of neural networks, we usually do\nnot refer exactly to the standard discrete convolution operation as it is usually\nunderstood in the mathematical literature. The functions used in practice diﬀer\nslightly. Here we describe these diﬀerences in detail and highlight some useful\nproperties of the functions used in neural networks.\nFirst, when we refer to convolution in the context of neural networks, we usually\nactually mean an operation that consists of many applications of convolution in\nparallel. This is because convolution with a single kernel can extract only one kind\nof feature, albeit at many spatial locations. Usually we want each layer of our\nnetwork to extract many kinds of features, at many locations.\nAdditionally, the input is usually not just a grid of real values. Rather, it\nis a grid of vector-valued observations. For example, a color image has a red,\n\nThese chapters are

In [25]:
final_prompt = prompt.invoke({"context": context_text, "question": question}) # This creates the final text prompt that will be passed to your LLM.

In [26]:
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know.\n\n      9.5\nVariants of the Basic Convolution Function\nWhen discussing convolution in the context of neural networks, we usually do\nnot refer exactly to the standard discrete convolution operation as it is usually\nunderstood in the mathematical literature. The functions used in practice diﬀer\nslightly. Here we describe these diﬀerences in detail and highlight some useful\nproperties of the functions used in neural networks.\nFirst, when we refer to convolution in the context of neural networks, we usually\nactually mean an operation that consists of many applications of convolution in\nparallel. This is because convolution with a single kernel can extract only one kind\nof feature, albeit at many spatial locations. Usually we want each layer of our\nnetwork to extract many kinds of features, at many l

# Step 4: Generation

In [27]:
answer = model.invoke(final_prompt) # Send prompt to model
print(answer.content)  # Print models response

HfHubHTTPError: 401 Client Error: Unauthorized for url: https://router.huggingface.co/nebius/v1/chat/completions (Request ID: Root=1-68f33285-4bc71055495813eb70e6402f;7d0cced2-3472-499a-9b00-28e1e93f9e0c)

Invalid credentials in Authorization header

In [35]:
# Utility function to format multiple retrieved documents

def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [36]:
parallel_chain = RunnableParallel({ # RunnableParallel to run multiple preprocessing steps at once
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [37]:
parallel_chain.invoke('what is CNN')

{'context': '9\nConvolutional Networks\nConvolutional networks (LeCun, 1989), also known as convolutional neural\nnetworks, or CNNs, are a specialized kind of neural network for processing data\nthat has a known grid-like topology. Examples include time-series data, which can\nbe thought of as a 1-D grid taking samples at regular time intervals, and image data,\nwhich can be thought of as a 2-D grid of pixels. Convolutional networks have been\ntremendously successful in practical applications. The name “convolutional neural\nnetwork” indicates that the network employs a mathematical operation called\nconvolution. Convolution is a specialized kind of linear operation. Convolutional\nnetworks are simply neural networks that use convolution in place of general matrix\nmultiplication in at least one of their layers.\nIn this chapter, we ﬁrst describe what convolution is. Next, we explain the\nmotivation behind using convolution in a neural network. We then describe an\n\nLarge Scale Visual

In [38]:
parser = StrOutputParser() # Output parser to convert LLM response into a simple string

In [39]:
main_chain = RunnableSequence(parallel_chain,
                              prompt, # fill prompt template with context and question
                              model, # send prompt to LLM
                              parser) # parse LLM output into string

In [40]:
main_chain.invoke('Can you tell me whats deep learning as per the book') # Running the full chain with a question

'Deep learning is a field that uses artificial neural networks with multiple layers, enabling computers to learn complex representations of data and to execute sequential instructions. \n'

In [41]:
#                                                                              ===  End of RAG section  ===

# Up to this point, I have built a Retrieval-Augmented Generation (RAG) pipeline
# it retrieves relevant content from the document and answers user queries.
# Now I am adding tools and a ReAct pipeline to make the system agentic.
# This allows the model to take actions, call tools, reason, think, over multiple steps
# Turning the  RAG into an Agentic RAG capable of more interactive behavior.

In [42]:
# This agent will be designed to intelligently answer user queries by using multiple tools.
# It will :
# 1. Search the web for up-to-date information.
# 2. Retrieve and summarize content from provided document
# 3. Fetch specific sections of documents for detailed answers
# The agent decides which tool to use based on the query, combining results to provide accurate responses.

# Custom Tool



In [43]:
 # The @traceable decorator is used here to enable observability:
    # - Logs inputs and outputs of the tool for debugging
    # - Allows monitoring tool usage in LangChain/LangSmith dashboards
    # - Helps track execution, performance, and errors
    # - Useful for analyzing agentic workflows where multiple tools interact

@traceable(name="get_section_content_trace")
@tool
def get_section_content(section_title: str) -> str:
    """
    Retrieves the content of a specific section from the Deep Learning PDF.
    """
    pdf_path = "/content/Deep Learning.pdf"  # hardcoded
    doc = fitz.open(pdf_path)
    full_text = ""
    for page in doc:
        full_text += page.get_text()

    pattern = re.escape(section_title) + r'(.*?)(?=\n\n|\n[A-Z][a-z]+|\Z)'
    match = re.search(pattern, full_text, re.DOTALL | re.IGNORECASE)

    if match:
        return f"Content from the section '{section_title}':\n\n{match.group(1).strip()}"
    else:
        return f"Could not find the section titled '{section_title}' in the PDF."


In [44]:
@traceable(name='summarize_pdf_page')
@tool
def summarize_pdf_page(page_number: int) -> str:
    """
    Summarizes the content of a specific page from the Deep Learning PDF.
    Use this tool ONLY when the user explicitly asks for a summary of a single page.
    """
    pdf_path = "/content/Deep Learning.pdf"  # hardcoded

    try:
        doc = fitz.open(pdf_path)
        if page_number < 1 or page_number > len(doc):
            return f"Error: Page number {page_number} is out of range. The document has {len(doc)} pages."

        page = doc[page_number - 1]
        page_content = page.get_text()

        prompt_template = "Summarize the following text concisely:\n\n{text}\n\nCONCISE SUMMARY:"
        PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

        summary_chain = load_summarize_chain(llm_for_summary, chain_type="stuff", prompt=PROMPT)
        summary = summary_chain.invoke({"input_documents": [page_content]})

        return f"Summary of page {page_number}:\n\n{summary}"

    except Exception as e:
        return f"An error occurred while summarizing the page: {e}"


In [45]:
!pip install -U ddgs
duckduckgo = DuckDuckGoSearchRun(name="DuckDuckGo Search") # for web search




In [46]:
tools = []

# RAG Retriever tool
retriever_tool = Tool(
    name="RAG Retriever",
    func=lambda q: retriever.invoke(q),  # your retriever function
    description="Use this to answer general questions from the PDF."
)
tools.append(retriever_tool)

# Wrap other function tools properly
tools.append(Tool.from_function(
    func=get_section_content,
    name="GetSectionContent",
    description="Retrieves content of a specific section from the PDF."
))

tools.append(Tool.from_function(
    func=summarize_pdf_page,
    name="SummarizePDFPage",
    description="Summarizes content of a PDF page."
))

# Web search tool
tools.append(Tool(
    name="DuckDuckGo Search",
    func=duckduckgo.run,
    description="Search the web using DuckDuckGo."
))

In [47]:
prompt = hub.pull("hwchase17/react") # Pulling the ReAct Framework for the agent

In [48]:
agent = initialize_agent(
    tools=tools, # collection of all the tools
    llm=model,  # LLM model used
    agent="zero-shot-react-description",  # or ReAct agent type
    verbose=True
)


  agent = initialize_agent(


In [49]:
agent_executor = initialize_agent(
    tools=tools,
    llm=model,
    agent="zero-shot-react-description",
    verbose=True # Since True # Allows detailed logging of the agent’s actions
)

In [50]:
response = agent_executor.invoke({'input':'what is the Deep Learning'}) # Example: invoking the agent directly
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to search the web for 'Deep Learning' definition. 
Action: DuckDuckGo Search
Action Input: 'Deep Learning definition'[0m
Observation: [36;1m[1;3mIn machine learning, deep learning focuses on utilizing multilayered neural networks to perform tasks such as classification, regression, and representation learning. Deep learning is a branch of machine learningthat is made up of a neural network with three or more layers: 1. Input layer: Data enters through the input layer. 2. Hidden layers: Hidden layers process and transport data to other layers. 3. Output layer: The final result or prediction is made in the output layer. Neural networks attempt to model hum... See full list on coursera.org Deep learning is a subset of machine learningthat is made up of a neural network with three or more layers. A neural network attempts to model the human brain's behavior by learning from large data sets. Deep learning drives

In [54]:
from langchain.chains import RetrievalQA # This loop allows a user to ask questions interactively

# Create a RetrievalQA object using a specific chain type

qa = RetrievalQA.from_chain_type(
    llm=model,
    retriever=retriever, # The retrieval tool
    return_source_documents=True # Also return the documents used for answering
)

while True:
    query = input("Ask me something (or type 'exit' to quit): ")

    if query.lower() == "exit":
        break

    result = qa.invoke(query)
    print("\n--- Retrieval QA Answer ---")
    print("Answer:", result["result"])
    print("Source docs:", [doc.metadata for doc in result["source_documents"]])

    try:
        response = agent_executor.invoke({"input": query})
        print("\n--- Agent Executor Answer ---")
        print("Answer:", response["output"])
    except Exception as e:
        print("\n[Agent Executor Error]", e)


Ask me something (or type 'exit' to quit): How many trainable parameters does GPT 4 have

--- Retrieval QA Answer ---
Answer: The text you provided does not contain the answer to how many trainable parameters GPT 4 has. 

Source docs: [{'producer': 'PDF-XChange (PDFTools4.exe v4.0.0201.0000) (Windows XP)', 'creator': 'pdfsam-console (Ver. 2.4.0e)', 'creationdate': '2016-09-07T11:09:29+01:00', 'source': '/content/Deep Learning.pdf', 'file_path': '/content/Deep Learning.pdf', 'total_pages': 777, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2018-02-27T20:51:34+01:00', 'trapped': '', 'modDate': "D:20180227205134+01'00'", 'creationDate': "D:20160907110929+01'00'", 'page': 251}, {'producer': 'PDF-XChange (PDFTools4.exe v4.0.0201.0000) (Windows XP)', 'creator': 'pdfsam-console (Ver. 2.4.0e)', 'creationdate': '2016-09-07T11:09:29+01:00', 'source': '/content/Deep Learning.pdf', 'file_path': '/content/Deep Learning.pdf', 'total_pages': 777, 'format':

In [59]:
"""
Conclusion:
This project implements a Retrieval-Augmented Generation (RAG) agent capable of
accessing information from both the web and documents. By integrating tools
like DuckDuckGo search, document summarizers, section content and retriever-based knowledge access,
the agent can efficiently provide accurate, concise, and context-aware answers.
It demonstrates how combining RAG with modular tools enhances the capability
of AI agents to interact dynamically with diverse sources of information.

"""

print('-'*65,"The End","-"*65)

----------------------------------------------------------------- The End -----------------------------------------------------------------
