In [2]:
# Vector Data impimentation
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="nomic-embed-text")

In [3]:
loader = PyPDFLoader("../test_documents/attention.pdf")

In [6]:
document = loader.load()

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
documents =  text_splitter.split_documents(document)
vectors = FAISS.from_documents(documents, embeddings)

In [8]:
retriver = vectors.as_retriever()

In [9]:
from langchain.tools.retriever import create_retriever_tool
retriver_tool = create_retriever_tool(retriver, "Database_Search",
                      """Search for information about the research paper. You should use this tool to get the information about the relevant research 
                      paper.""")

In [11]:
Tools = [retriver_tool]

# Agent Implimentation 

In [47]:
## Agents
from langchain.agents import AgentExecutor, create_react_agent, create_tool_calling_agent
from langchain.tools import Tool
from langchain.llms import Ollama
from langchain import hub
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
load_dotenv()


True

In [15]:
llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
llm_with_tools = llm.bind_tools(Tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [36]:
parse_agent_prompt = ChatPromptTemplate.from_template("""You are the Research Document Parsing Agent. 
First Use the given Database_Search Tool to get research paper context and  You Read the given  research paper  Fully and divide them into Abstract, Method, Math, Experiments, Results.
{input}
{agent_scratchpad}
""")

In [37]:
agent = create_tool_calling_agent(llm_with_tools, Tools, parse_agent_prompt)
agent_executor = AgentExecutor(agent=agent, tools=Tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [None]:
result_doc_agent=agent_executor.invoke({'input':"analyze the Attention is all you Need given research paper"})

In [58]:
print(result_doc_agent['output'])

**Research Document Parsing – “Attention Is All You Need” (Vaswani et al., 2017)**  
*Using the `Database_Search` tool to pull the key sections of the paper, then organizing the material into the requested categories.*

---

## 1. Abstract  
*(Retrieved via `Database_Search` query “Attention is all you need abstract”)*  

> The Transformer, a novel sequence‑to‑sequence model, relies entirely on attention mechanisms, dispensing with recurrence and convolutions.  It achieves state‑of‑the‑art results on the WMT 2014 English‑German and English‑French translation tasks while training 3–6× faster than comparable RNN‑based models.  The core of the architecture is a multi‑head self‑attention layer, combined with simple position‑wise feed‑forward networks and a sinusoidal positional encoding that allows the model to learn relative positions without recurrence.

---

## 2. Method (Model Architecture)  
*(Extracted from the “Model Architecture” and “Encoder/Decoder Stacks” sections)*  

| Compone

# Teaching Agent

In [44]:
teaching_agent_prompt = ChatPromptTemplate.from_template("""You are the Teaching Assistant Agent. 
Read the given context carefully and explain the key concepts, Architecture ideas , algorithm intuition using 
simple language, Analogies and step by step reasoning so even a complete beginner could understand.Use the tools only if you want.
Input: {input}
{agent_scratchpad}
<context>
{context}
</context>
""")

In [50]:
wiki_api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
wiki_tool = WikipediaQueryRun(api_wrapper= wiki_api_wrapper)

In [53]:
llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
teach_tools = [wiki_tool]
teach_llm=llm.bind_tools(teach_tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [59]:
teaching_agent = create_tool_calling_agent(teach_llm,teach_tools ,teaching_agent_prompt)
teaching_agent_executor = AgentExecutor(agent=teaching_agent, tools=teach_tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [61]:
teach_agent_results = teaching_agent_executor.invoke({"input":"Teach me this research paper context", "context":result_doc_agent['output']})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m## 1.  What the paper is about – a quick “story”

Imagine you want a computer to translate a sentence from English to German.  
The old way was to read the sentence word‑by‑word, keeping a running memory of everything it had seen so far (this is what **RNNs** do).  
The paper “Attention Is All You Need” says: **we don’t need that slow, step‑by‑step memory.**  
Instead, we can let the model look at *all* words at once, decide *which* words matter for each other, and do that in parallel.  
That’s the whole idea of the **Transformer**.

---

## 2.  Core ideas – “Attention” instead of “Recurrent memory”

| Idea | Why it matters | Simple analogy |
|------|----------------|----------------|
| **Scaled dot‑product attention** | Lets the model ask “how much should I pay attention to word B when I’m looking at word A?” | Like a spotlight that can focus on any part of a stage, but the spotlight’s intensity is scaled so it doesn’t get t

In [63]:
print(teach_agent_results['output'])

## 1.  What the paper is about – a quick “story”

Imagine you want a computer to translate a sentence from English to German.  
The old way was to read the sentence word‑by‑word, keeping a running memory of everything it had seen so far (this is what **RNNs** do).  
The paper “Attention Is All You Need” says: **we don’t need that slow, step‑by‑step memory.**  
Instead, we can let the model look at *all* words at once, decide *which* words matter for each other, and do that in parallel.  
That’s the whole idea of the **Transformer**.

---

## 2.  Core ideas – “Attention” instead of “Recurrent memory”

| Idea | Why it matters | Simple analogy |
|------|----------------|----------------|
| **Scaled dot‑product attention** | Lets the model ask “how much should I pay attention to word B when I’m looking at word A?” | Like a spotlight that can focus on any part of a stage, but the spotlight’s intensity is scaled so it doesn’t get too bright. |
| **Multi‑head attention** | The model can look 

# Mathematical Reasoning Agent

In [85]:
Math_agent_prompt = ChatPromptTemplate.from_template("""You are the Mathematical Reasoning Agent. 
Read the given research paper context carefully, extract all the mathematical formulas and explain , analyze equations, Loss functions, 
optimization methods.
Explain what each equation represents, why it needed and assumptions made so even a complete beginner could understand.
present the results in a human-readable format. 
Use the provided tools ONLY if you need.
Input: {input}
{agent_scratchpad}
<context>
{context}
</context>
""")

In [86]:
llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
math_tools = [wiki_tool]
math_llm=llm.bind_tools(math_tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [87]:
math_agent = create_tool_calling_agent(math_llm,math_tools ,Math_agent_prompt)
math_agent_executor = AgentExecutor(agent=math_agent, tools=math_tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [88]:
math_agent_results = math_agent_executor.invoke({'input':'explain all the mathematical terms in this research paper', 'context':result_doc_agent['output']})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m## 1.  What the paper actually *does* with math  

The Transformer paper is all about turning a sentence (or any sequence) into a set of numbers, letting those numbers talk to each other through **attention**, and then turning the final numbers back into words.  
Below is a “cook‑book” of every equation that appears in the paper, written in plain English and with the assumptions that the authors made.

---

## 2.  The building blocks – equations and what they mean  

| # | Equation | What it does | Why it’s needed | Key assumptions / notes |
|---|----------|--------------|-----------------|------------------------|
| **1** | **Scaled Dot‑Product Attention**<br>`Attention(Q, K, V) = softmax( Q Kᵀ / √d_k ) · V` | Computes a weighted sum of the “values” **V**. The weights come from the similarity of each “query” **Q** to every “key” **K**. | Allows every position in a sentence to look at every other position and decide how much 

In [89]:
print(math_agent_results['output'])

## 1.  What the paper actually *does* with math  

The Transformer paper is all about turning a sentence (or any sequence) into a set of numbers, letting those numbers talk to each other through **attention**, and then turning the final numbers back into words.  
Below is a “cook‑book” of every equation that appears in the paper, written in plain English and with the assumptions that the authors made.

---

## 2.  The building blocks – equations and what they mean  

| # | Equation | What it does | Why it’s needed | Key assumptions / notes |
|---|----------|--------------|-----------------|------------------------|
| **1** | **Scaled Dot‑Product Attention**<br>`Attention(Q, K, V) = softmax( Q Kᵀ / √d_k ) · V` | Computes a weighted sum of the “values” **V**. The weights come from the similarity of each “query” **Q** to every “key” **K**. | Allows every position in a sentence to look at every other position and decide how much it cares about it. | 1. `d_k` is the dimensionality of the ke

# Experimental Analysis Agent

In [101]:
experimental_analysis_agent_prompt = ChatPromptTemplate.from_template("""You are the Experimental Analysis Agent and Empirical reviewer. 
Read the given context carefully and explain / Reviews Datasets used, Experimental setup, Baselines, Evaluation metrics.
Determines whether experiments support claims, If comparisons are fair. Conpare them with other related research papers.
Use the given tools .
Input: {input}
{agent_scratchpad}
<context>
{context}
</context>
""")

In [102]:
from langchain_community.tools import ArxivQueryRun
from langchain_community.utilities import ArxivAPIWrapper
arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv_tool = ArxivQueryRun(api_wrapper= arxiv_wrapper)

In [103]:
llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
experimental_analysis_agent_tools = [arxiv_tool,wiki_tool]
experimental_analysis_agent_llm=llm.bind_tools(experimental_analysis_agent_tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [104]:
experimental_analysis_agent = create_tool_calling_agent(experimental_analysis_agent_llm,experimental_analysis_agent_tools ,experimental_analysis_agent_prompt)
experimental_analysis_agent_executor = AgentExecutor(agent=experimental_analysis_agent, tools=experimental_analysis_agent_tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [105]:
experimental_results = experimental_analysis_agent_executor.invoke({'input':'experiment analysis on the given research paper', 'context':result_doc_agent['output']})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `arxiv` with `{'query': 'Attention Is All You Need arxiv'}`


[0m[36;1m[1;3mPublished: 2013-04-11
Title: A Simulation and Modeling of Access Points with Definition Language
Authors: Tairen Sun
Summary: This submission has been withdrawn by arXiv administrators because it cont[0m[32;1m[1;3m
Invoking: `arxiv` with `{'query': '1706.03762'}`


[0m[36;1m[1;3mPublished: 2023-08-02
Title: Attention Is All You Need
Authors: Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin
Summary: The do[0m[32;1m[1;3m## Experimental Analysis & Empirical Review  
*(Based on the full “Attention Is All You Need” paper and the extracted summary above)*  

| Item | Findings | Comments |
|------|----------|----------|
| **Datasets** | • **WMT 2014 English‑German** (36 M tokens) <br>• **WMT 2014 English‑French** (36 M tokens) | Both are standard, publicly‑availabl

In [106]:
print(experimental_results['output'])

## Experimental Analysis & Empirical Review  
*(Based on the full “Attention Is All You Need” paper and the extracted summary above)*  

| Item | Findings | Comments |
|------|----------|----------|
| **Datasets** | • **WMT 2014 English‑German** (36 M tokens) <br>• **WMT 2014 English‑French** (36 M tokens) | Both are standard, publicly‑available benchmarks for neural MT. The authors use the same training, validation, and test splits as prior work, ensuring a fair comparison. |
| **Experimental Setup** | • 6‑layer encoder & decoder (d_model = 512, d_ff = 2048, h = 8) <br>• Adam optimizer (β₁=0.9, β₂=0.98, ε=10⁻⁸) <br>• Learning‑rate warm‑up (4000 steps) <br>• Dropout = 0.1, label‑smoothing = 0.1 <br>• Batch size 4096 tokens (global) <br>• Training on a single NVIDIA K80 GPU (≈ 4 days) | The hyper‑parameter choices are fully reported and reproducible. The warm‑up schedule and label‑smoothing are novel contributions that help stabilize training. |
| **Baselines** | • **RNN‑based models**: