In [113]:
# Vector Data impimentation
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="nomic-embed-text")

In [114]:
loader = PyPDFLoader("../test_documents/attention.pdf")

In [115]:
document = loader.load()

In [116]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
documents =  text_splitter.split_documents(document)
vectors = FAISS.from_documents(documents, embeddings)

In [117]:
retriver = vectors.as_retriever()

In [118]:
from langchain.tools.retriever import create_retriever_tool
retriver_tool = create_retriever_tool(retriver, "Database_Search",
                      """Search for information about the research paper. You should use this tool to get the information about the relevant research 
                      paper.""")

In [119]:
Tools = [retriver_tool]

# Agent Implimentation 

In [526]:
## Agents
from langchain.agents import AgentExecutor, create_react_agent, create_tool_calling_agent
from langchain.tools import Tool
from langchain.llms import Ollama
from langchain import hub
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
load_dotenv()


True

In [515]:
llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
parse_llm_with_tools = llm.bind_tools(Tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [516]:
parse_agent_prompt = ChatPromptTemplate.from_template("""You are the Research Document Parsing Agent. 
First Use the given Database_Search Tool to get research paper context and  You Read the given  research paper  Fully and divide them into Abstract, Method, Math, Experiments, Results.
Rules:
- Do NOT ask questions
- Do NOT wait for user input
- Use the document retrieval tool to read the paper
- Be concise but complete
{input}
{agent_scratchpad}
""")

In [517]:
parse_agent = create_tool_calling_agent(parse_llm_with_tools, Tools, parse_agent_prompt)
parse_agent_executor = AgentExecutor(agent=parse_agent, tools=Tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [569]:
result_doc_agent=parse_agent_executor.invoke({'input':"analyze the  given research paper"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Database_Search` with `{'query': ''}`


[0m[36;1m[1;3mtensorflow/tensor2tensor.
Acknowledgements We are grateful to Nal Kalchbrenner and Stephan Gouws for their fruitful
comments, corrections and inspiration.
9

Attention Is All You Need
Ashish Vaswani‚àó
Google Brain
avaswani@google.com
Noam Shazeer‚àó
Google Brain
noam@google.com
Niki Parmar‚àó
Google Research
nikip@google.com
Jakob Uszkoreit‚àó
Google Research
usz@google.com
Llion Jones‚àó
Google Research
llion@google.com
Aidan N. Gomez‚àó‚Ä†
University of Toronto
aidan@cs.toronto.edu
≈Åukasz Kaiser ‚àó
Google Brain
lukaszkaiser@google.com
Illia Polosukhin‚àó‚Ä°
illia.polosukhin@gmail.com
Abstract
The dominant sequence transduction models are based on complex recurrent or
convolutional neural networks that include an encoder and a decoder. The best
performing models also connect the encoder and decoder through an attention
mechanism. We propose a new simple 

In [486]:
print(result_doc_agent['output'])

**Attention Is All You Need** ‚Äì Structured Summary  
*(Ashish Vaswani et‚ÄØal., 2017)*  

---

### 1. Abstract  
The paper introduces the **Transformer**, a sequence‚Äëto‚Äësequence model that replaces recurrent and convolutional layers with a stack of **self‚Äëattention** mechanisms. The architecture is fully parallelizable, trains faster, and achieves state‚Äëof‚Äëthe‚Äëart BLEU scores on WMT‚ÄØ2014 English‚ÄëGerman (BLEU‚ÄØ28.4) and English‚ÄëFrench (BLEU‚ÄØ41.8) translation tasks.  

---

### 2. Method (Model Architecture)  

| Component | Description |
|-----------|-------------|
| **Encoder** | 6 identical layers. Each layer has: 1) *multi‚Äëhead self‚Äëattention* (queries, keys, values all from encoder output), 2) *position‚Äëwise feed‚Äëforward network* (two linear layers with ReLU). Residual connections + layer‚Äënorm around each sub‚Äëlayer. |
| **Decoder** | 6 identical layers. Each layer has: 1) *masked multi‚Äëhead self‚Äëattention* (prevent future‚Äëtoken access), 2) *e

# Teaching Agent

In [551]:
teaching_agent_prompt = ChatPromptTemplate.from_template("""You are the Teaching Assistant Agent. 
Read the given context carefully and explain the key concepts, Architecture ideas , algorithm intuition using 
simple language, Analogies and step by step reasoning so even a complete beginner could understand.Use the tools only if you want.
Compress the result into bullet points.
Keep only essential facts, findings, and conclusions.
Input: {input}
{agent_scratchpad}
""")

In [552]:
wiki_api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
wiki_tool = WikipediaQueryRun(api_wrapper= wiki_api_wrapper)

In [553]:
llm = ChatGroq(
    model="openai/gpt-oss-safeguard-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
teach_tools = [wiki_tool]
teach_llm=llm.bind_tools(teach_tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [554]:
teaching_agent = create_tool_calling_agent(teach_llm,teach_tools ,teaching_agent_prompt)
teaching_agent_executor = AgentExecutor(agent=teaching_agent, tools=teach_tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [246]:
teach_agent_results = teaching_agent_executor.invoke({"input":"Teach me this research paper context"+ "context - "+str(result_doc_agent['output'])})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m## üéì ‚ÄúAttention Is All You Need‚Äù ‚Äì A Beginner‚ÄëFriendly Guide

Imagine you‚Äôre translating a sentence from English to German.  
A traditional neural machine‚Äëtranslation system would read the whole sentence word by word, keeping a running ‚Äúmemory‚Äù of what it has seen so far.  
The **Transformer** does something completely different: it looks at *all* words at once, decides *which* words matter for each other, and does this in a way that can be computed in parallel on a GPU.  
That‚Äôs why the paper‚Äôs title is so catchy ‚Äì the whole model is built **only** on a mechanism called *attention*.

Below is a step‚Äëby‚Äëstep walk‚Äëthrough of the key ideas, with everyday analogies and simple math explanations.

---

### 1. The Big Picture: Encoder‚ÄëDecoder with Attention

| Component | What it does | Analogy |
|-----------|--------------|---------|
| **Encoder** | Reads the source sentence and turns it into a set

In [247]:
print(teach_agent_results['output'])

## üéì ‚ÄúAttention Is All You Need‚Äù ‚Äì A Beginner‚ÄëFriendly Guide

Imagine you‚Äôre translating a sentence from English to German.  
A traditional neural machine‚Äëtranslation system would read the whole sentence word by word, keeping a running ‚Äúmemory‚Äù of what it has seen so far.  
The **Transformer** does something completely different: it looks at *all* words at once, decides *which* words matter for each other, and does this in a way that can be computed in parallel on a GPU.  
That‚Äôs why the paper‚Äôs title is so catchy ‚Äì the whole model is built **only** on a mechanism called *attention*.

Below is a step‚Äëby‚Äëstep walk‚Äëthrough of the key ideas, with everyday analogies and simple math explanations.

---

### 1. The Big Picture: Encoder‚ÄëDecoder with Attention

| Component | What it does | Analogy |
|-----------|--------------|---------|
| **Encoder** | Reads the source sentence and turns it into a set of ‚Äúcontext‚Äëaware‚Äù vectors. | A group of people listen

# Mathematical Reasoning Agent

In [555]:
Math_agent_prompt = ChatPromptTemplate.from_template("""You are the Mathematical Reasoning Agent. 
Read the given research paper context carefully, extract all the mathematical formulas and explain , analyze equations, Loss functions, 
optimization methods.
Explain what each equation represents, why it needed and assumptions made so even a complete beginner could understand.
present the results in a human-readable format. 
Compress the result into bullet points.
Keep only essential facts, findings, and conclusions.
Use the provided tools ONLY if you need.
Input: {input}
{agent_scratchpad}

""")

In [556]:
llm = ChatGroq(
    model="openai/gpt-oss-120b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
math_tools = [wiki_tool]
math_llm=llm.bind_tools(math_tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [557]:
math_agent = create_tool_calling_agent(math_llm,math_tools ,Math_agent_prompt)
math_agent_executor = AgentExecutor(agent=math_agent, tools=math_tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [538]:
math_agent_results = math_agent_executor.invoke({'input':'explain all the mathematical terms in this research paper'+ 'context- '+str(result_doc_agent['output'])})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m## üìö  ‚ÄúAttention‚ÄØIs‚ÄØAll‚ÄØYou‚ÄØNeed‚Äù ‚Äì Mathematics Made Simple  

Below is a **step‚Äëby‚Äëstep walk‚Äëthrough** of every equation that appears in the paper, together with the *why* and the *assumptions* behind each term.  
The goal is to make the math understandable even if you have never seen a neural network before.

---

### 1Ô∏è‚É£  Notation that is used everywhere  

| Symbol | Meaning (in plain English) |
|--------|----------------------------|
| **\(x\)** | A vector (a list of numbers). |
| **\(X\in\mathbb{R}^{n\times d}\)** | A matrix with *n* rows and *d* columns; each row is a *d‚Äëdimensional* vector. |
| **\(d_{\text{model}}\)** | Dimension of the model (the size of every token representation). Typical values: 512 (Base) or 1024 (Big). |
| **\(d_k, d_v\)** | Dimension of *keys* and *values* inside an attention head (usually \(d_k = d_v = d_{\text{model}}/h\)). |
| **\(h\)** | Number of attention hea

In [276]:
print(math_agent_results['output'])

**Attention‚ÄØIs‚ÄØAll‚ÄØYou‚ÄØNeed (Vaswani‚ÄØet‚ÄØal.,‚ÄØ2017)**  
A beginner‚Äëfriendly walk‚Äëthrough of every mathematical expression that appears in the paper, together with the intuition behind it, why it is needed, and the assumptions that underlie it.

---

## 1.  Quick Overview of the Model

| Part | What it does | Main symbols |
|------|--------------|--------------|
| **Input embedding** | Turns each token (word‚Äëpiece, character, etc.) into a dense vector of size `d_model`. | `x‚ÇÅ,‚Ä¶,x‚Çô ‚àà ‚Ñù^{d_model}` |
| **Positional encoding** | Gives the model a notion of ‚Äúorder‚Äù because attention itself is order‚Äëagnostic. | `PE(pos, i)` |
| **Encoder** | 6 identical layers, each containing **(a)** Multi‚ÄëHead Self‚ÄëAttention and **(b)** a Position‚Äëwise Feed‚ÄëForward Network (FFN). | `Enc‚ÇÅ,‚Ä¶,Enc‚ÇÜ` |
| **Decoder** | 6 identical layers, each containing **(a)** Masked Multi‚ÄëHead Self‚ÄëAttention, **(b)** Multi‚ÄëHead Encoder‚ÄëDecoder Attention, **(c)** FFN. | `

# Experimental Analysis Agent

In [558]:
experimental_analysis_agent_prompt = ChatPromptTemplate.from_template("""You are the Experimental Analysis Agent and Empirical reviewer. 
Read the given context carefully and explain / Reviews Datasets used, Experimental setup, Baselines, Evaluation metrics.
Determines whether experiments support claims, If comparisons are fair. Conpare them with other related research papers.
Compress the result into bullet points.
Keep only essential facts, findings, and conclusions.
Use the given tools only if needed. Give me exacly a final answer don't think too much.
Input: {input}
{agent_scratchpad}
""")

In [559]:
from langchain_community.tools import ArxivQueryRun
from langchain_community.utilities import ArxivAPIWrapper
arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv_tool = ArxivQueryRun(api_wrapper= arxiv_wrapper)

In [560]:
llm = ChatGroq(
    model="openai/gpt-oss-120b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2, 
    # other params...
)
experimental_analysis_agent_tools = [arxiv_tool,wiki_tool]
experimental_analysis_agent_llm=llm.bind_tools(experimental_analysis_agent_tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [561]:
experimental_analysis_agent = create_tool_calling_agent(experimental_analysis_agent_llm,experimental_analysis_agent_tools ,experimental_analysis_agent_prompt)
experimental_analysis_agent_executor = AgentExecutor(agent=experimental_analysis_agent, tools=experimental_analysis_agent_tools, verbose=True,max_iterations=5,handle_parsing_errors=True)

In [394]:
experimental_results = experimental_analysis_agent_executor.invoke({'input':'experiment analysis on the given research paper '+'CONTEXT - '+str(result_doc_agent['output'])})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m**Experimental‚ÄëAnalysis & Empirical Review of ‚ÄúAttention Is All You Need‚Äù (Vaswani‚ÄØet‚ÄØal., 2017)**  

---

## 1. Datasets Used  

| Dataset | Language Pair | Size (‚âà‚ÄØM sentences) | Pre‚Äëprocessing | BPE merges | Test set |
|---------|---------------|----------------------|----------------|-----------|----------|
| **WMT‚ÄØ2014** | English‚ÄØ‚Üí‚ÄØGerman (EN‚ÄëDE) | 4.5‚ÄØM | Tokenized, lower‚Äëcased, SentencePiece (or BPE) | 32‚ÄØk | *newstest2014* |
| **WMT‚ÄØ2014** | English‚ÄØ‚Üí‚ÄØFrench (EN‚ÄëFR) | 36‚ÄØM | Same pipeline as EN‚ÄëDE | 32‚ÄØk | *newstest2014* |

*Why these datasets?* They are the standard benchmarks for neural machine translation (NMT) and were used by all prior strong baselines (GNMT, ConvS2S, LSTM‚Äëattention). This makes direct BLEU comparison possible.

---

## 2. Experimental Setup  

| Component | Configuration |
|-----------|----------------|
| **Model variants** | *Transformer‚Äëbase

In [395]:
print(experimental_results['output'])

**Experimental‚ÄëAnalysis & Empirical Review of ‚ÄúAttention Is All You Need‚Äù (Vaswani‚ÄØet‚ÄØal., 2017)**  

---

## 1. Datasets Used  

| Dataset | Language Pair | Size (‚âà‚ÄØM sentences) | Pre‚Äëprocessing | BPE merges | Test set |
|---------|---------------|----------------------|----------------|-----------|----------|
| **WMT‚ÄØ2014** | English‚ÄØ‚Üí‚ÄØGerman (EN‚ÄëDE) | 4.5‚ÄØM | Tokenized, lower‚Äëcased, SentencePiece (or BPE) | 32‚ÄØk | *newstest2014* |
| **WMT‚ÄØ2014** | English‚ÄØ‚Üí‚ÄØFrench (EN‚ÄëFR) | 36‚ÄØM | Same pipeline as EN‚ÄëDE | 32‚ÄØk | *newstest2014* |

*Why these datasets?* They are the standard benchmarks for neural machine translation (NMT) and were used by all prior strong baselines (GNMT, ConvS2S, LSTM‚Äëattention). This makes direct BLEU comparison possible.

---

## 2. Experimental Setup  

| Component | Configuration |
|-----------|----------------|
| **Model variants** | *Transformer‚Äëbase* (6 encoder + 6 decoder layers, d_model‚ÄØ=‚ÄØ512, h‚ÄØ=‚ÄØ8

# Testing Multi Agent Architecture

In [170]:
from langchain.tools import tool

In [None]:
You are the Teaching Assistant Agent. 
Read the given context carefully and explain the key concepts, Architecture ideas , algorithm intuition using 
simple language, Analogies and step by step reasoning so even a complete beginner could understand.Use the tools only if you want.
Input: {input}
{agent_scratchpad}
<context>
{context}
</context>

In [562]:
@tool
async def teach_paper_tool(request: str, context: str) -> str:
    """Teach the given research paper context in a beginner friendly manner.

    Use this tool to explain the key concepts, Architecture ideas , algorithm intuition using 
    simple language, Analogies and step by step reasoning so even a complete beginner could understand.
    Compress the output content into bullet points.
    Keep only essential facts, findings, and conclusions.

    """
    combined_input = f"""
TASK:
{request}

RESEARCH PAPER CONTEXT:
{context}
"""
    result = await teaching_agent_executor.ainvoke({
        "input": combined_input
    })

    return result['output']




In [563]:
@tool
async def teach_math_tool(request: str, context: str) -> str:
    """Read the given research paper context carefully, extract all the mathematical formulas and explain , analyze equations, Loss functions, 
    optimization methods.

    Use this tool to Explain what each equation represents, why it needed and assumptions made so even a complete beginner could understand.
    present the results in a human-readable format.

    """
    combined_input = f"""
    TASK:
    {request}
    
    RESEARCH PAPER CONTEXT:
    {context}
    """
    result = await math_agent_executor.ainvoke({
        "input": combined_input
    })

    return compressed_context['output']

In [564]:
@tool
async def experiment_analysis_tool(request: str, context: str) -> str:
    """
    Use this tool to conduct the Experimental Analysis
    Read the given context carefully and explain / Reviews Datasets used, Experimental setup, Baselines, Evaluation metrics.
    Determines whether experiments support claims, If comparisons are fair. Conpare them with other related research papers.
    Only use a sufficient token amount so supervisor agent can handle it
    

    """
    combined_input = f"""
    TASK:
    {request}
    
    RESEARCH PAPER CONTEXT:
    {context}
    """
    result = await experimental_analysis_agent_executor.ainvoke({
        "input": combined_input
    })

    return compressed_context['output']

# Summarizer Agent

In [548]:
summarizer_agent_prompt = ChatPromptTemplate.from_template("""
    Compress the following content into bullet points.
    Keep only essential facts, findings, and conclusions.
    
    Content:
    {input}
    {agent_scratchpad}
    """) 

summary_llm = ChatGroq(
    model="openai/gpt-oss-safeguard-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2,
    # other params... 
)


#supervisor_tools = [teach_paper_tool, teach_math_tool, experiment_analysis_tool]
#supervisor_llm_with_tools =llm_supervisor.bind_tools(supervisor_tools)
summary_agent = create_tool_calling_agent(summary_llm,[wiki_tool], summarizer_agent_prompt)
summary_agent_executor = AgentExecutor(agent=summary_agent, tools=[wiki_tool], verbose=True,max_iterations=2,handle_parsing_errors=True)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [549]:
summarized_context = summary_agent_executor.invoke({"input":result_doc_agent["output"]})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m**Attention Is All You Need ‚Äì Key Take‚Äëaways**

- **Model Innovation**
  - Introduces the **Transformer**, a sequence‚Äëto‚Äësequence architecture that uses only **self‚Äëattention** (no RNNs or CNNs).
  - Fully parallelizable, enabling faster training and inference.

- **Architecture Highlights**
  - **Encoder**: 6 layers √ó (multi‚Äëhead self‚Äëattention + position‚Äëwise feed‚Äëforward) with residual + layer‚Äënorm.
  - **Decoder**: 6 layers √ó (masked self‚Äëattention + encoder‚Äëdecoder attention + feed‚Äëforward) with residual + layer‚Äënorm.
  - **Multi‚Äëhead Attention**: 8 heads (Base) or 16 heads (Big); each head performs scaled dot‚Äëproduct attention.
  - **Positional Encoding**: Sinusoidal signals added to embeddings; essential for sequence order (drop ~1.5 BLEU if removed).

- **Mathematical Core**
  - Scaled dot‚Äëproduct: `Attention(Q,K,V)=softmax(QK·µÄ/‚àöd_k)V`.
  - Multi‚Äëhead: concatenate heads ‚Üí li

In [542]:
print(summarized_context['output'])

**Key Take‚Äëaways from ‚ÄúAttention Is All You Need‚Äù**

- **Model Innovation**  
  - Introduces the **Transformer**: a sequence‚Äëto‚Äësequence architecture that uses only self‚Äëattention, no recurrence or convolutions.  
  - Fully parallelizable, enabling faster training and inference.

- **Architecture Highlights**  
  - **Encoder**: 6 layers √ó (multi‚Äëhead self‚Äëattention + position‚Äëwise feed‚Äëforward).  
  - **Decoder**: 6 layers √ó (masked self‚Äëattention + encoder‚Äëdecoder attention + feed‚Äëforward).  
  - **Multi‚Äëhead attention**: 8 heads (base) or 16 heads (big); each head performs scaled dot‚Äëproduct attention.  
  - **Positional encoding**: sinusoidal signals added to token embeddings; essential for sequence order.  
  - Residual connections + layer‚Äënorm around every sub‚Äëlayer; final linear + softmax for output.

- **Training Regimen**  
  - Optimizer: Adam (Œ≤‚ÇÅ=0.9, Œ≤‚ÇÇ=0.98, Œµ=1e‚Äë9).  
  - Learning‚Äërate schedule: warm‚Äëup 4000 steps ‚Üí decay ‚

In [565]:
SUPERVISOR_PROMPT = ChatPromptTemplate.from_template(
    """You are the research coordinator agent. 
    Read the given research paper context
    You can Breaks the paper into logical components such as problem, Method, Math (use the teach_math_tool for this),   
    Theory (use the teach_paper_tool for this), Experiments (use the experiment_analysis_tool for this), Results
    Assigns tasks to specialist agents, Controls analysis order, Provide agent inputs only with the relavant contenxt using the given tool.
    Give ONLY relevent CONTEXT TO EACH AGENT / TOOLS WITH INPUT.
    teach_math_tool - to explain math
    teach_paper_tool - to explain the paper content
    Decides when analysis is ‚Äúcomplete‚Äù 
    Break down user requests into appropriate tool calls and coordinate the results. 
    When a request involves multiple actions, use multiple tools in sequence.
    finally you should give a full blown analysis report. Not Just A Summary

    Input: {input}
    {agent_scratchpad}
    <context>
    {context}
    </context>"""
)

In [566]:

llm_supervisor = ChatGroq(
    model="openai/gpt-oss-120b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2,
    # other params...
)
supervisor_tools = [teach_paper_tool, teach_math_tool, experiment_analysis_tool]
supervisor_llm_with_tools =llm_supervisor.bind_tools(supervisor_tools)

                    reasoning_format was transferred to model_kwargs.
                    Please confirm that reasoning_format is what you intended.


In [567]:
supervisor_agent = create_tool_calling_agent(supervisor_llm_with_tools,supervisor_tools, SUPERVISOR_PROMPT)
supervisor_agent_executor = AgentExecutor(agent=supervisor_agent, tools=supervisor_tools, verbose=True,max_iterations=10,handle_parsing_errors=True)

In [570]:
import asyncio

async def run_pipeline():

    result_doc_agent= await parse_agent_executor.ainvoke({'input':"analyze the  given research paper"})


    summary_context= await summary_agent_executor.ainvoke({"input":result_doc_agent["output"]})
    
    result = await supervisor_agent_executor.ainvoke({
        "input": f"""
Analyze and explain the following research paper.
Use provided tools when appropriate.

"""
    ,'context':summary_context['output']})

    return summary_context["output"]


# Run
final_output = await run_pipeline()
print(final_output)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI‚Äôm sorry, but no research paper was provided for analysis.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m- No research paper was provided for analysis.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI‚Äôm sorry, but there‚Äôs no research‚Äëpaper content in the request for me to analyze. Please provide the text (or a link) of the paper you‚Äôd like examined, and I‚Äôll break it down, run the appropriate tool calls, and produce a full analysis report.[0m

[1m> Finished chain.[0m
- No research paper was provided for analysis.


In [507]:
print(final_output)

**Attention Is All You Need ‚Äì Key Take‚Äëaways**

- **Core Idea**
  - Introduces the **Transformer**: a seq‚Äëto‚Äëseq model that uses **only attention mechanisms** (no RNNs or convolutions).
  - Claims higher translation quality, greater parallelism, and faster training/inference.

- **Architecture**
  - **Encoder**: 6 identical layers ‚Üí multi‚Äëhead self‚Äëattention + position‚Äëwise feed‚Äëforward network.
  - **Decoder**: 6 identical layers ‚Üí masked multi‚Äëhead self‚Äëattention, encoder‚Äëdecoder attention, feed‚Äëforward network.
  - **Attention**: Scaled dot‚Äëproduct‚ÄØ\( \text{softmax}(QK^\top/\sqrt{d_k})V \).
  - **Multi‚Äëhead**: splits model dimension into \(h\) heads, processes in parallel, then concatenates.
  - **Positional Encoding**: sinusoidal functions inject sequence order information.
  - **Embeddings**: learned token embeddings; final linear + softmax for output distribution.

- **Mathematical Formulas**
  - Scaled dot‚Äëproduct attention (Eq.‚ÄØ1).
  - Mult