# 🔬 Multi-Agent RAG Research Assistant

This notebook demonstrates a multi-agent RAG (Retrieval-Augmented Generation) pipeline using LangGraph, FAISS, Together.ai (LLaMA-4), and optional PDF ingestion.

Users can interactively generate summaries from either:
- Web search results (via SerpAPI), or
- Uploaded research PDFs

It includes a modular design, agent-based orchestration, and a Gradio UI for live summarization based on user-defined prompts.


## ⚙️ Tech Stack

- **LangGraph** – Agent orchestration and graph state management  
- **FAISS** – Vector indexing and similarity retrieval  
- **MiniLM (sentence-transformers)** – Document embedding  
- **Together.ai** – LLM-powered summarization (LLaMA-4)  
- **SerpAPI** – Google search result ingestion  
- **PyMuPDF** – PDF parsing and text extraction  
- **Gradio** – Interactive user interface


In [1]:
# Basic NLP and retrieval tools
!pip install -q sentence-transformers langchain faiss-cpu serpapi

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m41.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m54.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# LLaMA model loader (this is the slow one that may need C++ compilation)
!pip install -q llama-cpp-python


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone


In [4]:
!pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain-community)
  Downloading langchain_core-0.3.51-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.23 (from langchain-community)
  Downloading langchain-0.3.23-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [5]:
!pip install llama-cpp-python[server]


Collecting uvicorn>=0.22.0 (from llama-cpp-python[server])
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting fastapi>=0.100.0 (from llama-cpp-python[server])
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting sse-starlette>=1.6.1 (from llama-cpp-python[server])
  Downloading sse_starlette-2.2.1-py3-none-any.whl.metadata (7.8 kB)
Collecting starlette-context<0.4,>=0.3.6 (from llama-cpp-python[server])
  Downloading starlette_context-0.3.6-py3-none-any.whl.metadata (4.3 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi>=0.100.0->llama-cpp-python[server])
  Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sse_starlette-2.2.1-py3-none-any.whl (10 kB)
Downloading starlette_context-0.3.6-py3-none-any.whl (12 kB)
Downloading uvicorn-0.34

In [6]:
!pip install together


Collecting together
  Downloading together-1.5.5-py3-none-any.whl.metadata (14 kB)
Collecting eval-type-backport<0.3.0,>=0.1.3 (from together)
  Downloading eval_type_backport-0.2.2-py3-none-any.whl.metadata (2.2 kB)
Downloading together-1.5.5-py3-none-any.whl (87 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.9/87.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading eval_type_backport-0.2.2-py3-none-any.whl (5.8 kB)
Installing collected packages: eval-type-backport, together
Successfully installed eval-type-backport-0.2.2 together-1.5.5


In [7]:
!pip install langgraph


Collecting langgraph
  Downloading langgraph-0.3.25-py3-none-any.whl.metadata (7.7 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.0.10 (from langgraph)
  Downloading langgraph_checkpoint-2.0.24-py3-none-any.whl.metadata (4.6 kB)
Collecting langgraph-prebuilt<0.2,>=0.1.1 (from langgraph)
  Downloading langgraph_prebuilt-0.1.8-py3-none-any.whl.metadata (5.0 kB)
Collecting langgraph-sdk<0.2.0,>=0.1.42 (from langgraph)
  Downloading langgraph_sdk-0.1.61-py3-none-any.whl.metadata (1.8 kB)
Collecting xxhash<4.0.0,>=3.5.0 (from langgraph)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting ormsgpack<2.0.0,>=1.8.0 (from langgraph-checkpoint<3.0.0,>=2.0.10->langgraph)
  Downloading ormsgpack-1.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Downloading langgraph-0.3.25-py3-none-any.whl

In [8]:
!pip install grandalf


Collecting grandalf
  Downloading grandalf-0.8-py3-none-any.whl.metadata (1.7 kB)
Downloading grandalf-0.8-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: grandalf
Successfully installed grandalf-0.8


In [9]:
!pip install gradio
!python app.py

Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 

In [10]:
!pip install pymupdf

Collecting pymupdf
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.25.5


In [11]:
import gradio as gr
import tempfile
import os
import requests
import faiss
import numpy as np
from typing import TypedDict, List, Optional

from langgraph.graph import StateGraph
from langchain_core.runnables import RunnableLambda
from langchain.docstore.document import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.document_loaders import PyMuPDFLoader
from together import Together

##Agents##

## 🧪 Pipeline Flow

1. 🔍 **SearchAgent** – Uses SerpAPI to fetch top Google results
2. 📄 **PDFLoaderAgent** – Loads and splits uploaded PDFs into text
3. 🧠 **EmbedAgent** – Embeds all documents using MiniLM
4. 🔎 **RetrieveAgent** – Uses FAISS to find top-k relevant chunks
5. 📝 **SummarizeAgent** – Calls Together.ai LLaMA-4 with user prompt

The entire workflow is managed via LangGraph's stateful graph execution.


In [12]:

def search_agent(state):
    print("🔍 [SearchAgent] Fetching from SerpAPI...")
    query = state.get("query", "")
    api_key = state.get("serpapi_key", "")
    if not query or not api_key:
        print("⚠️ [SearchAgent] Missing query or API key.")
        return state

    params = {"engine": "google", "q": query, "api_key": api_key, "num": 20}
    resp = requests.get("https://serpapi.com/search", params=params)
    data = resp.json()
    snippets = [item.get("snippet", "") for item in data.get("organic_results", []) if item.get("snippet")]
    docs = [Document(page_content=s, metadata={"source": "serpapi", "rank": i}) for i, s in enumerate(snippets)]

    state["documents"] = docs  # Replace any existing docs
    print(f"🔍 [SearchAgent] Retrieved {len(docs)} snippets.")
    return state

In [13]:
def pdf_loader_agent(state):
    print("📄 [PDFLoaderAgent] Loading PDF...")
    pdf_path = state.get("pdf_path")

    # Ensure 'documents' key exists
    if "documents" not in state:
        state["documents"] = []

    if pdf_path and os.path.exists(pdf_path):
        loader = PyMuPDFLoader(pdf_path)
        pdf_docs = loader.load()
        state["documents"].extend(pdf_docs)
        print(f"📄 [PDFLoaderAgent] Loaded {len(pdf_docs)} pages from PDF.")
    else:
        print("⚠️ [PDFLoaderAgent] No PDF found.")
    return state

In [14]:
def embed_agent(state):
    print("🧠 [EmbedAgent] Embedding and indexing...")
    docs = state["documents"]
    embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectors = embed_model.embed_documents([doc.page_content for doc in docs])
    vectors_np = np.array(vectors).astype("float32")
    d = vectors_np.shape[1]
    nlist = min(5, len(docs))
    quantizer = faiss.IndexFlatL2(d)
    index = faiss.IndexIVFFlat(quantizer, d, nlist)
    index.train(vectors_np)
    index.add(vectors_np)

    index_to_docstore_id = {i: str(i) for i in range(len(docs))}
    docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(docs)})
    vectorstore = FAISS(embed_model, index, docstore, index_to_docstore_id)
    state["vectorstore"] = vectorstore
    print("✅ [EmbedAgent] Indexing complete.")
    return state

In [15]:
def retrieve_agent(state):
    print("🔎 [RetrieveAgent] Retrieving relevant documents...")
    vectorstore = state["vectorstore"]
    query = state.get("query", "")
    top_k = state.get("top_k", 3)
    results = vectorstore.similarity_search(query, k=top_k)
    state["retrieved_docs"] = results
    print(f"🔎 [RetrieveAgent] Retrieved {len(results)} docs.")
    return state


In [16]:
def summarize_agent(state):
    print("📝 [SummarizeAgent] Generating summary (based on user instructions)...")

    client = Together(api_key=state["together_api_key"])
    retrieved_texts = [doc.page_content for doc in state["retrieved_docs"]]

    user_query = state["query"]

    prompt = (
        f"You are a helpful AI research assistant.\n\n"
        f"The user has asked: \"{user_query}\"\n\n"
        f"Below is the retrieved research content. Please summarize it according to the user's request.\n\n"
        f"### Research Content:\n\n" + "\n\n".join(retrieved_texts)
    )

    response = client.chat.completions.create(
        model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        messages=[
            {"role": "system", "content": "You are a helpful scientific summarizer."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=4096,  # allow long summaries
        temperature=0.7,
    )

    summary = response.choices[0].message.content
    print("✅ [SummarizeAgent] Summary generated.")
    return {**state, "summary": summary}


##LangGraph definition##

## 🧪 Pipeline Architecture

[User Query / PDF Upload] ↓ SearchAgent 🔍 (via SerpAPI) ↓ PDFLoaderAgent 📄 (if PDF exists) ↓ EmbedAgent 🧠 (MiniLM + FAISS) ↓ RetrieveAgent 🔎 (Top-k retrieval) ↓ SummarizeAgent ✍️ (Together.ai LLaMA-4) ↓ [Summary Output 📝]



## 📝 How to Use

1. Enter a query in the Gradio interface (e.g., "AutoML in 2024, 500-word summary")
2. Optionally upload a research PDF file
3. Provide your API keys:
   - `SerpAPI Key` (for Google search)
   - `Together.ai Key` (for LLaMA-4 summarization)
4. Click Submit and wait for the generated response


## 🔐 API Keys Required

- **SerpAPI Key**: [Get from serpapi.com](https://serpapi.com)
- **Together API Key**: [Get from platform.together.xyz](https://platform.together.xyz)

Keys can be entered directly into the Gradio app fields, or managed via `.env` (if running locally).


In [17]:
class RAGState(TypedDict):
    query: str
    serpapi_key: str
    together_api_key: str
    top_k: int
    pdf_path: Optional[str]
    documents: List[Document]
    vectorstore: FAISS
    retrieved_docs: List[Document]
    summary: str

graph = StateGraph(state_schema=RAGState)
graph.add_node("SearchAgent", RunnableLambda(search_agent))
graph.add_node("PDFLoaderAgent", RunnableLambda(pdf_loader_agent))
graph.add_node("EmbedAgent", RunnableLambda(embed_agent))
graph.add_node("RetrieveAgent", RunnableLambda(retrieve_agent))
graph.add_node("SummarizeAgent", RunnableLambda(summarize_agent))

graph.set_entry_point("SearchAgent")
graph.add_edge("SearchAgent", "PDFLoaderAgent")
graph.add_edge("PDFLoaderAgent", "EmbedAgent")
graph.add_edge("EmbedAgent", "RetrieveAgent")
graph.add_edge("RetrieveAgent", "SummarizeAgent")
graph.set_finish_point("SummarizeAgent")

dag = graph.compile()

##Gradio Interface##

## 💡 Example Prompts

- "Summarize this PDF in 500 words with Introduction, Methodology, and Results"
- "What are the major NAS techniques in 2024? Give bullet points"
- "List pros and cons of AutoML methods from this paper"


## ✅ Sample Output (Truncated)

- The paper introduces a hybrid NAS method combining reinforcement learning and gradient descent.
- It compares 3 different architectures: X, Y, Z...
- Key metrics: Accuracy = 92.4%, Latency = 0.8ms


## 📊 Results

- Achieved summaries of up to 1000 words respecting structural prompts
- Enabled hybrid PDF + Search summarization
- Fast similarity search over embedded corpus using IVF-indexed FAISS


In [18]:

def run_rag_pipeline(query, serpapi_key, together_api_key, pdf_file):
    try:
        print("\n🚀 Starting RAG pipeline...")
        pdf_path = None

        if pdf_file is not None:
            pdf_path = pdf_file.name
            print(f"📄 Using uploaded PDF at: {pdf_path}")

        state = {
            "query": query if query else "placeholder",
            "serpapi_key": serpapi_key,
            "together_api_key": together_api_key,
            "top_k": 3,
            "pdf_path": pdf_path,
        }

        output = dag.invoke(state)

        if not output.get("summary"):
            return "⚠️ No summary generated. Check if the PDF or search returned useful content."

        return output["summary"]

    except Exception as e:
        import traceback
        traceback.print_exc()
        return f"❌ Error: {str(e)}"


In [19]:
# Launch Gradio app
gr.Interface(
    fn=run_rag_pipeline,
    inputs=[
        gr.Textbox(label="📌 Research Query", placeholder="e.g., NAS techniques in 2024"),
        gr.Textbox(label="🔑 SerpAPI Key", type="password"),
        gr.Textbox(label="🔐 Together API Key", type="password"),
        gr.File(label="📄 Upload Research PDF (Optional)", file_types=[".pdf"])
    ],
    outputs=gr.Textbox(label="📝 Summary Output", lines=15),
    title="🔬 Research Assistant Chatbot (LangGraph + LLaMA)",
    description="Combines SerpAPI + PDF + FAISS + Together.ai for autonomous research summaries."
).launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://29b781b604c90ad629.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


