# SLM Deep Researcher Example

This notebook demonstrates streaming research with the slm-deep-researcher API.

In [1]:
import os
import json
import requests
from typing import Iterator, Dict, Any
from dataclasses import dataclass

from rich.console import Console
from rich.panel import Panel
from rich.markdown import Markdown

In [2]:
console = Console()
BASE_URL = os.environ.get('LANGGRAPH_BASE_URL', 'http://127.0.0.1:2024')

In [3]:
@dataclass
class ResearchProgress:
    stage: str
    query: str = None
    sources: list[str] = None
    summary: str = None
    is_final: bool = False

In [4]:
class ResearchClient:
    def __init__(self, base_url: str = BASE_URL):
        self.base_url = base_url
        self.headers = {'Content-Type': 'application/json'}
        self.assistant_id = None
        self._ensure_connection()
        self._create_assistant()

    def _ensure_connection(self):
        try:
            response = requests.get(f'{self.base_url}/ok', timeout=5)
            response.raise_for_status()
        except requests.RequestException as e:
            raise ConnectionError(f"Cannot connect to LangGraph server: {e}")

    def _create_assistant(self):
        payload = {
            'name': 'slm-deep-researcher',
            'graph_id': 'slm_deep_researcher',
            'if_exists': 'do_nothing',
        }
        response = requests.post(
            f'{self.base_url}/assistants',
            headers=self.headers,
            json=payload,
            timeout=10
        )
        response.raise_for_status()
        self.assistant_id = response.json()['assistant_id']

    def research(self, topic: str) -> Iterator[ResearchProgress]:
        payload = {
            "assistant_id": self.assistant_id,
            "input": {"research_topic": topic},
            "on_completion": "delete",
            "stream_mode": ["events"],
        }

        with requests.post(
            f"{self.base_url}/runs/stream",
            json=payload,
            stream=True,
            timeout=300
        ) as resp:
            resp.raise_for_status()

            for raw in resp.iter_lines():
                if not raw:
                    continue

                text = raw.decode()
                if not text.startswith("data:"):
                    continue

                event = json.loads(text[5:])
                progress = self._parse_event(event)
                if progress:
                    yield progress

    def _parse_event(self, event: Dict[str, Any]) -> ResearchProgress:
        kind = event.get("event")
        name = event.get("name")
        data = event.get("data", {})

        if kind != "on_chain_end" or not name:
            return None

        output = data.get("output", {})

        if name == "generate_query":
            query = output.get("search_query")
            if query:
                return ResearchProgress(stage="query_generated", query=query)

        elif name == "web_research":
            sources = output.get("sources_gathered")
            if sources and sources[-1]:
                source_list = [
                    line.strip() for line in sources[-1].split("\n")
                    if line.startswith("* ")
                ]
                return ResearchProgress(stage="sources_found", sources=source_list)

        elif name == "summarize_sources":
            summary = output.get("running_summary")
            if summary:
                return ResearchProgress(stage="summary_updated", summary=summary)

        elif name == "reflect_on_summary":
            query = output.get("search_query")
            if query:
                return ResearchProgress(stage="reflection_query", query=query)

        elif name == "finalize_summary":
            summary = output.get("running_summary")
            if summary:
                return ResearchProgress(stage="completed", summary=summary, is_final=True)

        return None

In [5]:
def display_progress(progress: ResearchProgress, loop_count: dict):
    if progress.stage == "query_generated":
        console.print(f"\n[bold cyan]🔍 Initial Query[/bold cyan]")
        console.print(f"[cyan]   {progress.query}[/cyan]")

    elif progress.stage == "sources_found":
        console.print(f"[bold green]✓[/bold green] [green]Found {len(progress.sources)} sources[/green]")
        for source in progress.sources[:3]:
            source_clean = source.lstrip("* ")
            console.print(f"   [dim]{source_clean}[/dim]")

    elif progress.stage == "summary_updated":
        preview = progress.summary.replace("\n", " ")[:100]
        console.print(f"[bold yellow]📝[/bold yellow] [yellow]Summary updated[/yellow]")
        console.print(f"   [dim]{preview}...[/dim]")

    elif progress.stage == "reflection_query":
        loop_count['value'] += 1
        console.print(f"\n[bold magenta]🔍 Follow-up Query #{loop_count['value']}[/bold magenta]")
        console.print(f"[magenta]   {progress.query}[/magenta]")

    elif progress.stage == "completed":
        console.print("\n")
        console.rule("[bold green]Research Complete[/bold green]")
        console.print("\n")
        md = Markdown(progress.summary)
        console.print(md)

## Run Research

Set your research topic and run the cells below.

In [6]:
topic = "Investigate the environmental impact of training one large model vs. 10,000 tiny ones."

console.print(Panel.fit(
    f"[bold white]{topic}[/bold white]",
    title="[bold blue]Research Topic[/bold blue]",
    border_style="blue"
))
console.print()

In [7]:
client = ResearchClient()
loop_count = {'value': 0}

for progress in client.research(topic):
    display_progress(progress, loop_count)