# imports and setup


In [1]:
import sys
from datetime import datetime

from catppuccin.extras.rich_ctp import mocha
from rich import pretty
from rich.console import Console
from rich.pretty import pprint

pretty.install()

if ".." not in sys.path:
    sys.path.insert(0, "..")

from src.config.settings import get_settings
from src.utils.logger import create_logger, ChatPrinter

console = Console(theme=mocha)
settings = get_settings()
logger = create_logger(path=settings.paths.logs_dir)
printer = ChatPrinter()
logger.debug(f"settings loaded as \n{settings.model_dump_json(indent=2)}")

[37m[2m01:20 AM[0m[37m[0m | [37m[2m1228436519.L:21 [0m[37m[0m | [37m[22mDEBUG    [0m | [37m[22msettings loaded as 
{
  "models": {
    "hf": {
      "chat": "meta-llama/Llama-3.2-3B-Instruct:together",
      "reasoning": "zai-org/GLM-4.7-Flash:novita",
      "structured_output": "Qwen/Qwen3-Coder-30B-A3B-Instruct:ovhcloud",
      "embedding_snowflake": "Snowflake/snowflake-arctic-embed-l-v2.0",
      "embedding_specter": "allenai/specter2_base",
      "encoder": "m3rg-iitd/matscibert",
      "reranker": "Qwen/Qwen3-Reranker-0.6B",
      "router": "openai/gpt-oss-20b:together"
    },
    "nebius": {
      "reasoning": "deepseek-ai/DeepSeek-V3.2",
      "tool_user": "Qwen/Qwen3-30B-A3B-Instruct-2507",
      "chat": "meta-llama/Meta-Llama-3.1-8B-Instruct-fast",
      "embedding_baai_bge": "BAAI/bge-multilingual-gemma2",
      "router": "openai/gpt-oss-20b"
    }
  },
  "paths": {
    "base_dir": "/home/rudy/code/lattice/src",
    "data_dir": "/home/rudy/code/lattice/src/dat

In [2]:
from typing import Annotated, Dict, List, Optional, TypedDict, Literal

from langchain_core.messages import (
    AIMessage,
    HumanMessage,
    AIMessageChunk,
    MessageLikeRepresentation,
    SystemMessage,
    ToolMessage,
    filter_messages,
    get_buffer_string,
    convert_to_openai_messages,
)
from langchain_core.tools import tool
from langchain_core.utils.function_calling import convert_to_openai_tool
from langchain_openai import ChatOpenAI
from langchain_nebius import ChatNebius
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, StateGraph, MessagesState
from langgraph.prebuilt import ToolNode
from langgraph.graph.message import add_messages
from langgraph.types import Command, interrupt
from pydantic import BaseModel, Field, create_model
from pydantic.json_schema import SkipJsonSchema

In [3]:
from src.backends.virtual_filesystem import VirtualFilesystem
from src.prompts import (
    planning_mode_systemm_prompt,
    planning_structured_output,
)
from src.schemas import RESEARCH_PLAN_TEMPLATE, ResearchPlan
from src.schemas.prompts import SystemPromptTemplate
from src.tools.filesystem import create_filesystem_tools
from src.tools.utils import tool_with_auto_doc, SkipSchema
from src.utils.stats import accumulate_usage, add_usage_metadata

version = "0.0.1-alpha"

In [4]:
filesystem_tools = create_filesystem_tools(VirtualFilesystem())

# state


In [5]:
class TodoItems(TypedDict):
    content: str
    subitems: List[str]


# class UsageMetadata(TypedDict):
#     input_tokens: int
#     output_tokens: int
#     total_tokens: int
#     input_token_details: dict
#     output_token_details: dict


class AgentState(TypedDict):
    # list of messages
    messages: Annotated[List[MessageLikeRepresentation], add_messages]
    # # running statistics
    # # TODO: decide if this is even needed
    # # are we doing mroe than one LLM call per node?
    # # if no we dont need this
    # usage_metadata: Annotated[UsageMetadata, add_usage_metadata] = {
    #     "input_tokens": 0,
    #     "output_tokens": 0,
    #     "total_tokens": 0,
    #     "input_token_details": {},
    #     "output_token_details": {},
    # }
    # current mode
    mode: Literal["ask", "planning", "execution"]
    # whatever the agent needs across all the modes
    todo: List[TodoItems]
    research_plan: str | ResearchPlan
    plan_approval_status: Literal["pending", "rejected", "approved"]
    # thoughts
    thoughts: Annotated[List[AIMessage], add_messages]

# LLM


## structured output for ask node


In [6]:
class AskNodeResponse(BaseModel):
    message: str = Field(description="The response to the user message.")
    ready_to_draft_plan: bool = Field(
        description="Classify if you have enough information to draft the research plan given the conversation history. You will recieve this after using the ready_to_draft_plan_tool."
    )

In [7]:
chat_llm = ChatOpenAI(
    model=settings.models.nebius.tool_user,
    api_key=settings.env.NEBIUS_API_KEY,
    base_url=settings.env.NEBIUS_API_ENDPOINT,
    max_completion_tokens=8192,
    temperature=0.0,
    top_p=0.7,
    streaming=True,
)

# Graph


In [None]:
@tool_with_auto_doc
def think_tool(reflection: str, state: SkipSchema[Dict] = None) -> str:
    """Strategic reflection and thinking tool. Use this tool to reflect on the conversation so far and what you should do next. Ask questions such as:
    - What does the user want me to do next?
    - What information do I have so far?
    - Do I have enough information to give a final answer?
    - If i am missing information, what should I ask for?

    Args:
        reflection (str): Your detailed reflection on the conversation so far. Your thought should not be more than 2 sentences.

    Returns:
        str: Confirmation that reflection was recorded for decision making
    """
    return f"Reflection recorded: {reflection}"


In [9]:
short_system_prompt = SystemPromptTemplate(
    name="Assistant",
    node_name="Helper",
    description="You are a helpful and thoughtful assistant who thinks before answering",
    tools="""You have access to the following tools:
- `think_tool`: Use this tool to reflect and think strategically. Use your reflections to refine your next course of action. You are allowed to call this tool a maximum of 5 times.
""",
    workflow="""Given a user query, broadly follow the below steps:
1. Judge if the query is complex. 
    a. Look at the conversation history to understand if the current context of the conversation is complex or straightforward.
    b. Straightforward queries are usually like:
        - small talk (eg: "how are you?", "how can you help me?" etc)
        - basic questions (eg: simple arithmetic, questions about universal facts etc)
2. if the query is deemed to be straightforward, then answer the question directly.
3. if the query is deemed complex, 
    a. use the `think_tool` to think and reflect. You should ideally think about:
        - what information you need to answer the user's question
        - if you have those information available
        - if you need more information, how should you collect it
        - once you have all information, how should you respond
        - what follow up's can the user come up with
        - how can you tackle those follow up's
    b. for each thought, make a single call for `think_tool`
""",
)
logger.debug(short_system_prompt.to_markdown())

[37m[2m01:20 AM[0m[37m[0m | [37m[2m2770952278.L:26 [0m[37m[0m | [37m[22mDEBUG    [0m | [37m[22m# Helper: SYSTEM PROMPT

## PROFILE
- NAME: Assistant
- ROLE: Helper
- DESCRIPTION: You are a helpful and thoughtful assistant who thinks before answering
- MODE: ASK
- DATE: February 2026

## TRAITS
Your core traits are:
- helpful
- friendly

## TOOLS
You have access to the following tools:
- `think_tool`: Use this tool to reflect and think strategically. Use your reflections to refine your next course of action. You are allowed to call this tool a maximum of 5 times.

## SKILLS
None - You do not have access to any skills yet.

## DOMAIN KNOWLEDGE
None - You do not have any domain knowledge yet.

## WORKFLOW
Given a user query, broadly follow the below steps:
1. Judge if the query is complex. 
    a. Look at the conversation history to understand if the current context of the conversation is complex or straightforward.
    b. Straightforward queries are usually like:
        -

In [10]:
chat_llm = ChatOpenAI(
    model=settings.models.nebius.tool_user,
    api_key=settings.env.NEBIUS_API_KEY,
    base_url=settings.env.NEBIUS_API_ENDPOINT,
    max_completion_tokens=8192,
    temperature=0.0,
    top_p=0.7,
)

In [11]:
def ask_node(state: AgentState) -> AgentState | Dict:
    full_context = (
        [SystemMessage(content=short_system_prompt.to_markdown())]
        + state["messages"]
        + [SystemMessage("Following are your previous thoughts")]
        + state["thoughts"]
    )
    bound_llm = bound_llm = chat_llm.bind_tools(
        tools=[
            think_tool,
        ],
        strict=True,
        tool_choice="auto",
        parallel_tool_calls=True,
    )
    response = bound_llm.invoke(full_context)
    thoughts = []
    if response.tool_calls:
        for tool_call in response.tool_calls:
            if tool_call["name"] == "think_tool":
                thoughts.append(tool_call["args"]["reflection"])
    return {"messages": [response], "thoughts": thoughts}


tool_node = ToolNode([think_tool])


def tool_or_end_router(state: AgentState) -> Literal["tool", "end"]:
    last_message: AIMessage = state["messages"][-1]
    # if there are no tool calls then go to end
    # else go back to tools
    if last_message.tool_calls:
        return "tool"
    else:
        return "end"


graph = StateGraph(AgentState)
graph.add_node("ask", ask_node)
graph.add_node("tool", tool_node)
graph.add_edge(START, "ask")
graph.add_edge("tool", "ask")
graph.add_conditional_edges("ask", tool_or_end_router, {"tool": "tool", "end": END})
agent = graph.compile()

In [None]:
msg = "explain the solution to quadratic equations"
printer.user(msg)
all_msgs = []
all_tokens = []
for chunk in agent.stream(
    {"messages": [HumanMessage(msg)]},
    stream_mode="updates",
):
    for step, data in chunk.items():
        messages = data.get("messages", [])
        for m in messages:
            if isinstance(m, AIMessage):
                # Tool calls interrupt streaming
                if m.tool_calls:
                    printer._ensure_stream_closed()
                    for t in m.tool_calls:
                        printer.tool(t["name"], status="running", args=t["args"])
                # Sometimes final structured content arrives here
                # (only print if not already streamed token-wise)
                if m.content and not m.tool_calls:
                    # Only print if stream wasn't used
                    if not printer._ai_stream_active:
                        printer.ai(m.content)
                # Usage metadata normally arrives here
                if m.usage_metadata:
                    printer._ensure_stream_closed()
                    printer.token_usage(
                        m.usage_metadata.get("input_tokens", 0),
                        m.usage_metadata.get("output_tokens", 0),
                        latency=0,
                    )
            elif isinstance(m, ToolMessage):
                printer.tool(
                    m.name,
                    status="finished",
                )

In [22]:
all_tokens[1]


[1;35mAIMessageChunk[0m[1m([0m
    [33mcontent[0m=[32m''[0m,
    [33madditional_kwargs[0m=[1m{[0m[1m}[0m,
    [33mresponse_metadata[0m=[1m{[0m[32m'model_provider'[0m: [32m'openai'[0m[1m}[0m,
    [33mid[0m=[32m'lc_run--019c3ed2-c4e6-7a20-b15a-84a49d15dd21'[0m,
    [33mtool_calls[0m=[1m[[0m
        [1m{[0m
            [32m'name'[0m: [32m'think_tool'[0m,
            [32m'args'[0m: [1m{[0m[1m}[0m,
            [32m'id'[0m: [32m'chatcmpl-tool-b698ae1d11bf4e87a664e880611a99d3'[0m,
            [32m'type'[0m: [32m'tool_call'[0m
        [1m}[0m
    [1m][0m,
    [33minvalid_tool_calls[0m=[1m[[0m[1m][0m,
    [33mtool_call_chunks[0m=[1m[[0m
        [1m{[0m
            [32m'name'[0m: [32m'think_tool'[0m,
            [32m'args'[0m: [3;35mNone[0m,
            [32m'id'[0m: [32m'chatcmpl-tool-b698ae1d11bf4e87a664e880611a99d3'[0m,
            [32m'index'[0m: [1;36m0[0m,
            [32m'type'[0m: [32m'tool_call_chunk'

In [15]:
chat_llm.invoke([HumanMessage("How are you?")])


[1;35mAIMessage[0m[1m([0m
    [33mcontent[0m=[32m"I[0m[32m'm functioning well, thank you for asking! I'm here and ready to help you with whatever you need. How can I assist you today? ðŸ˜Š"[0m,
    [33madditional_kwargs[0m=[1m{[0m[32m'refusal'[0m: [3;35mNone[0m[1m}[0m,
    [33mresponse_metadata[0m=[1m{[0m
        [32m'token_usage'[0m: [1m{[0m
            [32m'completion_tokens'[0m: [1;36m33[0m,
            [32m'prompt_tokens'[0m: [1;36m12[0m,
            [32m'total_tokens'[0m: [1;36m45[0m,
            [32m'completion_tokens_details'[0m: [3;35mNone[0m,
            [32m'prompt_tokens_details'[0m: [3;35mNone[0m
        [1m}[0m,
        [32m'model_provider'[0m: [32m'openai'[0m,
        [32m'model_name'[0m: [32m'Qwen/Qwen3-30B-A3B-Instruct-2507'[0m,
        [32m'system_fingerprint'[0m: [3;35mNone[0m,
        [32m'id'[0m: [32m'chatcmpl-dfe1227f9d1b4e02a2aa048fe4c5b2f4'[0m,
        [32m'finish_reason'[0m: [32m'stop'[0m,
  