Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/aish/llm/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,17 @@ def event_proxy_callback(event):
"Cancelled by parent session",
)

forwarded_event_types = {
LLMEventType.TOOL_EXECUTION_START,
LLMEventType.TOOL_EXECUTION_END,
LLMEventType.ERROR,
LLMEventType.CANCELLED,
LLMEventType.TOOL_CONFIRMATION_REQUIRED,
LLMEventType.INTERACTION_REQUIRED,
}

# Forward event to parent callback if available
if self.parent_event_callback:
if self.parent_event_callback and event.event_type in forwarded_event_types:
# Add source information to the event data
modified_data = event.data.copy() if event.data else {}
modified_data["source"] = "system_diagnose_agent"
Expand Down
13 changes: 10 additions & 3 deletions src/aish/shell/runtime/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,9 @@ def handle_content_delta(self, event) -> None:
self.current_live.stop()
self.current_live = None

if not self._ttft_recorded and self._thinking_start_time > 0:
is_final = bool(event.data.get("is_final", True))

if is_final and not self._ttft_recorded and self._thinking_start_time > 0:
self._ttft = time.monotonic() - self._thinking_start_time
self._ttft_recorded = True

Expand All @@ -605,9 +607,11 @@ def handle_content_delta(self, event) -> None:

if not self._content_preview_active:
self._content_preview_active = True
self._content_streamed_to_terminal = True
content = f"🤖 {content}"

if is_final:
self._content_streamed_to_terminal = True

self.console.print(Text(content, style="bold grey50"), end="")
self._at_line_start = False
return None
Expand Down Expand Up @@ -1125,8 +1129,11 @@ def _setup_pty(self) -> None:
self._pty_manager.start()
if self._pty_manager.startup_cwd:
self._sync_backend_cwd(self._pty_manager.startup_cwd)
self._backend_session_ready = bool(
getattr(self._pty_manager, "startup_session_ready", False)
or getattr(self._pty_manager, "startup_ready", False)
)
if self._pty_manager.startup_ready:
self._backend_session_ready = True
self._shell_phase = "editing"
else:
time.sleep(0.2)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Expand Down
138 changes: 138 additions & 0 deletions tests/shell/runtime/test_shell_pty_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,47 @@ def _terminal_size(*_args, **_kwargs):
sleep_mock.assert_not_called()


def test_setup_pty_preserves_startup_session_ready_without_prompt(monkeypatch, tmp_path):
frontend_cwd = str(tmp_path / "frontend")
backend_cwd = str(tmp_path / "backend")
sleep_mock = Mock()

class _StartedPTYManager:
def __init__(self, *, rows: int, cols: int, cwd: str, use_output_thread: bool):
assert rows == 24
assert cols == 80
assert cwd == frontend_cwd
assert use_output_thread is False
self.startup_session_ready = True
self.startup_ready = False
self.startup_cwd = backend_cwd

def start(self) -> None:
return None

def _terminal_size(*_args, **_kwargs):
return os.terminal_size((80, 24))

monkeypatch.setattr("aish.shell.runtime.app.PTYManager", _StartedPTYManager)
monkeypatch.setattr("aish.shell.runtime.app.shutil.get_terminal_size", _terminal_size)
monkeypatch.setattr("aish.shell.runtime.app.time.sleep", sleep_mock)
monkeypatch.setattr("aish.shell.runtime.app.get_current_env_info", lambda: "env-info")
monkeypatch.setattr("aish.shell.runtime.app.os.chdir", lambda _cwd: None)

shell = object.__new__(PTYAIShell)
shell._current_cwd = frontend_cwd
shell._backend_session_ready = False
shell._shell_phase = "booting"

PTYAIShell._setup_pty(shell)

assert shell._pty_manager is not None
assert shell._current_cwd == backend_cwd
assert shell._backend_session_ready is True
assert shell._shell_phase == "booting"
sleep_mock.assert_called_once_with(0.2)


def test_output_processor_filters_split_user_command_echo_across_chunks():
processor = OutputProcessor(_FakePTYManager())
processor.prepare_user_command_echo("pwd", 5)
Expand Down Expand Up @@ -1369,6 +1410,103 @@ def test_ttft_timing_records_on_first_content_delta():
assert shell._ttft_recorded is True


def test_non_final_content_delta_does_not_mark_content_streamed():
shell = object.__new__(PTYAIShell)
shell.console = Mock()
shell._stop_animation = Mock()
shell._reset_reasoning_state = Mock()
shell.current_live = None
shell._at_line_start = True
shell._last_streaming_accumulated = ""
shell._content_preview_active = False
shell._content_streamed_to_terminal = False
shell._thinking_start_time = 0.0
shell._ttft = 0.0
shell._ttft_recorded = False

PTYAIShell.handle_content_delta(
shell,
Mock(data={"delta": "Working on it", "is_final": False}),
)

assert shell._content_streamed_to_terminal is False
shell.console.print.assert_called_once()


def test_final_content_delta_marks_content_streamed():
shell = object.__new__(PTYAIShell)
shell.console = Mock()
shell._stop_animation = Mock()
shell._reset_reasoning_state = Mock()
shell.current_live = None
shell._at_line_start = True
shell._last_streaming_accumulated = ""
shell._content_preview_active = False
shell._content_streamed_to_terminal = False
shell._thinking_start_time = 0.0
shell._ttft = 0.0
shell._ttft_recorded = False

PTYAIShell.handle_content_delta(
shell,
Mock(data={"delta": "Final answer", "is_final": True}),
)

assert shell._content_streamed_to_terminal is True
shell.console.print.assert_called_once()


def test_non_final_content_delta_does_not_record_ttft():
shell = object.__new__(PTYAIShell)
shell.console = Mock()
shell._stop_animation = Mock()
shell._reset_reasoning_state = Mock()
shell.current_live = None
shell._at_line_start = True
shell._last_streaming_accumulated = ""
shell._content_preview_active = False
shell._content_streamed_to_terminal = False
shell._thinking_start_time = 1.0
shell._ttft = 0.0
shell._ttft_recorded = False

PTYAIShell.handle_content_delta(
shell,
Mock(data={"delta": "Preview before tool", "is_final": False}),
)

assert shell._ttft == 0.0
assert shell._ttft_recorded is False


def test_op_end_does_not_render_ttft_for_non_final_preview_only():
shell = object.__new__(PTYAIShell)
shell.console = Mock()
shell._stop_animation = Mock()
shell._reset_reasoning_state = Mock()
shell.current_live = None
shell._at_line_start = True
shell._last_streaming_accumulated = ""
shell._content_preview_active = False
shell._content_streamed_to_terminal = False
shell._timing_active = True
shell._thinking_start_time = 1.0
shell._ttft = 0.0
shell._ttft_recorded = False

PTYAIShell.handle_content_delta(
shell,
Mock(data={"delta": "Preview before tool", "is_final": False}),
)
PTYAIShell.handle_op_end(shell, Mock())

timing_calls = [
call for call in shell.console.print.call_args_list
if "思考:" in str(call)
]
assert timing_calls == []


def test_ttft_timing_preserves_state_across_generations(monkeypatch):
"""Test that timing state is not reset between generations in multi-generation scenarios."""
import time
Expand Down
70 changes: 69 additions & 1 deletion tests/tools/test_final_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
3. End-to-end: run a dummy query via outer LLMSession; verify nested agent executes and returns
"""

import time
from unittest.mock import AsyncMock, patch

import pytest

from aish.config import ConfigModel
from aish.llm.agents import SystemDiagnoseAgent
from aish.llm import LLMSession
from aish.llm import LLMEvent, LLMEventType, LLMSession
from aish.skills import SkillManager
from aish.tools.code_exec import BashTool
from aish.tools.final_answer import FinalAnswer
Expand Down Expand Up @@ -219,6 +220,73 @@ def test_mocked_llm_exception_handling(self, mock_llm_session_class):

assert "Error during diagnosis: LLM API error" in result

@patch("aish.llm.LLMSession")
def test_system_diagnose_agent_filters_nested_content_and_lifecycle_events(
self, mock_llm_session_class
):
config = ConfigModel(
model="openrouter/moonshotai/kimi-k2", api_base=None, api_key="sk-test-key"
)
parent_events = []

def parent_event_callback(event):
parent_events.append(event)

agent = SystemDiagnoseAgent(
config=config,
model_id="openrouter/moonshotai/kimi-k2",
api_base=None,
api_key="sk-test-key",
skill_manager=make_skill_manager(),
parent_event_callback=parent_event_callback,
)

mock_subsession = AsyncMock()
mock_llm_session_class.create_subsession.return_value = mock_subsession

async def fake_process_input(*args, **kwargs):
_ = (args, kwargs)
callback = mock_subsession.event_callback
callback(
LLMEvent(
event_type=LLMEventType.OP_START,
data={"turn_id": "child-turn", "operation": "process_input"},
timestamp=time.time(),
)
)
callback(
LLMEvent(
event_type=LLMEventType.CONTENT_DELTA,
data={"delta": "nested content", "is_final": True},
timestamp=time.time(),
)
)
callback(
LLMEvent(
event_type=LLMEventType.TOOL_EXECUTION_END,
data={"tool_name": "final_answer", "result": "final diagnosis"},
timestamp=time.time(),
)
)
callback(
LLMEvent(
event_type=LLMEventType.OP_END,
data={"turn_id": "child-turn", "result": "final diagnosis"},
timestamp=time.time(),
)
)
return "final diagnosis"

mock_subsession.process_input = AsyncMock(side_effect=fake_process_input)

result = agent(query="Check memory")

assert result == "final diagnosis"
assert [event.event_type for event in parent_events] == [
LLMEventType.TOOL_EXECUTION_END,
]
assert parent_events[0].data.get("source") == "system_diagnose_agent"


class TestSystemDiagnoseAgentEndToEnd:
"""End-to-end integration tests for SystemDiagnoseAgent with outer LLMSession."""
Expand Down
Loading