AI-Shell-Team · F16shen · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/src/aish/llm/agents.py b/src/aish/llm/agents.py
@@ -202,8 +202,17 @@ def event_proxy_callback(event):
                         "Cancelled by parent session",
                     )
 
+            forwarded_event_types = {
+                LLMEventType.TOOL_EXECUTION_START,
+                LLMEventType.TOOL_EXECUTION_END,
+                LLMEventType.ERROR,
+                LLMEventType.CANCELLED,
+                LLMEventType.TOOL_CONFIRMATION_REQUIRED,
+                LLMEventType.INTERACTION_REQUIRED,
+            }
+
             # Forward event to parent callback if available
-            if self.parent_event_callback:
+            if self.parent_event_callback and event.event_type in forwarded_event_types:
                 # Add source information to the event data
                 modified_data = event.data.copy() if event.data else {}
                 modified_data["source"] = "system_diagnose_agent"

diff --git a/src/aish/shell/runtime/app.py b/src/aish/shell/runtime/app.py
@@ -594,7 +594,9 @@ def handle_content_delta(self, event) -> None:
             self.current_live.stop()
             self.current_live = None
 
-        if not self._ttft_recorded and self._thinking_start_time > 0:
+        is_final = bool(event.data.get("is_final", True))
+
+        if is_final and not self._ttft_recorded and self._thinking_start_time > 0:
             self._ttft = time.monotonic() - self._thinking_start_time
             self._ttft_recorded = True
 
@@ -605,9 +607,11 @@ def handle_content_delta(self, event) -> None:
 
         if not self._content_preview_active:
             self._content_preview_active = True
-            self._content_streamed_to_terminal = True
             content = f"🤖 {content}"
 
+        if is_final:
+            self._content_streamed_to_terminal = True
+
         self.console.print(Text(content, style="bold grey50"), end="")
         self._at_line_start = False
         return None
@@ -1125,8 +1129,11 @@ def _setup_pty(self) -> None:
         self._pty_manager.start()
         if self._pty_manager.startup_cwd:
             self._sync_backend_cwd(self._pty_manager.startup_cwd)
+        self._backend_session_ready = bool(
+            getattr(self._pty_manager, "startup_session_ready", False)
+            or getattr(self._pty_manager, "startup_ready", False)
+        )
         if self._pty_manager.startup_ready:
-            self._backend_session_ready = True
             self._shell_phase = "editing"
         else:
             time.sleep(0.2)

diff --git a/tests/shell/runtime/test_shell_pty_core.py b/tests/shell/runtime/test_shell_pty_core.py
@@ -305,6 +305,47 @@ def _terminal_size(*_args, **_kwargs):
     sleep_mock.assert_not_called()
 
 
+def test_setup_pty_preserves_startup_session_ready_without_prompt(monkeypatch, tmp_path):
+    frontend_cwd = str(tmp_path / "frontend")
+    backend_cwd = str(tmp_path / "backend")
+    sleep_mock = Mock()
+
+    class _StartedPTYManager:
+        def __init__(self, *, rows: int, cols: int, cwd: str, use_output_thread: bool):
+            assert rows == 24
+            assert cols == 80
+            assert cwd == frontend_cwd
+            assert use_output_thread is False
+            self.startup_session_ready = True
+            self.startup_ready = False
+            self.startup_cwd = backend_cwd
+
+        def start(self) -> None:
+            return None
+
+    def _terminal_size(*_args, **_kwargs):
+        return os.terminal_size((80, 24))
+
+    monkeypatch.setattr("aish.shell.runtime.app.PTYManager", _StartedPTYManager)
+    monkeypatch.setattr("aish.shell.runtime.app.shutil.get_terminal_size", _terminal_size)
+    monkeypatch.setattr("aish.shell.runtime.app.time.sleep", sleep_mock)
+    monkeypatch.setattr("aish.shell.runtime.app.get_current_env_info", lambda: "env-info")
+    monkeypatch.setattr("aish.shell.runtime.app.os.chdir", lambda _cwd: None)
+
+    shell = object.__new__(PTYAIShell)
+    shell._current_cwd = frontend_cwd
+    shell._backend_session_ready = False
+    shell._shell_phase = "booting"
+
+    PTYAIShell._setup_pty(shell)
+
+    assert shell._pty_manager is not None
+    assert shell._current_cwd == backend_cwd
+    assert shell._backend_session_ready is True
+    assert shell._shell_phase == "booting"
+    sleep_mock.assert_called_once_with(0.2)
+
+
 def test_output_processor_filters_split_user_command_echo_across_chunks():
     processor = OutputProcessor(_FakePTYManager())
     processor.prepare_user_command_echo("pwd", 5)
@@ -1369,6 +1410,103 @@ def test_ttft_timing_records_on_first_content_delta():
     assert shell._ttft_recorded is True
 
 
+def test_non_final_content_delta_does_not_mark_content_streamed():
+    shell = object.__new__(PTYAIShell)
+    shell.console = Mock()
+    shell._stop_animation = Mock()
+    shell._reset_reasoning_state = Mock()
+    shell.current_live = None
+    shell._at_line_start = True
+    shell._last_streaming_accumulated = ""
+    shell._content_preview_active = False
+    shell._content_streamed_to_terminal = False
+    shell._thinking_start_time = 0.0
+    shell._ttft = 0.0
+    shell._ttft_recorded = False
+
+    PTYAIShell.handle_content_delta(
+        shell,
+        Mock(data={"delta": "Working on it", "is_final": False}),
+    )
+
+    assert shell._content_streamed_to_terminal is False
+    shell.console.print.assert_called_once()
+
+
+def test_final_content_delta_marks_content_streamed():
+    shell = object.__new__(PTYAIShell)
+    shell.console = Mock()
+    shell._stop_animation = Mock()
+    shell._reset_reasoning_state = Mock()
+    shell.current_live = None
+    shell._at_line_start = True
+    shell._last_streaming_accumulated = ""
+    shell._content_preview_active = False
+    shell._content_streamed_to_terminal = False
+    shell._thinking_start_time = 0.0
+    shell._ttft = 0.0
+    shell._ttft_recorded = False
+
+    PTYAIShell.handle_content_delta(
+        shell,
+        Mock(data={"delta": "Final answer", "is_final": True}),
+    )
+
+    assert shell._content_streamed_to_terminal is True
+    shell.console.print.assert_called_once()
+
+
+def test_non_final_content_delta_does_not_record_ttft():
+    shell = object.__new__(PTYAIShell)
+    shell.console = Mock()
+    shell._stop_animation = Mock()
+    shell._reset_reasoning_state = Mock()
+    shell.current_live = None
+    shell._at_line_start = True
+    shell._last_streaming_accumulated = ""
+    shell._content_preview_active = False
+    shell._content_streamed_to_terminal = False
+    shell._thinking_start_time = 1.0
+    shell._ttft = 0.0
+    shell._ttft_recorded = False
+
+    PTYAIShell.handle_content_delta(
+        shell,
+        Mock(data={"delta": "Preview before tool", "is_final": False}),
+    )
+
+    assert shell._ttft == 0.0
+    assert shell._ttft_recorded is False
+
+
+def test_op_end_does_not_render_ttft_for_non_final_preview_only():
+    shell = object.__new__(PTYAIShell)
+    shell.console = Mock()
+    shell._stop_animation = Mock()
+    shell._reset_reasoning_state = Mock()
+    shell.current_live = None
+    shell._at_line_start = True
+    shell._last_streaming_accumulated = ""
+    shell._content_preview_active = False
+    shell._content_streamed_to_terminal = False
+    shell._timing_active = True
+    shell._thinking_start_time = 1.0
+    shell._ttft = 0.0
+    shell._ttft_recorded = False
+
+    PTYAIShell.handle_content_delta(
+        shell,
+        Mock(data={"delta": "Preview before tool", "is_final": False}),
+    )
+    PTYAIShell.handle_op_end(shell, Mock())
+
+    timing_calls = [
+        call for call in shell.console.print.call_args_list
+        if "思考:" in str(call)
+    ]
+    assert timing_calls == []
+
+
 def test_ttft_timing_preserves_state_across_generations(monkeypatch):
     """Test that timing state is not reset between generations in multi-generation scenarios."""
     import time

diff --git a/tests/tools/test_final_answer.py b/tests/tools/test_final_answer.py
@@ -7,13 +7,14 @@
 3. End-to-end: run a dummy query via outer LLMSession; verify nested agent executes and returns
 """
 
+import time
 from unittest.mock import AsyncMock, patch
 
 import pytest
 
 from aish.config import ConfigModel
 from aish.llm.agents import SystemDiagnoseAgent
-from aish.llm import LLMSession
+from aish.llm import LLMEvent, LLMEventType, LLMSession
 from aish.skills import SkillManager
 from aish.tools.code_exec import BashTool
 from aish.tools.final_answer import FinalAnswer
@@ -219,6 +220,73 @@ def test_mocked_llm_exception_handling(self, mock_llm_session_class):
 
         assert "Error during diagnosis: LLM API error" in result
 
+    @patch("aish.llm.LLMSession")
+    def test_system_diagnose_agent_filters_nested_content_and_lifecycle_events(
+        self, mock_llm_session_class
+    ):
+        config = ConfigModel(
+            model="openrouter/moonshotai/kimi-k2", api_base=None, api_key="sk-test-key"
+        )
+        parent_events = []
+
+        def parent_event_callback(event):
+            parent_events.append(event)
+
+        agent = SystemDiagnoseAgent(
+            config=config,
+            model_id="openrouter/moonshotai/kimi-k2",
+            api_base=None,
+            api_key="sk-test-key",
+            skill_manager=make_skill_manager(),
+            parent_event_callback=parent_event_callback,
+        )
+
+        mock_subsession = AsyncMock()
+        mock_llm_session_class.create_subsession.return_value = mock_subsession
+
+        async def fake_process_input(*args, **kwargs):
+            _ = (args, kwargs)
+            callback = mock_subsession.event_callback
+            callback(
+                LLMEvent(
+                    event_type=LLMEventType.OP_START,
+                    data={"turn_id": "child-turn", "operation": "process_input"},
+                    timestamp=time.time(),
+                )
+            )
+            callback(
+                LLMEvent(
+                    event_type=LLMEventType.CONTENT_DELTA,
+                    data={"delta": "nested content", "is_final": True},
+                    timestamp=time.time(),
+                )
+            )
+            callback(
+                LLMEvent(
+                    event_type=LLMEventType.TOOL_EXECUTION_END,
+                    data={"tool_name": "final_answer", "result": "final diagnosis"},
+                    timestamp=time.time(),
+                )
+            )
+            callback(
+                LLMEvent(
+                    event_type=LLMEventType.OP_END,
+                    data={"turn_id": "child-turn", "result": "final diagnosis"},
+                    timestamp=time.time(),
+                )
+            )
+            return "final diagnosis"
+
+        mock_subsession.process_input = AsyncMock(side_effect=fake_process_input)
+
+        result = agent(query="Check memory")
+
+        assert result == "final diagnosis"
+        assert [event.event_type for event in parent_events] == [
+            LLMEventType.TOOL_EXECUTION_END,
+        ]
+        assert parent_events[0].data.get("source") == "system_diagnose_agent"
+
 
 class TestSystemDiagnoseAgentEndToEnd:
     """End-to-end integration tests for SystemDiagnoseAgent with outer LLMSession."""