ServiceNow · raghavm243512 · Mar 27, 2026 · Mar 25, 2026 · tara-servicenow · Mar 26, 2026
diff --git a/src/eva/assistant/agentic/audit_log.py b/src/eva/assistant/agentic/audit_log.py
@@ -36,6 +36,7 @@ class ConversationMessage(BaseModel):
     tool_call_id: Optional[str] = None
     name: Optional[str] = None  # For tool messages
     turn_id: Optional[int] = None  # For associating transcription updates
+    reasoning: Optional[str] = None  # For model reasoning (e.g., from OpenAI o1)
 
     def to_dict(self) -> dict[str, Any]:
         """Convert to a plain dict, excluding None fields and internal tracking fields."""
@@ -268,6 +269,9 @@ def append_llm_call(self, llm_call: LLMCall, agent_name: Optional[str] = None) -
             "timestamp": current_timestamp_ms(),
             "message_type": "llm_call",
         }
+        # Add reasoning if present
+        if llm_call.response and llm_call.response.reasoning:
+            transcript_entry["value"]["reasoning"] = llm_call.response.reasoning
         self.transcript.append(transcript_entry)
 
     def append_tool_call(

diff --git a/src/eva/assistant/agentic/system.py b/src/eva/assistant/agentic/system.py
@@ -207,6 +207,7 @@ async def _run_tool_loop(
                     "latency": llm_stats.get("latency", 0.0),
                     "parameters": json.dumps(llm_stats.get("parameters", {})),
                     "tool_calls": json.dumps(response_tool_calls_for_stats) if response_tool_calls_for_stats else "",
+                    "reasoning": f'"{llm_stats.get("reasoning_content", "")}"',
                 }
                 self.agent_perf_stats.append(perf_stat)
                 logger.debug(
@@ -217,6 +218,7 @@ async def _run_tool_loop(
                     role=MessageRole.ASSISTANT,
                     content=response_content,
                     tool_calls=tool_calls_dicts if tool_calls_dicts else None,
+                    reasoning=llm_stats.get("reasoning"),
                 )
 
                 llm_call = LLMCall(
@@ -381,6 +383,7 @@ def save_agent_perf_stats(self) -> None:
                     "parameters",
                     "tool_calls",
                     "latency",
+                    "reasoning",
                 ]
                 writer = csv.DictWriter(f, fieldnames=fieldnames)
                 writer.writeheader()

diff --git a/src/eva/assistant/pipeline/alm_vllm.py b/src/eva/assistant/pipeline/alm_vllm.py
@@ -198,6 +198,9 @@ async def complete(
                 message = response.choices[0].message
                 usage = response.usage
 
+                # Extract reasoning if present (OpenAI o1 and compatible models)
+                reasoning = getattr(message, "reasoning_content", None)
+
                 stats = {
                     "prompt_tokens": usage.prompt_tokens if usage else 0,
                     "completion_tokens": usage.completion_tokens if usage else 0,
@@ -206,6 +209,7 @@ async def complete(
                     "cost": 0.0,  # Self-hosted, no API cost
                     "cost_source": "self_hosted",
                     "latency": round(elapsed, 3),
+                    "reasoning": reasoning,
                 }
 
                 if hasattr(message, "tool_calls") and message.tool_calls:

diff --git a/src/eva/assistant/services/llm.py b/src/eva/assistant/services/llm.py
@@ -80,6 +80,9 @@ async def complete(
                 response_cost = hidden_params.get("response_cost")
                 cost_source = "litellm"
 
+                # Extract reasoning if present (OpenAI o1 and compatible models)
+                reasoning = getattr(message, "reasoning_content", None)
+
                 stats = {
                     "prompt_tokens": prompt_tokens,
                     "completion_tokens": completion_tokens,
@@ -88,6 +91,7 @@ async def complete(
                     "cost": response_cost,
                     "cost_source": cost_source,
                     "latency": round(elapsed_time, 3),
+                    "reasoning": reasoning,
                 }
 
                 if hasattr(message, "tool_calls") and message.tool_calls:

diff --git a/tests/unit/assistant/test_audit_log.py b/tests/unit/assistant/test_audit_log.py
@@ -138,6 +138,27 @@ def test_append_llm_call_no_response(self):
         assert self.log.llm_prompts[0]["response"] == ""
         assert self.log.llm_prompts[0]["response_message"] is None
 
+    def test_append_llm_call_with_reasoning(self):
+        response_msg = ConversationMessage(
+            role=MessageRole.ASSISTANT, content="Sure!", reasoning="I thought about this carefully..."
+        )
+        llm_call = LLMCall(
+            messages=[{"role": "user", "content": "Hi"}],
+            response=response_msg,
+            duration_seconds=1.5,
+            start_time="100",
+            end_time="200",
+            model="o1-preview",
+            latency_ms=1500.0,
+        )
+        self.log.append_llm_call(llm_call, agent_name="TestAgent")
+
+        # Check that reasoning is added to transcript entry
+        assert len(self.log.transcript) == 1
+        assert "reasoning" in self.log.transcript[0]["value"]
+        assert self.log.transcript[0]["value"]["reasoning"] == "I thought about this carefully..."
+        assert self.log.transcript[0]["value"]["response"] == "Sure!"
+
     def test_append_tool_call_without_response(self):
         self.log.append_tool_call("search", {"query": "test"})
         assert len(self.log.transcript) == 1