NVIDIA · deepujain · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
@@ -1492,6 +1492,41 @@ function getNavigationChoice(value = ""): "back" | "exit" | null {
   return null;
 }
 
+/**
+ * Normalize user-provided truthy/falsy aliases for compatible endpoint reasoning mode.
+ */
+function normalizeReasoningFlag(value: string | null | undefined): "true" | "false" | null {
+  const normalized = String(value ?? "")
+    .trim()
+    .toLowerCase();
+  if (normalized === "true" || normalized === "1" || normalized === "yes" || normalized === "y") {
+    return "true";
+  }
+  if (normalized === "false" || normalized === "0" || normalized === "no" || normalized === "n") {
+    return "false";
+  }
+  return null;
+}
+
+/**
+ * Resolve compatible-endpoint reasoning mode and mirror it into process env for probes/builds.
+ */
+async function configureCompatibleEndpointReasoning(
+  storedValue?: string | null,
+): Promise<"true" | "false"> {
+  const configured = normalizeReasoningFlag(storedValue ?? process.env.NEMOCLAW_REASONING);
+  process.env.NEMOCLAW_REASONING = configured ?? "false";
+  return process.env.NEMOCLAW_REASONING as "true" | "false";
+}
+
+/**
+ * Drop compatible-endpoint reasoning state when the user switches providers.
+ */
+function clearCompatibleEndpointReasoning(): null {
+  delete process.env.NEMOCLAW_REASONING;
+  return null;
+}
+
 function exitOnboardFromPrompt(): never {
   console.log("  Exiting onboarding.");
   process.exit(1);
@@ -2043,7 +2078,7 @@ print(json.dumps({
     "messages": [
         {"role": "user", "content": "Reply with exactly: PONG"}
     ],
-    "max_tokens": 32,
+    "max_tokens": 512,
 }))
 PYPAYLOAD
 
@@ -2081,7 +2116,19 @@ content = (
     .get("content")
 )
 if not isinstance(content, str) or not content.strip():
-    print("inference.local response did not contain choices[0].message.content: %s" % json.dumps(data)[:1000], file=sys.stderr)
+    content = (
+        data.get("choices", [{}])[0]
+        .get("message", {})
+        .get("reasoning_content")
+    )
+if not isinstance(content, str) or not content.strip():
+    content = (
+        data.get("choices", [{}])[0]
+        .get("message", {})
+        .get("reasoning")
+    )
+if not isinstance(content, str) or not content.strip():
+    print("inference.local response did not contain message content or reasoning text: %s" % json.dumps(data)[:1000], file=sys.stderr)
     sys.exit(1)
 
 print("INFERENCE_SMOKE_OK " + content.strip()[:200])
@@ -2945,10 +2992,12 @@ async function validateCustomOpenAiLikeSelection(
   helpUrl: string | null = null,
 ): Promise<EndpointValidationResult> {
   const apiKey = getCredential(credentialEnv);
+  const reasoningEnabled = process.env.NEMOCLAW_REASONING === "true";
   const probe = probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, {
-    requireResponsesToolCalling: true,
-    skipResponsesProbe: shouldForceCompletionsApi(process.env.NEMOCLAW_PREFERRED_API),
-    probeStreaming: true,
+    requireResponsesToolCalling: !reasoningEnabled,
+    skipResponsesProbe:
+      reasoningEnabled || shouldForceCompletionsApi(process.env.NEMOCLAW_PREFERRED_API),
+    probeStreaming: !reasoningEnabled,
   });
   if (probe.ok) {
     if (probe.note) {
@@ -6074,6 +6123,7 @@ async function setupNim(
   endpointUrl: string | null;
   credentialEnv: string | null;
   preferredInferenceApi: string | null;
+  compatibleEndpointReasoning: string | null;
   nimContainer: string | null;
 }> {
   step(3, 8, "Configuring inference (NIM)");
@@ -6084,6 +6134,7 @@ async function setupNim(
   let endpointUrl: string | null = REMOTE_PROVIDER_CONFIG.build.endpointUrl;
   let credentialEnv: string | null = REMOTE_PROVIDER_CONFIG.build.credentialEnv;
   let preferredInferenceApi: string | null = null;
+  let compatibleEndpointReasoning: string | null = null;
 
   // Detect local inference options. Bound curl with --connect-timeout/--max-time
   // so a half-open port or stalled listener cannot hang the onboard at step 3
@@ -6599,6 +6650,11 @@ async function setupNim(
             }
 
             if (selected.key === "custom") {
+              const reasoning = await configureCompatibleEndpointReasoning();
+              compatibleEndpointReasoning = reasoning;
+              if (reasoning === "true") {
+                console.log("  Reasoning mode enabled for this compatible endpoint model.");
+              }
               const validation = await validateCustomOpenAiLikeSelection(
                 remoteConfig.label,
                 endpointUrl || OPENAI_ENDPOINT_URL,
@@ -6644,6 +6700,7 @@ async function setupNim(
                 continue selectionLoop;
               }
             } else if (selected.key === "anthropicCompatible") {
+              compatibleEndpointReasoning = clearCompatibleEndpointReasoning();
               const validation = await validateCustomAnthropicSelection(
                 remoteConfig.label,
                 endpointUrl || ANTHROPIC_ENDPOINT_URL,
@@ -7211,7 +7268,18 @@ async function setupNim(
     }
   }
 
-  return { model, provider, endpointUrl, credentialEnv, preferredInferenceApi, nimContainer };
+  if (provider !== "compatible-endpoint") {
+    compatibleEndpointReasoning = clearCompatibleEndpointReasoning();
+  }
+  return {
+    model,
+    provider,
+    endpointUrl,
+    credentialEnv,
+    preferredInferenceApi,
+    compatibleEndpointReasoning,
+    nimContainer,
+  };
 }
 
 // ── Step 4: Inference provider ───────────────────────────────────
@@ -9357,6 +9425,7 @@ function toSessionUpdates(
     endpointUrl?: string | null;
     credentialEnv?: string | null;
     preferredInferenceApi?: string | null;
+    compatibleEndpointReasoning?: string | null;
     nimContainer?: string | null;
     webSearchConfig?: WebSearchConfig | null;
     policyPresets?: string[] | null;
@@ -9376,6 +9445,9 @@ function toSessionUpdates(
   if (updates.preferredInferenceApi !== undefined) {
     normalized.preferredInferenceApi = toOptionalString(updates.preferredInferenceApi);
   }
+  if (updates.compatibleEndpointReasoning !== undefined) {
+    normalized.compatibleEndpointReasoning = updates.compatibleEndpointReasoning ?? null;
+  }
   if (updates.nimContainer !== undefined)
     normalized.nimContainer = toOptionalString(updates.nimContainer);
   if (updates.webSearchConfig !== undefined) normalized.webSearchConfig = updates.webSearchConfig;
@@ -9911,6 +9983,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
     let endpointUrl = session?.endpointUrl || null;
     let credentialEnv = session?.credentialEnv || null;
     let preferredInferenceApi = session?.preferredInferenceApi || null;
+    let compatibleEndpointReasoning = session?.compatibleEndpointReasoning || null;
     let nimContainer = session?.nimContainer || null;
     let webSearchConfig = session?.webSearchConfig || null;
     let forceProviderSelection = false;
@@ -9924,6 +9997,10 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
       if (resumeProviderSelection) {
         skippedStepMessage("provider_selection", `${provider} / ${model}`);
         hydrateCredentialEnv(credentialEnv);
+        compatibleEndpointReasoning =
+          provider === "compatible-endpoint"
+            ? await configureCompatibleEndpointReasoning(compatibleEndpointReasoning)
+            : clearCompatibleEndpointReasoning();
       } else {
         // #2753: do not persist sandboxName to onboard-session.json before
         // the sandbox actually exists in the gateway (Step 6 markStepComplete
@@ -9937,6 +10014,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
         endpointUrl = selection.endpointUrl;
         credentialEnv = selection.credentialEnv;
         preferredInferenceApi = selection.preferredInferenceApi;
+        compatibleEndpointReasoning = selection.compatibleEndpointReasoning;
         nimContainer = selection.nimContainer;
         onboardSession.markStepComplete(
           "provider_selection",
@@ -9946,6 +10024,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
             endpointUrl,
             credentialEnv,
             preferredInferenceApi,
+            compatibleEndpointReasoning,
             nimContainer,
           }),
         );
@@ -10425,6 +10504,8 @@ module.exports = {
   printSandboxCreateRecoveryHints,
   promptYesNoOrDefault,
   providerExistsInGateway,
+  normalizeReasoningFlag,
+  configureCompatibleEndpointReasoning,
   parsePolicyPresetEnv,
   parseSandboxStatus,
   pruneStaleSandboxEntry,

diff --git a/src/lib/state/onboard-session.ts b/src/lib/state/onboard-session.ts
@@ -76,6 +76,7 @@ export interface Session {
   endpointUrl: string | null;
   credentialEnv: string | null;
   preferredInferenceApi: string | null;
+  compatibleEndpointReasoning: string | null;
   nimContainer: string | null;
   routerPid: number | null;
   routerCredentialHash: string | null;
@@ -127,6 +128,7 @@ export interface SessionUpdates {
   endpointUrl?: string;
   credentialEnv?: string;
   preferredInferenceApi?: string;
+  compatibleEndpointReasoning?: string | null;
   nimContainer?: string;
   routerPid?: number;
   routerCredentialHash?: string;
@@ -154,6 +156,7 @@ export interface DebugSessionSummary {
   endpointUrl: string | null;
   credentialEnv: string | null;
   preferredInferenceApi: string | null;
+  compatibleEndpointReasoning: string | null;
   nimContainer: string | null;
   policyPresets: string[] | null;
   gpuPassthrough: boolean;
@@ -309,6 +312,7 @@ export function createSession(overrides: Partial<Session> = {}): Session {
     endpointUrl: overrides.endpointUrl ?? null,
     credentialEnv: overrides.credentialEnv ?? null,
     preferredInferenceApi: overrides.preferredInferenceApi ?? null,
+    compatibleEndpointReasoning: overrides.compatibleEndpointReasoning ?? null,
     nimContainer: overrides.nimContainer ?? null,
     routerPid: readPositiveInteger(overrides.routerPid),
     routerCredentialHash: overrides.routerCredentialHash ?? null,
@@ -348,6 +352,7 @@ export function normalizeSession(data: Session | SessionJsonValue | undefined):
     endpointUrl: typeof data.endpointUrl === "string" ? redactUrl(data.endpointUrl) : null,
     credentialEnv: readString(data.credentialEnv),
     preferredInferenceApi: readString(data.preferredInferenceApi),
+    compatibleEndpointReasoning: readString(data.compatibleEndpointReasoning),
     nimContainer: readString(data.nimContainer),
     routerPid: readPositiveInteger(data.routerPid),
     routerCredentialHash: readString(data.routerCredentialHash),
@@ -710,6 +715,11 @@ export function filterSafeUpdates(updates: SessionUpdates): Partial<Session> {
   if (typeof updates.credentialEnv === "string") safe.credentialEnv = updates.credentialEnv;
   if (typeof updates.preferredInferenceApi === "string")
     safe.preferredInferenceApi = updates.preferredInferenceApi;
+  if (typeof updates.compatibleEndpointReasoning === "string") {
+    safe.compatibleEndpointReasoning = updates.compatibleEndpointReasoning;
+  } else if (updates.compatibleEndpointReasoning === null) {
+    safe.compatibleEndpointReasoning = null;
+  }
   if (typeof updates.nimContainer === "string") safe.nimContainer = updates.nimContainer;
   if (typeof updates.routerPid === "number" && Number.isInteger(updates.routerPid) && updates.routerPid > 0) {
     safe.routerPid = updates.routerPid;
@@ -854,6 +864,7 @@ export function summarizeForDebug(
     endpointUrl: redactUrl(session.endpointUrl),
     credentialEnv: session.credentialEnv,
     preferredInferenceApi: session.preferredInferenceApi,
+    compatibleEndpointReasoning: session.compatibleEndpointReasoning,
     nimContainer: session.nimContainer,
     policyPresets: session.policyPresets,
     gpuPassthrough: session.gpuPassthrough,

diff --git a/test/onboard-selection.test.ts b/test/onboard-selection.test.ts
@@ -2060,6 +2060,113 @@ const { setupNim } = require(${onboardPath});
     );
   });
 
+  it("honors NEMOCLAW_REASONING for custom OpenAI-compatible endpoint models", () => {
+    const repoRoot = path.join(import.meta.dirname, "..");
+    const tmpDir = fs.mkdtempSync(
+      path.join(os.tmpdir(), "nemoclaw-onboard-custom-openai-reasoning-"),
+    );
+    const fakeBin = path.join(tmpDir, "bin");
+    const scriptPath = path.join(tmpDir, "custom-openai-reasoning-check.js");
+    const curlArgsLog = path.join(tmpDir, "custom-openai-reasoning-curl-args.log");
+    const onboardPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "onboard.js"));
+    const credentialsPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "credentials", "store.js"));
+    const runnerPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "runner.js"));
+
+    fs.mkdirSync(fakeBin, { recursive: true });
+    fs.writeFileSync(
+      path.join(fakeBin, "curl"),
+      `#!/usr/bin/env bash
+args_log=${JSON.stringify(curlArgsLog)}
+printf '%s\\n' "$*" >> "$args_log"
+body='{"error":{"message":"bad request"}}'
+status="400"
+outfile=""
+url=""
+while [ "$#" -gt 0 ]; do
+  case "$1" in
+    -o) outfile="$2"; shift 2 ;;
+    *) url="$1"; shift ;;
+  esac
+done
+if echo "$url" | grep -q '/chat/completions$'; then
+  body='{"id":"chatcmpl-123","choices":[{"message":{"content":"","reasoning_content":"OK"}}]}'
+  status="200"
+fi
+printf '%s' "$body" > "$outfile"
+printf '%s' "$status"
+`,
+      { mode: 0o755 },
+    );
+
+    const script = String.raw`
+const credentials = require(${credentialsPath});
+const runner = require(${runnerPath});
+
+const answers = ["3", "https://proxy.example.com/v1", "reasoning-model"];
+const messages = [];
+
+credentials.prompt = async (message) => {
+  messages.push(message);
+  return answers.shift() || "";
+};
+runner.runCapture = () => "";
+
+const { setupNim } = require(${onboardPath});
+
+(async () => {
+  process.env.COMPATIBLE_API_KEY = "proxy-key";
+  process.env.NEMOCLAW_REASONING = "yes";
+  const originalLog = console.log;
+  const originalError = console.error;
+  const lines = [];
+  console.log = (...args) => lines.push(args.join(" "));
+  console.error = (...args) => lines.push(args.join(" "));
+  try {
+    const result = await setupNim(null);
+    originalLog(JSON.stringify({
+      result,
+      messages,
+      lines,
+      reasoning: process.env.NEMOCLAW_REASONING,
+    }));
+  } finally {
+    console.log = originalLog;
+    console.error = originalError;
+  }
+})().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
+`;
+    fs.writeFileSync(scriptPath, script);
+
+    const result = spawnSync(process.execPath, [scriptPath], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        HOME: tmpDir,
+        PATH: `${fakeBin}:${process.env.PATH || ""}`,
+      },
+    });
+
+    assert.equal(result.status, 0, result.stderr);
+    const payload = JSON.parse(result.stdout.trim());
+    assert.equal(payload.result.provider, "compatible-endpoint");
+    assert.equal(payload.result.model, "reasoning-model");
+    assert.equal(payload.result.preferredInferenceApi, "openai-completions");
+    assert.equal(payload.reasoning, "true");
+    const curlInvocations = fs.readFileSync(curlArgsLog, "utf-8");
+    assert.match(curlInvocations, /chat\/completions/);
+    assert.doesNotMatch(curlInvocations, /\/responses/);
+    assert.doesNotMatch(curlInvocations, /(^|\s)-N(\s|$)/);
+    assert.ok(
+      payload.messages.every(
+        (message: string) => !/Enable reasoning mode for this model/.test(message),
+      ),
+    );
+  });
+
   it("forces chat completions for custom OpenAI-compatible endpoints even when /responses returns valid tool calls (#1932)", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
     const tmpDir = fs.mkdtempSync(