RayBytes · RayBytes · Nov 22, 2025 · Nov 22, 2025
diff --git a/README.md b/README.md
@@ -114,15 +114,19 @@ curl http://127.0.0.1:8000/v1/chat/completions \
 
 # Supported models
 - `gpt-5`
+- `gpt-5.1`
 - `gpt-5-codex`
+- `gpt-5.1-codex`
+- `gpt-5.1-codex-max`
+- `gpt-5.1-codex-mini`
 - `codex-mini`
 
 # Customisation / Configuration
 
 ### Thinking effort
 
-- `--reasoning-effort` (choice of minimal,low,medium,high)<br>
-GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
+- `--reasoning-effort` (choice of minimal,low,medium,high,xhigh)<br>
+GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`. The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`; neither offers a `minimal` variant.
 
 ### Thinking summaries
 

diff --git a/chatmock/cli.py b/chatmock/cli.py
@@ -311,7 +311,7 @@ def main() -> None:
     )
     p_serve.add_argument(
         "--reasoning-effort",
-        choices=["minimal", "low", "medium", "high"],
+        choices=["minimal", "low", "medium", "high", "xhigh"],
         default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(),
         help="Reasoning effort level for Responses API (default: medium)",
     )
@@ -335,8 +335,8 @@ def main() -> None:
         action="store_true",
         default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
         help=(
-            "Expose gpt-5 reasoning effort variants (minimal|low|medium|high) as separate models from /v1/models. "
-            "This allows choosing effort via model selection in compatible UIs."
+            "Expose gpt-5 reasoning effort variants (minimal|low|medium|high|xhigh where supported) "
+            "as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
         ),
     )
     p_serve.add_argument(

diff --git a/chatmock/reasoning.py b/chatmock/reasoning.py
@@ -1,15 +1,34 @@
 from __future__ import annotations
 
-from typing import Any, Dict
+from typing import Any, Dict, Set
+
+
+DEFAULT_REASONING_EFFORTS: Set[str] = {"minimal", "low", "medium", "high", "xhigh"}
+
+
+def allowed_efforts_for_model(model: str | None) -> Set[str]:
+    base = (model or "").strip().lower()
+    if not base:
+        return DEFAULT_REASONING_EFFORTS
+    normalized = base.split(":", 1)[0]
+    if normalized.startswith("gpt-5.1-codex-max"):
+        return {"low", "medium", "high", "xhigh"}
+    if normalized.startswith("gpt-5.1"):
+        return {"low", "medium", "high"}
+    return DEFAULT_REASONING_EFFORTS
 
 
 def build_reasoning_param(
-    base_effort: str = "medium", base_summary: str = "auto", overrides: Dict[str, Any] | None = None
+    base_effort: str = "medium",
+    base_summary: str = "auto",
+    overrides: Dict[str, Any] | None = None,
+    *,
+    allowed_efforts: Set[str] | None = None,
 ) -> Dict[str, Any]:
     effort = (base_effort or "").strip().lower()
     summary = (base_summary or "").strip().lower()
 
-    valid_efforts = {"minimal", "low", "medium", "high"}
+    valid_efforts = allowed_efforts or DEFAULT_REASONING_EFFORTS
     valid_summaries = {"auto", "concise", "detailed", "none"}
 
     if isinstance(overrides, dict):
@@ -80,7 +99,7 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non
     s = model.strip().lower()
     if not s:
         return None
-    efforts = {"minimal", "low", "medium", "high"}
+    efforts = {"minimal", "low", "medium", "high", "xhigh"}
 
     if ":" in s:
         maybe = s.rsplit(":", 1)[-1].strip()
@@ -96,5 +115,7 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non
             return {"effort": "medium"}
         if s.endswith(sep + "high"):
             return {"effort": "high"}
+        if s.endswith(sep + "xhigh"):
+            return {"effort": "xhigh"}
 
     return None
diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py
@@ -10,7 +10,11 @@
 from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .limits import record_rate_limits_from_response
 from .http import build_cors_headers
-from .reasoning import build_reasoning_param, extract_reasoning_from_model_name
+from .reasoning import (
+    allowed_efforts_for_model,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
 from .transform import convert_ollama_messages, normalize_ollama_tools
 from .upstream import normalize_model_name, start_upstream_request
 from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_to_responses
@@ -67,7 +71,7 @@ def ollama_version() -> Response:
 
 def _instructions_for_model(model: str) -> str:
     base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
-    if model == "gpt-5-codex" or model == "gpt-5.1-codex":
+    if model.startswith("gpt-5-codex") or model.startswith("gpt-5.1-codex"):
         codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
         if isinstance(codex, str) and codex.strip():
             return codex
@@ -89,7 +93,15 @@ def ollama_tags() -> Response:
     if bool(current_app.config.get("VERBOSE")):
         print("IN GET /api/tags")
     expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
-    model_ids = ["gpt-5", "gpt-5.1", "gpt-5-codex", "gpt-5.1-codex", "gpt-5.1-codex-mini", "codex-mini"]
+    model_ids = [
+        "gpt-5",
+        "gpt-5.1",
+        "gpt-5-codex",
+        "gpt-5.1-codex",
+        "gpt-5.1-codex-max",
+        "gpt-5.1-codex-mini",
+        "codex-mini",
+    ]
     if expose_variants:
         model_ids.extend(
             [
@@ -100,13 +112,16 @@ def ollama_tags() -> Response:
                 "gpt-5.1-high",
                 "gpt-5.1-medium",
                 "gpt-5.1-low",
-                "gpt-5.1-minimal",
                 "gpt-5-codex-high",
                 "gpt-5-codex-medium",
                 "gpt-5-codex-low",
                 "gpt-5.1-codex-high",
                 "gpt-5.1-codex-medium",
                 "gpt-5.1-codex-low",
+                "gpt-5.1-codex-max-xhigh",
+                "gpt-5.1-codex-max-high",
+                "gpt-5.1-codex-max-medium",
+                "gpt-5.1-codex-max-low",
             ]
         )
     models = []
@@ -275,7 +290,12 @@ def ollama_chat() -> Response:
         tools=tools_responses,
         tool_choice=tool_choice,
         parallel_tool_calls=parallel_tool_calls,
-        reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
+        reasoning_param=build_reasoning_param(
+            reasoning_effort,
+            reasoning_summary,
+            model_reasoning,
+            allowed_efforts=allowed_efforts_for_model(model),
+        ),
     )
     if error_resp is not None:
         if verbose:
@@ -310,7 +330,12 @@ def ollama_chat() -> Response:
                 tools=base_tools_only,
                 tool_choice=safe_choice,
                 parallel_tool_calls=parallel_tool_calls,
-                reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
+                reasoning_param=build_reasoning_param(
+                    reasoning_effort,
+                    reasoning_summary,
+                    model_reasoning,
+                    allowed_efforts=allowed_efforts_for_model(model),
+                ),
             )
             record_rate_limits_from_response(upstream2)
             if err2 is None and upstream2 is not None and upstream2.status_code < 400:

diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py
@@ -9,7 +9,12 @@
 from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .limits import record_rate_limits_from_response
 from .http import build_cors_headers
-from .reasoning import apply_reasoning_to_message, build_reasoning_param, extract_reasoning_from_model_name
+from .reasoning import (
+    allowed_efforts_for_model,
+    apply_reasoning_to_message,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
 from .upstream import normalize_model_name, start_upstream_request
 from .utils import (
     convert_chat_messages_to_responses_input,
@@ -54,7 +59,7 @@ def _gen():
 
 def _instructions_for_model(model: str) -> str:
     base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
-    if model == "gpt-5-codex" or model == "gpt-5.1-codex":
+    if model.startswith("gpt-5-codex") or model.startswith("gpt-5.1-codex"):
         codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
         if isinstance(codex, str) and codex.strip():
             return codex
@@ -166,7 +171,12 @@ def chat_completions() -> Response:
 
     model_reasoning = extract_reasoning_from_model_name(requested_model)
     reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
-    reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides)
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
 
     upstream, error_resp = start_upstream_request(
         model,
@@ -396,7 +406,12 @@ def completions() -> Response:
 
     model_reasoning = extract_reasoning_from_model_name(requested_model)
     reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
-    reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides)
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
     upstream, error_resp = start_upstream_request(
         model,
         input_items,
@@ -518,9 +533,10 @@ def list_models() -> Response:
     expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
     model_groups = [
         ("gpt-5", ["high", "medium", "low", "minimal"]),
-        ("gpt-5.1", ["high", "medium", "low", "minimal"]),
+        ("gpt-5.1", ["high", "medium", "low"]),
         ("gpt-5-codex", ["high", "medium", "low"]),
         ("gpt-5.1-codex", ["high", "medium", "low"]),
+        ("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]),
         ("gpt-5.1-codex-mini", []),
         ("codex-mini", []),
     ]

diff --git a/chatmock/upstream.py b/chatmock/upstream.py
@@ -32,7 +32,7 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
     base = name.split(":", 1)[0].strip()
     for sep in ("-", "_"):
         lowered = base.lower()
-        for effort in ("minimal", "low", "medium", "high"):
+        for effort in ("minimal", "low", "medium", "high", "xhigh"):
             suffix = f"{sep}{effort}"
             if lowered.endswith(suffix):
                 base = base[: -len(suffix)]
@@ -46,6 +46,7 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
         "gpt-5-codex": "gpt-5-codex",
         "gpt-5-codex-latest": "gpt-5-codex",
         "gpt-5.1-codex": "gpt-5.1-codex",
+        "gpt-5.1-codex-max": "gpt-5.1-codex-max",
         "codex": "codex-mini-latest",
         "codex-mini": "codex-mini-latest",
         "codex-mini-latest": "codex-mini-latest",