Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions chatmock/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

def create_app(
verbose: bool = False,
verbose_obfuscation: bool = False,
reasoning_effort: str = "medium",
reasoning_summary: str = "auto",
reasoning_compat: str = "think-tags",
Expand All @@ -21,6 +22,7 @@ def create_app(

app.config.update(
VERBOSE=bool(verbose),
VERBOSE_OBFUSCATION=bool(verbose_obfuscation),
REASONING_EFFORT=reasoning_effort,
REASONING_SUMMARY=reasoning_summary,
REASONING_COMPAT=reasoning_compat,
Expand Down
8 changes: 8 additions & 0 deletions chatmock/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def cmd_serve(
host: str,
port: int,
verbose: bool,
verbose_obfuscation: bool,
reasoning_effort: str,
reasoning_summary: str,
reasoning_compat: str,
Expand All @@ -272,6 +273,7 @@ def cmd_serve(
) -> int:
app = create_app(
verbose=verbose,
verbose_obfuscation=verbose_obfuscation,
reasoning_effort=reasoning_effort,
reasoning_summary=reasoning_summary,
reasoning_compat=reasoning_compat,
Expand All @@ -296,6 +298,11 @@ def main() -> None:
p_serve.add_argument("--host", default="127.0.0.1")
p_serve.add_argument("--port", type=int, default=8000)
p_serve.add_argument("--verbose", action="store_true", help="Enable verbose logging")
p_serve.add_argument(
"--verbose-obfuscation",
action="store_true",
help="Also dump raw SSE/obfuscation events (in addition to --verbose request/response logs).",
)
p_serve.add_argument(
"--debug-model",
dest="debug_model",
Expand Down Expand Up @@ -355,6 +362,7 @@ def main() -> None:
host=args.host,
port=args.port,
verbose=args.verbose,
verbose_obfuscation=args.verbose_obfuscation,
reasoning_effort=args.reasoning_effort,
reasoning_summary=args.reasoning_summary,
reasoning_compat=args.reasoning_compat,
Expand Down
1 change: 1 addition & 0 deletions chatmock/prompt.md
1 change: 1 addition & 0 deletions chatmock/prompt_gpt5_codex.md
129 changes: 106 additions & 23 deletions chatmock/routes_ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,52 @@
ollama_bp = Blueprint("ollama", __name__)


def _log_json(prefix: str, payload: Any) -> None:
try:
print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
except Exception:
try:
print(f"{prefix}\n{payload}")
except Exception:
pass


def _wrap_stream_logging(label: str, iterator, enabled: bool):
if not enabled:
return iterator

def _gen():
for chunk in iterator:
try:
text = (
chunk.decode("utf-8", errors="replace")
if isinstance(chunk, (bytes, bytearray))
else str(chunk)
)
print(f"{label}\n{text}")
except Exception:
pass
yield chunk

return _gen()


@ollama_bp.route("/api/version", methods=["GET"])
def ollama_version() -> Response:
if bool(current_app.config.get("VERBOSE")):
print("IN GET /api/version")
version = current_app.config.get("OLLAMA_VERSION", "0.12.10")
if not isinstance(version, str) or not version.strip():
version = "0.12.10"
payload = {"version": version}
resp = make_response(jsonify(payload), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
if bool(current_app.config.get("VERBOSE")):
_log_json("OUT GET /api/version", payload)
return resp


def _instructions_for_model(model: str) -> str:
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
if model == "gpt-5-codex":
Expand Down Expand Up @@ -75,28 +121,34 @@ def ollama_tags() -> Response:
},
}
)
resp = make_response(jsonify({"models": models}), 200)
payload = {"models": models}
resp = make_response(jsonify(payload), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
if bool(current_app.config.get("VERBOSE")):
_log_json("OUT GET /api/tags", payload)
return resp


@ollama_bp.route("/api/show", methods=["POST"])
def ollama_show() -> Response:
verbose = bool(current_app.config.get("VERBOSE"))
raw_body = request.get_data(cache=True, as_text=True) or ""
if verbose:
try:
print("IN POST /api/show\n" + raw_body)
except Exception:
pass
try:
if verbose:
body_preview = (request.get_data(cache=True, as_text=True) or "")[:2000]
print("IN POST /api/show\n" + body_preview)
payload = json.loads(raw_body) if raw_body else (request.get_json(silent=True) or {})
except Exception:
pass
try:
payload = request.get_json(silent=True) or {}
except Exception:
payload = {}
model = payload.get("model")
if not isinstance(model, str) or not model.strip():
return jsonify({"error": "Model not found"}), 400
err = {"error": "Model not found"}
if verbose:
_log_json("OUT POST /api/show", err)
return jsonify(err), 400
v1_show_response = {
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /models/blobs/sha256:placeholder\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 100000\nPARAMETER stop \"</s>\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
"parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"",
Expand All @@ -116,6 +168,8 @@ def ollama_show() -> Response:
},
"capabilities": ["completion", "vision", "tools", "thinking"],
}
if verbose:
_log_json("OUT POST /api/show", v1_show_response)
resp = make_response(jsonify(v1_show_response), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
Expand All @@ -132,10 +186,13 @@ def ollama_chat() -> Response:
try:
raw = request.get_data(cache=True, as_text=True) or ""
if verbose:
print("IN POST /api/chat\n" + (raw[:2000] if isinstance(raw, str) else ""))
print("IN POST /api/chat\n" + (raw if isinstance(raw, str) else ""))
payload = json.loads(raw) if raw else {}
except Exception:
return jsonify({"error": "Invalid JSON body"}), 400
err = {"error": "Invalid JSON body"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400

model = payload.get("model")
raw_messages = payload.get("messages")
Expand Down Expand Up @@ -166,7 +223,10 @@ def ollama_chat() -> Response:
if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
continue
if _t.get("type") not in ("web_search", "web_search_preview"):
return jsonify({"error": "Only web_search/web_search_preview are supported in responses_tools"}), 400
err = {"error": "Only web_search/web_search_preview are supported in responses_tools"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400
extra_tools.append(_t)
if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
rtc = payload.get("responses_tool_choice")
Expand All @@ -180,7 +240,10 @@ def ollama_chat() -> Response:
except Exception:
size = 0
if size > MAX_TOOLS_BYTES:
return jsonify({"error": "responses_tools too large"}), 400
err = {"error": "responses_tools too large"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400
had_responses_tools = True
tools_responses = (tools_responses or []) + extra_tools

Expand All @@ -189,7 +252,10 @@ def ollama_chat() -> Response:
tool_choice = rtc

if not isinstance(model, str) or not isinstance(messages, list) or not messages:
return jsonify({"error": "Invalid request format"}), 400
err = {"error": "Invalid request format"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400

input_items = convert_chat_messages_to_responses_input(messages)

Expand All @@ -205,6 +271,17 @@ def ollama_chat() -> Response:
reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
)
if error_resp is not None:
if verbose:
try:
body = error_resp.get_data(as_text=True)
if body:
try:
parsed = json.loads(body)
except Exception:
parsed = body
_log_json("OUT POST /api/chat", parsed)
except Exception:
pass
return error_resp

record_rate_limits_from_response(upstream)
Expand Down Expand Up @@ -232,17 +309,17 @@ def ollama_chat() -> Response:
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
upstream = upstream2
else:
return (
jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}),
(upstream2.status_code if upstream2 is not None else upstream.status_code),
)
err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
else:
if verbose:
print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
return (
jsonify({"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}),
upstream.status_code,
)
err = {"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), upstream.status_code

created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
model_out = model if isinstance(model, str) and model.strip() else normalized_model
Expand Down Expand Up @@ -408,8 +485,12 @@ def _gen():
}
done_obj.update(_OLLAMA_FAKE_EVAL)
yield json.dumps(done_obj) + "\n"
if verbose:
print("OUT POST /api/chat (streaming response)")
stream_iter = stream_with_context(_gen())
stream_iter = _wrap_stream_logging("STREAM OUT /api/chat", stream_iter, verbose)
resp = current_app.response_class(
stream_with_context(_gen()),
stream_iter,
status=200,
mimetype="application/x-ndjson",
)
Expand Down Expand Up @@ -481,6 +562,8 @@ def _gen():
"done_reason": "stop",
}
out_json.update(_OLLAMA_FAKE_EVAL)
if verbose:
_log_json("OUT POST /api/chat", out_json)
resp = make_response(jsonify(out_json), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
Expand Down
Loading