From 307ad130ace6ffd55e6e312e29b001ed522969bd Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 14 May 2026 18:31:03 +0800 Subject: [PATCH 1/4] Add provider model request profiles --- .../src/five08/worker/crm/skills_extractor.py | 54 ++--- packages/shared/src/five08/job_match.py | 73 +++--- packages/shared/src/five08/llm.py | 211 ++++++++++++++++++ .../shared/src/five08/llm_model_profiles.json | 140 ++++++++++++ .../shared/src/five08/resume_extractor.py | 85 ++++--- .../src/five08/resume_skills_extractor.py | 54 ++--- tests/unit/test_job_match.py | 8 + tests/unit/test_llm.py | 68 ++++++ tests/unit/test_resume_extractor.py | 5 +- 9 files changed, 581 insertions(+), 117 deletions(-) create mode 100644 packages/shared/src/five08/llm.py create mode 100644 packages/shared/src/five08/llm_model_profiles.json create mode 100644 tests/unit/test_llm.py diff --git a/apps/worker/src/five08/worker/crm/skills_extractor.py b/apps/worker/src/five08/worker/crm/skills_extractor.py index 6585f865..8de1c080 100644 --- a/apps/worker/src/five08/worker/crm/skills_extractor.py +++ b/apps/worker/src/five08/worker/crm/skills_extractor.py @@ -5,6 +5,7 @@ import re from typing import Any +from five08.llm import ProviderModel from five08.skills import ( DISALLOWED_RESUME_SKILLS, normalize_skill, @@ -62,14 +63,16 @@ class SkillsExtractor: """Extract skills with LLM when configured, fallback heuristics otherwise.""" def __init__(self) -> None: - self.model = settings.resolved_resume_ai_model + self.provider_model = ProviderModel.openai_compatible( + model=settings.resolved_resume_ai_model, + api_key=settings.openai_api_key, + base_url=settings.openai_base_url, + ) + self.model = self.provider_model.model self.client: Any = None if settings.openai_api_key and OpenAIClient is not None: - self.client = OpenAIClient( - api_key=settings.openai_api_key, - base_url=settings.openai_base_url, - ) + self.client = OpenAIClient(**self.provider_model.client_kwargs()) def extract_skills(self, resume_text: str) -> ExtractedSkills: """Extract skills from resume text.""" @@ -79,26 +82,27 @@ def extract_skills(self, resume_text: str) -> ExtractedSkills: prompt = self._create_prompt(resume_text) try: response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": ( - "You extract professional skills from resumes for a CRM. " - "Focus on white-collar skills for product development orgs: " - "engineering, product, data, design, growth, and marketing. " - "Return JSON only, no prose. " - "Normalize skills to concise canonical names, lowercase. " - "Provide a strength from 1-5 when known, where 5 is strongest. " - "If uncertain, you may omit it or leave it blank. " - "Bias 3 for simple mentions, 4-5 for recent/current project usage, " - "and 1-2 for weak, outdated, or minimal exposure." - ), - }, - {"role": "user", "content": prompt}, - ], - temperature=0.1, - max_tokens=1200, + **self.provider_model.chat_completion_kwargs( + messages=[ + { + "role": "system", + "content": ( + "You extract professional skills from resumes for a CRM. " + "Focus on white-collar skills for product development orgs: " + "engineering, product, data, design, growth, and marketing. " + "Return JSON only, no prose. " + "Normalize skills to concise canonical names, lowercase. " + "Provide a strength from 1-5 when known, where 5 is strongest. " + "If uncertain, you may omit it or leave it blank. " + "Bias 3 for simple mentions, 4-5 for recent/current project usage, " + "and 1-2 for weak, outdated, or minimal exposure." + ), + }, + {"role": "user", "content": prompt}, + ], + temperature=0.1, + max_tokens=1200, + ) ) content = response.choices[0].message.content if not content: diff --git a/packages/shared/src/five08/job_match.py b/packages/shared/src/five08/job_match.py index 0e19bca6..ec6522c3 100644 --- a/packages/shared/src/five08/job_match.py +++ b/packages/shared/src/five08/job_match.py @@ -9,6 +9,7 @@ from typing import Any from five08.discord_webhook import DiscordWebhookLogger +from five08.llm import ProviderModel from five08.skills import normalize_skill, normalize_skill_list logger = logging.getLogger(__name__) @@ -692,25 +693,31 @@ def rerank_shortlisted_candidates( except ImportError as exc: raise RuntimeError("openai package is not installed") from exc - client = _OpenAI(api_key=api_key, base_url=base_url or None) + provider_model = ProviderModel.openai_compatible( + model=model, + api_key=api_key, + base_url=base_url or None, + ) + client = _OpenAI(**provider_model.client_kwargs()) prompt = _build_rerank_prompt(posting_text, requirements, candidates) try: response = client.chat.completions.create( - model=model, - temperature=0.1, - response_format={"type": "json_object"}, - messages=[ - { - "role": "system", - "content": ( - "You are a senior recruiting coordinator. Rerank a shortlist " - "using only the provided evidence. Return only valid JSON." - ), - }, - {"role": "user", "content": prompt}, - ], - max_tokens=2500, + **provider_model.chat_completion_kwargs( + temperature=0.1, + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": ( + "You are a senior recruiting coordinator. Rerank a shortlist " + "using only the provided evidence. Return only valid JSON." + ), + }, + {"role": "user", "content": prompt}, + ], + max_tokens=2500, + ) ) except Exception as exc: logger.error("OpenAI candidate rerank call failed: %s", exc) @@ -794,27 +801,33 @@ def extract_job_requirements( except ImportError as exc: raise RuntimeError("openai package is not installed") from exc - client = _OpenAI(api_key=api_key, base_url=base_url or None) + provider_model = ProviderModel.openai_compatible( + model=model, + api_key=api_key, + base_url=base_url or None, + ) + client = _OpenAI(**provider_model.client_kwargs()) hints = _regex_hints(posting_text) prompt = _build_prompt(posting_text, hints) try: response = client.chat.completions.create( - model=model, - temperature=0.1, - response_format={"type": "json_object"}, - messages=[ - { - "role": "system", - "content": ( - "You are a recruiting assistant. Extract structured hiring requirements " - "from job postings. Return only valid JSON." - ), - }, - {"role": "user", "content": prompt}, - ], - max_tokens=2048, + **provider_model.chat_completion_kwargs( + temperature=0.1, + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": ( + "You are a recruiting assistant. Extract structured hiring requirements " + "from job postings. Return only valid JSON." + ), + }, + {"role": "user", "content": prompt}, + ], + max_tokens=2048, + ) ) except Exception as exc: logger.error("OpenAI job extraction call failed: %s", exc) diff --git a/packages/shared/src/five08/llm.py b/packages/shared/src/five08/llm.py new file mode 100644 index 00000000..f80554b8 --- /dev/null +++ b/packages/shared/src/five08/llm.py @@ -0,0 +1,211 @@ +"""Provider/model helpers for OpenAI-compatible chat completions.""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from functools import lru_cache +from importlib import resources +from typing import Any, Mapping, TypedDict +from urllib.parse import urlparse + +logger = logging.getLogger(__name__) + +_PROFILE_RESOURCE = "llm_model_profiles.json" +_OPENAI_PROVIDER_PREFIXES = ("openai/",) + + +class OpenAIClientKwargs(TypedDict, total=False): + """Typed OpenAI-compatible client constructor kwargs.""" + + api_key: str + base_url: str + + +@dataclass(frozen=True) +class ModelProfile: + """Runtime parameter support for a model.""" + + name: str + provider: str = "openai-compatible" + model: str | None = None + supports_temperature: bool = True + supports_response_format: bool = True + supports_reasoning_effort: bool = False + supports_verbosity: bool = False + + @property + def provider_model_name(self) -> str: + """Model name to send to the provider when the profile overrides it.""" + return self.model or self.name + + +@dataclass(frozen=True) +class ProviderModel: + """Resolved provider/model plus request-option filtering.""" + + model: str + api_key: str | None = None + base_url: str | None = None + profile: ModelProfile | None = None + + @classmethod + def openai_compatible( + cls, + *, + model: str, + api_key: str | None = None, + base_url: str | None = None, + ) -> "ProviderModel": + """Build an OpenAI-compatible provider model from app settings.""" + resolved_model = _resolve_openrouter_model(model, base_url) + profile = get_model_profile(resolved_model) + provider_model_name = ( + resolved_model + if profile is None or _has_provider_prefix(resolved_model) + else profile.provider_model_name + ) + return cls( + model=provider_model_name, + api_key=api_key, + base_url=base_url, + profile=profile, + ) + + def client_kwargs(self) -> OpenAIClientKwargs: + """Keyword args for constructing an OpenAI-compatible client.""" + kwargs: OpenAIClientKwargs = {} + if self.api_key: + kwargs["api_key"] = self.api_key + if self.base_url: + kwargs["base_url"] = self.base_url + return kwargs + + def chat_completion_kwargs( + self, + *, + messages: list[dict[str, str]], + temperature: float | None = None, + max_tokens: int | None = None, + response_format: Any | None = None, + reasoning_effort: str | None = None, + verbosity: str | None = None, + **extra: Any, + ) -> dict[str, Any]: + """Build strict-provider-safe kwargs for chat.completions calls.""" + kwargs: dict[str, Any] = { + "model": self.model, + "messages": messages, + } + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + if response_format is not None and self.supports("response_format"): + kwargs["response_format"] = response_format + if temperature is not None and self.supports("temperature"): + kwargs["temperature"] = temperature + if reasoning_effort is not None and self.supports("reasoning_effort"): + kwargs["reasoning_effort"] = reasoning_effort + if verbosity is not None and self.supports("verbosity"): + kwargs["verbosity"] = verbosity + kwargs.update(extra) + return kwargs + + def supports(self, option: str) -> bool: + """Return whether the configured model supports a request option.""" + profile = self.profile or _fallback_profile(self.model) + if option == "temperature": + return profile.supports_temperature + if option == "response_format": + return profile.supports_response_format + if option == "reasoning_effort": + return profile.supports_reasoning_effort + if option == "verbosity": + return profile.supports_verbosity + return True + + +def get_model_profile(model: str) -> ModelProfile | None: + """Look up a model profile, handling provider-prefixed OpenAI names.""" + profiles = _load_model_profiles() + normalized = _profile_lookup_key(model) + payload = profiles.get(normalized) + if payload is None: + return _fallback_profile(model) + return _profile_from_payload(normalized, payload) + + +@lru_cache(maxsize=1) +def _load_model_profiles() -> dict[str, Mapping[str, Any]]: + try: + raw = resources.files("five08").joinpath(_PROFILE_RESOURCE).read_text() + data = json.loads(raw) + except Exception as exc: # pragma: no cover - static package data should exist + logger.warning("Failed to load LLM model profiles: %s", exc) + return {} + models = data.get("models") + if not isinstance(models, dict): + return {} + return { + str(key): value for key, value in models.items() if isinstance(value, Mapping) + } + + +def _profile_from_payload(name: str, payload: Mapping[str, Any]) -> ModelProfile: + request_options = payload.get("request_options") + if not isinstance(request_options, Mapping): + request_options = {} + return ModelProfile( + name=str(payload.get("name") or name), + provider=str(payload.get("provider") or "openai-compatible"), + model=str(payload["model"]) if payload.get("model") else None, + supports_temperature=bool(request_options.get("temperature", True)), + supports_response_format=bool(request_options.get("response_format", True)), + supports_reasoning_effort=bool(request_options.get("reasoning_effort", False)), + supports_verbosity=bool(request_options.get("verbosity", False)), + ) + + +def _fallback_profile(model: str) -> ModelProfile: + """Conservative defaults for models missing from the compiled profile.""" + lookup_key = _profile_lookup_key(model) + reasoning_without_temperature = ( + lookup_key.startswith(("o1", "o3", "o4")) + or lookup_key.startswith("gpt-5") + and lookup_key != "gpt-5-chat-latest" + ) + return ModelProfile( + name=lookup_key, + model=model, + supports_temperature=not reasoning_without_temperature, + supports_reasoning_effort=reasoning_without_temperature, + supports_verbosity=reasoning_without_temperature, + ) + + +def _profile_lookup_key(model: str) -> str: + value = (model or "").strip() + for prefix in _OPENAI_PROVIDER_PREFIXES: + if value.startswith(prefix): + return value.removeprefix(prefix) + return value + + +def _has_provider_prefix(model: str) -> bool: + return "/" in model + + +def _resolve_openrouter_model(model: str, base_url: str | None) -> str: + candidate = model.strip() or "gpt-5-mini" + if _has_provider_prefix(candidate): + return candidate + + base = (base_url or "").strip() + if not base: + return candidate + + parsed = urlparse(base) + host = (parsed.netloc or parsed.path).split("/")[0].split(":")[0].lower() + if host.endswith("openrouter.ai"): + return f"openai/{candidate}" + return candidate diff --git a/packages/shared/src/five08/llm_model_profiles.json b/packages/shared/src/five08/llm_model_profiles.json new file mode 100644 index 00000000..17577c15 --- /dev/null +++ b/packages/shared/src/five08/llm_model_profiles.json @@ -0,0 +1,140 @@ +{ + "models": { + "gpt-5.5": { + "name": "gpt-5.5", + "provider": "openai-compatible", + "model": "gpt-5.5", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.4": { + "name": "gpt-5.4", + "provider": "openai-compatible", + "model": "gpt-5.4", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.4-mini": { + "name": "gpt-5.4-mini", + "provider": "openai-compatible", + "model": "gpt-5.4-mini", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.2": { + "name": "gpt-5.2", + "provider": "openai-compatible", + "model": "gpt-5.2", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.1": { + "name": "gpt-5.1", + "provider": "openai-compatible", + "model": "gpt-5.1", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5": { + "name": "gpt-5", + "provider": "openai-compatible", + "model": "gpt-5", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5-mini": { + "name": "gpt-5-mini", + "provider": "openai-compatible", + "model": "gpt-5-mini", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5-nano": { + "name": "gpt-5-nano", + "provider": "openai-compatible", + "model": "gpt-5-nano", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5-chat-latest": { + "name": "gpt-5-chat-latest", + "provider": "openai-compatible", + "model": "gpt-5-chat-latest", + "request_options": { + "temperature": true, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-4.1": { + "name": "gpt-4.1", + "provider": "openai-compatible", + "model": "gpt-4.1", + "request_options": { + "temperature": true, + "response_format": true + } + }, + "gpt-4.1-mini": { + "name": "gpt-4.1-mini", + "provider": "openai-compatible", + "model": "gpt-4.1-mini", + "request_options": { + "temperature": true, + "response_format": true + } + }, + "gpt-4o-mini": { + "name": "gpt-4o-mini", + "provider": "openai-compatible", + "model": "gpt-4o-mini", + "request_options": { + "temperature": true, + "response_format": true + } + }, + "o4-mini": { + "name": "o4-mini", + "provider": "openai-compatible", + "model": "o4-mini", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true + } + } + } +} diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py index c7574134..6fd1d3b4 100644 --- a/packages/shared/src/five08/resume_extractor.py +++ b/packages/shared/src/five08/resume_extractor.py @@ -22,6 +22,7 @@ normalize_timezone_offset as shared_normalize_timezone_offset, normalize_website_url as shared_normalize_website_url, ) +from five08.llm import ProviderModel from five08.skills import ( DISALLOWED_RESUME_SKILLS, normalize_skill_payload, @@ -1857,6 +1858,7 @@ def __init__( self.model = model.strip() if model else "gpt-5-mini" if not self.model: self.model = "gpt-5-mini" + self.base_url = base_url self.max_tokens = max(1, max_tokens) self.snippet_chars = max(1000, snippet_chars) self.client: Any = None @@ -1918,15 +1920,22 @@ def _build_completion_kwargs( messages: list[dict[str, str]], temperature: float, max_tokens: int, + response_format: Any | None = None, ) -> dict[str, Any]: - return { - "model": self.model, - "messages": messages, - "temperature": temperature, - "max_tokens": max_tokens, - "reasoning_effort": "minimal", - "verbosity": "low", - } + return self._provider_model().chat_completion_kwargs( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + reasoning_effort="minimal", + verbosity="low", + ) + + def _provider_model(self) -> ProviderModel: + return ProviderModel.openai_compatible( + model=self.model, + base_url=self.base_url, + ) def _next_length_retry_max_tokens(self, current_max_tokens: int) -> int: return max(self.max_tokens * 2, current_max_tokens * 2) @@ -1980,22 +1989,25 @@ def extract( retry_reason=retry_reason, ) attempt_temperature = 0.1 if attempt_index == 0 else 0.0 - request_kwargs = self._build_completion_kwargs( - messages=messages, - temperature=attempt_temperature, - max_tokens=attempt_max_tokens, - ) try: if use_structured_output: response = self.client.beta.chat.completions.parse( - response_format=ResumeLLMExtractionResponse, - **request_kwargs, + **self._build_completion_kwargs( + messages=messages, + temperature=attempt_temperature, + max_tokens=attempt_max_tokens, + response_format=ResumeLLMExtractionResponse, + ), ) else: response = self.client.chat.completions.create( - response_format={"type": "json_object"}, - **request_kwargs, + **self._build_completion_kwargs( + messages=messages, + temperature=attempt_temperature, + max_tokens=attempt_max_tokens, + response_format={"type": "json_object"}, + ), ) except (ValidationError, json.JSONDecodeError, ValueError): if attempt_index == 0: @@ -2672,25 +2684,26 @@ def _split_name_with_llm(self, full_name: str) -> tuple[str, str] | None: return None response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": ( - "Split person names into firstName and lastName for CRM fields. " - "Return JSON only with no extra keys." - ), - }, - { - "role": "user", - "content": ( - f"Name: {full_name}. " - 'If this is a single name, set lastName to "Unknown".' - ), - }, - ], - temperature=0.0, - max_tokens=80, + **self._provider_model().chat_completion_kwargs( + messages=[ + { + "role": "system", + "content": ( + "Split person names into firstName and lastName for CRM fields. " + "Return JSON only with no extra keys." + ), + }, + { + "role": "user", + "content": ( + f"Name: {full_name}. " + 'If this is a single name, set lastName to "Unknown".' + ), + }, + ], + temperature=0.0, + max_tokens=80, + ) ) raw_content = response.choices[0].message.content if not raw_content: diff --git a/packages/shared/src/five08/resume_skills_extractor.py b/packages/shared/src/five08/resume_skills_extractor.py index b546623c..e8bdd7fd 100644 --- a/packages/shared/src/five08/resume_skills_extractor.py +++ b/packages/shared/src/five08/resume_skills_extractor.py @@ -5,6 +5,7 @@ import re from typing import Any +from five08.llm import ProviderModel from five08.skills import ( DISALLOWED_RESUME_SKILLS, normalize_skill, @@ -67,14 +68,16 @@ def __init__( openai_api_key: str | None, openai_base_url: str | None, ) -> None: - self.model = model + self.provider_model = ProviderModel.openai_compatible( + model=model, + api_key=openai_api_key, + base_url=openai_base_url, + ) + self.model = self.provider_model.model self.client: Any = None if openai_api_key and OpenAIClient is not None: - self.client = OpenAIClient( - api_key=openai_api_key, - base_url=openai_base_url, - ) + self.client = OpenAIClient(**self.provider_model.client_kwargs()) def extract_skills(self, resume_text: str) -> ExtractedSkills: """Extract skills from resume text.""" @@ -84,26 +87,27 @@ def extract_skills(self, resume_text: str) -> ExtractedSkills: prompt = self._create_prompt(resume_text) try: response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": ( - "You extract professional skills from resumes for a CRM. " - "Focus on white-collar skills for product development orgs: " - "engineering, product, data, design, growth, and marketing. " - "Return JSON only, no prose. " - "Normalize skills to concise canonical names, lowercase. " - "Provide a strength from 1-5 when known, where 5 is strongest. " - "If uncertain, you may omit it or leave it blank. " - "Bias 3 for simple mentions, 4-5 for recent/current project usage, " - "and 1-2 for weak, outdated, or minimal exposure." - ), - }, - {"role": "user", "content": prompt}, - ], - temperature=0.1, - max_tokens=1200, + **self.provider_model.chat_completion_kwargs( + messages=[ + { + "role": "system", + "content": ( + "You extract professional skills from resumes for a CRM. " + "Focus on white-collar skills for product development orgs: " + "engineering, product, data, design, growth, and marketing. " + "Return JSON only, no prose. " + "Normalize skills to concise canonical names, lowercase. " + "Provide a strength from 1-5 when known, where 5 is strongest. " + "If uncertain, you may omit it or leave it blank. " + "Bias 3 for simple mentions, 4-5 for recent/current project usage, " + "and 1-2 for weak, outdated, or minimal exposure." + ), + }, + {"role": "user", "content": prompt}, + ], + temperature=0.1, + max_tokens=1200, + ) ) content = response.choices[0].message.content if not content: diff --git a/tests/unit/test_job_match.py b/tests/unit/test_job_match.py index 4548932c..0c76f2b9 100644 --- a/tests/unit/test_job_match.py +++ b/tests/unit/test_job_match.py @@ -175,6 +175,10 @@ def test_extract_normalizes_required_skills() -> None: assert result.required_skills == ["python", "react", "django"] assert result.seniority == "senior" assert result.title == "Backend Engineer" + create_kwargs = mock_client.chat.completions.create.call_args.kwargs + assert create_kwargs["model"] == "gpt-5-mini" + assert create_kwargs["response_format"] == {"type": "json_object"} + assert "temperature" not in create_kwargs def test_extract_backfills_hard_and_soft_from_legacy_required_skills() -> None: @@ -398,6 +402,10 @@ def test_rerank_shortlisted_candidates_parses_structured_response() -> None: missing_requirements=["live webflow projects"], ) ] + create_kwargs = mock_client.chat.completions.create.call_args.kwargs + assert create_kwargs["model"] == "gpt-5-mini" + assert create_kwargs["response_format"] == {"type": "json_object"} + assert "temperature" not in create_kwargs def test_rerank_shortlisted_candidates_parse_error_does_not_log_raw_content() -> None: diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py new file mode 100644 index 00000000..aacdfc3e --- /dev/null +++ b/tests/unit/test_llm.py @@ -0,0 +1,68 @@ +"""Unit tests for provider/model request profiles.""" + +from five08.llm import ProviderModel, get_model_profile + + +def test_gpt_5_mini_profile_omits_temperature() -> None: + provider_model = ProviderModel.openai_compatible(model="gpt-5-mini") + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + response_format={"type": "json_object"}, + max_tokens=100, + reasoning_effort="minimal", + verbosity="low", + ) + + assert kwargs["model"] == "gpt-5-mini" + assert "temperature" not in kwargs + assert kwargs["response_format"] == {"type": "json_object"} + assert kwargs["reasoning_effort"] == "minimal" + assert kwargs["verbosity"] == "low" + + +def test_gpt_4_1_mini_profile_keeps_temperature_but_omits_reasoning_options() -> None: + provider_model = ProviderModel.openai_compatible(model="gpt-4.1-mini") + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + response_format={"type": "json_object"}, + max_tokens=100, + reasoning_effort="minimal", + verbosity="low", + ) + + assert kwargs["temperature"] == 0.1 + assert kwargs["response_format"] == {"type": "json_object"} + assert "reasoning_effort" not in kwargs + assert "verbosity" not in kwargs + + +def test_openrouter_plain_openai_model_gets_provider_prefix_for_request() -> None: + provider_model = ProviderModel.openai_compatible( + model="gpt-5-mini", + base_url="https://openrouter.ai/api/v1", + ) + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + ) + + assert provider_model.model == "openai/gpt-5-mini" + assert "temperature" not in kwargs + + +def test_unknown_non_reasoning_model_preserves_temperature() -> None: + profile = get_model_profile("fake-model") + provider_model = ProviderModel.openai_compatible(model="fake-model") + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.0, + ) + + assert profile is not None + assert kwargs["temperature"] == 0.0 diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py index 7311cbbf..80447430 100644 --- a/tests/unit/test_resume_extractor.py +++ b/tests/unit/test_resume_extractor.py @@ -950,6 +950,7 @@ def test_extract_preserves_raw_llm_output_on_fallback() -> None: (), {"chat": type("Chat", (), {"completions": fake_completions})()}, )() + extractor.model = "fake-model" with patch.object(extractor, "_split_name_with_llm", return_value=None): result = extractor.extract("Jane Doe\nSoftware Engineer\nBerlin, Germany") @@ -1314,7 +1315,6 @@ def __init__(self, content: str | None) -> None: (), {"chat": type("Chat", (), {"completions": fake_completions})()}, )() - extractor.model = "fake-model" with patch.object(extractor, "_split_name_with_llm", return_value=("Jane", "Doe")): result = extractor.extract("Jane Doe\nSoftware Engineer") @@ -1559,6 +1559,7 @@ def test_extract_retries_once_on_structured_validation_failure() -> None: )() }, )() + extractor.model = "fake-model" result = extractor.extract("Jane Doe\nSoftware Engineer") @@ -1637,6 +1638,7 @@ def test_extract_accepts_nullable_website_url_candidates() -> None: )() }, )() + extractor.model = "fake-model" result = extractor.extract("Jane Doe\nSoftware Engineer") @@ -1707,6 +1709,7 @@ def test_extract_falls_back_to_json_object_when_structured_api_errors() -> None: )(), }, )() + extractor.model = "fake-model" result = extractor.extract("Jane Doe\nSoftware Engineer") From e1d9bc584c1617198049827f890a6f274f1a857a Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 14 May 2026 21:32:02 +0800 Subject: [PATCH 2/4] Address provider model review feedback --- packages/shared/src/five08/llm.py | 6 ++---- tests/unit/test_llm.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/shared/src/five08/llm.py b/packages/shared/src/five08/llm.py index f80554b8..997296e0 100644 --- a/packages/shared/src/five08/llm.py +++ b/packages/shared/src/five08/llm.py @@ -63,7 +63,7 @@ def openai_compatible( profile = get_model_profile(resolved_model) provider_model_name = ( resolved_model - if profile is None or _has_provider_prefix(resolved_model) + if _has_provider_prefix(resolved_model) else profile.provider_model_name ) return cls( @@ -91,7 +91,6 @@ def chat_completion_kwargs( response_format: Any | None = None, reasoning_effort: str | None = None, verbosity: str | None = None, - **extra: Any, ) -> dict[str, Any]: """Build strict-provider-safe kwargs for chat.completions calls.""" kwargs: dict[str, Any] = { @@ -108,7 +107,6 @@ def chat_completion_kwargs( kwargs["reasoning_effort"] = reasoning_effort if verbosity is not None and self.supports("verbosity"): kwargs["verbosity"] = verbosity - kwargs.update(extra) return kwargs def supports(self, option: str) -> bool: @@ -125,7 +123,7 @@ def supports(self, option: str) -> bool: return True -def get_model_profile(model: str) -> ModelProfile | None: +def get_model_profile(model: str) -> ModelProfile: """Look up a model profile, handling provider-prefixed OpenAI names.""" profiles = _load_model_profiles() normalized = _profile_lookup_key(model) diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index aacdfc3e..c202972f 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -1,5 +1,7 @@ """Unit tests for provider/model request profiles.""" +import pytest + from five08.llm import ProviderModel, get_model_profile @@ -64,5 +66,15 @@ def test_unknown_non_reasoning_model_preserves_temperature() -> None: temperature=0.0, ) - assert profile is not None + assert profile.name == "fake-model" assert kwargs["temperature"] == 0.0 + + +def test_chat_completion_kwargs_rejects_unprofiled_extra_options() -> None: + provider_model = ProviderModel.openai_compatible(model="gpt-5-mini") + + with pytest.raises(TypeError): + provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + seed=1, # type: ignore[call-arg] + ) From 0d7b4920632b1f3419014596cead06877b97585e Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 14 May 2026 21:48:08 +0800 Subject: [PATCH 3/4] Address model profile review comments --- packages/shared/pyproject.toml | 1 + packages/shared/src/five08/llm.py | 24 +++++++++--- .../shared/src/five08/resume_extractor.py | 1 + tests/unit/test_llm.py | 18 +++++++++ tests/unit/test_resume_extractor.py | 37 +++++++++++++++++++ 5 files changed, 76 insertions(+), 5 deletions(-) diff --git a/packages/shared/pyproject.toml b/packages/shared/pyproject.toml index 1d216df4..be119204 100644 --- a/packages/shared/pyproject.toml +++ b/packages/shared/pyproject.toml @@ -32,3 +32,4 @@ packages = ["src/five08"] [tool.hatch.build.targets.wheel.force-include] "src/five08/data/model-profiles.json" = "five08/data/model-profiles.json" +"src/five08/llm_model_profiles.json" = "five08/llm_model_profiles.json" diff --git a/packages/shared/src/five08/llm.py b/packages/shared/src/five08/llm.py index 7215054e..b2aa49cb 100644 --- a/packages/shared/src/five08/llm.py +++ b/packages/shared/src/five08/llm.py @@ -15,7 +15,19 @@ logger = logging.getLogger(__name__) _PROFILE_RESOURCE = "llm_model_profiles.json" -_OPENAI_PROVIDER_PREFIXES = ("openai/",) +_KNOWN_PROVIDER_PREFIXES = frozenset( + { + "anthropic", + "bedrock", + "cohere", + "fireworks", + "gemini", + "groq", + "openai", + "openrouter", + "vertex", + } +) class OpenAIClientKwargs(TypedDict, total=False): @@ -87,7 +99,7 @@ def client_kwargs(self) -> OpenAIClientKwargs: def chat_completion_kwargs( self, *, - messages: list[dict[str, str]], + messages: list[dict[str, Any]], temperature: float | None = None, max_tokens: int | None = None, response_format: Any | None = None, @@ -208,9 +220,11 @@ def _fallback_profile(model: str) -> ModelProfile: def _profile_lookup_key(model: str) -> str: value = (model or "").strip() - for prefix in _OPENAI_PROVIDER_PREFIXES: - if value.startswith(prefix): - return value.removeprefix(prefix) + while "/" in value: + provider, rest = value.split("/", 1) + if provider not in _KNOWN_PROVIDER_PREFIXES or not rest: + break + value = rest return value diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py index 67f718ed..88c70237 100644 --- a/packages/shared/src/five08/resume_extractor.py +++ b/packages/shared/src/five08/resume_extractor.py @@ -2780,6 +2780,7 @@ def _split_name_with_llm(self, full_name: str) -> tuple[str, str] | None: ], temperature=0.0, max_tokens=80, + response_format={"type": "json_object"}, ) ) raw_content = response.choices[0].message.content diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index c202972f..ead3c587 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -57,6 +57,24 @@ def test_openrouter_plain_openai_model_gets_provider_prefix_for_request() -> Non assert "temperature" not in kwargs +def test_nested_provider_prefix_model_uses_underlying_profile() -> None: + profile = get_model_profile("openrouter/openai/gpt-5-mini") + provider_model = ProviderModel.openai_compatible( + model="openrouter/openai/gpt-5-mini", + ) + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + response_format={"type": "json_object"}, + ) + + assert profile.name == "gpt-5-mini" + assert kwargs["model"] == "openrouter/openai/gpt-5-mini" + assert kwargs["response_format"] == {"type": "json_object"} + assert "temperature" not in kwargs + + def test_unknown_non_reasoning_model_preserves_temperature() -> None: profile = get_model_profile("fake-model") provider_model = ProviderModel.openai_compatible(model="fake-model") diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py index ac46bf45..52a91c0d 100644 --- a/tests/unit/test_resume_extractor.py +++ b/tests/unit/test_resume_extractor.py @@ -192,6 +192,43 @@ def test_split_name_prefers_llm_output() -> None: mock_llm_split.assert_called_once_with("Ada Lovelace") +def test_split_name_with_llm_requests_json_object_response() -> None: + """Name split helper should request JSON because it parses JSON content.""" + fake_response = type( + "Response", + (), + { + "choices": [ + type( + "Choice", + (), + { + "message": type( + "Message", + (), + {"content": '{"firstName":"Ada","lastName":"Lovelace"}'}, + )() + }, + )() + ] + }, + )() + fake_create = Mock(return_value=fake_response) + extractor = ResumeProfileExtractor(api_key="test-key") + extractor.client = type( + "Client", + (), + { + "chat": type( + "Chat", (), {"completions": type("C", (), {"create": fake_create})()} + )() + }, + )() + + assert extractor._split_name_with_llm("Ada Lovelace") == ("Ada", "Lovelace") + assert fake_create.call_args.kwargs["response_format"] == {"type": "json_object"} + + def test_split_name_falls_back_to_heuristic_without_name_hints() -> None: """Split-name should still split names using heuristics when LLM fails.""" extractor = ResumeProfileExtractor(api_key="test-key") From 54dcc01637c1a11f47d5ee579f0781e5b43c4a5e Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Thu, 14 May 2026 22:30:41 +0800 Subject: [PATCH 4/4] Add gpt-5.1 chat completion profile --- .../src/five08/data/model-profiles.json | 18 +++++++ tests/evals/model-profiles.json | 18 +++++++ tests/unit/test_resume_extractor.py | 50 +++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/packages/shared/src/five08/data/model-profiles.json b/packages/shared/src/five08/data/model-profiles.json index 87e2d46c..bff9d4d1 100644 --- a/packages/shared/src/five08/data/model-profiles.json +++ b/packages/shared/src/five08/data/model-profiles.json @@ -154,6 +154,24 @@ } } }, + "gpt-5.1": { + "name": "gpt-5.1", + "provider": "openai-compatible", + "model": "gpt-5.1", + "api_key_env": "OPENAI_API_KEY", + "base_url": "https://api.openai.com/v1", + "request_options": { + "response_format": { + "type": "json_object" + } + }, + "chat_completion_options": { + "max_tokens_parameter": "max_completion_tokens", + "reasoning_effort": "low", + "verbosity": "low", + "supports_temperature": false + } + }, "gpt-5": { "name": "gpt-5", "provider": "openai-compatible", diff --git a/tests/evals/model-profiles.json b/tests/evals/model-profiles.json index 87e2d46c..bff9d4d1 100644 --- a/tests/evals/model-profiles.json +++ b/tests/evals/model-profiles.json @@ -154,6 +154,24 @@ } } }, + "gpt-5.1": { + "name": "gpt-5.1", + "provider": "openai-compatible", + "model": "gpt-5.1", + "api_key_env": "OPENAI_API_KEY", + "base_url": "https://api.openai.com/v1", + "request_options": { + "response_format": { + "type": "json_object" + } + }, + "chat_completion_options": { + "max_tokens_parameter": "max_completion_tokens", + "reasoning_effort": "low", + "verbosity": "low", + "supports_temperature": false + } + }, "gpt-5": { "name": "gpt-5", "provider": "openai-compatible", diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py index 52a91c0d..e1f06028 100644 --- a/tests/unit/test_resume_extractor.py +++ b/tests/unit/test_resume_extractor.py @@ -1150,6 +1150,56 @@ def test_extract_uses_supported_gpt54_reasoning_effort() -> None: assert kwargs["verbosity"] == "low" +def test_extract_uses_supported_gpt51_reasoning_effort() -> None: + """GPT-5.1 should not use the GPT-5 minimal reasoning-effort value.""" + + response = type( + "Response", + (), + { + "choices": [ + type( + "Choice", + (), + { + "finish_reason": "stop", + "message": type( + "Message", + (), + { + "content": ( + '{"name":"Jane Doe","firstName":"Jane",' + '"lastName":"Doe","email":"jane@example.com"}' + ) + }, + )(), + }, + )() + ] + }, + )() + fake_completions = Mock() + fake_completions.create.return_value = response + extractor = ResumeProfileExtractor( + api_key="test-key", + model="gpt-5.1", + max_tokens=40, + ) + extractor.client = type( + "Client", + (), + {"chat": type("Chat", (), {"completions": fake_completions})()}, + )() + + with patch.object(extractor, "_split_name_with_llm", return_value=("Jane", "Doe")): + extractor.extract("Jane Doe\nSoftware Engineer") + + kwargs = fake_completions.create.call_args.kwargs + assert kwargs["max_completion_tokens"] == 40 + assert kwargs["reasoning_effort"] == "low" + assert kwargs["verbosity"] == "low" + + def test_extract_repairs_json_with_comments_trailing_commas_and_prose() -> None: """Common near-JSON formatting issues should not force heuristic fallback."""