diff --git a/apps/worker/src/five08/worker/crm/skills_extractor.py b/apps/worker/src/five08/worker/crm/skills_extractor.py index 1956abfc..6336dc5b 100644 --- a/apps/worker/src/five08/worker/crm/skills_extractor.py +++ b/apps/worker/src/five08/worker/crm/skills_extractor.py @@ -5,6 +5,7 @@ import re from typing import Any +from five08.llm import ProviderModel from five08.openai_fallback import FallbackOpenAIClient from five08.skills import ( DISALLOWED_RESUME_SKILLS, @@ -63,7 +64,12 @@ class SkillsExtractor: """Extract skills with LLM when configured, fallback heuristics otherwise.""" def __init__(self) -> None: - self.model = settings.resolved_resume_ai_model + self.provider_model = ProviderModel.openai_compatible( + model=settings.resolved_resume_ai_model, + api_key=settings.resolved_resume_ai_api_key, + base_url=settings.resolved_resume_ai_base_url, + ) + self.model = self.provider_model.model self.client: Any = None if settings.resolved_resume_ai_provider_attempts and OpenAIClient is not None: @@ -72,10 +78,7 @@ def __init__(self) -> None: client_factory=OpenAIClient, ) elif settings.resolved_resume_ai_api_key and OpenAIClient is not None: - self.client = OpenAIClient( - api_key=settings.resolved_resume_ai_api_key, - base_url=settings.resolved_resume_ai_base_url, - ) + self.client = OpenAIClient(**self.provider_model.client_kwargs()) def extract_skills(self, resume_text: str) -> ExtractedSkills: """Extract skills from resume text.""" @@ -85,26 +88,27 @@ def extract_skills(self, resume_text: str) -> ExtractedSkills: prompt = self._create_prompt(resume_text) try: response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": ( - "You extract professional skills from resumes for a CRM. " - "Focus on white-collar skills for product development orgs: " - "engineering, product, data, design, growth, and marketing. " - "Return JSON only, no prose. " - "Normalize skills to concise canonical names, lowercase. " - "Provide a strength from 1-5 when known, where 5 is strongest. " - "If uncertain, you may omit it or leave it blank. " - "Bias 3 for simple mentions, 4-5 for recent/current project usage, " - "and 1-2 for weak, outdated, or minimal exposure." - ), - }, - {"role": "user", "content": prompt}, - ], - temperature=0.1, - max_tokens=1200, + **self.provider_model.chat_completion_kwargs( + messages=[ + { + "role": "system", + "content": ( + "You extract professional skills from resumes for a CRM. " + "Focus on white-collar skills for product development orgs: " + "engineering, product, data, design, growth, and marketing. " + "Return JSON only, no prose. " + "Normalize skills to concise canonical names, lowercase. " + "Provide a strength from 1-5 when known, where 5 is strongest. " + "If uncertain, you may omit it or leave it blank. " + "Bias 3 for simple mentions, 4-5 for recent/current project usage, " + "and 1-2 for weak, outdated, or minimal exposure." + ), + }, + {"role": "user", "content": prompt}, + ], + temperature=0.1, + max_tokens=1200, + ) ) content = response.choices[0].message.content if not content: diff --git a/packages/shared/pyproject.toml b/packages/shared/pyproject.toml index 1d216df4..be119204 100644 --- a/packages/shared/pyproject.toml +++ b/packages/shared/pyproject.toml @@ -32,3 +32,4 @@ packages = ["src/five08"] [tool.hatch.build.targets.wheel.force-include] "src/five08/data/model-profiles.json" = "five08/data/model-profiles.json" +"src/five08/llm_model_profiles.json" = "five08/llm_model_profiles.json" diff --git a/packages/shared/src/five08/data/model-profiles.json b/packages/shared/src/five08/data/model-profiles.json index 87e2d46c..bff9d4d1 100644 --- a/packages/shared/src/five08/data/model-profiles.json +++ b/packages/shared/src/five08/data/model-profiles.json @@ -154,6 +154,24 @@ } } }, + "gpt-5.1": { + "name": "gpt-5.1", + "provider": "openai-compatible", + "model": "gpt-5.1", + "api_key_env": "OPENAI_API_KEY", + "base_url": "https://api.openai.com/v1", + "request_options": { + "response_format": { + "type": "json_object" + } + }, + "chat_completion_options": { + "max_tokens_parameter": "max_completion_tokens", + "reasoning_effort": "low", + "verbosity": "low", + "supports_temperature": false + } + }, "gpt-5": { "name": "gpt-5", "provider": "openai-compatible", diff --git a/packages/shared/src/five08/job_match.py b/packages/shared/src/five08/job_match.py index 0e19bca6..ec6522c3 100644 --- a/packages/shared/src/five08/job_match.py +++ b/packages/shared/src/five08/job_match.py @@ -9,6 +9,7 @@ from typing import Any from five08.discord_webhook import DiscordWebhookLogger +from five08.llm import ProviderModel from five08.skills import normalize_skill, normalize_skill_list logger = logging.getLogger(__name__) @@ -692,25 +693,31 @@ def rerank_shortlisted_candidates( except ImportError as exc: raise RuntimeError("openai package is not installed") from exc - client = _OpenAI(api_key=api_key, base_url=base_url or None) + provider_model = ProviderModel.openai_compatible( + model=model, + api_key=api_key, + base_url=base_url or None, + ) + client = _OpenAI(**provider_model.client_kwargs()) prompt = _build_rerank_prompt(posting_text, requirements, candidates) try: response = client.chat.completions.create( - model=model, - temperature=0.1, - response_format={"type": "json_object"}, - messages=[ - { - "role": "system", - "content": ( - "You are a senior recruiting coordinator. Rerank a shortlist " - "using only the provided evidence. Return only valid JSON." - ), - }, - {"role": "user", "content": prompt}, - ], - max_tokens=2500, + **provider_model.chat_completion_kwargs( + temperature=0.1, + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": ( + "You are a senior recruiting coordinator. Rerank a shortlist " + "using only the provided evidence. Return only valid JSON." + ), + }, + {"role": "user", "content": prompt}, + ], + max_tokens=2500, + ) ) except Exception as exc: logger.error("OpenAI candidate rerank call failed: %s", exc) @@ -794,27 +801,33 @@ def extract_job_requirements( except ImportError as exc: raise RuntimeError("openai package is not installed") from exc - client = _OpenAI(api_key=api_key, base_url=base_url or None) + provider_model = ProviderModel.openai_compatible( + model=model, + api_key=api_key, + base_url=base_url or None, + ) + client = _OpenAI(**provider_model.client_kwargs()) hints = _regex_hints(posting_text) prompt = _build_prompt(posting_text, hints) try: response = client.chat.completions.create( - model=model, - temperature=0.1, - response_format={"type": "json_object"}, - messages=[ - { - "role": "system", - "content": ( - "You are a recruiting assistant. Extract structured hiring requirements " - "from job postings. Return only valid JSON." - ), - }, - {"role": "user", "content": prompt}, - ], - max_tokens=2048, + **provider_model.chat_completion_kwargs( + temperature=0.1, + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": ( + "You are a recruiting assistant. Extract structured hiring requirements " + "from job postings. Return only valid JSON." + ), + }, + {"role": "user", "content": prompt}, + ], + max_tokens=2048, + ) ) except Exception as exc: logger.error("OpenAI job extraction call failed: %s", exc) diff --git a/packages/shared/src/five08/llm.py b/packages/shared/src/five08/llm.py new file mode 100644 index 00000000..b2aa49cb --- /dev/null +++ b/packages/shared/src/five08/llm.py @@ -0,0 +1,248 @@ +"""Provider/model helpers for OpenAI-compatible chat completions.""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from functools import lru_cache +from importlib import resources +from typing import Any, Mapping, TypedDict +from urllib.parse import urlparse + +from five08.model_catalog import model_chat_completion_options + +logger = logging.getLogger(__name__) + +_PROFILE_RESOURCE = "llm_model_profiles.json" +_KNOWN_PROVIDER_PREFIXES = frozenset( + { + "anthropic", + "bedrock", + "cohere", + "fireworks", + "gemini", + "groq", + "openai", + "openrouter", + "vertex", + } +) + + +class OpenAIClientKwargs(TypedDict, total=False): + """Typed OpenAI-compatible client constructor kwargs.""" + + api_key: str + base_url: str + + +@dataclass(frozen=True) +class ModelProfile: + """Runtime parameter support for a model.""" + + name: str + provider: str = "openai-compatible" + model: str | None = None + supports_temperature: bool = True + supports_response_format: bool = True + supports_reasoning_effort: bool = False + supports_verbosity: bool = False + + @property + def provider_model_name(self) -> str: + """Model name to send to the provider when the profile overrides it.""" + return self.model or self.name + + +@dataclass(frozen=True) +class ProviderModel: + """Resolved provider/model plus request-option filtering.""" + + model: str + api_key: str | None = None + base_url: str | None = None + profile: ModelProfile | None = None + + @classmethod + def openai_compatible( + cls, + *, + model: str, + api_key: str | None = None, + base_url: str | None = None, + ) -> "ProviderModel": + """Build an OpenAI-compatible provider model from app settings.""" + resolved_model = _resolve_openrouter_model(model, base_url) + profile = get_model_profile(resolved_model) + provider_model_name = ( + resolved_model + if _has_provider_prefix(resolved_model) + else profile.provider_model_name + ) + return cls( + model=provider_model_name, + api_key=api_key, + base_url=base_url, + profile=profile, + ) + + def client_kwargs(self) -> OpenAIClientKwargs: + """Keyword args for constructing an OpenAI-compatible client.""" + kwargs: OpenAIClientKwargs = {} + if self.api_key: + kwargs["api_key"] = self.api_key + if self.base_url: + kwargs["base_url"] = self.base_url + return kwargs + + def chat_completion_kwargs( + self, + *, + messages: list[dict[str, Any]], + temperature: float | None = None, + max_tokens: int | None = None, + response_format: Any | None = None, + reasoning_effort: str | None = None, + verbosity: str | None = None, + ) -> dict[str, Any]: + """Build strict-provider-safe kwargs for chat.completions calls.""" + kwargs: dict[str, Any] = { + "model": self.model, + "messages": messages, + } + model_options = model_chat_completion_options(self.model) + if max_tokens is not None: + max_tokens_parameter = model_options.get("max_tokens_parameter") + if isinstance(max_tokens_parameter, str) and max_tokens_parameter: + kwargs[max_tokens_parameter] = max_tokens + else: + kwargs["max_tokens"] = max_tokens + if response_format is not None and self.supports("response_format"): + kwargs["response_format"] = response_format + if temperature is not None and self.supports("temperature"): + kwargs["temperature"] = temperature + if reasoning_effort is not None and self.supports("reasoning_effort"): + configured_reasoning_effort = model_options.get("reasoning_effort") + kwargs["reasoning_effort"] = ( + configured_reasoning_effort + if isinstance(configured_reasoning_effort, str) + and configured_reasoning_effort + else reasoning_effort + ) + if verbosity is not None and self.supports("verbosity"): + configured_verbosity = model_options.get("verbosity") + kwargs["verbosity"] = ( + configured_verbosity + if isinstance(configured_verbosity, str) and configured_verbosity + else verbosity + ) + return kwargs + + def supports(self, option: str) -> bool: + """Return whether the configured model supports a request option.""" + model_options = model_chat_completion_options(self.model) + if option == "temperature" and "supports_temperature" in model_options: + return bool(model_options["supports_temperature"]) + if option in {"reasoning_effort", "verbosity"} and option in model_options: + return isinstance(model_options[option], str) and bool( + model_options[option] + ) + profile = self.profile or _fallback_profile(self.model) + if option == "temperature": + return profile.supports_temperature + if option == "response_format": + return profile.supports_response_format + if option == "reasoning_effort": + return profile.supports_reasoning_effort + if option == "verbosity": + return profile.supports_verbosity + return True + + +def get_model_profile(model: str) -> ModelProfile: + """Look up a model profile, handling provider-prefixed OpenAI names.""" + profiles = _load_model_profiles() + normalized = _profile_lookup_key(model) + payload = profiles.get(normalized) + if payload is None: + return _fallback_profile(model) + return _profile_from_payload(normalized, payload) + + +@lru_cache(maxsize=1) +def _load_model_profiles() -> dict[str, Mapping[str, Any]]: + try: + raw = resources.files("five08").joinpath(_PROFILE_RESOURCE).read_text() + data = json.loads(raw) + except Exception as exc: # pragma: no cover - static package data should exist + logger.warning("Failed to load LLM model profiles: %s", exc) + return {} + models = data.get("models") + if not isinstance(models, dict): + return {} + return { + str(key): value for key, value in models.items() if isinstance(value, Mapping) + } + + +def _profile_from_payload(name: str, payload: Mapping[str, Any]) -> ModelProfile: + request_options = payload.get("request_options") + if not isinstance(request_options, Mapping): + request_options = {} + return ModelProfile( + name=str(payload.get("name") or name), + provider=str(payload.get("provider") or "openai-compatible"), + model=str(payload["model"]) if payload.get("model") else None, + supports_temperature=bool(request_options.get("temperature", True)), + supports_response_format=bool(request_options.get("response_format", True)), + supports_reasoning_effort=bool(request_options.get("reasoning_effort", False)), + supports_verbosity=bool(request_options.get("verbosity", False)), + ) + + +def _fallback_profile(model: str) -> ModelProfile: + """Conservative defaults for models missing from the compiled profile.""" + lookup_key = _profile_lookup_key(model) + reasoning_without_temperature = ( + lookup_key.startswith(("o1", "o3", "o4")) + or lookup_key.startswith("gpt-5") + and lookup_key != "gpt-5-chat-latest" + ) + return ModelProfile( + name=lookup_key, + model=model, + supports_temperature=not reasoning_without_temperature, + supports_reasoning_effort=reasoning_without_temperature, + supports_verbosity=reasoning_without_temperature, + ) + + +def _profile_lookup_key(model: str) -> str: + value = (model or "").strip() + while "/" in value: + provider, rest = value.split("/", 1) + if provider not in _KNOWN_PROVIDER_PREFIXES or not rest: + break + value = rest + return value + + +def _has_provider_prefix(model: str) -> bool: + return "/" in model + + +def _resolve_openrouter_model(model: str, base_url: str | None) -> str: + candidate = model.strip() or "gpt-5-mini" + if _has_provider_prefix(candidate): + return candidate + + base = (base_url or "").strip() + if not base: + return candidate + + parsed = urlparse(base) + host = (parsed.netloc or parsed.path).split("/")[0].split(":")[0].lower() + if host.endswith("openrouter.ai"): + return f"openai/{candidate}" + return candidate diff --git a/packages/shared/src/five08/llm_model_profiles.json b/packages/shared/src/five08/llm_model_profiles.json new file mode 100644 index 00000000..17577c15 --- /dev/null +++ b/packages/shared/src/five08/llm_model_profiles.json @@ -0,0 +1,140 @@ +{ + "models": { + "gpt-5.5": { + "name": "gpt-5.5", + "provider": "openai-compatible", + "model": "gpt-5.5", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.4": { + "name": "gpt-5.4", + "provider": "openai-compatible", + "model": "gpt-5.4", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.4-mini": { + "name": "gpt-5.4-mini", + "provider": "openai-compatible", + "model": "gpt-5.4-mini", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.2": { + "name": "gpt-5.2", + "provider": "openai-compatible", + "model": "gpt-5.2", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5.1": { + "name": "gpt-5.1", + "provider": "openai-compatible", + "model": "gpt-5.1", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5": { + "name": "gpt-5", + "provider": "openai-compatible", + "model": "gpt-5", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5-mini": { + "name": "gpt-5-mini", + "provider": "openai-compatible", + "model": "gpt-5-mini", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5-nano": { + "name": "gpt-5-nano", + "provider": "openai-compatible", + "model": "gpt-5-nano", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-5-chat-latest": { + "name": "gpt-5-chat-latest", + "provider": "openai-compatible", + "model": "gpt-5-chat-latest", + "request_options": { + "temperature": true, + "response_format": true, + "reasoning_effort": true, + "verbosity": true + } + }, + "gpt-4.1": { + "name": "gpt-4.1", + "provider": "openai-compatible", + "model": "gpt-4.1", + "request_options": { + "temperature": true, + "response_format": true + } + }, + "gpt-4.1-mini": { + "name": "gpt-4.1-mini", + "provider": "openai-compatible", + "model": "gpt-4.1-mini", + "request_options": { + "temperature": true, + "response_format": true + } + }, + "gpt-4o-mini": { + "name": "gpt-4o-mini", + "provider": "openai-compatible", + "model": "gpt-4o-mini", + "request_options": { + "temperature": true, + "response_format": true + } + }, + "o4-mini": { + "name": "o4-mini", + "provider": "openai-compatible", + "model": "o4-mini", + "request_options": { + "temperature": false, + "response_format": true, + "reasoning_effort": true + } + } + } +} diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py index f3afbb45..88c70237 100644 --- a/packages/shared/src/five08/resume_extractor.py +++ b/packages/shared/src/five08/resume_extractor.py @@ -22,7 +22,7 @@ normalize_timezone_offset as shared_normalize_timezone_offset, normalize_website_url as shared_normalize_website_url, ) -from five08.model_catalog import model_chat_completion_options +from five08.llm import ProviderModel from five08.openai_fallback import FallbackOpenAIClient, OpenAICompatibleProvider from five08.skills import ( DISALLOWED_RESUME_SKILLS, @@ -1991,27 +1991,22 @@ def _build_completion_kwargs( messages: list[dict[str, str]], temperature: float, max_tokens: int, + response_format: Any | None = None, ) -> dict[str, Any]: - kwargs: dict[str, Any] = { - "model": self.model, - "messages": messages, - } - model_options = model_chat_completion_options(self.model) - max_tokens_parameter = model_options.get("max_tokens_parameter") - if isinstance(max_tokens_parameter, str) and max_tokens_parameter: - kwargs[max_tokens_parameter] = max_tokens - reasoning_effort = model_options.get("reasoning_effort") - if isinstance(reasoning_effort, str) and reasoning_effort: - kwargs["reasoning_effort"] = reasoning_effort - verbosity = model_options.get("verbosity") - if isinstance(verbosity, str) and verbosity: - kwargs["verbosity"] = verbosity - if model_options.get("supports_temperature", True): - kwargs["temperature"] = temperature - else: - kwargs["temperature"] = temperature - kwargs["max_tokens"] = max_tokens - return kwargs + return self._provider_model().chat_completion_kwargs( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + reasoning_effort="minimal", + verbosity="low", + ) + + def _provider_model(self) -> ProviderModel: + return ProviderModel.openai_compatible( + model=self.model, + base_url=self.base_url, + ) def _next_length_retry_max_tokens(self, current_max_tokens: int) -> int: return max(self.max_tokens * 2, current_max_tokens * 2) @@ -2066,22 +2061,25 @@ def extract( retry_reason=retry_reason, ) attempt_temperature = 0.1 if attempt_index == 0 else 0.0 - request_kwargs = self._build_completion_kwargs( - messages=messages, - temperature=attempt_temperature, - max_tokens=attempt_max_tokens, - ) try: if use_structured_output: response = self.client.beta.chat.completions.parse( - response_format=ResumeLLMExtractionResponse, - **request_kwargs, + **self._build_completion_kwargs( + messages=messages, + temperature=attempt_temperature, + max_tokens=attempt_max_tokens, + response_format=ResumeLLMExtractionResponse, + ), ) else: response = self.client.chat.completions.create( - response_format={"type": "json_object"}, - **request_kwargs, + **self._build_completion_kwargs( + messages=messages, + temperature=attempt_temperature, + max_tokens=attempt_max_tokens, + response_format={"type": "json_object"}, + ), ) except (ValidationError, json.JSONDecodeError, ValueError): if attempt_index == 0: @@ -2763,25 +2761,27 @@ def _split_name_with_llm(self, full_name: str) -> tuple[str, str] | None: return None response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": ( - "Split person names into firstName and lastName for CRM fields. " - "Return JSON only with no extra keys." - ), - }, - { - "role": "user", - "content": ( - f"Name: {full_name}. " - 'If this is a single name, set lastName to "Unknown".' - ), - }, - ], - temperature=0.0, - max_tokens=80, + **self._provider_model().chat_completion_kwargs( + messages=[ + { + "role": "system", + "content": ( + "Split person names into firstName and lastName for CRM fields. " + "Return JSON only with no extra keys." + ), + }, + { + "role": "user", + "content": ( + f"Name: {full_name}. " + 'If this is a single name, set lastName to "Unknown".' + ), + }, + ], + temperature=0.0, + max_tokens=80, + response_format={"type": "json_object"}, + ) ) raw_content = response.choices[0].message.content if not raw_content: diff --git a/packages/shared/src/five08/resume_skills_extractor.py b/packages/shared/src/five08/resume_skills_extractor.py index afd5c206..bb3f9a02 100644 --- a/packages/shared/src/five08/resume_skills_extractor.py +++ b/packages/shared/src/five08/resume_skills_extractor.py @@ -5,6 +5,7 @@ import re from typing import Any +from five08.llm import ProviderModel from five08.openai_fallback import FallbackOpenAIClient, OpenAICompatibleProvider from five08.skills import ( DISALLOWED_RESUME_SKILLS, @@ -69,7 +70,12 @@ def __init__( openai_base_url: str | None, provider_attempts: tuple[OpenAICompatibleProvider, ...] | None = None, ) -> None: - self.model = model + self.provider_model = ProviderModel.openai_compatible( + model=model, + api_key=openai_api_key, + base_url=openai_base_url, + ) + self.model = self.provider_model.model self.client: Any = None if provider_attempts and OpenAIClient is not None: @@ -78,10 +84,7 @@ def __init__( client_factory=OpenAIClient, ) elif openai_api_key and OpenAIClient is not None: - self.client = OpenAIClient( - api_key=openai_api_key, - base_url=openai_base_url, - ) + self.client = OpenAIClient(**self.provider_model.client_kwargs()) def extract_skills(self, resume_text: str) -> ExtractedSkills: """Extract skills from resume text.""" @@ -91,26 +94,27 @@ def extract_skills(self, resume_text: str) -> ExtractedSkills: prompt = self._create_prompt(resume_text) try: response = self.client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "system", - "content": ( - "You extract professional skills from resumes for a CRM. " - "Focus on white-collar skills for product development orgs: " - "engineering, product, data, design, growth, and marketing. " - "Return JSON only, no prose. " - "Normalize skills to concise canonical names, lowercase. " - "Provide a strength from 1-5 when known, where 5 is strongest. " - "If uncertain, you may omit it or leave it blank. " - "Bias 3 for simple mentions, 4-5 for recent/current project usage, " - "and 1-2 for weak, outdated, or minimal exposure." - ), - }, - {"role": "user", "content": prompt}, - ], - temperature=0.1, - max_tokens=1200, + **self.provider_model.chat_completion_kwargs( + messages=[ + { + "role": "system", + "content": ( + "You extract professional skills from resumes for a CRM. " + "Focus on white-collar skills for product development orgs: " + "engineering, product, data, design, growth, and marketing. " + "Return JSON only, no prose. " + "Normalize skills to concise canonical names, lowercase. " + "Provide a strength from 1-5 when known, where 5 is strongest. " + "If uncertain, you may omit it or leave it blank. " + "Bias 3 for simple mentions, 4-5 for recent/current project usage, " + "and 1-2 for weak, outdated, or minimal exposure." + ), + }, + {"role": "user", "content": prompt}, + ], + temperature=0.1, + max_tokens=1200, + ) ) content = response.choices[0].message.content if not content: diff --git a/tests/evals/model-profiles.json b/tests/evals/model-profiles.json index 87e2d46c..bff9d4d1 100644 --- a/tests/evals/model-profiles.json +++ b/tests/evals/model-profiles.json @@ -154,6 +154,24 @@ } } }, + "gpt-5.1": { + "name": "gpt-5.1", + "provider": "openai-compatible", + "model": "gpt-5.1", + "api_key_env": "OPENAI_API_KEY", + "base_url": "https://api.openai.com/v1", + "request_options": { + "response_format": { + "type": "json_object" + } + }, + "chat_completion_options": { + "max_tokens_parameter": "max_completion_tokens", + "reasoning_effort": "low", + "verbosity": "low", + "supports_temperature": false + } + }, "gpt-5": { "name": "gpt-5", "provider": "openai-compatible", diff --git a/tests/unit/test_job_match.py b/tests/unit/test_job_match.py index 4548932c..0c76f2b9 100644 --- a/tests/unit/test_job_match.py +++ b/tests/unit/test_job_match.py @@ -175,6 +175,10 @@ def test_extract_normalizes_required_skills() -> None: assert result.required_skills == ["python", "react", "django"] assert result.seniority == "senior" assert result.title == "Backend Engineer" + create_kwargs = mock_client.chat.completions.create.call_args.kwargs + assert create_kwargs["model"] == "gpt-5-mini" + assert create_kwargs["response_format"] == {"type": "json_object"} + assert "temperature" not in create_kwargs def test_extract_backfills_hard_and_soft_from_legacy_required_skills() -> None: @@ -398,6 +402,10 @@ def test_rerank_shortlisted_candidates_parses_structured_response() -> None: missing_requirements=["live webflow projects"], ) ] + create_kwargs = mock_client.chat.completions.create.call_args.kwargs + assert create_kwargs["model"] == "gpt-5-mini" + assert create_kwargs["response_format"] == {"type": "json_object"} + assert "temperature" not in create_kwargs def test_rerank_shortlisted_candidates_parse_error_does_not_log_raw_content() -> None: diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py new file mode 100644 index 00000000..ead3c587 --- /dev/null +++ b/tests/unit/test_llm.py @@ -0,0 +1,98 @@ +"""Unit tests for provider/model request profiles.""" + +import pytest + +from five08.llm import ProviderModel, get_model_profile + + +def test_gpt_5_mini_profile_omits_temperature() -> None: + provider_model = ProviderModel.openai_compatible(model="gpt-5-mini") + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + response_format={"type": "json_object"}, + max_tokens=100, + reasoning_effort="minimal", + verbosity="low", + ) + + assert kwargs["model"] == "gpt-5-mini" + assert "temperature" not in kwargs + assert kwargs["response_format"] == {"type": "json_object"} + assert kwargs["reasoning_effort"] == "minimal" + assert kwargs["verbosity"] == "low" + + +def test_gpt_4_1_mini_profile_keeps_temperature_but_omits_reasoning_options() -> None: + provider_model = ProviderModel.openai_compatible(model="gpt-4.1-mini") + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + response_format={"type": "json_object"}, + max_tokens=100, + reasoning_effort="minimal", + verbosity="low", + ) + + assert kwargs["temperature"] == 0.1 + assert kwargs["response_format"] == {"type": "json_object"} + assert "reasoning_effort" not in kwargs + assert "verbosity" not in kwargs + + +def test_openrouter_plain_openai_model_gets_provider_prefix_for_request() -> None: + provider_model = ProviderModel.openai_compatible( + model="gpt-5-mini", + base_url="https://openrouter.ai/api/v1", + ) + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + ) + + assert provider_model.model == "openai/gpt-5-mini" + assert "temperature" not in kwargs + + +def test_nested_provider_prefix_model_uses_underlying_profile() -> None: + profile = get_model_profile("openrouter/openai/gpt-5-mini") + provider_model = ProviderModel.openai_compatible( + model="openrouter/openai/gpt-5-mini", + ) + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.1, + response_format={"type": "json_object"}, + ) + + assert profile.name == "gpt-5-mini" + assert kwargs["model"] == "openrouter/openai/gpt-5-mini" + assert kwargs["response_format"] == {"type": "json_object"} + assert "temperature" not in kwargs + + +def test_unknown_non_reasoning_model_preserves_temperature() -> None: + profile = get_model_profile("fake-model") + provider_model = ProviderModel.openai_compatible(model="fake-model") + + kwargs = provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + temperature=0.0, + ) + + assert profile.name == "fake-model" + assert kwargs["temperature"] == 0.0 + + +def test_chat_completion_kwargs_rejects_unprofiled_extra_options() -> None: + provider_model = ProviderModel.openai_compatible(model="gpt-5-mini") + + with pytest.raises(TypeError): + provider_model.chat_completion_kwargs( + messages=[{"role": "user", "content": "Return JSON."}], + seed=1, # type: ignore[call-arg] + ) diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py index 49cc9f71..e1f06028 100644 --- a/tests/unit/test_resume_extractor.py +++ b/tests/unit/test_resume_extractor.py @@ -192,6 +192,43 @@ def test_split_name_prefers_llm_output() -> None: mock_llm_split.assert_called_once_with("Ada Lovelace") +def test_split_name_with_llm_requests_json_object_response() -> None: + """Name split helper should request JSON because it parses JSON content.""" + fake_response = type( + "Response", + (), + { + "choices": [ + type( + "Choice", + (), + { + "message": type( + "Message", + (), + {"content": '{"firstName":"Ada","lastName":"Lovelace"}'}, + )() + }, + )() + ] + }, + )() + fake_create = Mock(return_value=fake_response) + extractor = ResumeProfileExtractor(api_key="test-key") + extractor.client = type( + "Client", + (), + { + "chat": type( + "Chat", (), {"completions": type("C", (), {"create": fake_create})()} + )() + }, + )() + + assert extractor._split_name_with_llm("Ada Lovelace") == ("Ada", "Lovelace") + assert fake_create.call_args.kwargs["response_format"] == {"type": "json_object"} + + def test_split_name_falls_back_to_heuristic_without_name_hints() -> None: """Split-name should still split names using heuristics when LLM fails.""" extractor = ResumeProfileExtractor(api_key="test-key") @@ -1113,6 +1150,56 @@ def test_extract_uses_supported_gpt54_reasoning_effort() -> None: assert kwargs["verbosity"] == "low" +def test_extract_uses_supported_gpt51_reasoning_effort() -> None: + """GPT-5.1 should not use the GPT-5 minimal reasoning-effort value.""" + + response = type( + "Response", + (), + { + "choices": [ + type( + "Choice", + (), + { + "finish_reason": "stop", + "message": type( + "Message", + (), + { + "content": ( + '{"name":"Jane Doe","firstName":"Jane",' + '"lastName":"Doe","email":"jane@example.com"}' + ) + }, + )(), + }, + )() + ] + }, + )() + fake_completions = Mock() + fake_completions.create.return_value = response + extractor = ResumeProfileExtractor( + api_key="test-key", + model="gpt-5.1", + max_tokens=40, + ) + extractor.client = type( + "Client", + (), + {"chat": type("Chat", (), {"completions": fake_completions})()}, + )() + + with patch.object(extractor, "_split_name_with_llm", return_value=("Jane", "Doe")): + extractor.extract("Jane Doe\nSoftware Engineer") + + kwargs = fake_completions.create.call_args.kwargs + assert kwargs["max_completion_tokens"] == 40 + assert kwargs["reasoning_effort"] == "low" + assert kwargs["verbosity"] == "low" + + def test_extract_repairs_json_with_comments_trailing_commas_and_prose() -> None: """Common near-JSON formatting issues should not force heuristic fallback.""" @@ -1785,6 +1872,7 @@ def test_extract_accepts_nullable_website_url_candidates() -> None: )() }, )() + extractor.model = "fake-model" result = extractor.extract("Jane Doe\nSoftware Engineer")