diff --git a/apps/discord_bot/src/five08/discord_bot/cogs/crm.py b/apps/discord_bot/src/five08/discord_bot/cogs/crm.py index fc0dc832..0213b15b 100644 --- a/apps/discord_bot/src/five08/discord_bot/cogs/crm.py +++ b/apps/discord_bot/src/five08/discord_bot/cogs/crm.py @@ -1345,6 +1345,10 @@ async def confirm_create( create_payload = self.crm_cog._build_resume_create_contact_payload( file_content=self.file_content ) + self.crm_cog._populate_name_fields( + create_payload, + source_name=str(create_payload.get("name", "")).strip(), + ) target_contact = self.crm_cog.espo_api.request( "POST", "Contact", create_payload ) @@ -1375,6 +1379,8 @@ async def confirm_create( ) except Exception as exc: status_code = getattr(self.crm_cog.espo_api, "status_code", None) + error_detail = str(exc).strip() or "Unknown error" + status_note = f" (status {status_code})" if status_code else "" logger.exception( "Failed to create contact from resume filename=%s target_scope=%s inferred_meta=%s status_code=%s payload=%s", self.filename, @@ -1399,7 +1405,7 @@ async def confirm_create( metadata=audit_metadata, ) await interaction.followup.send( - "⚠️ Could not create a contact from this resume. " + f"⚠️ Could not create a contact from this resume: `{error_detail}`{status_note}. " "Please provide `search_term` or `link_user`.", ephemeral=True, ) @@ -1532,6 +1538,11 @@ def __init__(self, bot: commands.Bot) -> None: discord_logs_webhook_wait=settings.discord_logs_webhook_wait, ) + @staticmethod + def _configured_linkedin_field() -> str: + """Return the configured field for LinkedIn profile values.""" + return str(getattr(settings, "crm_linkedin_field", "cLinkedInUrl")) + def _audit_command( self, *, @@ -3280,6 +3291,26 @@ def _to_values(raw_values: Any) -> list[str]: return "\nParsed resume identifiers: " + "; ".join(summary_parts) + def _build_resume_parsed_identity_summary(self, file_content: bytes) -> str: + """Build a short display summary of parsed contact identity fields.""" + hints = self._extract_resume_contact_hints(file_content) + parsed_name = str(hints.get("name") or "").strip() + if not parsed_name: + parsed_name = self._extract_resume_name_fallback(file_content) + + emails = hints.get("emails", []) + if not isinstance(emails, list): + emails = [] + primary_email = "No email parsed" + if emails: + raw_email = str(emails[0]).strip() + if raw_email: + primary_email = raw_email + + return ( + f"\nParsed contact details: name=`{parsed_name}`, email=`{primary_email}`" + ) + def _extract_resume_name_hint(self, file_content: bytes) -> str: """Best-effort contact name extraction from resume text.""" hints = self._extract_resume_contact_hints(file_content) @@ -3288,6 +3319,18 @@ def _extract_resume_name_hint(self, file_content: bytes) -> str: return extracted_name return self._extract_resume_name_fallback(file_content) + def _populate_name_fields( + self, payload: dict[str, str], *, source_name: str + ) -> None: + """Populate firstName and lastName fields for CRM contact creation payloads.""" + first_name, last_name = self.resume_extractor.split_name( + full_name=source_name, + first_name_hint=str(payload.get("firstName", "")).strip() or None, + last_name_hint=str(payload.get("lastName", "")).strip() or None, + ) + payload["firstName"] = first_name + payload["lastName"] = last_name + def _build_resume_create_contact_payload( self, file_content: bytes ) -> dict[str, str]: @@ -3312,6 +3355,7 @@ def _build_resume_create_contact_payload( "type": "Prospect", "name": contact_name, } + self._populate_name_fields(payload, source_name=contact_name) if emails: primary_email = emails[0] if primary_email.endswith("@508.dev"): @@ -3321,7 +3365,7 @@ def _build_resume_create_contact_payload( if github_usernames: payload["cGitHubUsername"] = github_usernames[0] if linkedin_urls: - payload["cLinkedInUrl"] = linkedin_urls[0] + payload[self._configured_linkedin_field()] = linkedin_urls[0] phone = hints.get("phone") if isinstance(phone, str) and phone.strip(): payload["phoneNumber"] = phone.strip() @@ -3370,6 +3414,9 @@ def _build_contact_payload_for_link_user( parsed_name = str(payload.get("name", "")).strip() if not parsed_name or parsed_name == "Resume Candidate": payload["name"] = self._fallback_contact_name_for_discord_user(user) + self._populate_name_fields( + payload, source_name=str(payload.get("name", "")).strip() + ) payload.update(self._discord_link_fields(user)) return payload @@ -3377,10 +3424,20 @@ async def _search_contacts_by_field( self, *, field: str, value: str, max_size: int = 10 ) -> list[dict[str, Any]]: """Search contacts using an exact field equals match.""" + select_fields = [ + "id", + "name", + "emailAddress", + "c508Email", + "cDiscordUsername", + "cGitHubUsername", + ] + if field not in select_fields: + select_fields.append(field) search_params = { "where": [{"type": "equals", "attribute": field, "value": value}], "maxSize": max_size, - "select": "id,name,emailAddress,c508Email,cDiscordUsername,cGitHubUsername,cLinkedInUrl", + "select": ",".join(select_fields), } response = self.espo_api.request("GET", "Contact", search_params) @@ -3450,7 +3507,7 @@ async def _infer_contact_from_resume( for linkedin_url in linkedin_urls: attempts.append({"method": "linkedin", "value": linkedin_url}) contacts = await self._search_contacts_by_field( - field="cLinkedInUrl", value=linkedin_url + field=self._configured_linkedin_field(), value=linkedin_url ) if len(contacts) == 1: return contacts[0], { @@ -4948,7 +5005,7 @@ async def update_contact( if linkedin is not None: clean_linkedin = linkedin.strip() if clean_linkedin: - update_data["cLinkedInUrl"] = clean_linkedin + update_data[self._configured_linkedin_field()] = clean_linkedin requested_updates.append("linkedin") if rate_range is not None: @@ -5028,9 +5085,10 @@ async def update_contact( inline=True, ) if "linkedin" in requested_updates: + linkedin_field = self._configured_linkedin_field() embed.add_field( name="🔗 LinkedIn", - value=update_data["cLinkedInUrl"], + value=update_data[linkedin_field], inline=True, ) if "skills" in requested_updates: @@ -5605,7 +5663,8 @@ async def upload_resume( await interaction.followup.send( "⚠️ Could not find a unique contact from this resume. " "Would you like to create a new contact from the parsed details?" - + inferred_attempts_text, + + inferred_attempts_text + + self._build_resume_parsed_identity_summary(file_content), view=view, ephemeral=True, ) diff --git a/packages/shared/src/five08/resume_extractor.py b/packages/shared/src/five08/resume_extractor.py index 6fd96cf2..ed69df31 100644 --- a/packages/shared/src/five08/resume_extractor.py +++ b/packages/shared/src/five08/resume_extractor.py @@ -67,6 +67,26 @@ r"(?:https?://)?(?:[\w.-]+\.)?linkedin\.com/in/[A-Za-z0-9_%-]+/?", flags=re.IGNORECASE, ) +DEFAULT_FALLBACK_FIRST_NAME = "Resume" +DEFAULT_FALLBACK_LAST_NAME = "Candidate" +SINGLE_NAME_FALLBACK_LAST_NAME = "Unknown" +NAME_PREFIXES = { + "dr", + "mr", + "mrs", + "ms", + "prof", + "miss", + "mx", +} +NAME_SUFFIXES = { + "jr", + "sr", + "iii", + "ii", + "iv", + "v", +} def _bounded_confidence(value: Any, fallback: float) -> float: @@ -85,6 +105,18 @@ def _normalize_email(value: Any) -> str | None: return normalized or None +def _normalize_name_part(value: Any) -> str | None: + if not isinstance(value, str): + return None + normalized = value.strip() + if not normalized: + return None + normalized = re.sub(r"\s+", " ", normalized).strip() + if not any(char.isalpha() for char in normalized): + return None + return normalized + + def _coerce_email_list(value: Any) -> list[str]: if value is None: return [] @@ -564,6 +596,8 @@ class ResumeExtractedProfile(BaseModel): """Normalized profile fields extracted from resume text.""" name: str | None = None + first_name: str | None = None + last_name: str | None = None email: str | None = None github_username: str | None = None linkedin_url: str | None = None @@ -652,6 +686,18 @@ def extract( raise ValueError("LLM returned empty content") parsed = _parse_json_object(raw_content) + raw_first_name = parsed.get("firstName") + if raw_first_name is None: + raw_first_name = parsed.get("first_name") + raw_last_name = parsed.get("lastName") + if raw_last_name is None: + raw_last_name = parsed.get("last_name") + extracted_name = _normalize_name(parsed.get("name")) + extracted_first_name, extracted_last_name = self.split_name( + full_name=extracted_name, + first_name_hint=raw_first_name, + last_name_hint=raw_last_name, + ) parsed_url_candidates = _extract_website_url_candidates( parsed.get("website_url_candidates") ) @@ -725,7 +771,9 @@ def extract( if _linkedin_profile_key(item) != linkedin_profile_key ] return ResumeExtractedProfile( - name=_normalize_name(parsed.get("name")), + name=extracted_name, + first_name=extracted_first_name, + last_name=extracted_last_name, email=parsed_email, additional_emails=parsed_emails, github_username=github_username, @@ -817,9 +865,12 @@ def _heuristic_extract( availability = _normalize_scalar(source_texts.get("rate")) rate_range = _normalize_scalar(source_texts.get("rate_range")) referred_by = _normalize_scalar(source_texts.get("referred_by")) + first_name, last_name = self.split_name(full_name=name_match) return ResumeExtractedProfile( name=name_match, + first_name=first_name, + last_name=last_name, email=extracted_emails[0] if extracted_emails else None, additional_emails=extracted_emails[1:], github_username=github_username, @@ -880,7 +931,8 @@ def _build_prompt( return ( "Extract candidate profile fields from all provided sources.\n" "Return JSON with exact keys and no extras:\n" - '{"name": string|null, "email": string|null, "additional_emails": string[]|null, ' + '{"name": string|null, "firstName": string|null, "lastName": string|null, ' + '"email": string|null, "additional_emails": string[]|null, ' '"github_username": string|null, "linkedin_url": string|null, ' '"website_url_candidates": [' '{"url": string|null, "kind": "personal_website|social_profile|other", ' @@ -928,6 +980,120 @@ def _build_prompt( f"Sources:\n{snippet}" ) + def split_name( + self, + full_name: str | None, + *, + first_name_hint: str | None = None, + last_name_hint: str | None = None, + ) -> tuple[str, str]: + """Return CRM-safe first/last-name pairs for a profile name.""" + first_name = _normalize_name_part(first_name_hint) + last_name = _normalize_name_part(last_name_hint) + normalized_full_name = _normalize_name(full_name) + + if first_name and last_name: + return first_name, last_name + + inferred_first: str | None = first_name + inferred_last: str | None = last_name + if normalized_full_name: + inferred = None + if self.client is not None: + try: + inferred = self._split_name_with_llm(normalized_full_name) + except Exception: + inferred = None + if inferred is None: + inferred = self._split_name_heuristically(normalized_full_name) + if inferred: + inferred_first, inferred_last = inferred + if not first_name: + first_name = inferred_first + if not last_name: + last_name = inferred_last + + return ( + first_name or DEFAULT_FALLBACK_FIRST_NAME, + last_name or inferred_last or SINGLE_NAME_FALLBACK_LAST_NAME, + ) + + def _split_name_with_llm(self, full_name: str) -> tuple[str, str] | None: + """Ask the model to split a display name into first/last.""" + if self.client is None: + return None + + response = self.client.chat.completions.create( + model=self.model, + messages=[ + { + "role": "system", + "content": ( + "Split person names into firstName and lastName for CRM fields. " + "Return JSON only with no extra keys." + ), + }, + { + "role": "user", + "content": ( + f"Name: {full_name}. " + 'If this is a single name, set lastName to "Unknown".' + ), + }, + ], + temperature=0.0, + max_tokens=80, + ) + raw_content = response.choices[0].message.content + if not raw_content: + raise ValueError("LLM returned empty name split content") + + parsed = _parse_json_object(raw_content) + split_first = _normalize_name_part(parsed.get("firstName")) + split_last = _normalize_name_part(parsed.get("lastName")) + split_first = split_first or _normalize_name_part(parsed.get("first_name")) + split_last = split_last or _normalize_name_part(parsed.get("last_name")) + if not split_first and not split_last: + return None + return split_first or full_name, split_last or SINGLE_NAME_FALLBACK_LAST_NAME + + @staticmethod + def _split_name_heuristically(full_name: str) -> tuple[str, str]: + parts = [ + token.strip() for token in re.split(r"\s+", full_name) if token.strip() + ] + if not parts: + return ( + DEFAULT_FALLBACK_FIRST_NAME, + DEFAULT_FALLBACK_LAST_NAME, + ) + + while parts and parts[0].lower().strip(".") in NAME_PREFIXES: + parts = parts[1:] + + if not parts: + return ( + DEFAULT_FALLBACK_FIRST_NAME, + DEFAULT_FALLBACK_LAST_NAME, + ) + + if len(parts) == 1: + return ( + parts[0], + SINGLE_NAME_FALLBACK_LAST_NAME, + ) + + if len(parts) >= 2 and parts[-1].lower().strip(".") in NAME_SUFFIXES: + return ( + parts[0], + parts[-2] if len(parts) >= 3 else SINGLE_NAME_FALLBACK_LAST_NAME, + ) + + return ( + parts[0], + parts[-1], + ) + @staticmethod def _extract_name(resume_text: str) -> str | None: lines = [line.strip() for line in resume_text.splitlines() if line.strip()] diff --git a/tests/unit/test_crm.py b/tests/unit/test_crm.py index 24cdce51..762086b3 100644 --- a/tests/unit/test_crm.py +++ b/tests/unit/test_crm.py @@ -1578,6 +1578,28 @@ async def test_search_contact_for_linking_includes_discord_username_filter_when_ "value": "john", } in where_filters + @pytest.mark.asyncio + async def test_search_contacts_by_field_uses_configured_linkedin_field( + self, crm_cog + ): + """Search-by-field uses the configured LinkedIn field name.""" + crm_cog.espo_api.request.return_value = {"list": []} + + await crm_cog._search_contacts_by_field( + field="cLinkedIn", value="https://linkedin.com/in/test" + ) + + call = crm_cog.espo_api.request.call_args + assert call.args[0] == "GET" + assert call.args[1] == "Contact" + params = call.args[2] + assert params["where"][0]["attribute"] == "cLinkedIn" + assert params["where"][0]["value"] == "https://linkedin.com/in/test" + assert params["where"][0]["type"] == "equals" + select_fields = params["select"].split(",") + assert "cLinkedIn" in select_fields + assert "cLinkedInUrl" not in select_fields + @pytest.mark.asyncio async def test_crm_status_success(self, crm_cog, mock_interaction): """Test successful CRM status check.""" @@ -1839,6 +1861,45 @@ async def test_update_contact_requires_updates(self, crm_cog, mock_interaction): message = mock_interaction.followup.send.call_args[0][0] assert "Provide at least one of" in message + @pytest.mark.asyncio + async def test_update_contact_uses_configured_linkedin_field( + self, crm_cog, mock_interaction + ): + """Configured LinkedIn custom field should flow through update payload and embed.""" + mock_interaction.user.id = 123456789 + + with ( + patch.object( + crm_cog, "_configured_linkedin_field", return_value="cLinkedIn" + ), + patch.object( + crm_cog, + "_find_contact_by_discord_id", + new=AsyncMock(return_value={"id": "contact123", "name": "Test User"}), + ), + ): + crm_cog.espo_api.request.return_value = {"id": "contact123"} + + await crm_cog.update_contact.callback( + crm_cog, + mock_interaction, + linkedin="https://www.linkedin.com/in/test-user/", + ) + + assert mock_interaction.followup.send.call_count == 1 + call = crm_cog.espo_api.request.call_args + assert call.args[0] == "PUT" + assert call.args[1] == "Contact/contact123" + assert call.args[2] == {"cLinkedIn": "https://www.linkedin.com/in/test-user/"} + + send_kwargs = mock_interaction.followup.send.call_args.kwargs + linkedin_value = next( + field.value + for field in send_kwargs["embed"].fields + if field.name == "🔗 LinkedIn" + ) + assert linkedin_value == "https://www.linkedin.com/in/test-user/" + @pytest.mark.asyncio async def test_update_contact_self_not_linked(self, crm_cog, mock_interaction): """Self update without a linked CRM contact should return a helpful error.""" @@ -2136,6 +2197,8 @@ def test_build_resume_create_contact_payload_sets_email_field_by_domain( assert payload["type"] == "Prospect" assert payload["name"] == "Person Example" assert payload["emailAddress"] == "person@example.com" + assert payload["firstName"] == "Person" + assert payload["lastName"] == "Example" assert "c508Email" not in payload with ( @@ -2156,6 +2219,8 @@ def test_build_resume_create_contact_payload_sets_email_field_by_domain( assert payload["type"] == "Prospect" assert payload["name"] == "Person 508" assert payload["c508Email"] == "person@508.dev" + assert payload["firstName"] == "Person" + assert payload["lastName"] == "Unknown" assert "emailAddress" not in payload def test_build_resume_create_contact_payload_populates_prospect_details( @@ -2188,6 +2253,32 @@ def test_build_resume_create_contact_payload_populates_prospect_details( assert payload["addressCountry"] == "Canada" assert payload["cSeniority"] == "senior" assert payload["skills"] == "Python, fastapi" + assert payload["firstName"] == "Jane" + assert payload["lastName"] == "Doe" + + def test_build_resume_create_contact_payload_single_name_uses_unknown_last( + self, crm_cog + ): + """Single token names should include a placeholder lastName.""" + with ( + patch.object( + crm_cog, + "_extract_resume_contact_hints", + return_value={ + "emails": ["single@example.com"], + "github_usernames": [], + "linkedin_urls": [], + }, + ), + patch.object(crm_cog, "_extract_resume_name_hint", return_value="Cher"), + ): + payload = crm_cog._build_resume_create_contact_payload(b"resume") + + assert payload["type"] == "Prospect" + assert payload["name"] == "Cher" + assert payload["firstName"] == "Cher" + assert payload["lastName"] == "Unknown" + assert payload["emailAddress"] == "single@example.com" def test_build_inference_lookup_summary_uses_attempt_text(self, crm_cog): """Test lookup summary uses attempt text when attempts are present.""" @@ -2258,6 +2349,27 @@ def test_build_inference_lookup_summary_with_non_dict_hints(self, crm_cog): assert summary == "" + def test_build_resume_parsed_identity_summary_includes_name_and_email( + self, crm_cog + ): + """Parsed name and email are included in resume identity summary.""" + with patch.object( + crm_cog, + "_extract_resume_contact_hints", + return_value={ + "name": "Jane Doe", + "emails": ["jane@example.com", "ignored@alt.example"], + }, + ): + summary = crm_cog._build_resume_parsed_identity_summary( + file_content=b"resume" + ) + + assert ( + summary + == "\nParsed contact details: name=`Jane Doe`, email=`jane@example.com`" + ) + @pytest.mark.asyncio async def test_upload_resume_link_user_shows_confirm_then_creates_contact( self, crm_cog, mock_interaction @@ -2345,6 +2457,8 @@ async def test_upload_resume_link_user_shows_confirm_then_creates_contact( "Contact", { "name": "Candidate User", + "firstName": "Candidate", + "lastName": "User", "cDiscordUsername": "candidateuser", "cDiscordUserID": "202", }, @@ -2355,6 +2469,66 @@ async def test_upload_resume_link_user_shows_confirm_then_creates_contact( ) assert mock_upload.await_args.kwargs.get("contact") == created_contact + @pytest.mark.asyncio + async def test_upload_resume_no_matching_inferred_contact_shows_name_and_email( + self, crm_cog, mock_interaction + ): + """No-match inference should show parsed name/email for the candidate.""" + mock_interaction.user.id = 101 + mock_interaction.user.name = "Requester" + steering_role = Mock() + steering_role.name = "Steering Committee" + mock_interaction.user.roles = [steering_role] + + resume_file = Mock() + resume_file.filename = "candidate.pdf" + resume_file.size = 1024 + resume_file.read = AsyncMock(return_value=b"resume-bytes") + + with ( + patch.object( + crm_cog, + "_infer_contact_from_resume", + new=AsyncMock(return_value=(None, {"reason": "no_matching_contact"})), + ), + patch.object( + crm_cog, + "_build_resume_parsed_identity_summary", + return_value=( + "\nParsed contact details: name=`Jane Doe`, email=`jane@example.com`" + ), + ), + patch.object( + crm_cog, + "_find_contact_by_discord_id", + new=AsyncMock(return_value=None), + ), + patch( + "five08.discord_bot.cogs.crm.check_user_roles_with_hierarchy", + return_value=True, + ), + patch( + "five08.discord_bot.cogs.crm.settings.api_shared_secret", + "test-shared-secret", + ), + ): + await crm_cog.upload_resume.callback( + crm_cog, + mock_interaction, + resume_file, + None, + False, + None, + ) + + message = mock_interaction.followup.send.call_args[0][0] + assert "⚠️ Could not find a unique contact from this resume." in message + assert ( + "Parsed contact details: name=`Jane Doe`, email=`jane@example.com`" + in message + ) + assert "view" in mock_interaction.followup.send.call_args.kwargs + @pytest.mark.asyncio async def test_resume_create_contact_view_logs_create_failure( self, crm_cog, mock_interaction @@ -2411,8 +2585,19 @@ async def test_resume_create_contact_view_logs_create_failure( audit_metadata = crm_cog._audit_command.call_args.kwargs["metadata"] assert audit_metadata["reason"] == "contact_create_failed" assert audit_metadata["status_code"] == 422 - assert audit_metadata["create_payload_keys"] == ["emailAddress", "name"] + assert audit_metadata["create_payload_keys"] == [ + "emailAddress", + "firstName", + "lastName", + "name", + ] mock_interaction.followup.send.assert_called_once() + failure_message = mock_interaction.followup.send.call_args.args[0] + assert ( + "Could not create a contact from this resume: `validation failed` (status 422)." + in failure_message + ) + assert "Please provide `search_term` or `link_user`." in failure_message @pytest.mark.asyncio async def test_reprocess_resume_shows_confirmation(self, crm_cog, mock_interaction): diff --git a/tests/unit/test_resume_extractor.py b/tests/unit/test_resume_extractor.py index ac04b1ac..73c00b64 100644 --- a/tests/unit/test_resume_extractor.py +++ b/tests/unit/test_resume_extractor.py @@ -1,5 +1,7 @@ """Unit tests for resume extractor helpers.""" +from unittest.mock import Mock, patch + from five08.resume_extractor import _coerce_email_list from five08.resume_extractor import ResumeProfileExtractor @@ -60,6 +62,45 @@ def test_extract_profile_links_route_social_urls_away_from_website() -> None: assert all("node.js" not in link.casefold() for link in result.website_links) +def test_split_name_prefers_llm_output() -> None: + """Split-name should prefer LLM output when it is available.""" + extractor = ResumeProfileExtractor(api_key="test-key") + extractor.client = Mock() + + with patch.object( + extractor, + "_split_name_with_llm", + return_value=("Ada", "Lovelace"), + ) as mock_llm_split: + first_name, last_name = extractor.split_name("Ada Lovelace") + + assert first_name == "Ada" + assert last_name == "Lovelace" + mock_llm_split.assert_called_once_with("Ada Lovelace") + + +def test_split_name_falls_back_to_heuristic_without_name_hints() -> None: + """Split-name should still split names using heuristics when LLM fails.""" + extractor = ResumeProfileExtractor(api_key="test-key") + extractor.client = Mock() + + with patch.object(extractor, "_split_name_with_llm", side_effect=RuntimeError()): + first_name, last_name = extractor.split_name("Dr. Grace Hopper") + + assert first_name == "Grace" + assert last_name == "Hopper" + + +def test_split_name_single_token_returns_unknown_last_name() -> None: + """Single token names should use a placeholder last name.""" + extractor = ResumeProfileExtractor(api_key=None) + + first_name, last_name = extractor.split_name("Cher") + + assert first_name == "Cher" + assert last_name == "Unknown" + + def test_extract_profile_backfills_website_and_social_urls_from_markdown() -> None: """Markdown links should be split by website vs social and routed correctly."""