Skip to content

Commit b88f052

Browse files
authored
Python: Preserve Citation Title in AnnotationContent from Azure AI Foundry Annotations (#12152)
### Motivation and Context When invoking an Azure AI Foundry agent via AzureAIAgent, url_citation metadata such as the document filename (title) is omitted in the resulting AnnotationContent. Only the url field is populated; the filename (title) is lost, and no structured metadata is retained. This breaks traceability and rich citation in downstream responses. This PR updates the annotation mapping to include the title from url_citation into the resulting AnnotationContent. This PR also updates the content types to properly show the present information either in the `to_dict()` call or `to_str` call, as well as handling the attributes for the `from_element` class method. <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> ### Description Citations in responses returned via AzureAIAgent now retain the full provenance information (url, title, etc.) as delivered by the underlying agent, matching the behavior observed with direct AIProjectClient calls. - Closes #12133 - Adds further unit tests to validate functionality <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
1 parent e29fcf9 commit b88f052

File tree

7 files changed

+220
-47
lines changed

7 files changed

+220
-47
lines changed

python/semantic_kernel/agents/azure_ai/agent_content_generation.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
ThreadRun,
2929
)
3030

31-
from semantic_kernel.contents.annotation_content import AnnotationContent
31+
from semantic_kernel.contents.annotation_content import AnnotationContent, CitationType
3232
from semantic_kernel.contents.chat_message_content import ChatMessageContent
3333
from semantic_kernel.contents.file_reference_content import FileReferenceContent
3434
from semantic_kernel.contents.function_call_content import FunctionCallContent
@@ -526,22 +526,30 @@ def generate_streaming_code_interpreter_content(
526526
def generate_annotation_content(
527527
annotation: MessageTextFilePathAnnotation | MessageTextFileCitationAnnotation | MessageTextUrlCitationAnnotation,
528528
) -> AnnotationContent:
529-
"""Generate annotation content."""
529+
"""Generate annotation content with safe attribute access."""
530530
file_id = None
531531
url = None
532-
if isinstance(annotation, MessageTextFilePathAnnotation) and annotation.file_path is not None:
532+
title = None
533+
citation_type = None
534+
if isinstance(annotation, MessageTextFilePathAnnotation) and annotation.file_path:
533535
file_id = annotation.file_path.file_id
534-
elif isinstance(annotation, MessageTextFileCitationAnnotation) and annotation.file_citation is not None:
536+
citation_type = CitationType.FILE_PATH
537+
elif isinstance(annotation, MessageTextFileCitationAnnotation) and annotation.file_citation:
535538
file_id = annotation.file_citation.file_id
536-
elif isinstance(annotation, MessageTextUrlCitationAnnotation) and annotation.url_citation is not None:
537-
url = annotation.url_citation.url if annotation.url_citation.url else None
539+
citation_type = CitationType.FILE_CITATION
540+
elif isinstance(annotation, MessageTextUrlCitationAnnotation) and annotation.url_citation:
541+
url = annotation.url_citation.url
542+
title = annotation.url_citation.title
543+
citation_type = CitationType.URL_CITATION
538544

539545
return AnnotationContent(
540546
file_id=file_id,
541-
quote=annotation.text,
542-
start_index=annotation.start_index if annotation.start_index is not None else None,
543-
end_index=annotation.end_index if annotation.end_index is not None else None,
547+
quote=getattr(annotation, "text", None),
548+
start_index=getattr(annotation, "start_index", None),
549+
end_index=getattr(annotation, "end_index", None),
544550
url=url,
551+
title=title,
552+
citation_type=citation_type,
545553
)
546554

547555

@@ -551,24 +559,31 @@ def generate_streaming_annotation_content(
551559
| MessageDeltaTextFileCitationAnnotation
552560
| MessageDeltaTextUrlCitationAnnotation,
553561
) -> StreamingAnnotationContent:
554-
"""Generate streaming annotation content."""
562+
"""Generate streaming annotation content with defensive checks."""
555563
file_id = None
556564
url = None
557565
quote = None
566+
title = None
567+
citation_type = None
558568
if isinstance(annotation, MessageDeltaTextFilePathAnnotation) and annotation.file_path:
559-
file_id = annotation.file_path.file_id if annotation.file_path.file_id else None
560-
quote = annotation.text if annotation.text else None
569+
file_id = annotation.file_path.file_id
570+
quote = getattr(annotation, "text", None)
571+
citation_type = CitationType.FILE_PATH
561572
elif isinstance(annotation, MessageDeltaTextFileCitationAnnotation) and annotation.file_citation:
562-
file_id = annotation.file_citation.file_id if annotation.file_citation.file_id else None
563-
quote = annotation.text if annotation.text else None
573+
file_id = annotation.file_citation.file_id
574+
quote = getattr(annotation, "text", None)
575+
citation_type = CitationType.FILE_CITATION
564576
elif isinstance(annotation, MessageDeltaTextUrlCitationAnnotation) and annotation.url_citation:
565-
url = annotation.url_citation.url if annotation.url_citation.url else None
566-
quote = annotation.url_citation.title if annotation.url_citation.title else None
577+
url = annotation.url_citation.url
578+
title = annotation.url_citation.title
579+
citation_type = CitationType.URL_CITATION
567580

568581
return StreamingAnnotationContent(
569582
file_id=file_id,
570583
quote=quote,
571-
start_index=annotation.start_index if annotation.start_index is not None else None,
572-
end_index=annotation.end_index if annotation.end_index is not None else None,
584+
start_index=getattr(annotation, "start_index", None),
585+
end_index=getattr(annotation, "end_index", None),
573586
url=url,
587+
title=title,
588+
citation_type=citation_type,
574589
)

python/semantic_kernel/contents/annotation_content.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
from typing import Any, ClassVar, Literal, TypeVar
66
from xml.etree.ElementTree import Element # nosec
77

8-
from pydantic import Field
9-
from pydantic_settings import SettingsConfigDict
8+
from pydantic import ConfigDict, Field
109

1110
from semantic_kernel.contents.const import ANNOTATION_CONTENT_TAG, ContentTypes
1211
from semantic_kernel.contents.kernel_content import KernelContent
@@ -17,12 +16,13 @@
1716
_T = TypeVar("_T", bound="AnnotationContent")
1817

1918

19+
@experimental
2020
class CitationType(str, Enum):
2121
"""Citation type."""
2222

2323
URL_CITATION = "url_citation"
24+
FILE_PATH = "file_path"
2425
FILE_CITATION = "file_citation"
25-
TEXT_CITATION = "text_citation"
2626

2727

2828
@experimental
@@ -39,19 +39,21 @@ class AnnotationContent(KernelContent):
3939
title: str | None = None
4040
citation_type: CitationType | None = Field(None, alias="type")
4141

42-
model_config = SettingsConfigDict(
42+
model_config = ConfigDict(
4343
extra="ignore",
44-
case_sensitive=False,
4544
populate_by_name=True,
4645
)
4746

4847
def __str__(self) -> str:
4948
"""Return the string representation of the annotation content."""
50-
return f"AnnotationContent(file_id={self.file_id}, url={self.url}, quote={self.quote}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501
49+
ctype = self.citation_type.value if self.citation_type else None
50+
return f"AnnotationContent(type={ctype}, file_id={self.file_id}, url={self.url}, quote={self.quote}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501
5151

5252
def to_element(self) -> Element:
5353
"""Convert the annotation content to an Element."""
5454
element = Element(self.tag)
55+
if self.citation_type:
56+
element.set("type", self.citation_type)
5557
if self.file_id:
5658
element.set("file_id", self.file_id)
5759
if self.quote:
@@ -62,22 +64,27 @@ def to_element(self) -> Element:
6264
element.set("end_index", str(self.end_index))
6365
if self.url is not None:
6466
element.set("url", self.url)
67+
if self.title is not None:
68+
element.set("title", self.title)
6569
return element
6670

6771
@classmethod
6872
def from_element(cls: type[_T], element: Element) -> _T:
6973
"""Create an instance from an Element."""
7074
return cls(
75+
type=element.get("type"),
7176
file_id=element.get("file_id"),
7277
quote=element.get("quote"),
7378
start_index=int(element.get("start_index")) if element.get("start_index") else None, # type: ignore
7479
end_index=int(element.get("end_index")) if element.get("end_index") else None, # type: ignore
7580
url=element.get("url") if element.get("url") else None, # type: ignore
81+
title=element.get("title") if element.get("title") else None, # type: ignore
7682
)
7783

7884
def to_dict(self) -> dict[str, Any]:
7985
"""Convert the instance to a dictionary."""
86+
ctype = self.citation_type.value if self.citation_type else None
8087
return {
8188
"type": "text",
82-
"text": f"{self.file_id or self.url} {self.quote} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
89+
"text": f"type={ctype}, {self.file_id or self.url} {self.quote} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
8390
}

python/semantic_kernel/contents/streaming_annotation_content.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
from typing import Any, ClassVar, Literal, TypeVar
55
from xml.etree.ElementTree import Element # nosec
66

7-
from pydantic import Field
7+
from pydantic import ConfigDict, Field
88

9+
from semantic_kernel.contents.annotation_content import CitationType
910
from semantic_kernel.contents.const import STREAMING_ANNOTATION_CONTENT_TAG, ContentTypes
1011
from semantic_kernel.contents.kernel_content import KernelContent
1112
from semantic_kernel.utils.feature_stage_decorator import experimental
@@ -29,14 +30,24 @@ class StreamingAnnotationContent(KernelContent):
2930
start_index: int | None = None
3031
end_index: int | None = None
3132
url: str | None = None
33+
title: str | None = None
34+
citation_type: CitationType | None = Field(None, alias="type")
35+
36+
model_config = ConfigDict(
37+
extra="ignore",
38+
populate_by_name=True,
39+
)
3240

3341
def __str__(self) -> str:
3442
"""Return the string representation of the annotation content."""
35-
return f"StreamingAnnotationContent(file_id={self.file_id}, url={self.url}, quote={self.quote}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501
43+
ctype = self.citation_type.value if self.citation_type else None
44+
return f"StreamingAnnotationContent(type={ctype}, file_id={self.file_id}, url={self.url}, quote={self.quote}, title={self.title}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501
3645

3746
def to_element(self) -> Element:
3847
"""Convert the annotation content to an Element."""
3948
element = Element(self.tag)
49+
if self.citation_type:
50+
element.set("type", self.citation_type)
4051
if self.file_id:
4152
element.set("file_id", self.file_id)
4253
if self.quote:
@@ -47,22 +58,27 @@ def to_element(self) -> Element:
4758
element.set("end_index", str(self.end_index))
4859
if self.url is not None:
4960
element.set("url", self.url)
61+
if self.title is not None:
62+
element.set("title", self.title)
5063
return element
5164

5265
@classmethod
5366
def from_element(cls: type[_T], element: Element) -> _T:
5467
"""Create an instance from an Element."""
5568
return cls(
69+
type=element.get("type"),
5670
file_id=element.get("file_id"),
5771
quote=element.get("quote"),
5872
start_index=int(element.get("start_index")) if element.get("start_index") else None, # type: ignore
5973
end_index=int(element.get("end_index")) if element.get("end_index") else None, # type: ignore
6074
url=element.get("url") if element.get("url") else None, # type: ignore
75+
title=element.get("title") if element.get("title") else None, # type: ignore
6176
)
6277

6378
def to_dict(self) -> dict[str, Any]:
6479
"""Convert the instance to a dictionary."""
80+
ctype = self.citation_type.value if self.citation_type else None
6581
return {
6682
"type": "text",
67-
"text": f"{self.file_id or self.url} {self.quote} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
83+
"text": f"type={ctype}, {self.file_id or self.url}, quote={self.quote}, title={self.title} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
6884
}

0 commit comments

Comments
 (0)