Skip to content

Python: Preserve Citation Title in AnnotationContent from Azure AI Foundry Annotations #12152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
ThreadRun,
)

from semantic_kernel.contents.annotation_content import AnnotationContent
from semantic_kernel.contents.annotation_content import AnnotationContent, CitationType
from semantic_kernel.contents.chat_message_content import ChatMessageContent
from semantic_kernel.contents.file_reference_content import FileReferenceContent
from semantic_kernel.contents.function_call_content import FunctionCallContent
Expand Down Expand Up @@ -529,19 +529,27 @@ def generate_annotation_content(
"""Generate annotation content."""
file_id = None
url = None
title = None
citation_type = None
if isinstance(annotation, MessageTextFilePathAnnotation) and annotation.file_path is not None:
file_id = annotation.file_path.file_id
citation_type = CitationType.FILE_PATH
elif isinstance(annotation, MessageTextFileCitationAnnotation) and annotation.file_citation is not None:
file_id = annotation.file_citation.file_id
citation_type = CitationType.FILE_CITATION
elif isinstance(annotation, MessageTextUrlCitationAnnotation) and annotation.url_citation is not None:
url = annotation.url_citation.url if annotation.url_citation.url else None
title = annotation.url_citation.title if annotation.url_citation.title else None
citation_type = CitationType.URL_CITATION

return AnnotationContent(
file_id=file_id,
quote=annotation.text,
start_index=annotation.start_index if annotation.start_index is not None else None,
end_index=annotation.end_index if annotation.end_index is not None else None,
url=url,
title=title if title else None,
citation_type=citation_type,
)


Expand All @@ -555,20 +563,27 @@ def generate_streaming_annotation_content(
file_id = None
url = None
quote = None
title = None
citation_type = None
if isinstance(annotation, MessageDeltaTextFilePathAnnotation) and annotation.file_path:
file_id = annotation.file_path.file_id if annotation.file_path.file_id else None
quote = annotation.text if annotation.text else None
citation_type = CitationType.FILE_PATH
elif isinstance(annotation, MessageDeltaTextFileCitationAnnotation) and annotation.file_citation:
file_id = annotation.file_citation.file_id if annotation.file_citation.file_id else None
quote = annotation.text if annotation.text else None
citation_type = CitationType.FILE_CITATION
elif isinstance(annotation, MessageDeltaTextUrlCitationAnnotation) and annotation.url_citation:
url = annotation.url_citation.url if annotation.url_citation.url else None
quote = annotation.url_citation.title if annotation.url_citation.title else None
title = annotation.url_citation.title if annotation.url_citation.title else None
citation_type = CitationType.URL_CITATION

return StreamingAnnotationContent(
file_id=file_id,
quote=quote,
start_index=annotation.start_index if annotation.start_index is not None else None,
end_index=annotation.end_index if annotation.end_index is not None else None,
url=url,
title=title if title else None,
citation_type=citation_type,
)
13 changes: 10 additions & 3 deletions python/semantic_kernel/contents/annotation_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
_T = TypeVar("_T", bound="AnnotationContent")


@experimental
class CitationType(str, Enum):
"""Citation type."""

URL_CITATION = "url_citation"
FILE_PATH = "file_path"
FILE_CITATION = "file_citation"
TEXT_CITATION = "text_citation"


@experimental
Expand All @@ -47,11 +48,13 @@ class AnnotationContent(KernelContent):

def __str__(self) -> str:
"""Return the string representation of the annotation content."""
return f"AnnotationContent(file_id={self.file_id}, url={self.url}, quote={self.quote}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501
return f"AnnotationContent(type={self.citation_type}, file_id={self.file_id}, url={self.url}, quote={self.quote}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501

def to_element(self) -> Element:
"""Convert the annotation content to an Element."""
element = Element(self.tag)
if self.citation_type:
element.set("type", self.citation_type)
if self.file_id:
element.set("file_id", self.file_id)
if self.quote:
Expand All @@ -62,22 +65,26 @@ def to_element(self) -> Element:
element.set("end_index", str(self.end_index))
if self.url is not None:
element.set("url", self.url)
if self.title is not None:
element.set("title", self.title)
return element

@classmethod
def from_element(cls: type[_T], element: Element) -> _T:
"""Create an instance from an Element."""
return cls(
type=element.get("type"),
file_id=element.get("file_id"),
quote=element.get("quote"),
start_index=int(element.get("start_index")) if element.get("start_index") else None, # type: ignore
end_index=int(element.get("end_index")) if element.get("end_index") else None, # type: ignore
url=element.get("url") if element.get("url") else None, # type: ignore
title=element.get("title") if element.get("title") else None, # type: ignore
)

def to_dict(self) -> dict[str, Any]:
"""Convert the instance to a dictionary."""
return {
"type": "text",
"text": f"{self.file_id or self.url} {self.quote} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
"text": f"type={self.citation_type}, {self.file_id or self.url} {self.quote} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
}
20 changes: 18 additions & 2 deletions python/semantic_kernel/contents/streaming_annotation_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from xml.etree.ElementTree import Element # nosec

from pydantic import Field
from pydantic_settings import SettingsConfigDict

from semantic_kernel.contents.annotation_content import CitationType
from semantic_kernel.contents.const import STREAMING_ANNOTATION_CONTENT_TAG, ContentTypes
from semantic_kernel.contents.kernel_content import KernelContent
from semantic_kernel.utils.feature_stage_decorator import experimental
Expand All @@ -29,14 +31,24 @@ class StreamingAnnotationContent(KernelContent):
start_index: int | None = None
end_index: int | None = None
url: str | None = None
title: str | None = None
citation_type: CitationType | None = Field(None, alias="type")

model_config = SettingsConfigDict(
extra="ignore",
case_sensitive=False,
populate_by_name=True,
)

def __str__(self) -> str:
"""Return the string representation of the annotation content."""
return f"StreamingAnnotationContent(file_id={self.file_id}, url={self.url}, quote={self.quote}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501
return f"StreamingAnnotationContent(type={self.citation_type}, file_id={self.file_id}, url={self.url}, quote={self.quote}, title={self.title}, start_index={self.start_index}, end_index={self.end_index})" # noqa: E501

def to_element(self) -> Element:
"""Convert the annotation content to an Element."""
element = Element(self.tag)
if self.citation_type:
element.set("type", self.citation_type)
if self.file_id:
element.set("file_id", self.file_id)
if self.quote:
Expand All @@ -47,22 +59,26 @@ def to_element(self) -> Element:
element.set("end_index", str(self.end_index))
if self.url is not None:
element.set("url", self.url)
if self.title is not None:
element.set("title", self.title)
return element

@classmethod
def from_element(cls: type[_T], element: Element) -> _T:
"""Create an instance from an Element."""
return cls(
type=element.get("type"),
file_id=element.get("file_id"),
quote=element.get("quote"),
start_index=int(element.get("start_index")) if element.get("start_index") else None, # type: ignore
end_index=int(element.get("end_index")) if element.get("end_index") else None, # type: ignore
url=element.get("url") if element.get("url") else None, # type: ignore
title=element.get("title") if element.get("title") else None, # type: ignore
)

def to_dict(self) -> dict[str, Any]:
"""Convert the instance to a dictionary."""
return {
"type": "text",
"text": f"{self.file_id or self.url} {self.quote} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
"text": f"type={self.citation_type}, {self.file_id or self.url}, quote={self.quote}, title={self.title} (Start Index={self.start_index}->End Index={self.end_index})", # noqa: E501
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,20 @@ def test_generate_message_content_text_and_image():
assert out.items[2].quote == "text"
assert out.items[2].start_index == 0
assert out.items[2].end_index == 9
assert out.items[2].citation_type == "file_citation"

assert out.items[3].file_id == "file_id_2"
assert out.items[3].quote == "text again"
assert out.items[3].start_index == 1
assert out.items[3].end_index == 10
assert out.items[3].citation_type == "file_path"

assert out.items[4].url == "http://example.com"
assert out.items[4].quote == "text"
assert out.items[4].start_index == 1
assert out.items[4].end_index == 10
assert out.items[4].title == "some title"
assert out.items[4].citation_type == "url_citation"

assert out.metadata["step_id"] == "step_id"
assert out.role == AuthorRole.USER
Expand Down Expand Up @@ -178,16 +182,16 @@ def test_generate_streaming_message_content_text_annotations():
annotations=[
MessageDeltaTextFileCitationAnnotation(
index=0,
file_citation=MessageDeltaTextFileCitationAnnotationObject(file_id="file123"),
file_citation=MessageDeltaTextFileCitationAnnotationObject(file_id="file123", quote="some text"),
start_index=0,
end_index=9,
text="some text",
),
MessageDeltaTextFilePathAnnotation(
index=0,
file_path=MessageDeltaTextFilePathAnnotationObject(file_id="file123"),
start_index=0,
end_index=9,
start_index=1,
end_index=10,
text="some text",
),
MessageDeltaTextUrlCitationAnnotation(
Expand All @@ -196,8 +200,8 @@ def test_generate_streaming_message_content_text_annotations():
title="some title",
url="http://example.com",
),
start_index=0,
end_index=9,
start_index=2,
end_index=11,
),
],
),
Expand All @@ -221,18 +225,21 @@ def test_generate_streaming_message_content_text_annotations():
assert out.items[2].quote == "some text"
assert out.items[2].start_index == 0
assert out.items[2].end_index == 9
assert out.items[2].citation_type == "file_citation"

assert isinstance(out.items[3], StreamingAnnotationContent)
assert out.items[3].file_id == "file123"
assert out.items[3].quote == "some text"
assert out.items[3].start_index == 0
assert out.items[3].end_index == 9
assert out.items[3].start_index == 1
assert out.items[3].end_index == 10
assert out.items[3].citation_type == "file_path"

assert isinstance(out.items[4], StreamingAnnotationContent)
assert out.items[4].url == "http://example.com"
assert out.items[4].quote == "some title"
assert out.items[4].start_index == 0
assert out.items[4].end_index == 9
assert out.items[4].title == "some title"
assert out.items[4].start_index == 2
assert out.items[4].end_index == 11
assert out.items[4].citation_type == "url_citation"


def test_generate_streaming_function_content_with_function():
Expand Down
29 changes: 24 additions & 5 deletions python/tests/unit/contents/test_annotation_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from semantic_kernel.contents.annotation_content import AnnotationContent
from semantic_kernel.contents.annotation_content import AnnotationContent, CitationType

test_cases = [
pytest.param(AnnotationContent(file_id="12345"), id="file_id"),
Expand All @@ -13,6 +13,17 @@
pytest.param(
AnnotationContent(file_id="12345", quote="This is a quote.", start_index=5, end_index=20), id="all_fields"
),
pytest.param(
AnnotationContent(
file_id="abc",
citation_type=CitationType.URL_CITATION,
url="http://example.com",
quote="q",
start_index=0,
end_index=2,
),
id="citation_type_and_url",
),
]


Expand Down Expand Up @@ -72,8 +83,8 @@ def test_to_str():
annotation = AnnotationContent(file_id="12345", quote="This is a quote.", start_index=5, end_index=20)
assert (
str(annotation)
== "AnnotationContent(file_id=12345, url=None, quote=This is a quote., start_index=5, end_index=20)"
) # noqa: E501
== "AnnotationContent(type=None, file_id=12345, url=None, quote=This is a quote., start_index=5, end_index=20)"
)


def test_to_element():
Expand Down Expand Up @@ -101,9 +112,13 @@ def test_from_element():

def test_to_dict():
annotation = AnnotationContent(file_id="12345", quote="This is a quote.", start_index=5, end_index=20)
expected_text = (
f"type={annotation.citation_type}, {annotation.file_id or annotation.url} {annotation.quote} "
f"(Start Index={annotation.start_index}->End Index={annotation.end_index})"
)
assert annotation.to_dict() == {
"type": "text",
"text": f"{annotation.file_id} {annotation.quote} (Start Index={annotation.start_index}->End Index={annotation.end_index})", # noqa: E501
"text": expected_text,
}


Expand All @@ -116,8 +131,12 @@ def test_element_roundtrip(annotation):

@pytest.mark.parametrize("annotation", test_cases)
def test_to_dict_call(annotation):
expected_text = (
f"type={annotation.citation_type}, {annotation.file_id or annotation.url} {annotation.quote} "
f"(Start Index={annotation.start_index}->End Index={annotation.end_index})"
)
expected_dict = {
"type": "text",
"text": f"{annotation.file_id} {annotation.quote} (Start Index={annotation.start_index}->End Index={annotation.end_index})", # noqa: E501
"text": expected_text,
}
assert annotation.to_dict() == expected_dict
2 changes: 1 addition & 1 deletion python/tests/unit/contents/test_chat_message_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def test_cmc_to_dict_keys():
},
{
"role": "user",
"content": [{"type": "text", "text": "test None (Start Index=None->End Index=None)"}],
"content": [{"type": "text", "text": "type=None, test None (Start Index=None->End Index=None)"}],
},
),
(
Expand Down
Loading
Loading