Skip to content

Commit 0a92459

Browse files
Python: Vector store updates core azs (#12114)
<!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> Updates to the core vector store setup, including renaming of params, and conceptual change in VectorStoreVectorField And the accompanying implementation for AZS <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
1 parent 8d1b3fd commit 0a92459

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+4250
-3235
lines changed

python/.coveragerc

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,10 @@
11
[run]
22
source = semantic_kernel
33
omit =
4-
semantic_kernel/connectors/memory/astradb/*
5-
semantic_kernel/connectors/memory/azure_cognitive_search/*
6-
semantic_kernel/connectors/memory/azure_cosmosdb/*
7-
semantic_kernel/connectors/memory/azure_cosmosdb_no_sql/*
8-
semantic_kernel/connectors/memory/chroma/chroma_memory_store.py
9-
semantic_kernel/connectors/memory/milvus/*
10-
semantic_kernel/connectors/memory/mongodb_atlas/mongodb_atlas_memory_store.py
11-
semantic_kernel/connectors/memory/pinecone/pinecone_memory_store.py
12-
semantic_kernel/connectors/memory/pinecone/utils.py
13-
semantic_kernel/connectors/memory/postgres/postgres_memory_store.py
14-
semantic_kernel/connectors/memory/qdrant/qdrant_memory_store.py
15-
semantic_kernel/connectors/memory/redis/redis_memory_store.py
16-
semantic_kernel/connectors/memory/usearch/*
17-
semantic_kernel/connectors/memory/weaviate/weaviate_memory_store.py
4+
semantic_kernel/connectors/memory_stores/*
185
semantic_kernel/reliability/*
196
semantic_kernel/memory/*
7+
semantic_kernel/planners/*
208

219
[report]
2210
# Regexes for lines to exclude from consideration

python/mypy.ini

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -17,50 +17,13 @@ disable_error_code = method-assign
1717

1818
[mypy-semantic_kernel.memory.*]
1919
ignore_errors = true
20-
# TODO (eavanvalkenburg): remove this
21-
# https://github.com/microsoft/semantic-kernel/issues/6463
20+
# TODO (eavanvalkenburg): remove this when removing the memory stores
2221

23-
[mypy-semantic_kernel.connectors.memory.astradb.*]
22+
[mypy-semantic_kernel.connectors.memory_stores.*]
2423
ignore_errors = true
24+
# TODO (eavanvalkenburg): remove this when removing the memory stores
2525

26-
[mypy-semantic_kernel.connectors.memory.azure_ai_search.*]
27-
ignore_errors = false
28-
[mypy-semantic_kernel.connectors.memory.azure_cognitive_search.*]
29-
ignore_errors = true
30-
31-
[mypy-semantic_kernel.connectors.memory.azure_cosmosdb.*]
32-
ignore_errors = true
33-
34-
[mypy-semantic_kernel.connectors.memory.azure_cosmosdb_no_sql.*]
35-
ignore_errors = true
36-
37-
[mypy-semantic_kernel.connectors.memory.chroma.*]
38-
ignore_errors = true
39-
40-
[mypy-semantic_kernel.connectors.memory.milvus.*]
41-
ignore_errors = true
42-
43-
[mypy-semantic_kernel.connectors.memory.mongodb_atlas.*]
44-
ignore_errors = true
45-
46-
[mypy-semantic_kernel.connectors.memory.pinecone.pinecone_memory_store]
47-
ignore_errors = true
48-
49-
[mypy-semantic_kernel.connectors.memory.postgres.*]
50-
ignore_errors = true
51-
52-
[mypy-semantic_kernel.connectors.memory.qdrant.qdrant_vector_record_store.*]
53-
ignore_errors = true
54-
[mypy-semantic_kernel.connectors.memory.qdrant.*]
55-
ignore_errors = true
56-
57-
[mypy-semantic_kernel.connectors.memory.redis.redis_vector_record_store.*]
58-
ignore_errors = true
59-
[mypy-semantic_kernel.connectors.memory.redis.*]
60-
ignore_errors = true
61-
62-
[mypy-semantic_kernel.connectors.memory.usearch.*]
63-
ignore_errors = true
64-
65-
[mypy-semantic_kernel.connectors.memory.weaviate.*]
26+
[mypy-semantic_kernel.planners.*]
6627
ignore_errors = true
28+
# TODO (eavanvalkenburg): remove this after future of planner is decided
29+
# https://github.com/microsoft/semantic-kernel/issues/6465

python/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ select = [
215215
ignore = [
216216
"D100", #allow missing docstring in public module
217217
"D104", #allow missing docstring in public package
218+
"D418", #allow docstring on overloaded function
218219
"TD003", #allow missing link to todo issue
219220
"FIX002" #allow todo
220221
]
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Copyright (c) Microsoft. All rights reserved.
2+
3+
import asyncio
4+
5+
from samples.concepts.memory.azure_ai_search_hotel_samples.data_model import (
6+
HotelSampleClass,
7+
custom_index,
8+
load_records,
9+
)
10+
from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
11+
from semantic_kernel.connectors.memory import AzureAISearchCollection
12+
13+
"""
14+
With the data model and records defined in step_0_data_model.py, this script will create an Azure AI Search collection,
15+
upsert the records, and then search the collection using vector and hybrid search.
16+
The script will print the first five records in the collection and the search results.
17+
The script will also delete the collection at the end.
18+
19+
Note that we add the OpenAITextEmbedding to the collection, which is used to generate the vectors.
20+
To use the built-in embedding in Azure AI Search, remove this and add that definition to the custom_index.
21+
"""
22+
23+
24+
async def main(query: str):
25+
records = load_records()
26+
# Create the Azure AI Search collection
27+
async with AzureAISearchCollection[str, HotelSampleClass](
28+
data_model_type=HotelSampleClass, embedding_generator=OpenAITextEmbedding()
29+
) as collection:
30+
# Check if the collection exists.
31+
if not await collection.does_collection_exist():
32+
await collection.create_collection(index=custom_index)
33+
await collection.upsert(records)
34+
# get the first five records to check the upsert worked.
35+
results = await collection.get(order_by={"field": "HotelName", "ascending": True}, top=5)
36+
print("Get first five records: ")
37+
if results:
38+
for result in results:
39+
print(
40+
f" {result.HotelId} (in {result.Address.City}, {result.Address.Country}): {result.Description}"
41+
)
42+
43+
print("\n")
44+
print("Search results using vector: ")
45+
# Use search to search using the vector.
46+
results = await collection.search(
47+
query,
48+
vector_property_name="DescriptionVector",
49+
)
50+
async for result in results.results:
51+
print(
52+
f" {result.record.HotelId} (in {result.record.Address.City}, "
53+
f"{result.record.Address.Country}): {result.record.Description} (score: {result.score})"
54+
)
55+
print("\n")
56+
print("Search results using hybrid: ")
57+
# Use hybrid search to search using the vector.
58+
results = await collection.hybrid_search(
59+
query, vector_property_name="DescriptionVector", additional_property_name="Description"
60+
)
61+
async for result in results.results:
62+
print(
63+
f" {result.record.HotelId} (in {result.record.Address.City}, "
64+
f"{result.record.Address.Country}): {result.record.Description} (score: {result.score})"
65+
)
66+
67+
await collection.delete_collection()
68+
69+
70+
if __name__ == "__main__":
71+
query = "swimming pool and good internet connection"
72+
asyncio.run(main(query=query))
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
# Copyright (c) Microsoft. All rights reserved.
2+
3+
4+
import asyncio
5+
from collections.abc import Awaitable, Callable
6+
from typing import TYPE_CHECKING, Any
7+
8+
from samples.concepts.memory.azure_ai_search_hotel_samples.data_model import (
9+
HotelSampleClass,
10+
custom_index,
11+
load_records,
12+
)
13+
from semantic_kernel.agents import ChatCompletionAgent
14+
from semantic_kernel.agents.agent import AgentThread
15+
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
16+
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextEmbedding
17+
from semantic_kernel.connectors.memory import AzureAISearchCollection
18+
from semantic_kernel.filters import FilterTypes, FunctionInvocationContext
19+
from semantic_kernel.functions import KernelParameterMetadata
20+
from semantic_kernel.functions.kernel_plugin import KernelPlugin
21+
from semantic_kernel.kernel_types import OptionalOneOrList
22+
23+
if TYPE_CHECKING:
24+
from semantic_kernel.functions import KernelParameterMetadata
25+
26+
27+
"""
28+
This sample builds on the previous one, but can be run independently.
29+
It uses the data model defined in step_0_data_model.py, and with that creates a collection
30+
and creates two kernel functions from those that are then made available to a LLM.
31+
The first function is a search function that allows you to search for hotels, optionally filtering for a city.
32+
The second function is a details function that allows you to get details about a hotel.
33+
"""
34+
35+
36+
# Create an Azure AI Search collection.
37+
collection = AzureAISearchCollection[str, HotelSampleClass](
38+
data_model_type=HotelSampleClass, embedding_generator=OpenAITextEmbedding()
39+
)
40+
# load the records
41+
records = load_records()
42+
# get the set of cities
43+
cities: set[str] = set()
44+
for record in records:
45+
if record.Address.Country == "USA" and record.Address.City:
46+
cities.add(record.Address.City)
47+
48+
49+
# Before we create the plugin, we want to create a function that will help the plugin work the way we want it to.
50+
# This function allows us to create the plugin with a parameter called `city` that
51+
# then get's put into a filter for address/city.
52+
# This function has to adhere to the `DynamicFilterFunction` signature.
53+
# which consists of 2 named arguments, `filter`, and `parameters`.
54+
# and kwargs.
55+
# It returns the updated filter.
56+
# The default version that is used when not supplying this, reads the parameters and if there is
57+
# a parameter that is not `query`, `top`, or 'skip`, and it can find a value for it, either in the kwargs
58+
# or the default value specified in the parameter, it will add a filter to the options.
59+
# In this case, we are adding a filter to the options to filter by the city, but since the technical name
60+
# of that field in the index is `address/city`, want to do this manually.
61+
# this can also be used to replace a complex technical name in your index with a friendly name towards the LLM.
62+
def filter_update(
63+
filter: OptionalOneOrList[Callable | str] | None = None,
64+
parameters: list["KernelParameterMetadata"] | None = None,
65+
**kwargs: Any,
66+
) -> OptionalOneOrList[Callable | str] | None:
67+
if "city" in kwargs:
68+
city = kwargs["city"]
69+
if city not in cities:
70+
raise ValueError(f"City '{city}' is not in the list of cities: {', '.join(cities)}")
71+
# we need the actual value and not a named param, otherwise the parser will not be able to find it.
72+
new_filter = f"lambda x: x.Address.City == '{city}'"
73+
if filter is None:
74+
filter = new_filter
75+
elif isinstance(filter, list):
76+
filter.append(new_filter)
77+
else:
78+
filter = [filter, new_filter]
79+
return filter
80+
81+
82+
# Next we create the Agent, with two functions.
83+
travel_agent = ChatCompletionAgent(
84+
name="TravelAgent",
85+
description="A travel agent that helps you find a hotel.",
86+
service=OpenAIChatCompletion(),
87+
instructions="""You are a travel agent. Your name is Mosscap and
88+
you have one goal: help people find a hotel.
89+
Your full name, should you need to know it, is
90+
Splendid Speckled Mosscap. You communicate
91+
effectively, but you tend to answer with long
92+
flowery prose. You always make sure to include the
93+
hotel_id in your answers so that the user can
94+
use it to get more information.""",
95+
function_choice_behavior=FunctionChoiceBehavior.Auto(),
96+
plugins=[
97+
KernelPlugin(
98+
name="azure_ai_search",
99+
description="A plugin that allows you to search for hotels in Azure AI Search.",
100+
functions=[
101+
collection.create_search_function(
102+
# this create search method uses the `search` method of the text search object.
103+
# remember that the text_search object for this sample is based on
104+
# the text_search method of the Azure AI Search.
105+
# but it can also be used with the other vector search methods.
106+
# This method's description, name and parameters are what will be serialized as part of the tool
107+
# call functionality of the LLM.
108+
# And crafting these should be part of the prompt design process.
109+
# The default parameters are `query`, `top`, and `skip`, but you specify your own.
110+
# The default parameters match the parameters of the VectorSearchOptions class.
111+
description="A hotel search engine, allows searching for hotels in specific cities, "
112+
"you do not have to specify that you are searching for hotels, for all, use `*`.",
113+
search_type="keyword_hybrid",
114+
# Next to the dynamic filters based on parameters, I can specify options that are always used.
115+
# this can include the `top` and `skip` parameters, but also filters that are always applied.
116+
# In this case, I am filtering by country, so only hotels in the USA are returned.
117+
filter=lambda x: x.Address.Country == "USA",
118+
parameters=[
119+
KernelParameterMetadata(
120+
name="query",
121+
description="What to search for.",
122+
type="str",
123+
is_required=True,
124+
type_object=str,
125+
),
126+
KernelParameterMetadata(
127+
name="city",
128+
description="The city that you want to search for a hotel "
129+
f"in, values are: {', '.join(cities)}",
130+
type="str",
131+
type_object=str,
132+
),
133+
KernelParameterMetadata(
134+
name="top",
135+
description="Number of results to return.",
136+
type="int",
137+
default_value=5,
138+
type_object=int,
139+
),
140+
],
141+
# and here the above created function is passed in.
142+
filter_update_function=filter_update,
143+
# finally, we specify the `string_mapper` function that is used to convert the record to a string.
144+
# This is used to make sure the relevant information from the record is passed to the LLM.
145+
string_mapper=lambda x: f"(hotel_id :{x.record.HotelId}) {x.record.HotelName} (rating {x.record.Rating}) - {x.record.Description}. Address: {x.record.Address.StreetAddress}, {x.record.Address.City}, {x.record.Address.StateProvince}, {x.record.Address.Country}. Number of room types: {len(x.record.Rooms)}. Last renovated: {x.record.LastRenovationDate}.", # noqa: E501
146+
),
147+
collection.create_search_function(
148+
# This second function is a more detailed one, that uses a `HotelId` to get details about a hotel.
149+
# we set the top to 1, so that only 1 record is returned.
150+
function_name="get_details",
151+
description="Get details about a hotel, by ID, use the generic search function to get the ID.",
152+
top=1,
153+
parameters=[
154+
KernelParameterMetadata(
155+
name="HotelId",
156+
description="The hotel ID to get details for.",
157+
type="str",
158+
is_required=True,
159+
type_object=str,
160+
),
161+
],
162+
),
163+
],
164+
)
165+
],
166+
)
167+
168+
169+
# This filter will log all calls to the Azure AI Search plugin.
170+
# This allows us to see what parameters are being passed to the plugin.
171+
# And this gives us a way to debug the search experience and if necessary tweak the parameters and descriptions.
172+
@travel_agent.kernel.filter(filter_type=FilterTypes.FUNCTION_INVOCATION)
173+
async def log_search_filter(
174+
context: FunctionInvocationContext, next: Callable[[FunctionInvocationContext], Awaitable[None]]
175+
):
176+
print(f"Calling Azure AI Search ({context.function.name}) with arguments:")
177+
for arg in context.arguments:
178+
if arg in ("chat_history"):
179+
continue
180+
print(f' {arg}: "{context.arguments[arg]}"')
181+
await next(context)
182+
183+
184+
async def chat():
185+
# Create the Azure AI Search collection
186+
async with collection:
187+
# Check if the collection exists.
188+
if not await collection.does_collection_exist():
189+
await collection.create_collection(index=custom_index)
190+
if not await collection.get(top=1):
191+
await collection.upsert(records)
192+
thread: AgentThread | None = None
193+
while True:
194+
try:
195+
user_input = input("User:> ")
196+
except KeyboardInterrupt:
197+
print("\n\nExiting chat...")
198+
break
199+
except EOFError:
200+
print("\n\nExiting chat...")
201+
break
202+
203+
if user_input == "exit":
204+
print("\n\nExiting chat...")
205+
break
206+
207+
result = await travel_agent.get_response(messages=user_input, thread=thread)
208+
print(f"Agent: {result.content}")
209+
thread = result.thread
210+
211+
delete_collection = input("Do you want to delete the collection? (y/n): ")
212+
if delete_collection.lower() == "y":
213+
await collection.delete_collection()
214+
print("Collection deleted.")
215+
else:
216+
print("Collection not deleted.")
217+
218+
219+
async def main():
220+
print(
221+
"Welcome to the chat bot!\
222+
\n Type 'exit' to exit.\
223+
\n Try to find a hotel to your liking!"
224+
)
225+
await chat()
226+
227+
228+
if __name__ == "__main__":
229+
import asyncio
230+
231+
asyncio.run(main())

0 commit comments

Comments
 (0)