In [1]:
from agentools import *

In [2]:
from agentools.retrieval.db import EmbeddableData, EmbeddableDataCollection, EmbeddableField

EmbeddableDataCollection.use_global_client()

In [3]:
from enum import StrEnum


class OriginalStrEnum(StrEnum):
    """
    A StrEnum that does NOT convert the value to lowercase, but rather uses the original value
    """

    @staticmethod
    def _generate_next_value_(
        name: str, start: int, count: int, last_values: list[str]
    ) -> str:
        return name

In [4]:
"""
Document database that stores product metadata.
"""

from agentools.retrieval.db import (
    EmbeddableDataCollection,
    EmbeddableData,
    EmbeddableField,
)


Powertrain = OriginalStrEnum(
    "Powertrain",
    ["EV", "ICE", "Hybrid"],
)

VehicleType = OriginalStrEnum(
    "VehicleType",
    ["Car", "Truck", "SUV", "Van"],
)


class Product(EmbeddableData):
    """
    Product metadata
    """

    name: str
    description: str = EmbeddableField()
    vehicle_type: VehicleType
    powertrain_type: Powertrain
    range_in_km: float
    price_in_eur: float
    num_seats: int
    color_options: list[str]
    customization_options: list[str]

    image_url: str

In [5]:
product_db = EmbeddableDataCollection("product_db", Product)
await product_db.check_exist_and_initialize()

True

In [6]:
data = [
    Product(name="Mercedes S-Class", description="The best car ever", vehicle_type=VehicleType.Car, powertrain_type=Powertrain.Hybrid, range_in_km=1000, price_in_eur=100000, num_seats=5, color_options=["Black", "White"], customization_options=["Leather Seats", "Sunroof"], image_url="https://www.mercedes-benz.com/en/vehicles/passenger-cars/s-class/s-class-sedan/"),
    Product(name="Tesla Model 3", description="The best EV ever", vehicle_type=VehicleType.Car, powertrain_type=Powertrain.EV, range_in_km=500, price_in_eur=50000, num_seats=5, color_options=["Black", "White"], customization_options=["Leather Seats", "Sunroof"], image_url="https://www.tesla.com/model3"),
]

await product_db.add(data)

In [7]:
async for product in product_db.iterate():
    print(product)

name='Tesla Model 3' description='The best EV ever' vehicle_type=<VehicleType.Car: 'Car'> powertrain_type=<Powertrain.EV: 'EV'> range_in_km=500.0 price_in_eur=50000.0 num_seats=5 color_options=['Black', 'White'] customization_options=['Leather Seats', 'Sunroof'] image_url='https://www.tesla.com/model3'
name='Mercedes S-Class' description='The best car ever' vehicle_type=<VehicleType.Car: 'Car'> powertrain_type=<Powertrain.Hybrid: 'Hybrid'> range_in_km=1000.0 price_in_eur=100000.0 num_seats=5 color_options=['Black', 'White'] customization_options=['Leather Seats', 'Sunroof'] image_url='https://www.mercedes-benz.com/en/vehicles/passenger-cars/s-class/s-class-sedan/'


In [8]:
await product_db.query(description="The best car ever", top=2)

[(1.0000000148350376,
  Product(name='Mercedes S-Class', description='The best car ever', vehicle_type=<VehicleType.Car: 'Car'>, powertrain_type=<Powertrain.Hybrid: 'Hybrid'>, range_in_km=1000.0, price_in_eur=100000.0, num_seats=5, color_options=['Black', 'White'], customization_options=['Leather Seats', 'Sunroof'], image_url='https://www.mercedes-benz.com/en/vehicles/passenger-cars/s-class/s-class-sedan/')),
 (0.6764056073463183,
  Product(name='Tesla Model 3', description='The best EV ever', vehicle_type=<VehicleType.Car: 'Car'>, powertrain_type=<Powertrain.EV: 'EV'>, range_in_km=500.0, price_in_eur=50000.0, num_seats=5, color_options=['Black', 'White'], customization_options=['Leather Seats', 'Sunroof'], image_url='https://www.tesla.com/model3'))]

In [1]:
import json

with open("qdrant.json") as f:
    filter_schema = json.load(f)

filter_schema

{'$defs': {'FieldCondition': {'additionalProperties': False,
   'description': 'All possible payload filtering conditions',
   'properties': {'key': {'description': 'Payload key',
     'title': 'Key',
     'type': 'string'},
    'match': {'anyOf': [{'$ref': '#/$defs/MatchValue'},
      {'$ref': '#/$defs/MatchText'},
      {'$ref': '#/$defs/MatchAny'},
      {'$ref': '#/$defs/MatchExcept'},
      {'type': 'null'}],
     'default': None,
     'description': 'Check if point has field with a given value',
     'title': 'Match'},
    'range': {'anyOf': [{'$ref': '#/$defs/Range'},
      {'$ref': '#/$defs/DatetimeRange'},
      {'type': 'null'}],
     'default': None,
     'description': 'Check if points value lies in a given range',
     'title': 'Range'},
    'values_count': {'anyOf': [{'$ref': '#/$defs/ValuesCount'},
      {'type': 'null'}],
     'default': None,
     'description': 'Check number of values of the field'}},
   'required': ['key'],
   'title': 'FieldCondition',
   'type': 'o

In [20]:
from enum import StrEnum
from agentools.retrieval.db import (
    EmbeddableDataCollection,
    EmbeddableData,
    EmbeddableField,
)

class OriginalStrEnum(StrEnum):
    """
    A StrEnum that does NOT convert the value to lowercase, but rather uses the original value
    """

    @staticmethod
    def _generate_next_value_(
        name: str, start: int, count: int, last_values: list[str]
    ) -> str:
        return name

Powertrain = OriginalStrEnum(
    "Powertrain",
    ["EV", "ICE", "Hybrid"],
)

VehicleType = OriginalStrEnum(
    "VehicleType",
    ["Car", "Truck", "SUV", "Van"],
)


class Product(EmbeddableData):

    # ========= Vehicle Info ========= #
    vehicle_type: VehicleType  # build.body_type
    release_year: int  # build.year
    price_in_usd: float  # price
    is_used: bool  # inventory_type=="used"

    # ========= Technical Specs ========= #
    powertrain_type: Powertrain  # {"EV": build.powertrain_type == "BEV", "ICE": build.powertrain_type == "Combustion", "Hybrid": else}
    num_seats: int  # build.std_seating
    color: str  # base_ext_color



In [50]:
product_schema = Product.model_json_schema()
product_schema = json.dumps(product_schema)
print(product_schema)

{"$defs": {"Powertrain": {"enum": ["EV", "ICE", "Hybrid"], "title": "Powertrain", "type": "string"}, "VehicleType": {"enum": ["Car", "Truck", "SUV", "Van"], "title": "VehicleType", "type": "string"}}, "properties": {"vehicle_type": {"$ref": "#/$defs/VehicleType"}, "release_year": {"title": "Release Year", "type": "integer"}, "price_in_usd": {"title": "Price In Usd", "type": "number"}, "is_used": {"title": "Is Used", "type": "boolean"}, "powertrain_type": {"$ref": "#/$defs/Powertrain"}, "num_seats": {"title": "Num Seats", "type": "integer"}, "color": {"title": "Color", "type": "string"}}, "required": ["vehicle_type", "release_year", "price_in_usd", "is_used", "powertrain_type", "num_seats", "color"], "title": "Product", "type": "object"}


In [62]:
def extract_values(obj, key):
    """Recursively fetch values from nested JSON."""
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if k == key:
                    arr.append(v)
                elif isinstance(v, (dict, list)):
                    extract(v, arr, key)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)
        return arr

    results = extract(obj, arr, key)
    return results

In [65]:
extract_values(json.loads(ex), "key")

['vehicle_type',
 'powertrain_type',
 'range',
 'price_in_usd',
 'range',
 'num_seats']

In [76]:
from jsonschema import validate
import jsonschema
from agentools import *

SYSTEM = f"""
There exists a database of cars and the user wants to buy a car. 
The user gives requirements on the car he wants to buy and you must respond with a json object conforming to the following JSON schema: 
{filter_schema}
where the keys must conform to the following JSON schema:
{product_schema}
""".strip()

async def extract_condition(SYSTEM, msg, tries=3):
    keys = ["vehicle_type", "release_year", "price_in_usd", "is_used", "powertrain_type", "num_seats", "color"]
    gpt = ChatGPT(SimpleHistory.system(SYSTEM), model="gpt-4-turbo") 
    extracted = await gpt(msg, response_format={ "type": "json_object" })
    for i in range(tries):
        if extracted == "null":
            return None
        else:
            try:
                validate(json.loads(extracted), filter_schema)
            except jsonschema.exceptions.ValidationError as err:
                print(err.message)
                extracted = await gpt(err.message, response_format={ "type": "json_object"})
            extracted_keys = extract_values(json.loads(extracted), "key")
            wrong_keys = [element for element in extracted_keys if element not in keys]
            if wrong_keys == []:
                return extracted
            else:
                print(f"Please delete the following keys: {wrong_keys}")
                extracted = await gpt(f"Please remove the following keys: {wrong_keys}", response_format={ "type": "json_object"})
    return "null"

In [73]:
print(SYSTEM)

There exists a database of cars and the user wants to buy a car. 
The user gives requirements on the car he wants to buy and you must respond with a json object conforming to the following JSON schema: 
{'$defs': {'FieldCondition': {'additionalProperties': False, 'description': 'All possible payload filtering conditions', 'properties': {'key': {'description': 'Payload key', 'title': 'Key', 'type': 'string'}, 'match': {'anyOf': [{'$ref': '#/$defs/MatchValue'}, {'$ref': '#/$defs/MatchText'}, {'$ref': '#/$defs/MatchAny'}, {'$ref': '#/$defs/MatchExcept'}, {'type': 'null'}], 'default': None, 'description': 'Check if point has field with a given value', 'title': 'Match'}, 'range': {'anyOf': [{'$ref': '#/$defs/Range'}, {'$ref': '#/$defs/DatetimeRange'}, {'type': 'null'}], 'default': None, 'description': 'Check if points value lies in a given range', 'title': 'Range'}, 'values_count': {'anyOf': [{'$ref': '#/$defs/ValuesCount'}, {'type': 'null'}], 'default': None, 'description': 'Check number o

In [77]:
ex = await extract_condition(SYSTEM, "I want an electric car with a range of 500 km and a price of 50000 dollars or a car with a range of 1000 km and 4 seats")

{'should': [{'must': [{'key': 'powertrain_type', 'match': {'value': 'EV'}}, {'key': 'range', 'range': {'gte': 500}}, {'key': 'price_in_usd', 'range': {'lte': 50000}}]}, {'must': [{'key': 'range', 'range': {'gte': 1000}}, {'key': 'num_seats', 'match': {'value': 4}}]}]} is not valid under any of the given schemas
Please delete the following keys: ['range', 'range']


In [78]:
print(ex)

{
  "should": [
    {
      "must": [
        {
          "key": "powertrain_type",
          "match": {
            "value": "EV"
          }
        },
        {
          "key": "price_in_usd",
          "range": {
            "lte": 50000
          }
        }
      ]
    },
    {
      "must": [
        {
          "key": "num_seats",
          "match": {
            "value": 4
          }
        }
      ]
    }
  ]
}


In [47]:
ex = await extract_condition(SYSTEM, "I want nothing")

In [48]:
print(ex)

{
  "must": []
}


In [97]:
SYSTEM = f"""
"There exists a database of cars. You are listening to a customer and car salesman talking where the customer wants to buy a car and the car salesman is recommending cars from the database. 
If the customer does not give any information on specific conditions on the car, respond with null. If he provides conditions on the car he wants to buy, you must respond with a json object conforming to the following JSON schema: {filter_schema}."
""".strip()

gpt = ChatGPT(SimpleHistory.system(SYSTEM), model="gpt-4-turbo") 

extracted = await gpt("hi i want a car", response_format={ "type": "json_object"})

In [98]:
print(extracted)

{
  "response": null
}


In [96]:
SYSTEM = f"""
"There exists a database of cars and the user wants to buy a car. 
The user can ask for a car within a specific price range and you must respond with a json object conforming to the following JSON schema: {filter_schema}."
""".strip()

gpt = ChatGPT(SimpleHistory.system(SYSTEM), model="gpt-4-turbo") 

extracted = await gpt("hi i want a cheap car", response_format={ "type": "json_object"})

CancelledError: 

In [51]:
print(extracted)

{
  "must": [
    {
      "key": "price",
      "range": {
        "lt": 10000
      }
    }
  ]
}


In [65]:
extracted = await gpt("hi i want a car under 500k", response_format={ "type": "json_object" })

In [66]:
print(extracted)

{
  "must": [
    {
      "key": "price",
      "range": {
        "lt": 500000
      }
    }
  ]
}


In [64]:
print(validate(json.loads(extracted), filter_schema))

None


In [None]:

from agentools import *

gpt = ChatGPT(model="gpt-4-turbo", tools=extract_conditions)

