In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
from superlinked import framework as sl

class Property(sl.Schema):
    """Schema for real estate properties."""
    id: sl.IdField
    description: sl.String
    baths: sl.Integer
    rooms: sl.Integer
    sqft: sl.Integer
    location: sl.String
    price: sl.Integer


# Create an instance of the schema
property_schema = Property()

In [None]:
# Embedding model for text similarity
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

# TextSimilaritySpace for semantic understanding of descriptions
description_space = sl.TextSimilaritySpace(
    text=property_schema.description,
    model=EMBEDDING_MODEL
)

# NumberSpace for size - MAXIMUM mode means larger is better
size_space = sl.NumberSpace(
    number=property_schema.sqft,
    min_value=20,    # Smallest reasonable apartment
    max_value=500,   # Largest reasonable apartment
    mode=sl.Mode.MAXIMUM
)

# NumberSpace for price - MINIMUM mode means lower is better
price_space = sl.NumberSpace(
    number=property_schema.price,
    min_value=100000,    # Minimum price
    max_value=10000000,   # Maximum price
    mode=sl.Mode.MINIMUM
)

In [None]:
# Create the index combining all spaces
property_index = sl.Index(
    spaces=[description_space, size_space, price_space],
    fields=[
        property_schema.rooms,
        property_schema.baths,
        property_schema.sqft,
        property_schema.price,
        property_schema.location,
    ],
)

In [None]:
from realtime_phone_agents.config import settings

openai_config = sl.OpenAIClientConfig(
    api_key=settings.openai.api_key, model=settings.openai.model
)

In [None]:
# Define the semantic search query with parameterized weights and filters
search_query = (
    sl.Query(
        property_index,
        weights={
            description_space: sl.Param("description_weight"),
            size_space: sl.Param("size_weight"),
            price_space: sl.Param("price_weight"),
        },
    )
    # Explicit mention to the schema
    .find(property_schema)
    # Define natural query as a way to decompose the user's query
    .with_natural_query(sl.Param("natural_query"), openai_config)
    .similar(
        description_space,
        sl.Param(
            "description_query",
            description="The user's natural language query for property search.",
        ),
    )
    # Filters - these are hard constraints
    .filter(
        property_schema.location 
        == sl.Param(
            "location",
            description="Used to filter appartments by neighborhood"
        ))
    .filter(
        property_schema.rooms 
        >= sl.Param(
            "min_rooms",
            description="Used to find apartments with a room count equal to or greater than the specified number"
        ))
    .filter(
        property_schema.baths 
        >= sl.Param(
            "min_baths",
            description="Used to find apartments with a bath count equal to or greater than the specified number"
        ))
    .filter(
        property_schema.sqft 
        >= sl.Param(
            "sqft_bigger_than",
            description="Used to find appartments with square feet equal to or greather than the specified number"
        ))
    .filter(
        property_schema.price 
        <= sl.Param(
            "price_smaller_than",
            description="Used to find appartments with price less than the specified number"
        ))
    .limit(sl.Param("limit"))
    .select_all()
)

In [None]:
# We define the source type. In this case, `InMemorySource`
source = sl.InMemorySource(
    property_schema,
    parser=sl.DataFrameParser(schema=property_schema)
)

executor = sl.InMemoryExecutor(sources=[source], indices=[property_index])
app = executor.run()

In [None]:
import pandas as pd

df = pd.read_csv("../data/properties.csv")

In [None]:
df.head()

Now, let's insert his data into our `InMemorySource`.

In [None]:
source.put([df])

In [None]:
from pprint import pprint

In [None]:
results = app.query(
    search_query,
    natural_query="Do you have appartments in Barrio de Salamanca of at most 900000 euros?",
    limit=1,
)

In [None]:
pprint(results.entries[0].fields)

In [None]:
results = app.query(
    search_query,
    natural_query="Do you have appartments in Hortaleza of, at most 500000 euros? I'm not paying more than that!",
    limit=1,
)

In [None]:
pprint(results.entries[0].fields)

In [None]:
results = app.query(
    search_query,
    natural_query="I want an appartment with 4 rooms and 4 bathrooms in ChamartÃ­n please",
    limit=1,
)

In [None]:
pprint(results.entries[0].fields)