In [None]:
# Import python packages
from snowflake.snowpark import Session
from snowflake.core import Root
import pandas as pd

connection_parameters = {
    "account": "your_account_name",
    "user": "your_username",
    "host": "your_host",
    "password": "your_password",
    "role": "your_role",
    "warehouse": "your_warehouse",
    "database": "your_database",
    "schema": "your_schema"
}

session = Session.builder.configs(connection_parameters).create()
root = Root(session)

We will first download the wands data

In [None]:
! git clone https://github.com/wayfair/WANDS.git # Clone the WANDS repository

Before creating the cortex search service. We need to create a text column which has all the information we want to search upon. Hence we will create a text column with all the columns.

In [None]:
# parsing product features
def get_features(product_features: str):
    features = ""
    if product_features:
        for feature in product_features.split("|"):
            pair = feature.split(":")
            if len(pair) >= 2 and pair[0] and pair[1]:
                key = pair[0].strip()
                value = pair[1].strip()
                features += f"{key}: {value} "
    return features

# Function to create a single text column from multiple columns
# this will be used to create the text column for the search index
def wands_text(row):
    text = ""
    if row["product_name"]:
        text += f"Name: {str(row['product_name']).strip()} "
    if row["product_class"]:
        text += f"Class: {str(row['product_class']).strip()} "
    if row["product_description"]:
        text += f"Description: {str(row['product_description']).strip()} "
    if row["category hierarchy"]:
        text += f"Hierarchy: {str(row['category hierarchy']).strip()} "
    if row["features"]:
        text += row['features']
    return text


product_df = pd.read_csv("WANDS/dataset/product.csv", sep="\t")
product_df["features"] = product_df["product_features"].apply(get_features)
product_df["TEXT"] = product_df.apply(wands_text, axis=1)
upload_df = product_df.rename(
    columns={
        "product_id": "ID",
        "product_name": "NAME",
        "product_class": "CLASS",
        "rating_count": "RATING_COUNT",
        "average_rating": "RATING",
        "review_count": "REVIEW_COUNT",
    }
)
upload_df = upload_df[
    [
        "ID",
        "NAME",
        "CLASS",
        "RATING_COUNT",
        "RATING",
        "REVIEW_COUNT",
        "TEXT",
    ]
]

Now we can upload the data to snowflake and create a cortex search service

In [None]:
session.write_pandas(
    df=upload_df,
    table_name="WANDS_PRODUCT_DATASET",
    schema="DATASETS",
    database="CORTEX_SEARCH_DB",
    overwrite=True,
    auto_create_table=True,
)

session.sql("""CREATE OR REPLACE CORTEX SEARCH SERVICE CORTEX_SEARCH_DB.SERVICES.WANDS
ON TEXT
ATTRIBUTES CLASS 
WAREHOUSE = WH_TEST
TARGET_LAG = '60 minute'
AS (
    SELECT
        TEXT, ID, CLASS, RATING_COUNT, RATING, REVIEW_COUNT
    FROM CORTEX_SEARCH_DB.DATASETS.WANDS_PRODUCT_DATASET
)""").collect()

Now we can query the service

In [None]:
# fetch service
my_service = (root
  .databases["CORTEX_SEARCH_DB"]
  .schemas["SERVICES"]
  .cortex_search_services["WANDS"]
)

my_service.search(
    query="hulmeville writing desk with hutch",
    experimental={
        "softBoosts": [
            {"phrase": "Furniture, Office Furniture, Desks"}
        ]
    }
)

TypeError: CortexSearchServiceResource.search() got an unexpected keyword argument 'experimental'