In [None]:
import os
import json, weaviate
import weaviate.classes as wvc
from snowflake.snowpark.types import StringType
from snowflake.snowpark.functions import udf
from snowflake.snowpark import Session
import pandas as pd

In [None]:
#connect to snowflake
connection_params = {
    'port': os.environ['SNOWFLAKE_PORT'],
    'protocol': 'https',
    'account': os.environ['SNOW_ACCOUNT'],
    'password': os.environ['SNOW_PASSWORD'],
    'role': os.environ['SNOW_ROLE'],
    'warehouse': os.environ['SNOW_WAREHOUSE'],
    'database': os.environ['SNOW_DATABASE'],
    'schema': os.environ['SNOW_SCHEMA'],
    'user': os.environ['SNOW_USER']
}


session = Session.builder.configs(connection_params).create()

#connect to weaviate
client = weaviate.connect_to_custom(
    http_host="weaviate",
    http_port="8080",
    http_secure=False,
    grpc_host="weaviate",
    grpc_port="50051",
    grpc_secure=False
)
print("Connected to both Weaviate and Snowflake!")

In [None]:
#Create the collection for products
collection = client.collections.create(
    name="products",
    vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_transformers(),
    properties=[
        wvc.config.Property(
            name="ASIN",
            data_type=wvc.DataType.TEXT
        ),
         wvc.config.Property(
            name="name",
            data_type=wvc.DataType.TEXT
        ),
         wvc.config.Property(
            name="review_summary",
            data_type=wvc.DataType.TEXT
        ),
         wvc.config.Property(
            name="features",
            data_type=wvc.DataType.TEXT
        ),
         wvc.config.Property(
            name="description",
            data_type=wvc.DataType.TEXT
        ),
    ]
)

print("Collection Created!")

In [None]:
collection = client.collections.get("products")
df = session.table('PRODUCTS')
df = df.toPandas()


# Convert df to JSON string and then to a list of dictionaries
data = df.to_json(orient='records')
data_list = json.loads(data)

items_to_insert = []

for d in data_list:
    new_item = {
        "ASIN": d["ASIN"],
        "name": d["NAME"],
        "description": d["DESCRIPTION"],  \
        "features": d["FEATURES"],
        "review_summary": d["REVIEW_SUMMARY"]
    }
    items_to_insert.append(new_item)

    # Insert every 100 items
    if len(items_to_insert) == 100:
        collection.data.insert_many(items_to_insert)
        items_to_insert.clear()

# Insert remaining items
if len(items_to_insert) > 0:
    collection.data.insert_many(items_to_insert)

In [None]:
# run a simple search
response = collection.query.near_text(query="mic",limit=2, include_vector=True)
#confirm vectors exists
for o in response.objects:
    print(o.vector)



In [None]:
#Hybrid search
response = collection.query.hybrid(
    query="easy to learn instrument",
    limit=5
)

for o in response.objects:
    print(o.properties)

In [None]:
#keyword search
response = collection.query.bm25(
    query="easy to learn instrument",
    limit=3
)

for o in response.objects:
    print(o.properties)