In [None]:
import os
from json import loads
from ollama import Client
from pandas import DataFrame, concat
from snowflake.snowpark import Session

In [None]:
product_record_limit = 50

In [None]:
connection_params = {
    'port': os.environ['SNOWFLAKE_PORT'],
    'protocol': 'https',
    'account': os.environ['SNOWFLAKE_ACCOUNT'],
    'password':os.environ["SNOW_PASSWORD"],
    'role': os.environ["SNOW_ROLE"],
    'warehouse': os.environ["SNOW_WAREHOUSE"],
    'database': os.environ["SNOW_DATABASE"],
    'schema': os.environ["SNOW_SCHEMA"],
    'user': os.environ["SNOW_USER"], 
}

session = Session.builder.configs(connection_params).create()

In [None]:
prompts = session.sql("""
WITH

    most_recent_reviews AS (
        SELECT *
        FROM WEAVIATE_DEMO.PUBLIC.PRODUCT_REVIEWS
        QUALIFY (ROW_NUMBER() OVER (PARTITION BY ASIN ORDER BY UNIXREVIEWTIME DESC)) <=20
    ),
    
    by_asin AS (
        SELECT
            asin,
            array_agg(reviewtext) as reviewtext
        FROM most_recent_reviews
        GROUP BY asin
    )

SELECT 
ASIN,
REGEXP_REPLACE(
'Here are product reviews for a music product with an ID of ' ||
ASIN ||
'. Respond back only as only JSON! Only provide a single record returned. 
Provide the product "description", product "name", 
a summary of all the reviews as "review_summary", 
the "ASIN" and product "features" based on the content of these reviews. 
The "features" should be a string describing the features and NOT JSON. 
Do not include the ASIN in the description field. The reviews for the product are: ',
'\n|\t', '') ||
array_to_string(reviewtext, '\n') AS prompt
FROM by_asin
""").to_pandas()

prompts = prompts.head(product_record_limit)

In [None]:
ollama = Client(host='http://ollama:11434')

In [None]:
review_summaries = []
counter = 0

for id, p in prompts.iterrows():
    counter +=1
    print(f'Getting review summary {counter} of {len(prompts)}, ASIN: {p.ASIN}')
    try:
        response = ollama.chat(
            model='mistral',
            messages=[{
                'role': 'user',
                'content': p.PROMPT
            }]
        )
        summary = loads(response['message']['content'])
        summary["ASIN"] = p.ASIN
        review_summaries.append(summary)
    except:
        print(f'Issue with ASIN {p.ASIN}, skipping')
        pass

review_summaries = DataFrame(review_summaries)

print("Done!")

In [None]:
session.write_pandas(
    df=review_summaries,
    table_name='PRODUCTS',
    schema=os.environ["SNOW_SCHEMA"],
    database = os.environ["SNOW_DATABASE"],
    overwrite=True,
    quote_identifiers=False,
    auto_create_table=True
)