In [None]:
import os
from json import loads
from ollama import Client
from pandas import DataFrame, concat
from snowflake.snowpark import Session

In [None]:
API_BASE = "http://ollama:11434/api/generate"
product_record_limit = 50

In [None]:
connection_params = {
    'port': os.environ['SNOWFLAKE_PORT'],
    'protocol': 'https',
    'account': os.environ['SNOWFLAKE_ACCOUNT'],
    'password':os.environ["SNOW_PASSWORD"],
    'role': os.environ["SNOW_ROLE"],
    'warehouse': os.environ["SNOW_WAREHOUSE"],
    'database': os.environ["SNOW_DATABASE"],
    'schema': os.environ["SNOW_SCHEMA"],
    'user': os.environ["SNOW_USER"], 
}

session = Session.builder.configs(connection_params).create()

In [None]:
def build_prompt(asin: str, reviewtext: str):
    PROMPT_TEMPLATE = f'Here are product reviews for a music product with an ID of "{asin}". Respond back only as only JSON! Only provide a single record returned. Provide the product "description", product "name", a summary of all the reviews as "review_summary", the "ASIN" and product "features" based on the content of these reviews. The "features" should be a string describing the features and NOT JSON. Do not include the ASIN in the description field. The reviews for the product are: {reviewtext}'
    
    return PROMPT_TEMPLATE

In [None]:
def get_asins(limit=product_record_limit):
    asins = session.table('PRODUCT_REVIEWS').select("ASIN").distinct()
    if limit:
        asins = asins.limit(limit)
    asins = asins.to_pandas()['ASIN'].to_list()
    # handle NoneType ASINs
    asins = [a for a in asins if a is not None]
    return asins

In [None]:
def get_prompt(asin, review_limit=20):
    reviews = session.table('PRODUCT_REVIEWS')
    reviews = reviews.filter(reviews.ASIN == str(asin)).select('ASIN', 'REVIEWTEXT').limit(review_limit).to_pandas()
    reviewtext = '\n'.join(reviews["REVIEWTEXT"].to_list())
    return DataFrame(
        {
            "ASIN": [asin],
            "PROMPT": [build_prompt(asin, reviewtext)]
        }
    )

In [None]:
def get_prompts(limit=product_record_limit):
    prompts = []
    asins = get_asins(limit=limit)
    for asin in asins:
        try:
            prompt = get_prompt(asin)
            prompts.append(prompt)
        except:
            print(f'Issue with ASIN {asin}, skipping')
            pass
    return concat(prompts)

In [None]:
prompts = get_prompts()

In [None]:
ollama = Client(host='http://ollama:11434')

In [None]:
review_summaries = []
counter = 0

for id, p in prompts.iterrows():
    counter +=1
    print(f'Getting review summary {counter} of {len(prompts)}, ASIN: {p.ASIN}')
    try:
        response = ollama.chat(
            model='mistral',
            messages=[{
                'role': 'user',
                'content': p.PROMPT
            }]
        )
        summary = loads(response['message']['content'])
        summary["ASIN"] = p.ASIN
        review_summaries.append(summary)
    except:
        print(f'Issue with ASIN {p.ASIN}, skipping')
        pass

review_summaries = DataFrame(review_summaries)

print("Done!")

In [None]:
session.write_pandas(
    df=review_summaries,
    table_name='PRODUCTS',
    schema=os.environ["SNOW_SCHEMA"],
    database = os.environ["SNOW_DATABASE"],
    overwrite=True,
    quote_identifiers=False,
    auto_create_table=True
)