In [None]:
# Install ollama on Greatlake

#We don't have admin access on Greatlake, thus using binaries to run ollama.

#Step 1: download ollama binary
#!curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz

#Step 2: unzip the binary
# !mkdir -p $HOME/ollama
# !tar -C $HOME/ollama -xzf ollama-linux-amd64.tgz
# !export PATH=$HOME/ollama/bin:$PATH

#Step 3: expose binary to system path for running
!export PATH=$HOME/ollama/bin:$PATH
!export LD_LIBRARY_PATH=$HOME/ollama/lib/ollama:$LD_LIBRARY_PATH

In [None]:
import sys
#ensure ollama can be found in python
#since ollama is installed using non-admin access on Greatlake
sys.path.append("~/.local/lib/python3.11/site-packages/")
import ollama
import json
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
sns.set_theme(style="ticks")
import requests
from pandarallel import pandarallel
import re
pandarallel.initialize(progress_bar=True)

In [None]:
# Ollama default llama3.2:3b context window is only 2k
# Here we increase the context window to 128k

# Firstly run in terminal: ollama serve

!ollama show llama3.2:3b --modelfile > settings.txt

# Add parameters
!echo "PARAMETER num_ctx 131072" >> settings.txt

# Create new model with updated settings
!ollama create llama3.2-large-ctx -f settings.txt

In [None]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_column', 99)

In [None]:
model = 'llama3.2-large-ctx'

In [None]:
# Make an example request to test the model
response = ollama.chat(model=model, messages=[
  {
    'role': 'user',
    'content': 'What is this RGB color 97,56,57',
  },
],
)
print(response['message']['content'])

# Get All Recommendation datasets and Reviews

In [None]:
df_reviews = pd.read_pickle('data/product_sku_info_with_rgb_review_v3.pkl')

In [None]:
df_selected1 = pd.read_excel('recommendation_datasets/expertRecommendation_top_10_skus_per_RBG.xlsx')
df_selected2 = pd.read_excel('recommendation_datasets/colorClusterRecommendation_top_20_skus_per_colorCluster.xlsx')
df_selected3 = pd.read_excel('recommendation_datasets/skinToneRecommendation_top_20_skus_per_skinTone.xlsx')
df_selected = pd.concat([df_selected1,df_selected2,df_selected3])

In [None]:
filter_sku_id = df_selected['skuID'].astype(str).unique()

In [None]:
df_reviews_selected = df_reviews.loc[df_reviews['productID'].isin(df_selected.productID) & df_reviews['skuID'].isin(filter_sku_id)]

In [None]:
# Total SKU count
df_reviews_selected['skuID'].nunique()

In [None]:
# Total Product Count
df_reviews_selected['productID'].nunique()

In [None]:
# Aggregate all reviews at SKU level
df_reviews_agg_product = df_reviews_selected.groupby(['productID','skuID'])[['ReviewText']].sum().reset_index()

In [None]:
df_reviews_agg_product.shape

# Helper Functions

In [None]:
def clean_response(text):
    # llama generated response has unuseful text
    # We tried to control this in prompt engineering
    # However, there are still cases LLM cannot follow instructions
    # Here we clean the text up
    cleaned = re.sub(r'^Here.*\n?', '', text, flags=re.MULTILINE)
    cleaned = re.sub(r'^Customer Review.*\n?', '', text, flags=re.MULTILINE)
    cleaned = re.sub(r'^------.*\n?', '', text, flags=re.MULTILINE)
    cleaned = cleaned.strip()
    cleaned = cleaned.strip('\n')
    cleaned = re.sub(r'Please note that.*\n?', '', text, flags=re.MULTILINE)
    cleaned = re.sub(r'here is.*\n?', '', text, flags=re.MULTILINE)
    return cleaned

# Generate Product level Reviews

In [None]:
prompt_product = '''

Provide the reviews as input above


-------------------
Role: You are a professional sales and marketing writer specializing in creating friendly, objective, and engaging lipstick product recommendations for customers.

Task: Summarize the general product feedback from customer reviews of a lipstick product.

Input Details:

Customer reviews contain general product feedback and may also include specific comments linked to skin tones.
Output Requirements:
Each feedback entry should consist of short and concise key phrases, starting with a consistent and suitable emoji that matches the content of the phrase (e.g., 👍 for positive feedback, 🎨 for color-related comments, 💄 for application, etc.).
Formatting Example:
"""
👍 Beautifully pigmented and long-lasting.
💄 Glides smoothly for easy application.
"""
Additional Notes:
Only output the summarized phrases as plain text, with no additional explanation or introductory text.
Ensure feedback is concise and reflects key customer sentiments.
'''

In [None]:
# Define a function to perform sentiment analysis using Ollama's API
def get_sentiment_product(review_text):
    response = ollama.chat(model=model, messages=[
      {
        'temperature':0.1,
          'top_p':0.9,
          'role': 'user',
          'content':review_text+prompt_product,
      },
    ])
    return response['message']['content']

In [None]:
## Uncomment to Test the code with small data
# df_reviews_agg_product['ReviewText'].head(5).parallel_apply(get_sentiment_product)

In [None]:
# Run on all SKUs (45 mins long with 3 V100 GPUs)
df_reviews_agg_product['Sentiment'] = df_reviews_agg_product['ReviewText'].parallel_apply(get_sentiment_product)

In [None]:
## Uncomment to save the data on disk drive
# df_reviews_agg_product.to_pickle('data/product_level_reviews_summarized.pkl')

In [None]:
# Clean the Sentiment output
df_reviews_agg_product['Sentiment'] = df_reviews_agg_product['Sentiment'].apply(clean_response)

# Add the Sentiment column to Recommendation datasets

In [None]:
df_reviews_agg_product.drop(columns=['ReviewText'],inplace=True)

In [None]:
df_reviews_agg_product['skuID'] = df_reviews_agg_product.skuID.astype(int)

In [None]:
df_selected1.merge(df_reviews_agg_product, on =['productID','skuID'],how='left').to_excel('recommendation_datasets/expertRecommendation_top_10_skus_per_RBG_w_review_summarized.xlsx')
df_selected2.merge(df_reviews_agg_product, on =['productID','skuID'],how='left').to_excel('recommendation_datasets/colorClusterRecommendation_top_20_skus_per_colorCluster_w_review_summarized.xlsx')
df_selected3.merge(df_reviews_agg_product, on =['productID','skuID'],how='left').to_excel('recommendation_datasets/skinToneRecommendation_top_20_skus_per_skinTone_w_review_summarized.xlsx')