In [1]:
import pandas as pd
from accelerate import init_empty_weights, infer_auto_device_map
from llama_cpp import Llama

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load output reviews and meta datasets with sentiment and category columns from previous models 

# Load reviews from csv
processed_reviews = pd.read_csv('./Models output/sentiment_checkpoint.csv')

# Display the first few rows to verify the content
print(processed_reviews.head())

   rating              title  \
0     5.0         Work great   
1     5.0  excellent product   
2     5.0    Happy customer!   
3     5.0      Amazing value   
4     5.0        Dryer parts   

                                                text parent_asin sentiment  \
0              work great. use a new one every month  B01N0TQ0OH  positive   
1                            Little on the thin side  B07DD37QPZ  positive   
2                   Quick delivery, fixed the issue!  B082W3Z9YK  positive   
3  I wasn't sure whether these were worth it or n...  B078W2BJY8  positive   
4  Easy to install got the product expected to re...  B08C9LPCQV  positive   

                                       combined_text  \
0   Work great work great. use a new one every month   
1          excellent product Little on the thin side   
2   Happy customer! Quick delivery, fixed the issue!   
3  Amazing value I wasn't sure whether these were...   
4  Dryer parts Easy to install got the product ex...   

 

In [3]:
# Load meta and meta_cat from pickle
meta = pd.read_pickle("./Saved datasets/meta.pkl")
meta_cat = pd.read_pickle("./Saved datasets/meta_cat.pkl")

In [4]:
# Merge categories to the meta dataframe 
processed_meta = meta.merge(meta_cat[['title', 'category_new']], on='title', how='left')
print(processed_meta.head())

              main_category  \
0   Industrial & Scientific   
1  Tools & Home Improvement   
2  Tools & Home Improvement   
3  Tools & Home Improvement   
4  Tools & Home Improvement   

                                               title  average_rating  \
0  ROVSUN Ice Maker Machine Countertop, Make 44lb...             3.7   
1  HANSGO Egg Holder for Refrigerator, Deviled Eg...             4.2   
2  Clothes Dryer Drum Slide, General Electric, Ho...             3.5   
3  154567702 Dishwasher Lower Wash Arm Assembly f...             4.5   
4                        Whirlpool W10918546 Igniter             3.8   

   rating_number                                           features  \
0             61  [【Quick Ice Making】This countertop ice machine...   
1             75  [Plastic, Practical Kitchen Storage - Our egg ...   
2             18                                                 []   
3             26  [MODEL NUMBER:154567702 Dishwasher Lower Wash ...   
4             12         

In [5]:
# Calculate the ranking of products in each category using average rating and rating number columns

# Mean rating and mean rating count
mean_rating = processed_meta['average_rating'].mean()
mean_rating_count = processed_meta['rating_number'].mean()

# Calculate a ranking score based on the Bayesian average formula
processed_meta['ranking_score'] = (
    (processed_meta['average_rating'] * processed_meta['rating_number'] + mean_rating * mean_rating_count) /
    (processed_meta['rating_number'] + mean_rating_count)
)

print(processed_meta[['title', 'ranking_score']])

                                                    title  ranking_score
0       ROVSUN Ice Maker Machine Countertop, Make 44lb...       4.015724
1       HANSGO Egg Holder for Refrigerator, Deviled Eg...       4.155630
2       Clothes Dryer Drum Slide, General Electric, Ho...       4.071319
3       154567702 Dishwasher Lower Wash Arm Assembly f...       4.185644
4                             Whirlpool W10918546 Igniter       4.111839
...                                                   ...            ...
135548                                          AMI PARTS       4.130818
135549  WP10442411 Refrigerator Defrost Thermostat Rep...       4.145603
135550    3957749 DRYER KNOB WHIRLPOOL ROPER USED PART fc       4.140343
135551  5 Dryer Timer Knob Replacement for Frigidaire ...       4.140343
135552  Edgewater Parts AP2039084-4 Rack Rollers Compa...       4.145603

[135553 rows x 2 columns]


In [6]:
# Only include columns needed for model
reviews_clean = processed_reviews[['parent_asin', 'combined_text', 'sentiment']] # Can use the predicted labels here as well
meta_clean = processed_meta[['parent_asin', 'category_new', 'title', 'ranking_score', 'features']] # Can use the predicted categories here as well

In [32]:
# Path to the GGUF file 
model_path = "./amber.Q4_K_M.gguf"

# Initialize the model
llm = Llama(
    model_path=model_path,  
    n_ctx=6000,  
    n_threads=8,  
    n_gpu_layers=0 
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [8]:
# Create smaller dataset with only top n products

def get_top_products(df, category=None, top_n=3):
     # Filter by category if provided
    if category:
        df = df[df['category_new'] == category]
    
    # Sort the dataframe by ranking score in descending order
    df_sorted = df.sort_values(by='ranking_score', ascending=False)
    
    # Drop duplicates based on parent_asin to ensure unique products
    df_unique = df_sorted.drop_duplicates(subset=['parent_asin'])
    
    # Get top products (up to top_n) after filtering and sorting
    top_products_per_category = df_unique.head(top_n)
    
    return top_products_per_category

In [16]:
# Testing
category = 'refrigerators, freezers and ice makers'  
top_n = 3  # Number of top products to include

# Test the function
top_products = get_top_products(meta_clean, category, top_n)
print(top_products)

       parent_asin                            category_new  \
16056   B000BQWN2G  refrigerators, freezers and ice makers   
108045  B083Y6FJD6  refrigerators, freezers and ice makers   
102513  B07R3YF9Q7  refrigerators, freezers and ice makers   

                                                    title  ranking_score  \
16056   Fluidmaster 12IM72 Braided Stainless Steel Ice...       4.736266   
108045  Shark Industrial 25 FT Stainless Steel Braided...       4.704098   
102513  Refrigerator Lock,Fridge Lock with Keys,Freeze...       4.668703   

                                                 features  
16056   [QUALITY WATER SUPPLY LINE: Ice maker connecto...  
108045  [ONE 304 stainless steel braided premium icema...  
102513  [Make You More Peace:Refrigerator lock is perf...  


In [60]:
# Break down prompt in 3 steps due to context length restrictions

# Step 1: Summarize the features for each product

def summarize_features(llm, product_title, features):
    prompt = f"Summarize the features of the product in 3 sentences '{product_title}':\nFeatures: {features}\n"
    output = llm(prompt, max_tokens=300, stop=["</s>"], echo=False)
    return output['choices'][0]['text']

# Step 2: Summarize the positive reviews

def summarize_positive_reviews(llm, product_title, positive_reviews):
    reviews_text = " ".join(positive_reviews)
    prompt = f"Summarize the top 3 positive reviews for '{product_title}':\nReviews: {reviews_text}\n"
    output = llm(prompt, max_tokens=300, stop=["</s>"], echo=False)
    return output['choices'][0]['text']

#Step 3: Summarise the negative reviews:

def summarize_negative_reviews(llm, product_title, negative_reviews):
    reviews_text = " ".join(negative_reviews)
    prompt = f"Summarize the top 3 negative reviews for '{product_title}':\nReviews: {reviews_text}\n"
    output = llm(prompt, max_tokens=300, stop=["</s>"], echo=False)
    return output['choices'][0]['text']

In [61]:
# Step 4: Concatenate into the final article
def generate_final_article(llm, products, reviews_df):
    final_article = ""

    # Iterate over each product
    for _, product in products.iterrows():
        parent_asin = product['parent_asin']
        title = product['title']
        features = product['features']

        # Retrieve positive and negative reviews for the product
        product_reviews = reviews_df[reviews_df['parent_asin'] == parent_asin]
        positive_reviews = product_reviews[product_reviews['sentiment'] == 'positive']['combined_text'].tolist()
        negative_reviews = product_reviews[product_reviews['sentiment'] == 'negative']['combined_text'].tolist()

        # Summarize product features, positive reviews, and negative reviews
        features_summary = summarize_features(llm, title, features)
        positive_reviews_summary = summarize_positive_reviews(llm, title, positive_reviews[:5])
        negative_reviews_summary = summarize_negative_reviews(llm, title, negative_reviews[:5])

        # Add the summaries to the final article
        final_article += f"\nProduct: {title}\n"
        final_article += f"Features Summary:\n{features_summary}\n"
        final_article += f"Positive Reviews Summary:\n{positive_reviews_summary}\n"
        final_article += f"Negative Reviews Summary:\n{negative_reviews_summary}\n"

    return final_article

In [62]:
# Show categories as list, so they can be selected by indexing
categories = meta_clean["category_new"].unique().tolist()
print(categories)

['refrigerators, freezers and ice makers', 'parts and accessories', 'small appliances', 'mixed', 'microwaves, ovens and grills', 'dishwashers, washers and dryers']


In [64]:
category = categories[0]  # Choose the desired category via index
top_n = 3  # Number of top products to include

# Get the top products for the given category
top_products = get_top_products(meta_clean, category, top_n)

# Generate the final article
final_article = generate_final_article(llm, top_products, reviews_clean)

# Print the final article
print(final_article)

Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit



Product: Fluidmaster 12IM72 Braided Stainless Steel Ice Maker Connector Water Line with Dual 1/4-In. x 1/4-In. Female Compression Threads, 6 Ft. (72-In.) Length
Features Summary:
Specifications : ['INCLUDES: One 1/4 in. x [email protected] (72 inches) stainless steel ice maker connector', "One Ice Maker Connections Assembly", 'Four Nickel-plated Brass Hex Nuts, Washers and Splicing Tape'
Warranty : ['Limited Lifetime']]
Product dimensions: [email protected], 72.0 x .154" (3/8")
Positive Reviews Summary:
See all 3,106 customer reviews on Amazon USA
Negative Reviews Summary:
Reviews: Doesn't last and will ruin a floor - Review from Home Depot Started leaking two years after installing .Starts Leaked at area just above nut part ,made in China ... One Star didnt replace my plastic icemaker tubing which crack... maybe its a peculiarity of the fridge, don't know.
Reviews: Fluidmaster 12IM74 Stainless Steel Ice Making Connector Kit with Dual Compression Fittings and Adapter - White (3-Pack) 