In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import text, create_engine
import warnings
warnings.filterwarnings("ignore")

In [2]:
DATABASE_URL = 'mysql+pymysql://root:Lihasigen%404444@localhost:3306/amazon_data_db'
engine = create_engine(DATABASE_URL)

In [3]:
# for testing data
query = "SELECT * FROM amazon_dataframe LIMIT 10000"  
df = pd.read_sql(query, engine)

In [4]:
df['category'].value_counts()

category
All Beauty                  7747
Health & Personal Care       851
Premium Beauty               753
Amazon Home                  173
Tools & Home Improvement     145
AMAZON FASHION                56
Industrial & Scientific       50
Grocery                       24
Toys & Games                  16
Pet Supplies                   9
Sports & Outdoors              8
Appliances                     7
Baby                           6
All Electronics                4
Office Products                4
Arts, Crafts & Sewing          2
Computers                      1
Name: count, dtype: int64

In [5]:
df[['review_title', 'category', 'review_headline', 'rating', 'price']].head(3)

Unnamed: 0,review_title,category,review_headline,rating,price
0,Terra Tattoos Tropical Hawaiian Metallic Tatto...,All Beauty,Gasoline!! Seriously reeks of gasoline!,1.0,9.87
1,Palmer's Coconut Oil Formula Moisture Boost Co...,All Beauty,This stuff is a luxurious vacation in a bottle.,5.0,19.85
2,Chloven 55 Pcs Premium Velvet Hair Scrunchies ...,All Beauty,A+,5.0,11.99


In [7]:
df.head(3)

Unnamed: 0,rating,review_headline,reviews,parent_asin,category,product_title,average_rating,rating_number,features,description,price,store,categories,details
0,1.0,Gasoline!! Seriously reeks of gasoline!,Opened the package & instant migraine. I canno...,B00Z03RC80,All Beauty,Terra Tattoos Tropical Hawaiian Metallic Tatto...,4.3,882,OVER 75+ New tropical mermaid temporary metall...,,9.87,Terra Tattoos,"Beauty & Personal Care, Makeup, Body, Temporar...","Color: Gold,Silver, Brand: Terra Tattoos, Size..."
1,5.0,This stuff is a luxurious vacation in a bottle.,It smells heavenly but not overwhelming at all...,B0BHP2ZS4G,All Beauty,Palmer's Coconut Oil Formula Moisture Boost Co...,4.6,1507,"Hydrating Shampoo: This rich, creamy, non stri...",Palmer\u2019s Coconut Oil Formula Moisture Boo...,19.85,Palmer's,"Beauty & Personal Care, Hair Care, Shampoo & C...","Is Discontinued By Manufacturer: No, Product D..."
2,5.0,A+,Excellent,B081RW1PLY,All Beauty,Chloven 55 Pcs Premium Velvet Hair Scrunchies ...,4.7,11451,"?55 PIECES of HIGH QUALITY SCRUNCHIES?, Our ve...",,11.99,Chloven,"Beauty & Personal Care, Hair Care, Hair Access...","Brand: Chloven, Hair Type: Long, Material: Vel..."


In [8]:
df.columns

Index(['rating', 'review_headline', 'reviews', 'parent_asin', 'category',
       'product_title', 'average_rating', 'rating_number', 'features',
       'description', 'price', 'store', 'categories', 'details'],
      dtype='object')

In [None]:
# group by parent_asin

products = df.groupby('parent_asin').agg({
    'product_title': 'first',          # Take first product title
    'category': 'first',                # Take first category
    'price': df['category']'mean',                    # Average price
    'rating': 'mean',                   # Average rating
    'rating_number': 'first',           # Number of reviews
    'reviews': lambda x: ' '.join(x.astype(str)[:10]),  # Combine first 10 reviews
    'features': 'first',                # Product features
    'description': 'first',             # Product description
    'details': 'first'                  # Product details
}).reset_index()

In [10]:
sample = products.iloc[0]
print(f"Product: {sample['product_title']}")
print(f"Category: {sample['category']}")
print(f"Avg Rating: {sample['rating']:.1f}")
print(f"Price: ${sample['price']:.2f}")

Product: Bond No 9 Chez Bond Cologne for Men 1.7 oz Eau De Parfum Spray
Category: All Beauty
Avg Rating: 3.0
Price: $217.17


In [11]:
df.columns

Index(['rating', 'review_headline', 'reviews', 'parent_asin', 'category',
       'product_title', 'average_rating', 'rating_number', 'features',
       'description', 'price', 'store', 'categories', 'details'],
      dtype='object')

In [12]:
# create text embeddings

def create_embeddings(row):
    
    parts = []
    parts.append(row['product_title'])

    if pd.notna(row['category']):
        parts.append(f"Category:{row['category']}")

    if pd.notna(row['features']):
        parts.append(f"Features:{str(row['features'])[:300]}")
        
    if pd.notna(row['description']):
        parts.append(f"Description:{str(row['description'])[:300]}")

    if pd.notna(row['reviews']):
        parts.append(f"Customer Reviews:{str(row['reviews'])[:600]}")

    if pd.notna(row['price']):
        parts.append(f"Price:${row['price']:.2f}")
    
    parts.append(f"Rating:{row['rating']}/5")

    return " | ".join(parts)

In [13]:
products['combined_text'] = products.apply(create_embeddings, axis=1)
print("Combined text:")
print(products['combined_text'].iloc[3][:800] + "...\n")

Combined text:
Face Moisturizer by Olay, Complete All Day Moisturizer With Sunscreen Broad Spectrum SPF 15 - Sensitive, 4 fl. Oz, 1 unit | Category:All Beauty | Features:Gentle formula provides 8 hours of long-lasting hydration for sensitive skin, Fragrance-free moisturizer helps prevent skin damage by protecting against the suns harmful rays with SolaSheer Sensitive Technology, 100% fragrance-free, and PABA-free formula. Non-greasy, Conditions skin\u2019s surface w | Description:Protect against the #1 cause of aging skin - damage from the sun. Recommended by the Skin Cancer Foundation, Olay Complete Daily Moisturizer with Broad Spectrum Sunscreen SPF 15 Facial Moisturizer, with UVA/UVB protection, provides 8 hours of hydration for sensitive skin. This gentle, lightweight lo | Customer Reviews:I've had i...



In [14]:
products.head()

Unnamed: 0,parent_asin,product_title,category,price,rating,rating_number,reviews,features,description,details,combined_text
0,9790770952,Bond No 9 Chez Bond Cologne for Men 1.7 oz Eau...,All Beauty,217.17,3.0,32,"The product itself smells good. However, the b...",,"Product Description, Bond No 9 Chez Bond Colog...","Brand: Bond No. 9, Item Form: Spray, Item Volu...",Bond No 9 Chez Bond Cologne for Men 1.7 oz Eau...
1,9839215027,Nu Skin Enhancer Skin Conditioning Gel,All Beauty,20.0,4.0,651,I like it,"Nuskin Nu Skin Enhancer Skin Conditioning Gel,...",With conditioning panthenol and soothing aloe ...,"Brand: Nu Skin, Scent: Aloe Vera, Item Form: G...",Nu Skin Enhancer Skin Conditioning Gel | Categ...
2,B000052YN9,"Eucerin Original Healing Cream, Fragrance Free...",All Beauty,29.06,5.0,6593,This is wonderful for dry skin and is very hea...,Helps to Heal and Protect: This Eucerin Healin...,Eucerin Original Healing Cream is a time-teste...,"Brand: Eucerin, Scent: Fragrance Free, Item Fo...","Eucerin Original Healing Cream, Fragrance Free..."
3,B000052YQ2,"Face Moisturizer by Olay, Complete All Day Moi...",All Beauty,9.49,5.0,3135,I've had issues with some moisturizers lately ...,Gentle formula provides 8 hours of long-lastin...,Protect against the #1 cause of aging skin - d...,"Brand: Olay, Scent: Unscented, Item Form: Loti...","Face Moisturizer by Olay, Complete All Day Moi..."
4,B000052YQN,Pond's Cold Cream Cleanser 3.5 oz,All Beauty,7.29,2.0,7691,"No security seal on jar, had to send it back! ...","3.5oz jar of Ponds Cold Cream Cleanser, Dissol...",Dissolves all traces of makeup and moisturizes...,"Item Form: Cream, Skin Type: Dry, Brand: Ponds...",Pond's Cold Cream Cleanser 3.5 oz | Category:A...


In [15]:
# Generate embeddings

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

print("Model loaded")

Model loaded


In [16]:
embeddings = model.encode(
    products['combined_text'].tolist(),
    show_progress_bar=True,
    batch_size=32
)

print(f"Embedding shape: {embeddings.shape}")

Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 256/256 [01:25<00:00,  2.99it/s]

Embedding shape: (8187, 384)





In [17]:
# client.delete_collection(name="products")

In [24]:
metadata_df

Unnamed: 0,product_title,category,price,rating
0,Bond No 9 Chez Bond Cologne for Men 1.7 oz Eau...,All Beauty,217.17,3.0
1,Nu Skin Enhancer Skin Conditioning Gel,All Beauty,20.00,4.0
2,"Eucerin Original Healing Cream, Fragrance Free...",All Beauty,29.06,5.0
3,"Face Moisturizer by Olay, Complete All Day Moi...",All Beauty,9.49,5.0
4,Pond's Cold Cream Cleanser 3.5 oz,All Beauty,7.29,2.0
...,...,...,...,...
8182,DRMTLGY Tinted Moisturizer with SPF 46. Univer...,All Beauty,27.95,5.0
8183,TROIAREUKE A.G.T HYDRO ESSENCE l Deep Hydratin...,All Beauty,52.00,4.0
8184,Upgraded 10x Magnifying Lighted Makeup Mirror ...,Amazon Home,19.99,5.0
8185,"Dove Sensitive Skin Micellar Water Body Wash, ...",0.0,22.94,5.0


In [19]:
# Set-up Vector search

import chromadb
client = chromadb.Client()

collection = client.create_collection(name="products")

metadata_df = products[['product_title', 'category', 'price', 'rating']].copy()

metadata_df = metadata_df.where(pd.notna(metadata_df), (0.0))           #replace the null value with 0

metadatas = metadata_df.to_dict('records')

collection.add(
    embeddings=embeddings.tolist(),
    documents=products['combined_text'].tolist(),
    ids=products['parent_asin'].tolist(),
    metadatas=metadatas
)

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event CollectionAddEvent: capture() takes 1 positional argument but 3 were given


In [20]:
test_query = "facewash for oily skin"

# Encode the query
query_embedding = model.encode([test_query])

# Search
results = collection.query(
    query_embeddings=query_embedding.tolist(),
    n_results=5
)

print(f"\n‚úÖ Found {len(results['ids'][0])} results:")

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given



‚úÖ Found 5 results:


In [21]:
for i, (id, metadata, distance) in enumerate(zip(
    results['ids'][0], 
    results['metadatas'][0], 
    results['distances'][0]
)):
    print(f"\n{i+1}. {metadata['product_title']}")
    print(f"   Category: {metadata['category']}")
    print(f"   Rating: {metadata['rating']:.1f}/5.0")
    print(f"   Price: ${metadata['price']:.2f}")
    print(f"   Similarity: {1 - distance:.3f}")


1. Garnier SkinActive Face Wash with Green Tea, Oily Skin, 6.7 Fluid Ounce (Pack of 2)
   Category: All Beauty
   Rating: 4.0/5.0
   Price: $32.99
   Similarity: 0.411

2. Golden Grooming Co. Men Face Wash - -Nature-Based & Organic Mens Face Wash - Rich Foaming Daily Face Cleanser For Men - Ideal for Sensitive, Oily, Normal & Combo Skin - Black Men Skin Care
   Category: All Beauty
   Rating: 3.0/5.0
   Price: $12.99
   Similarity: 0.365

3. facetheory Clarifying Cleanser C2 - Oil Free Salicylic Acid Face Wash, Exfoliating Face Scrub and Chemical Peel, Deeply Cleanse Pores, Vegan and Cruelty-Free, Made in the UK | 5.7 fl oz
   Category: All Beauty
   Rating: 5.0/5.0
   Price: $16.00
   Similarity: 0.334

4. Basis Cleaner Clean Face Wash, 6 Ounce (Pack of 3)
   Category: All Beauty
   Rating: 2.0/5.0
   Price: $9.87
   Similarity: 0.329

5. Acne Control Face Wash For Oily Acne Skin Prone to Breakouts, 8oz
   Category: All Beauty
   Rating: 4.0/5.0
   Price: $29.99
   Similarity: 0.301


In [22]:
# Create a Search function 

def search_products(query, k=5):
    """
    Args: 
    query: User search
    k: No of similar search results
    
    Returns:
    List of matching products with metadata
    """
    
    query_embedding = model.encode([query])

    results = collection.query(
        query_embeddings= query_embedding.tolist(),
        n_results=k
    )

    return results

In [23]:
search_products("shampoo for hairfall", k=3)

{'ids': [['B0C4LYR6Z3', 'B001A6L7UA', 'B00FXH7YZ4']],
 'distances': [[0.7068009376525879, 0.7318345904350281, 0.7630177140235901]],
 'metadatas': [[{'category': 'All Beauty',
    'price': 15.3,
    'product_title': 'WOW Skin Science Apple Cider Vinegar Shampoo & Conditioner Set with Coconut & Avocado Oil - Men and Women Gentle Shampoo Set - Hair Growth Shampoo for Thinning Hair & Loss - Sulfate & Paraben Free',
    'rating': 5.0},
   {'category': 'All Beauty',
    'price': 33.99,
    'product_title': 'Hair Loss Therapy Conditioner | Best conditioner to promote and encourage healthy looking hair | Nutritive ingredients the feed and hydrate your hair',
    'rating': 2.0},
   {'category': 'All Beauty',
    'price': 22.97,
    'product_title': 'Hair Shampoo for Damaged Hair - Fragrance Free Moisturizing Shampoo with Organic Aloe, Coconut Oil & Rosemary - Mens & Womens Shampoo - Clarifying Shampoo Best for Curly, Fine, Thick, Dry or Oily Hair',
    'rating': 5.0}]],
 'embeddings': None,
 'd

In [37]:
#Connect to LLM(Ollama)

import requests

def generate_response(prompt):
    response = requests.post(
        'http://localhost:11434/api/generate',
        json={
            'model':'llama3.1:8b',
            'prompt': prompt,
            'stream':False,
        }
    )
    return response.json()['response']


In [41]:
print(generate_response("Nature-Based & Organic Mens Face Wash"))

Here are some great options for Nature-Based and Organic Men's Face Wash:

1. **Burt's Bees Sensitive Skin Facial Cleanser**: Made with aloe vera, chamomile, and green tea extract to soothe and calm sensitive skin.
2. **Acure Organics Mint To Be Facial Cleanser**: Contains burdock root, argan stem cells, and argan oil to cleanse and balance the skin.
3. **Andalou Naturals Fruit Stem Cell Revitalizing Mist & Face Wash**: Enriched with resveratrol, vitamin C, and fruit stem cells to revitalize and protect the skin.
4. **Desert Essence Thoroughly Clean Facial Cleanser**: Made with aloe vera, tea tree oil, and burdock root to gently clean and balance the skin.
5. **Dr. Bronner's Castile Soap**: A natural, biodegradable soap made from organic oils like coconut, olive, and jojoba.
6. **The Body Shop Tea Tree Oil Facial Cleanser**: Contains a blend of tea tree oil, witch hazel, and aloe vera to cleanse and reduce acne.
7. **Jason Natural Purifying Neem Face Wash**: Made with neem extract, tea

In [47]:
def chatbot(user_query, k=5):
    """
    Complete RAG chatbot
    """
    
    # 1. Search for relevant products
    print("\nüîç Searching for relevant products...")
    results = search_products(user_query, k=k)
    
    # 2. Format context
    context = "Here are the relevant products:\n\n"
    for i, meta in enumerate(results['metadatas'][0]):
        context += f"{i+1}. {meta['product_title']}\n"
        context += f"   Category: {meta['category']}\n"
        context += f"   Rating: {meta['rating']:.1f}/5.0\n"
        context += f"   Price: ${meta['price']:.2f}\n\n"
    
    # 3. Create prompt
    prompt = f"""You are a helpful e-commerce assistant. 

User asked: "{user_query}"

{context}

Provide a helpful recommendation explaining which product(s) best match their needs and why. Be frindly, kind and specific."""

    # 4. Generate response
    print("ü§ñ Generating response...")
    response = generate_response(prompt)
    
    print(f"\nüí¨ Chatbot: {response}")
    
    
    return response

# Test it!
chatbot("HOw do you recommend WOW Skin Science Apple Cider Vinegar Shampoo & Conditioner?")


üîç Searching for relevant products...
ü§ñ Generating response...

üí¨ Chatbot: I'm so happy to help you find the perfect Apple Cider Vinegar Shampoo & Conditioner set that suits your hair type and needs!

Based on our product selection, I highly recommend **Option 1: WOW Skin Science Apple Cider Vinegar Shampoo & Conditioner Set** ($15.30). Here's why:

* **Excellent rating**: This product has a perfect 5.0/5.0 rating from customers who have used it, which means they're extremely satisfied with the results.
* **Multi-benefit formula**: The set contains coconut and avocado oil, making it suitable for men and women with dry or damaged hair, as well as those looking to promote hair growth and reduce thinning.
* **Sulfate-free and paraben-free**: This product is gentle on your scalp and locks in moisture without stripping your hair of its natural oils.
* **Affordable price**: At $15.30, this set offers excellent value for the quality and benefits it provides.

While the other options 

"I'm so happy to help you find the perfect Apple Cider Vinegar Shampoo & Conditioner set that suits your hair type and needs!\n\nBased on our product selection, I highly recommend **Option 1: WOW Skin Science Apple Cider Vinegar Shampoo & Conditioner Set** ($15.30). Here's why:\n\n* **Excellent rating**: This product has a perfect 5.0/5.0 rating from customers who have used it, which means they're extremely satisfied with the results.\n* **Multi-benefit formula**: The set contains coconut and avocado oil, making it suitable for men and women with dry or damaged hair, as well as those looking to promote hair growth and reduce thinning.\n* **Sulfate-free and paraben-free**: This product is gentle on your scalp and locks in moisture without stripping your hair of its natural oils.\n* **Affordable price**: At $15.30, this set offers excellent value for the quality and benefits it provides.\n\nWhile the other options are also great products, I wouldn't recommend them as much as WOW Skin Sci