# Recommendation System 

In [2]:
import pandas as pd
import re
import json

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load Dataset

## metadata 

In [2]:
file_path_meta = 'data/meta_Appliances.jsonl'
data = []

In [3]:
with open(file_path_meta, 'r', encoding='utf-8') as file:
    for line in file:
        try:
            data.append(json.loads(line))
        except json.JSONDecodeError:
            continue

df_meta = pd.DataFrame(data)
df_meta.head(5)

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Industrial & Scientific,"ROVSUN Ice Maker Machine Countertop, Make 44lb...",3.7,61,[【Quick Ice Making】This countertop ice machine...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Our Point of View on the Euhomy Ic...,ROVSUN,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'Brand': 'ROVSUN', 'Model Name': 'ICM-2005', ...",B08Z743RRD,,,
1,Tools & Home Improvement,"HANSGO Egg Holder for Refrigerator, Deviled Eg...",4.2,75,"[Plastic, Practical Kitchen Storage - Our egg ...",[],,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': '10 Eggs Egg Holder for Refrigerato...,HANSGO,"[Appliances, Parts & Accessories, Refrigerator...","{'Manufacturer': 'HANSGO', 'Part Number': 'HAN...",B097BQDGHJ,,,
2,Tools & Home Improvement,"Clothes Dryer Drum Slide, General Electric, Ho...",3.5,18,[],"[Brand new dryer drum slide, replaces General ...",,[{'thumb': 'https://m.media-amazon.com/images/...,[],GE,"[Appliances, Parts & Accessories]","{'Manufacturer': 'RPI', 'Part Number': 'WE1M33...",B00IN9AGAE,,,
3,Tools & Home Improvement,154567702 Dishwasher Lower Wash Arm Assembly f...,4.5,26,[MODEL NUMBER:154567702 Dishwasher Lower Wash ...,[MODEL NUMBER:154567702 Dishwasher Lower Wash ...,,[{'thumb': 'https://m.media-amazon.com/images/...,[],folosem,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Manufacturer': 'folosem', 'Part Number': '15...",B0C7K98JZS,,,
4,Tools & Home Improvement,Whirlpool W10918546 Igniter,3.8,12,[This is a Genuine OEM Replacement Part.],[Whirlpool Igniter],25.07,[{'thumb': 'https://m.media-amazon.com/images/...,[],Whirlpool,"[Appliances, Parts & Accessories]","{'Manufacturer': 'Whirlpool', 'Part Number': '...",B07QZHQTVJ,,,


## Review data

In [4]:
file_path = 'data/review_data.jsonl'
data = []

In [5]:
with open(file_path, 'r', encoding='utf-8') as file:
    for line in file:
        try:
            data.append(json.loads(line))
        except json.JSONDecodeError:
            continue

df_review = pd.DataFrame(data)
df_review.head(5)

Unnamed: 0,rating,title,text,images,asin,parent_asin,user_id,timestamp,helpful_vote,verified_purchase
0,5.0,Work great,work great. use a new one every month,[],B01N0TQ0OH,B01N0TQ0OH,AGKHLEW2SOWHNMFQIJGBECAF7INQ,1519317108692,0,True
1,5.0,excellent product,Little on the thin side,[],B07DD2DMXB,B07DD37QPZ,AHWWLSPCJMALVHDDVSUGICL6RUCA,1664746863446,0,True
2,5.0,Happy customer!,"Quick delivery, fixed the issue!",[],B082W3Z9YK,B082W3Z9YK,AHZIJGKEWRTAEOZ673G5B3SNXEGQ,1607225435363,0,True
3,5.0,Amazing value,I wasn't sure whether these were worth it or n...,[],B078W2BJY8,B078W2BJY8,AFGUPTDFAWOHHL4LZDV27ERDNOYQ,1534104184306,0,True
4,5.0,Dryer parts,Easy to install got the product expected to re...,[],B08C9LPCQV,B08C9LPCQV,AELFJFAXQERUSMTXJQ6SYFFRDWMA,1620176603754,0,True


# Data Merge

In [6]:
# Merge two datasets on the common 'parent_asin' column 
# 'how=inner' argument used to ensure that only rows with matching 'parent_asin' values in both datasets are included in the result 
# inner: use intersection of keys from both frames

merged_df = pd.merge(df_review, df_meta, on='parent_asin', how='inner')

In [7]:
merged_df.head(10)

Unnamed: 0,rating,title_x,text,images_x,asin,parent_asin,user_id,timestamp,helpful_vote,verified_purchase,...,description,price,images_y,videos,store,categories,details,bought_together,subtitle,author
0,5.0,Work great,work great. use a new one every month,[],B01N0TQ0OH,B01N0TQ0OH,AGKHLEW2SOWHNMFQIJGBECAF7INQ,1519317108692,0,True,...,[],9.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Mr Coffee Water Filter Instruction...,Geesta,"[Small Appliance Parts & Accessories, Coffee &...","{'Manufacturer': 'Geesta', 'Part Number': 'Gee...",,,
1,5.0,excellent product,Little on the thin side,[],B07DD2DMXB,B07DD37QPZ,AHWWLSPCJMALVHDDVSUGICL6RUCA,1664746863446,0,True,...,[],22.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'How To Install An Indoor Dryer Ven...,Essential Values,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Manufacturer': 'Essential Values', 'Part Num...",,,
2,5.0,Happy customer!,"Quick delivery, fixed the issue!",[],B082W3Z9YK,B082W3Z9YK,AHZIJGKEWRTAEOZ673G5B3SNXEGQ,1607225435363,0,True,...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Blutoget make life more convenient...,Romalon,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Manufacturer': 'Romalon', 'Part Number': '27...",,,
3,5.0,Amazing value,I wasn't sure whether these were worth it or n...,[],B078W2BJY8,B078W2BJY8,AFGUPTDFAWOHHL4LZDV27ERDNOYQ,1534104184306,0,True,...,[FilterLogic FL-RF13 Replacement Refrigerator ...,,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Overbest NSF Certified Refrigerato...,FilterLogic,"[Appliances, Parts & Accessories, Refrigerator...","{'Material': 'Carbon,Coconut Shell', 'Product ...",,,
4,5.0,Dryer parts,Easy to install got the product expected to re...,[],B08C9LPCQV,B08C9LPCQV,AELFJFAXQERUSMTXJQ6SYFFRDWMA,1620176603754,0,True,...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Your Dryer Has Heating Problem?', ...",Sikawai,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Part Number': '279816', 'Item Weight': '1.8 ...",,,
5,5.0,DO NOT purchase this ice machine.,After buying this ice machine just 15 months a...,[],B08D6RFV6D,B099ZKQJHK,AEUH4EH6XHROLT7UZPUYU2YKTYMA,1663078878875,0,True,...,"[2-in-1 ice maker, you can get ice water while...",,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Poor fill design and not made for ...,COOLLIFE,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'Brand': 'COOLLIFE', 'Model Name': 'Counterto...",,,
6,2.0,They don't fit properly,Not the best quality,[],B001TH7GZA,B001TH7H0O,AHCV2CNCOCG6WECDROOUYPDZIFEQ,1610219023865,0,True,...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Blutoget creates a happy life for ...,Stanco,"[Appliances, Parts & Accessories, Range Parts ...",{'Product Dimensions': '9.75 x 9.75 x 2.5 inch...,,,
7,5.0,Five Stars,Part came quickly and fit my LG dryer. Thanks!,[],B00AF7WZTM,B00AF7WZTM,AH6PLOGWYIVIWLJTY756BHNFD4YA,1480334300000,0,True,...,"[Product Description, This high quality Genuin...",46.27,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Whole Parts General Video', 'url':...",LG,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Brand Name': 'LG', 'Model Info': '4774EL2001...",,,
8,5.0,Five Stars,Always arrive in a fast manner. Descriptions ...,[],B001H05AXY,B001H05AXY,AGCIVNDDRX3PXIUINEG4NCRGWCRA,1427130850000,0,True,...,[Maytag Amana Whirlpool Refrigerator Icemaker ...,,[{'thumb': 'https://m.media-amazon.com/images/...,[],AMANA,"[Appliances, Parts & Accessories]","{'Manufacturer': 'Amana', 'Part Number': 'Wate...",,,
9,2.0,Company is phenomenal.,The company responded very quickly. Refunded p...,[],B085C6C7WH,B085C6C7WH,AFUOYIZBU3MTBOLYKOJE5Z35MBDA,1630449561325,0,True,...,[],,[{'thumb': 'https://m.media-amazon.com/images/...,[],Northair,"[Appliances, Refrigerators, Freezers & Ice Mak...","{'Brand': 'Northair', 'Model Name': '2 in1 Ice...",,,


In [8]:
# For timestamp, we need to convert the current format to YYYY-M-D type

merged_df['datetime'] = pd.to_datetime(merged_df['timestamp'], unit='ms')

# If the timestamp is already in seconds, you can use the code below
# merged_df['datetime'] = pd.to_datetime(merged_df['timestamp'], unit='s')
merged_df.head(5)

Unnamed: 0,rating,title_x,text,images_x,asin,parent_asin,user_id,timestamp,helpful_vote,verified_purchase,...,price,images_y,videos,store,categories,details,bought_together,subtitle,author,datetime
0,5.0,Work great,work great. use a new one every month,[],B01N0TQ0OH,B01N0TQ0OH,AGKHLEW2SOWHNMFQIJGBECAF7INQ,1519317108692,0,True,...,9.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Mr Coffee Water Filter Instruction...,Geesta,"[Small Appliance Parts & Accessories, Coffee &...","{'Manufacturer': 'Geesta', 'Part Number': 'Gee...",,,,2018-02-22 16:31:48.692
1,5.0,excellent product,Little on the thin side,[],B07DD2DMXB,B07DD37QPZ,AHWWLSPCJMALVHDDVSUGICL6RUCA,1664746863446,0,True,...,22.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'How To Install An Indoor Dryer Ven...,Essential Values,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Manufacturer': 'Essential Values', 'Part Num...",,,,2022-10-02 21:41:03.446
2,5.0,Happy customer!,"Quick delivery, fixed the issue!",[],B082W3Z9YK,B082W3Z9YK,AHZIJGKEWRTAEOZ673G5B3SNXEGQ,1607225435363,0,True,...,,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Blutoget make life more convenient...,Romalon,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Manufacturer': 'Romalon', 'Part Number': '27...",,,,2020-12-06 03:30:35.363
3,5.0,Amazing value,I wasn't sure whether these were worth it or n...,[],B078W2BJY8,B078W2BJY8,AFGUPTDFAWOHHL4LZDV27ERDNOYQ,1534104184306,0,True,...,,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Overbest NSF Certified Refrigerato...,FilterLogic,"[Appliances, Parts & Accessories, Refrigerator...","{'Material': 'Carbon,Coconut Shell', 'Product ...",,,,2018-08-12 20:03:04.306
4,5.0,Dryer parts,Easy to install got the product expected to re...,[],B08C9LPCQV,B08C9LPCQV,AELFJFAXQERUSMTXJQ6SYFFRDWMA,1620176603754,0,True,...,,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Your Dryer Has Heating Problem?', ...",Sikawai,"[Appliances, Parts & Accessories, Dryer Parts ...","{'Part Number': '279816', 'Item Weight': '1.8 ...",,,,2021-05-05 01:03:23.754


In [9]:
merged_df.to_csv('data/merged_df_rec.csv', index=False)

# Filter Dataset

In [3]:
# Load dataset

df = pd.read_csv('data/merged_df_rec.csv')

  df = pd.read_csv('data/merged_df_rec.csv')


In [4]:
# Filter columns 

df_rec = df[['parent_asin', 'main_category', 'user_id', 'title_y', 'title_x', 'images_y', 'datetime', 'text', 'rating', 'rating_number', 'features', 'description', 'price', 'store', 'categories', 'details']]
                      
df_rec.head(20)

Unnamed: 0,parent_asin,main_category,user_id,title_y,title_x,images_y,datetime,text,rating,rating_number,features,description,price,store,categories,details
0,B01N0TQ0OH,Tools & Home Improvement,AGKHLEW2SOWHNMFQIJGBECAF7INQ,Geesta 12-Pack Premium Activated Charcoal Wate...,Work great,[{'thumb': 'https://m.media-amazon.com/images/...,2018-02-22 16:31:48.692,work great. use a new one every month,5.0,4939,"['EXCEPTIONAL QUALITY AND VALUE: Brew clean, d...",[],9.99,Geesta,"['Small Appliance Parts & Accessories', 'Coffe...","{'Manufacturer': 'Geesta', 'Part Number': 'Gee..."
1,B07DD37QPZ,Tools & Home Improvement,AHWWLSPCJMALVHDDVSUGICL6RUCA,Essential Values 18 Pack Compatible Replacemen...,excellent product,[{'thumb': 'https://m.media-amazon.com/images/...,2022-10-02 21:41:03.446,Little on the thin side,5.0,3186,['BEST VALUE - Our 18 Pack Of Fine Polyester R...,[],22.99,Essential Values,"['Appliances', 'Parts & Accessories', 'Dryer P...","{'Manufacturer': 'Essential Values', 'Part Num..."
2,B082W3Z9YK,Tools & Home Improvement,AHZIJGKEWRTAEOZ673G5B3SNXEGQ,279838 Dryer Heating Element by Romalon with R...,Happy customer!,[{'thumb': 'https://m.media-amazon.com/images/...,2020-12-06 03:30:35.363,"Quick delivery, fixed the issue!",5.0,444,"[""★【BUY WITH CONFIDENCE】 For any reason you're...",[],,Romalon,"['Appliances', 'Parts & Accessories', 'Dryer P...","{'Manufacturer': 'Romalon', 'Part Number': '27..."
3,B078W2BJY8,Amazon Home,AFGUPTDFAWOHHL4LZDV27ERDNOYQ,"Filterlogic UKF8001 Water Filter, Replacement ...",Amazing value,[{'thumb': 'https://m.media-amazon.com/images/...,2018-08-12 20:03:04.306,I wasn't sure whether these were worth it or n...,5.0,355,['The replacement for Maytag UKF8001 refrigera...,['FilterLogic FL-RF13 Replacement Refrigerator...,,FilterLogic,"['Appliances', 'Parts & Accessories', 'Refrige...","{'Material': 'Carbon,Coconut Shell', 'Product ..."
4,B08C9LPCQV,Tools & Home Improvement,AELFJFAXQERUSMTXJQ6SYFFRDWMA,Sikawai 279816 Dryer Thermal Cut-off Kit Repla...,Dryer parts,[{'thumb': 'https://m.media-amazon.com/images/...,2021-05-05 01:03:23.754,Easy to install got the product expected to re...,5.0,55,[],[],,Sikawai,"['Appliances', 'Parts & Accessories', 'Dryer P...","{'Part Number': '279816', 'Item Weight': '1.8 ..."
5,B099ZKQJHK,Industrial & Scientific,AEUH4EH6XHROLT7UZPUYU2YKTYMA,COOLLIFE Compact Countertop Ice Maker Machine ...,DO NOT purchase this ice machine.,[{'thumb': 'https://m.media-amazon.com/images/...,2022-09-13 14:21:18.875,After buying this ice machine just 15 months a...,5.0,506,['Highly Efficient Ice Production - Countertop...,"[""2-in-1 ice maker, you can get ice water whil...",,COOLLIFE,"['Appliances', 'Refrigerators, Freezers & Ice ...","{'Brand': 'COOLLIFE', 'Model Name': 'Counterto..."
6,B001TH7H0O,Tools & Home Improvement,AHCV2CNCOCG6WECDROOUYPDZIFEQ,"Stanco 5557 Drip Bowl Universal, Porcelain coa...",They don't fit properly,[{'thumb': 'https://m.media-amazon.com/images/...,2021-01-09 19:03:43.865,Not the best quality,2.0,11035,['Fits most plug-in electric ranges * Fits mos...,[],,Stanco,"['Appliances', 'Parts & Accessories', 'Range P...",{'Product Dimensions': '9.75 x 9.75 x 2.5 inch...
7,B00AF7WZTM,Appliances,AH6PLOGWYIVIWLJTY756BHNFD4YA,LG 4774EL2001A Genuine OEM Door Hinge for LG D...,Five Stars,[{'thumb': 'https://m.media-amazon.com/images/...,2016-11-28 11:58:20.000,Part came quickly and fit my LG dryer. Thanks!,5.0,129,['The LG 4774EL2001A Door Hinge is a genuine O...,"['Product Description', ""This high quality Gen...",46.27,LG,"['Appliances', 'Parts & Accessories', 'Dryer P...","{'Brand Name': 'LG', 'Model Info': '4774EL2001..."
8,B001H05AXY,Tools & Home Improvement,AGCIVNDDRX3PXIUINEG4NCRGWCRA,Amana Refrigerator Icemaker Water Valve R00002...,Five Stars,[{'thumb': 'https://m.media-amazon.com/images/...,2015-03-23 17:14:10.000,Always arrive in a fast manner. Descriptions ...,5.0,59,['Water Valve Inlet allows water exchange and ...,['Maytag Amana Whirlpool Refrigerator Icemaker...,,AMANA,"['Appliances', 'Parts & Accessories']","{'Manufacturer': 'Amana', 'Part Number': 'Wate..."
9,B085C6C7WH,Appliances,AFUOYIZBU3MTBOLYKOJE5Z35MBDA,Northair 2 in1 Ice Maker and Water Dispenser w...,Company is phenomenal.,[{'thumb': 'https://m.media-amazon.com/images/...,2021-08-31 22:39:21.325,The company responded very quickly. Refunded p...,2.0,223,['【2-in-1 Ice Maker】 2-in-1 Water Dispenser wi...,[],,Northair,"['Appliances', 'Refrigerators, Freezers & Ice ...","{'Brand': 'Northair', 'Model Name': '2 in1 Ice..."


In [5]:
# rename columns

df_rec.rename(columns={'title_y': 'product_name', 'title_x': 'review_title', 'text': 'review_text', 'parent_asin': 'product_id', 'images_y': 'images'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rec.rename(columns={'title_y': 'product_name', 'title_x': 'review_title', 'text': 'review_text', 'parent_asin': 'product_id', 'images_y': 'images'}, inplace=True)


In [6]:
df_rec.head(5)

Unnamed: 0,product_id,main_category,user_id,product_name,review_title,images,datetime,review_text,rating,rating_number,features,description,price,store,categories,details
0,B01N0TQ0OH,Tools & Home Improvement,AGKHLEW2SOWHNMFQIJGBECAF7INQ,Geesta 12-Pack Premium Activated Charcoal Wate...,Work great,[{'thumb': 'https://m.media-amazon.com/images/...,2018-02-22 16:31:48.692,work great. use a new one every month,5.0,4939,"['EXCEPTIONAL QUALITY AND VALUE: Brew clean, d...",[],9.99,Geesta,"['Small Appliance Parts & Accessories', 'Coffe...","{'Manufacturer': 'Geesta', 'Part Number': 'Gee..."
1,B07DD37QPZ,Tools & Home Improvement,AHWWLSPCJMALVHDDVSUGICL6RUCA,Essential Values 18 Pack Compatible Replacemen...,excellent product,[{'thumb': 'https://m.media-amazon.com/images/...,2022-10-02 21:41:03.446,Little on the thin side,5.0,3186,['BEST VALUE - Our 18 Pack Of Fine Polyester R...,[],22.99,Essential Values,"['Appliances', 'Parts & Accessories', 'Dryer P...","{'Manufacturer': 'Essential Values', 'Part Num..."
2,B082W3Z9YK,Tools & Home Improvement,AHZIJGKEWRTAEOZ673G5B3SNXEGQ,279838 Dryer Heating Element by Romalon with R...,Happy customer!,[{'thumb': 'https://m.media-amazon.com/images/...,2020-12-06 03:30:35.363,"Quick delivery, fixed the issue!",5.0,444,"[""★【BUY WITH CONFIDENCE】 For any reason you're...",[],,Romalon,"['Appliances', 'Parts & Accessories', 'Dryer P...","{'Manufacturer': 'Romalon', 'Part Number': '27..."
3,B078W2BJY8,Amazon Home,AFGUPTDFAWOHHL4LZDV27ERDNOYQ,"Filterlogic UKF8001 Water Filter, Replacement ...",Amazing value,[{'thumb': 'https://m.media-amazon.com/images/...,2018-08-12 20:03:04.306,I wasn't sure whether these were worth it or n...,5.0,355,['The replacement for Maytag UKF8001 refrigera...,['FilterLogic FL-RF13 Replacement Refrigerator...,,FilterLogic,"['Appliances', 'Parts & Accessories', 'Refrige...","{'Material': 'Carbon,Coconut Shell', 'Product ..."
4,B08C9LPCQV,Tools & Home Improvement,AELFJFAXQERUSMTXJQ6SYFFRDWMA,Sikawai 279816 Dryer Thermal Cut-off Kit Repla...,Dryer parts,[{'thumb': 'https://m.media-amazon.com/images/...,2021-05-05 01:03:23.754,Easy to install got the product expected to re...,5.0,55,[],[],,Sikawai,"['Appliances', 'Parts & Accessories', 'Dryer P...","{'Part Number': '279816', 'Item Weight': '1.8 ..."


In [7]:
df_rec.shape

(2128605, 16)

In [8]:
# Check duplicates

duplicate_rows = df_rec[df_rec.duplicated()]
print(duplicate_rows.shape)

(22657, 16)


In [9]:
# Remove duplicates 

df_rec = df_rec.drop_duplicates()

In [10]:
# re-check duplicates after removal 

duplicate_rows = df_rec[df_rec.duplicated()]
print(duplicate_rows.shape)

(0, 16)


In [11]:
# Check missing data 

df_rec.isnull().sum()

product_id            0
main_category     28172
user_id               0
product_name         15
review_title        865
images                0
datetime              0
review_text         931
rating                0
rating_number         0
features              0
description           0
price            522690
store              6030
categories            0
details               0
dtype: int64

In [12]:
# remove missing data 

df_rec = df_rec.dropna()

In [13]:
# Re-check missing value after removal 

df_rec.isnull().sum()

product_id       0
main_category    0
user_id          0
product_name     0
review_title     0
images           0
datetime         0
review_text      0
rating           0
rating_number    0
features         0
description      0
price            0
store            0
categories       0
details          0
dtype: int64

In [14]:
# unique category check

category_unique = df_rec['main_category'].unique()
print(category_unique)
print(f"Total unique 'category' in the reviews dataset: {len(category_unique)}")

['Tools & Home Improvement' 'Appliances' 'Sports & Outdoors' 'Amazon Home'
 'Industrial & Scientific' 'Health & Personal Care' 'Automotive'
 'Office Products' 'Grocery' 'Baby' 'Books' 'AMAZON FASHION'
 'All Electronics' 'Pet Supplies' 'Camera & Photo' 'Toys & Games'
 'All Beauty' 'Home Audio & Theater' 'Cell Phones & Accessories'
 'Arts, Crafts & Sewing' 'Portable Audio & Accessories' 'Computers'
 'Musical Instruments' 'Car Electronics' 'Movies & TV' 'Video Games'
 'Digital Music']
Total unique 'category' in the reviews dataset: 27


In [15]:
# unique product name check

product_title_unique = df_rec['product_name'].unique()
print(product_title_unique)
print(f"Total unique 'products' in the reviews dataset: {len(product_title_unique)}")

['Geesta 12-Pack Premium Activated Charcoal Water Filter Disk for All Mr. Coffee Models'
 'Essential Values 18 Pack Compatible Replacement Filters (90 Dryer Loads Total) for Bettervent Indoor Dryer Vent'
 'LG 4774EL2001A Genuine OEM Door Hinge for LG Dryers' ...
 '3-Pack Replacement for Kenmore/Sears 10650512003 Refrigerator Water Filter - Compatible with Kenmore/Sears 46-9002 Fridge Water Filter Cartridge'
 'W10161215 Lower Rack & wheels Fits for Whirlpool Dishwasher 8561705 8519478 8539225'
 'Primeco 4417EA1002K Washer Motor Stator Compatible for Washer Made by OEM Parts Manufacturer AP5229785, 1519490, 4417EA1002X, AH3522950, EA3522950, PS3522950']
Total unique 'products' in the reviews dataset: 43471


# Content based Filtering


 

In [16]:
# Filter columns

df_rec = df_rec[['product_id', 'main_category', 'categories', 'user_id', 'product_name', 'images', 'datetime', 'rating', 'features', 'description', 'price']]
                      
df_rec.head(20)

Unnamed: 0,product_id,main_category,categories,user_id,product_name,images,datetime,rating,features,description,price
0,B01N0TQ0OH,Tools & Home Improvement,"['Small Appliance Parts & Accessories', 'Coffe...",AGKHLEW2SOWHNMFQIJGBECAF7INQ,Geesta 12-Pack Premium Activated Charcoal Wate...,[{'thumb': 'https://m.media-amazon.com/images/...,2018-02-22 16:31:48.692,5.0,"['EXCEPTIONAL QUALITY AND VALUE: Brew clean, d...",[],9.99
1,B07DD37QPZ,Tools & Home Improvement,"['Appliances', 'Parts & Accessories', 'Dryer P...",AHWWLSPCJMALVHDDVSUGICL6RUCA,Essential Values 18 Pack Compatible Replacemen...,[{'thumb': 'https://m.media-amazon.com/images/...,2022-10-02 21:41:03.446,5.0,['BEST VALUE - Our 18 Pack Of Fine Polyester R...,[],22.99
7,B00AF7WZTM,Appliances,"['Appliances', 'Parts & Accessories', 'Dryer P...",AH6PLOGWYIVIWLJTY756BHNFD4YA,LG 4774EL2001A Genuine OEM Door Hinge for LG D...,[{'thumb': 'https://m.media-amazon.com/images/...,2016-11-28 11:58:20.000,5.0,['The LG 4774EL2001A Door Hinge is a genuine O...,"['Product Description', ""This high quality Gen...",46.27
10,B09CBF2XCF,Sports & Outdoors,"['Small Appliance Parts & Accessories', 'Coffe...",AHPUT3ITXCHQJO7OMF74LEMYHIVA,Perfect Pod EZ-Scoop Coffee Scooper & Funnel f...,[{'thumb': 'https://m.media-amazon.com/images/...,2021-03-17 21:09:01.916,4.0,['Coffee Scoop with Integrated Funnel- fastest...,[],8.99
11,B09W5PMK5X,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ...",AHGAOIZVODNHYMNCBV4DECZH42UQ,COMFEE’ Washing Machine 2.4 Cu.ft LED Portable...,[{'thumb': 'https://m.media-amazon.com/images/...,2022-04-17 00:01:07.807,5.0,"[""AUTO CLEAN - COMFEE' 2.4 Cu.ft portable wash...",['Feeling unsafe going to Laundromat? Tired of...,399.0
12,B07BFGZQ65,Amazon Home,"['Small Appliance Parts & Accessories', 'Coffe...",AHGAOIZVODNHYMNCBV4DECZH42UQ,Blendin Replacement Pod Holder Part with Exit ...,[{'thumb': 'https://m.media-amazon.com/images/...,2018-07-02 21:18:12.904,5.0,['WIDELY COMPATIBLE With KEURIG: This K Cup Ho...,"[""Welcome to Blendin! We give you dependable K...",9.99
14,B072JXR3MW,Amazon Home,"['Small Appliance Parts & Accessories', 'Coffe...",AHGAOIZVODNHYMNCBV4DECZH42UQ,MG Coffee Refillable Capsule Reusable Capsules...,[{'thumb': 'https://m.media-amazon.com/images/...,2018-04-28 21:52:00.094,5.0,"['Reusbale- can be use over hundred time,user ...",[],8.99
15,B08FDB6W59,Tools & Home Improvement,"['Appliances', 'Parts & Accessories', 'Dryer P...",AHGAOIZVODNHYMNCBV4DECZH42UQ,"Deflecto Dryer Lint Trap Kit, Indoor Venting w...",[{'thumb': 'https://m.media-amazon.com/images/...,2017-12-18 00:35:34.944,5.0,['Designed For Safe Indoor Dryer Venting Where...,"['Product Description', 'When Outside Venting ...",18.0
16,B00EORXEUS,Amazon Home,"['Small Appliance Parts & Accessories', 'Coffe...",AHGAOIZVODNHYMNCBV4DECZH42UQ,Brew Rite Wrap Around Percolator Coffee Filter...,[{'thumb': 'https://m.media-amazon.com/images/...,2017-07-22 23:53:29.568,4.0,"['Made in USA', '55 ct Wraparound Style Filter...",['In order to provide superior coffee filtrati...,5.99
17,B07VN6QRQL,Amazon Home,"['Small Appliance Parts & Accessories', 'Coffe...",AHGAOIZVODNHYMNCBV4DECZH42UQ,Melitta 3.5 Inch White Disc Coffee Filters,[{'thumb': 'https://m.media-amazon.com/images/...,2017-07-15 03:42:32.427,1.0,['Each package contains 100 Filters (200 total...,"[""Specially designed to let coffee's full flav...",6.18


In [17]:
# check how many reviews are registered, per category 

review_counts = df_rec.groupby('main_category').size()
print(review_counts)


main_category
AMAZON FASHION                     930
All Beauty                         471
All Electronics                   1956
Amazon Home                     443516
Appliances                      408604
Arts, Crafts & Sewing              137
Automotive                        6243
Baby                              2224
Books                              159
Camera & Photo                    1698
Car Electronics                     95
Cell Phones & Accessories          505
Computers                          228
Digital Music                        1
Grocery                          10581
Health & Personal Care            5788
Home Audio & Theater               440
Industrial & Scientific          76509
Movies & TV                          3
Musical Instruments                270
Office Products                   1189
Pet Supplies                       343
Portable Audio & Accessories       251
Sports & Outdoors                 4177
Tools & Home Improvement        603314
Toys & Game

### Recommendation for Appliances Categories

In [153]:
# we will create a recommendation system for products from Appliances category 

target_categories = [
    'Appliances',
]

# .isin() method for target categories
elec_df = df_rec[df_rec['main_category'].isin(target_categories)]
elec_df.head(5)

Unnamed: 0,product_id,main_category,categories,user_id,product_name,images,datetime,rating,features,description,price
7,B00AF7WZTM,Appliances,"['Appliances', 'Parts & Accessories', 'Dryer P...",AH6PLOGWYIVIWLJTY756BHNFD4YA,LG 4774EL2001A Genuine OEM Door Hinge for LG D...,[{'thumb': 'https://m.media-amazon.com/images/...,2016-11-28 11:58:20.000,5.0,['The LG 4774EL2001A Door Hinge is a genuine O...,"['Product Description', ""This high quality Gen...",46.27
11,B09W5PMK5X,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ...",AHGAOIZVODNHYMNCBV4DECZH42UQ,COMFEE’ Washing Machine 2.4 Cu.ft LED Portable...,[{'thumb': 'https://m.media-amazon.com/images/...,2022-04-17 00:01:07.807,5.0,"[""AUTO CLEAN - COMFEE' 2.4 Cu.ft portable wash...",['Feeling unsafe going to Laundromat? Tired of...,399.0
37,B00ECV6EEO,Appliances,"['Appliances', 'Parts & Accessories', 'Oven Pa...",AE2TA5GQH4JI5RQ4W5H5PQOVYBGA,GE WB2X9719 Support Bracket Silver,[{'thumb': 'https://m.media-amazon.com/images/...,2020-05-24 20:10:39.354,1.0,['The GE Appliances Broil Element Support is a...,"['Product Description', 'The high quality GE A...",8.75
57,B0C57WMPJQ,Appliances,"['Appliances', 'Refrigerators, Freezers & Ice ...",AGJXTLEOLLTIX5AAGFPBZ7CNNVOQ,"Silonn Ice Makers Countertop, 9 Cubes Ready in...",[{'thumb': 'https://m.media-amazon.com/images/...,2022-06-10 16:22:20.651,5.0,['Note : Please check the dimension and item w...,['Ice in 6 mins - Self Cleaning - Portable - 2...,80.97
89,B0C2HMPZRB,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ...",AGYEAZK4OEYF2MSSTGJ5WNJDVZKA,"COSTWAY Compact Laundry Dryer, 110V Electric P...",[{'thumb': 'https://m.media-amazon.com/images/...,2019-09-23 22:44:56.891,5.0,['【Compact & Portable】The weight of this cloth...,[],259.99


In [154]:
elec_df.shape

(408604, 11)

In [155]:
# unique categories check

elec_category_unique = elec_df['categories'].unique()
print(elec_category_unique)
print(f"Total unique 'products' in the reviews dataset: {len(elec_category_unique)}")

["['Appliances', 'Parts & Accessories', 'Dryer Parts & Accessories', 'Replacement Parts']"
 "['Appliances', 'Laundry Appliances', 'Washers & Dryers', 'Portable Washers']"
 "['Appliances', 'Parts & Accessories', 'Oven Parts & Accessories']"
 "['Appliances', 'Refrigerators, Freezers & Ice Makers', 'Ice Makers']"
 "['Appliances', 'Laundry Appliances', 'Washers & Dryers', 'Portable Dryers']"
 "['Appliances', 'Refrigerators, Freezers & Ice Makers', 'Freezers', 'Chest Freezers']"
 "['Appliances', 'Refrigerators, Freezers & Ice Makers', 'Refrigerators']"
 "['Appliances', 'Parts & Accessories', 'Range Parts & Accessories', 'Drip Pans']"
 "['Appliances', 'Dishwashers', 'Countertop Dishwashers']"
 "['Appliances', 'Parts & Accessories', 'Refrigerator Parts & Accessories', 'Water Filters']"
 "['Appliances', 'Ranges, Ovens & Cooktops', 'Range Hoods']"
 "['Appliances', 'Refrigerators, Freezers & Ice Makers', 'Freezers', 'Upright Freezers']"
 "['Appliances', 'Parts & Accessories', 'Refrigerator Parts

In [156]:
# main category and detailed categories check

elec_df[['main_category', 'categories']].head(5)

Unnamed: 0,main_category,categories
7,Appliances,"['Appliances', 'Parts & Accessories', 'Dryer P..."
11,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ..."
37,Appliances,"['Appliances', 'Parts & Accessories', 'Oven Pa..."
57,Appliances,"['Appliances', 'Refrigerators, Freezers & Ice ..."
89,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ..."


In [157]:
# convert list to str for categorise values 
import ast 

def list_to_string(categories):
    try:
        if isinstance(categories, str):
            list_obj = ast.literal_eval(categories)
        elif isinstance(categories, list):
            list_obj = categories
        else:
            return str(categories)
        
        return ' '.join(list_obj)
    except Exception as e:
        # Return the original value if pasing fails
        print(f"Error processing value: {categories}. Error: {e}")
        return str(categories)
    

In [158]:
elec_df['categories_string'] = elec_df['categories'].apply(list_to_string)
elec_df.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  elec_df['categories_string'] = elec_df['categories'].apply(list_to_string)


Unnamed: 0,product_id,main_category,categories,user_id,product_name,images,datetime,rating,features,description,price,categories_string
7,B00AF7WZTM,Appliances,"['Appliances', 'Parts & Accessories', 'Dryer P...",AH6PLOGWYIVIWLJTY756BHNFD4YA,LG 4774EL2001A Genuine OEM Door Hinge for LG D...,[{'thumb': 'https://m.media-amazon.com/images/...,2016-11-28 11:58:20.000,5.0,['The LG 4774EL2001A Door Hinge is a genuine O...,"['Product Description', ""This high quality Gen...",46.27,Appliances Parts & Accessories Dryer Parts & A...
11,B09W5PMK5X,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ...",AHGAOIZVODNHYMNCBV4DECZH42UQ,COMFEE’ Washing Machine 2.4 Cu.ft LED Portable...,[{'thumb': 'https://m.media-amazon.com/images/...,2022-04-17 00:01:07.807,5.0,"[""AUTO CLEAN - COMFEE' 2.4 Cu.ft portable wash...",['Feeling unsafe going to Laundromat? Tired of...,399.0,Appliances Laundry Appliances Washers & Dryers...
37,B00ECV6EEO,Appliances,"['Appliances', 'Parts & Accessories', 'Oven Pa...",AE2TA5GQH4JI5RQ4W5H5PQOVYBGA,GE WB2X9719 Support Bracket Silver,[{'thumb': 'https://m.media-amazon.com/images/...,2020-05-24 20:10:39.354,1.0,['The GE Appliances Broil Element Support is a...,"['Product Description', 'The high quality GE A...",8.75,Appliances Parts & Accessories Oven Parts & Ac...
57,B0C57WMPJQ,Appliances,"['Appliances', 'Refrigerators, Freezers & Ice ...",AGJXTLEOLLTIX5AAGFPBZ7CNNVOQ,"Silonn Ice Makers Countertop, 9 Cubes Ready in...",[{'thumb': 'https://m.media-amazon.com/images/...,2022-06-10 16:22:20.651,5.0,['Note : Please check the dimension and item w...,['Ice in 6 mins - Self Cleaning - Portable - 2...,80.97,"Appliances Refrigerators, Freezers & Ice Maker..."
89,B0C2HMPZRB,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ...",AGYEAZK4OEYF2MSSTGJ5WNJDVZKA,"COSTWAY Compact Laundry Dryer, 110V Electric P...",[{'thumb': 'https://m.media-amazon.com/images/...,2019-09-23 22:44:56.891,5.0,['【Compact & Portable】The weight of this cloth...,[],259.99,Appliances Laundry Appliances Washers & Dryers...


In [159]:
def text_cleaning(text):
    text = str(text).lower()
    # remove special characters
     # Remove any remaining non-alphabetic characters
    text = re.sub(r'[^a-z\s]', '', text)
    return text 

In [160]:
elec_df['sub_categories'] = elec_df['categories_string'].apply(text_cleaning)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  elec_df['sub_categories'] = elec_df['categories_string'].apply(text_cleaning)


In [161]:
elec_df.head(5)

Unnamed: 0,product_id,main_category,categories,user_id,product_name,images,datetime,rating,features,description,price,categories_string,sub_categories
7,B00AF7WZTM,Appliances,"['Appliances', 'Parts & Accessories', 'Dryer P...",AH6PLOGWYIVIWLJTY756BHNFD4YA,LG 4774EL2001A Genuine OEM Door Hinge for LG D...,[{'thumb': 'https://m.media-amazon.com/images/...,2016-11-28 11:58:20.000,5.0,['The LG 4774EL2001A Door Hinge is a genuine O...,"['Product Description', ""This high quality Gen...",46.27,Appliances Parts & Accessories Dryer Parts & A...,appliances parts accessories dryer parts acc...
11,B09W5PMK5X,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ...",AHGAOIZVODNHYMNCBV4DECZH42UQ,COMFEE’ Washing Machine 2.4 Cu.ft LED Portable...,[{'thumb': 'https://m.media-amazon.com/images/...,2022-04-17 00:01:07.807,5.0,"[""AUTO CLEAN - COMFEE' 2.4 Cu.ft portable wash...",['Feeling unsafe going to Laundromat? Tired of...,399.0,Appliances Laundry Appliances Washers & Dryers...,appliances laundry appliances washers dryers ...
37,B00ECV6EEO,Appliances,"['Appliances', 'Parts & Accessories', 'Oven Pa...",AE2TA5GQH4JI5RQ4W5H5PQOVYBGA,GE WB2X9719 Support Bracket Silver,[{'thumb': 'https://m.media-amazon.com/images/...,2020-05-24 20:10:39.354,1.0,['The GE Appliances Broil Element Support is a...,"['Product Description', 'The high quality GE A...",8.75,Appliances Parts & Accessories Oven Parts & Ac...,appliances parts accessories oven parts acce...
57,B0C57WMPJQ,Appliances,"['Appliances', 'Refrigerators, Freezers & Ice ...",AGJXTLEOLLTIX5AAGFPBZ7CNNVOQ,"Silonn Ice Makers Countertop, 9 Cubes Ready in...",[{'thumb': 'https://m.media-amazon.com/images/...,2022-06-10 16:22:20.651,5.0,['Note : Please check the dimension and item w...,['Ice in 6 mins - Self Cleaning - Portable - 2...,80.97,"Appliances Refrigerators, Freezers & Ice Maker...",appliances refrigerators freezers ice makers ...
89,B0C2HMPZRB,Appliances,"['Appliances', 'Laundry Appliances', 'Washers ...",AGYEAZK4OEYF2MSSTGJ5WNJDVZKA,"COSTWAY Compact Laundry Dryer, 110V Electric P...",[{'thumb': 'https://m.media-amazon.com/images/...,2019-09-23 22:44:56.891,5.0,['【Compact & Portable】The weight of this cloth...,[],259.99,Appliances Laundry Appliances Washers & Dryers...,appliances laundry appliances washers dryers ...


In [162]:
elec_df['sub_categories'].value_counts()

sub_categories
appliances parts  accessories refrigerator parts  accessories water filters                                                                    73802
appliances parts  accessories dryer parts  accessories replacement parts                                                                       52783
appliances refrigerators freezers  ice makers ice makers                                                                                       44098
appliances laundry appliances washers  dryers portable washers                                                                                 34874
appliances parts  accessories                                                                                                                  22145
                                                                                                                                               ...  
appliances ranges ovens  cooktops warming drawers                                          

In [163]:
elec_df.sub_categories.head(5)

7     appliances parts  accessories dryer parts  acc...
11    appliances laundry appliances washers  dryers ...
37    appliances parts  accessories oven parts  acce...
57    appliances refrigerators freezers  ice makers ...
89    appliances laundry appliances washers  dryers ...
Name: sub_categories, dtype: object

In [164]:
# Calculate the average rating for each product 

df_agg = elec_df.groupby('product_name').agg(
    # Calculate the mean rating
    rating = ('rating', 'mean'),
    sub_categories = ('sub_categories', 'first')
).reset_index()

In [165]:
df_unique = df_agg.copy()

In [166]:
df_unique = df_unique.reset_index(drop=True)

In [167]:
print(f"Original product count (with reviews): {len(elec_df)}")
print(f"Unique product count: {len(df_unique)}")
print("DataFrame successfully aggregated to unique products.")

Original product count (with reviews): 408604
Unique product count: 11378
DataFrame successfully aggregated to unique products.


In [168]:
_# vectorization using CountVectorizer

cv_unique = CountVectorizer()
category_features_unique = cv_unique.fit_transform(df_unique['sub_categories'])

In [169]:
category_features_unique.shape

(11378, 96)

In [170]:
# 2. Calculate the Cosine Similarity Matrix using the unique features
category_sim_matrix_unique = cosine_similarity(category_features_unique)

In [171]:
category_sim_matrix_unique.shape

(11378, 11378)

In [175]:
# function to recommend similar products based on product name 
def recommend_product(
        user_product_input: str, 
        df: pd.DataFrame,
        category_similarity,
        n_recommendations: int = 5,      
        min_rating: float = 4.0,
        similarity_threshold: float = 0.1) -> pd.DataFrame:

    # Find all product name containing the user input 
    matching_products = df[df['product_name'].str.contains(user_product_input, case=False, na=False)]
 
    if matching_products.empty:
      print(f" No Product found matching '{user_product_input}'")
      return pd.DataFrame()

    # product matching    
    seed_product_index = matching_products.index[0]  
    seed_product_name = df.loc[seed_product_index, 'product_name']
    # print(f" Using '{seed_product_name}' as the seed product.")  

   # Get the similarity score for the seed product against all other products 
    category_scores = pd.Series(category_similarity[seed_product_index]) 

   # combine scores with the dataframe and sort 
    sim_df = df.copy()
    sim_df['similarity_score'] = category_scores 

   # filter out the seed product itself 
    sim_df = sim_df[sim_df.index != seed_product_index]

   # Filter by a minimum similarity threshold
    sim_df = sim_df[sim_df['similarity_score'] > similarity_threshold]

   # Filter products below the minimum required rating 
    rated_sim_df = sim_df[sim_df['rating'] >= min_rating] 
  
    if rated_sim_df.empty:
      print(f" No highly rated (>{min_rating}) similar products found. Returning best matches regardless of rating.")
      # If no highly rated products, return the best matches by similarity 
      final_df = sim_df 
    else:
      final_df = rated_sim_df 

   # Final sort by similarity score (descending)
    final_recommendations = final_df.sort_values(by=['similarity_score', 'rating'], ascending=[False, False]).head(n_recommendations)

    return final_recommendations[[ 'product_name', 'sub_categories', 'rating', 'similarity_score' ]]  

## Testing the function 

* To validate that the recommend_product function is working as intended, we will execute it with a sample user query. 
* The function should identify a seed product containing 'washer' in its name, calculate the similarity scores based on its sub-categories, filter by the minimum rating, 
  and return the top recommended products, ranked by similarity.



In [176]:
# sample query is washer 
user_query = "washer"
min_acceptable_rating = 4.0

recommendations = recommend_product(
    user_product_input=user_query,
    df= df_unique,
    category_similarity = category_sim_matrix_unique,
    min_rating=min_acceptable_rating,
    n_recommendations=5 # Get top 5
)

print("\n--- Recommendations ---")
(recommendations)


--- Recommendations ---


Unnamed: 0,product_name,sub_categories,rating,similarity_score
37,(NEW 1oz size) Uber Goop DARK GREY Dishwasher ...,appliances parts accessories dishwasher parts...,5.0,1.0
460,25 Universal Gray Grey Dishwasher Rack Tip Tin...,appliances parts accessories dishwasher parts...,5.0,1.0
938,630076 White Vinyl Dishwasher Rack Repair Coat...,appliances parts accessories dishwasher parts...,5.0,1.0
2403,CoreCentric Remanufactured Dishwasher Control ...,appliances parts accessories dishwasher parts...,5.0,1.0
2407,CoreCentric Remanufactured Dishwasher Control ...,appliances parts accessories dishwasher parts...,5.0,1.0


Upon running the function with the query 'washer', the system successfully returned the top 5 most relevant products. 