In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

import os
from scipy.sparse import coo_matrix

In [2]:
train_data = pd.read_csv('amazon.csv')
train_data.columns

Index(['product_id', 'product_name', 'category', 'discounted_price',
       'actual_price', 'discount_percentage', 'rating', 'rating_count',
       'about_product', 'user_id', 'user_name', 'review_id', 'review_title',
       'review_content', 'img_link', 'product_link'],
      dtype='object')

In [3]:
train_data = train_data[['product_id', 'product_name', 'category', 'discounted_price',
       'actual_price', 'discount_percentage', 'rating', 'rating_count', 
       'img_link', 'product_link']]
train_data.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,img_link,product_link
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...


In [4]:
train_data.shape

(1465, 10)

In [5]:
train_data.isnull().sum()

product_id             0
product_name           0
category               0
discounted_price       0
actual_price           0
discount_percentage    0
rating                 0
rating_count           2
img_link               0
product_link           0
dtype: int64

In [6]:
train_data['rating_count'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_data['rating_count'].fillna(0, inplace=True)


In [7]:
train_data.duplicated().sum()

np.int64(0)

In [8]:
train_data['id'] = range(1, len(train_data) + 1)
train_data.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,img_link,product_link,id
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,1
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,2
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,3


In [9]:
# Convert product_id to numeric by extracting digits and store in a new DataFrame
train_data_numeric_id = train_data.copy()
train_data_numeric_id['product_id_numeric'] = train_data_numeric_id['product_id'].str.extract(r'(\d+)').astype(float)
train_data_numeric_id.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,img_link,product_link,id,product_id_numeric
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,1,7.0
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,2,98.0
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,3,96.0


In [10]:
def alphanum_to_digit(s):
      mapping = {chr(i): str((i - ord('a')) % 10) for i in range(ord('a'), ord('z') + 1)}
      mapping.update({chr(i): str((i - ord('A')) % 10) for i in range(ord('A'), ord('Z') + 1)})
      result = ''.join([mapping.get(ch, ch) for ch in s])
      return float(result)

train_data_numeric_id['product_id_numeric'] = train_data_numeric_id['product_id'].apply(alphanum_to_digit)
train_data_numeric_id.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,img_link,product_link,id,product_id_numeric
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,1,1079297000.0
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,2,1098387000.0
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,3,1096283000.0


In [11]:
train_data_numeric_id['product_id_numeric'] = train_data_numeric_id['product_id_numeric'].apply(lambda x: float(str(x).replace('0', '')))
train_data_numeric_id.head(3)

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,img_link,product_link,id,product_id_numeric
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,1,179297491.0
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,2,198386516.0
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,3,196282629.0


In [17]:
train_data['product_id'] = train_data_numeric_id['product_id_numeric']
train_data.head(3)

Unnamed: 0,id,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,img_link,product_link,tags
0,1,179297491.0,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,"[computers&accessories, accessories&peripheral..."
1,2,198386516.0,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,"[computers&accessories, accessories&peripheral..."
2,3,196282629.0,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,"[computers&accessories, accessories&peripheral..."


In [20]:
train_data = train_data[['id'] + [col for col in train_data.columns if col != 'id']]
train_data.head(3)

Unnamed: 0,id,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,img_link,product_link,tags
0,1,179297491.0,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,"computers&accessories, accessories&peripherals..."
1,2,198386516.0,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,"computers&accessories, accessories&peripherals..."
2,3,196282629.0,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,"computers&accessories, accessories&peripherals..."


In [19]:
def extract_tags_csv(row):
      # Split category string by '|' and convert each to lowercase, then join with comma
      return ', '.join([cat.strip().lower() for cat in row['category'].split('|')])

train_data['tags'] = train_data.apply(extract_tags_csv, axis=1)
train_data[['category', 'tags']].head(3)

Unnamed: 0,category,tags
0,Computers&Accessories|Accessories&Peripherals|...,"computers&accessories, accessories&peripherals..."
1,Computers&Accessories|Accessories&Peripherals|...,"computers&accessories, accessories&peripherals..."
2,Computers&Accessories|Accessories&Peripherals|...,"computers&accessories, accessories&peripherals..."


['id', 'product_id', 'product_name', 'category', 'discounted_price', 'actual_price', 'discount_percentage', 'rating', 'rating_count', 'img_link', 'product_link', 'tags']


In [31]:
# Convert 'rating' column to numeric, errors='coerce' will turn invalid parsing into NaN
train_data['rating'] = pd.to_numeric(train_data['rating'], errors='coerce')

average_ratings = train_data.groupby(['product_name','rating_count','img_link','discounted_price','actual_price','discount_percentage'])['rating'].mean().reset_index()

In [34]:
average_ratings.isnull().sum()

product_name           0
rating_count           0
img_link               0
discounted_price       0
actual_price           0
discount_percentage    0
rating                 0
dtype: int64

In [33]:
average_ratings['rating'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  average_ratings['rating'].fillna(0, inplace=True)


In [35]:
top_rated_items = average_ratings.sort_values(by='rating', ascending=False)

rating_base_recommendation = top_rated_items.head(10)

In [36]:
print("Rating Base Recommendation System: (Trending Products)")
rating_base_recommendation[['product_name','rating','rating_count','img_link','discounted_price','actual_price','discount_percentage']] = rating_base_recommendation[['product_name','rating','rating_count','img_link','discounted_price','actual_price','discount_percentage']]
rating_base_recommendation

Rating Base Recommendation System: (Trending Products)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_base_recommendation[['product_name','rating','rating_count','img_link','discounted_price','actual_price','discount_percentage']] = rating_base_recommendation[['product_name','rating','rating_count','img_link','discounted_price','actual_price','discount_percentage']]


Unnamed: 0,product_name,rating_count,img_link,discounted_price,actual_price,discount_percentage,rating
1126,Syncwire LTG to USB Cable for Fast Charging Co...,5,https://m.media-amazon.com/images/I/317OoQfs1g...,₹399,"₹1,999",80%,5.0
86,Amazon Basics Wireless Mouse | 2.4 GHz Connect...,23,https://m.media-amazon.com/images/I/31+Rg6Z46d...,₹499,"₹1,000",50%,5.0
940,"REDTECH USB-C to Lightning Cable 3.3FT, [Apple...",0,https://m.media-amazon.com/images/I/31-q0xhaTA...,₹249,₹999,75%,5.0
1124,Swiffer Instant Electric Water Heater Faucet T...,53803,https://m.media-amazon.com/images/I/41+t2HWvwF...,"₹1,439","₹1,999",28%,4.8
552,"Instant Pot Air Fryer, Vortex 2QT, Touch Contr...",3964,https://m.media-amazon.com/images/W/WEBP_40237...,"₹4,995","₹20,049",75%,4.8
804,"Oratech Coffee Frother electric, milk frother ...",28,https://m.media-amazon.com/images/W/WEBP_40237...,₹279,₹499,44%,4.8
742,Multifunctional 2 in 1 Electric Egg Boiling St...,2300,https://m.media-amazon.com/images/W/WEBP_40237...,₹699,"₹1,599",56%,4.7
262,Campfire Spring Chef Prolix Instant Portable W...,2591,https://m.media-amazon.com/images/W/WEBP_40237...,"₹1,499","₹3,500",57%,4.7
395,"FIGMENT Handheld Milk Frother Rechargeable, 3-...",1729,https://m.media-amazon.com/images/I/51oN+8Zs5Y...,₹699,"₹1,599",56%,4.7
1105,Spigen EZ Fit Tempered Glass Screen Protector ...,7779,https://m.media-amazon.com/images/I/51EiPNlJDg...,₹999,"₹2,899",66%,4.7


In [39]:
['id', 'product_id', 'product_name', 'category', 'discounted_price', 'actual_price', 'discount_percentage', 'rating', 'rating_count', 'img_link', 'product_link', 'tags']

['id',
 'product_id',
 'product_name',
 'category',
 'discounted_price',
 'actual_price',
 'discount_percentage',
 'rating',
 'rating_count',
 'img_link',
 'product_link',
 'tags']

In [40]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def content_based_recommendations(train_data, item_name, top_n=10):
    # Check if the item name exists in the training data
    if item_name not in train_data['product_name'].values:
        print(f"Item '{item_name}' not found in the training data.")
        return pd.DataFrame()

    # Create a TF-IDF vectorizer for item descriptions
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')

    # Apply TF-IDF vectorization to item descriptions
    tfidf_matrix_content = tfidf_vectorizer.fit_transform(train_data['tags'])

    # Calculate cosine similarity between items based on descriptions
    cosine_similarities_content = cosine_similarity(tfidf_matrix_content, tfidf_matrix_content)

    # Find the index of the item
    item_index = train_data[train_data['product_name'] == item_name].index[0]

    # Get the cosine similarity scores for the item
    similar_items = list(enumerate(cosine_similarities_content[item_index]))

    # Sort similar items by similarity score in descending order
    similar_items = sorted(similar_items, key=lambda x: x[1], reverse=True)

    # Get the top N most similar items (excluding the item itself)
    top_similar_items = similar_items[1:top_n+1]

    # Get the indices of the top similar items
    recommended_item_indices = [x[0] for x in top_similar_items]

    # Get the details of the top similar items
    recommended_items_details = train_data.iloc[recommended_item_indices][['product_name', 'rating_count', 'discounted_price', 'actual_price', 'discount_percentage', 'img_link', 'rating']]

    return recommended_items_details

In [44]:
item_name = 'AmazonBasics Flexible Premium HDMI Cable (Black, 4K@60Hz, 18Gbps), 3-Foot'
content_based_rec = content_based_recommendations(train_data, item_name, top_n=10)

content_based_rec

Unnamed: 0,product_name,rating_count,discounted_price,actual_price,discount_percentage,img_link,rating
21,tizum HDMI to VGA Adapter Cable 1080P for Proj...,10962,₹279,₹499,44%,https://m.media-amazon.com/images/W/WEBP_40237...,3.7
25,Tizum High Speed HDMI Cable with Ethernet | Su...,12153,₹199,₹699,72%,https://m.media-amazon.com/images/I/41da4tk7N+...,4.2
47,"Amazon Basics High-Speed HDMI Cable, 6 Feet - ...",426973,₹309,₹475,35%,https://m.media-amazon.com/images/I/41bCxnHksn...,4.4
65,"Amazon Basics High-Speed HDMI Cable, 6 Feet (2...",426973,₹309,"₹1,400",78%,https://m.media-amazon.com/images/I/41eJqkFjCR...,4.4
125,AmazonBasics 6-Feet DisplayPort (not USB port)...,28638,₹799,"₹1,700",53%,https://m.media-amazon.com/images/I/41F6ukNxcC...,4.1
126,AmazonBasics 3 Feet High Speed HDMI Male to Fe...,12835,₹229,₹595,62%,https://m.media-amazon.com/images/I/41Rg-JkRGg...,4.3
137,"10k 8k 4k HDMI Cable, Certified 48Gbps 1ms Ult...",3664,₹999,"₹2,399",58%,https://m.media-amazon.com/images/W/WEBP_40237...,4.6
158,ZEBRONICS HAA2021 HDMI version 2.1 cable with ...,24,₹637,"₹1,499",58%,https://m.media-amazon.com/images/W/WEBP_40237...,4.1
204,BlueRigger High Speed HDMI Cable with Ethernet...,44054,₹467,₹599,22%,https://m.media-amazon.com/images/W/WEBP_40237...,4.4
217,Posh 1.5 Meter High Speed Gold Plated HDMI Mal...,1237,₹173,₹999,83%,https://m.media-amazon.com/images/W/WEBP_40237...,4.3
