## Rating base recommendation system

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

import os
from scipy.sparse import coo_matrix

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
nltk.download("punkt")
nltk.download("stopwords")
from sklearn.feature_extraction.text import TfidfVectorizer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\theoh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\theoh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
#read dataset
df = pd.read_csv(r'cleaned_train_data.csv')
df.columns

Index(['product_id', 'product_name', 'category', 'discounted_price',
       'actual_price', 'rating', 'rating_count', 'user_id', 'product_link',
       'about_product'],
      dtype='object')

In [4]:
average_ratings = df.groupby(['product_id','product_name','actual_price','category','about_product','product_link','rating_count'])['rating'].mean().reset_index()

In [5]:
top_rated_items = average_ratings.sort_values(by='rating', ascending=False)

rating_base_recommendation = top_rated_items.head(10)

In [6]:
rating_base_recommendation.loc[:,'rating'] = rating_base_recommendation['rating'].astype(int)
rating_base_recommendation.loc[:,'rating_count'] = rating_base_recommendation['rating_count'].astype(int)

In [7]:
print("Rating Base Recommendation System: (Trending Products)")
rating_base_recommendation[['product_id','product_name','actual_price','category','about_product','product_link','rating_count']] = rating_base_recommendation[['product_id','product_name','actual_price','category','about_product','product_link','rating_count']]
rating_base_recommendation

Rating Base Recommendation System: (Trending Products)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_base_recommendation[['product_id','product_name','actual_price','category','about_product','product_link','rating_count']] = rating_base_recommendation[['product_id','product_name','actual_price','category','about_product','product_link','rating_count']]


Unnamed: 0,product_id,product_name,actual_price,category,about_product,product_link,rating_count,rating
1342,1342,Syncwire LTG to USB Cable for Fast Charging Co...,1999.0,Computers&Accessories|Accessories&Peripherals|...,This sturdy and durable cable made of tpe and ...,https://www.amazon.in/Syncwire-Cable-Charging-...,5.0,5.0
1349,1349,"REDTECH USB-C to Lightning Cable 3.3FT, [Apple...",999.0,Computers&Accessories|Accessories&Peripherals|...,💎[The Fastest Charge] - This iPhone USB C cabl...,https://www.amazon.in/REDTECH-Lightning-Certif...,0.0,5.0
1121,1121,Amazon Basics Wireless Mouse | 2.4 GHz Connect...,1000.0,Computers&Accessories|Accessories&Peripherals|...,Reliable wireless connection up to 10m|Advance...,https://www.amazon.in/Wireless-Connection-Batt...,23.0,5.0
1350,1350,Swiffer Instant Electric Water Heater Faucet T...,1999.0,"Home&Kitchen|Heating,Cooling&AirQuality|WaterH...",✔Quick Electric Hot Water Tap Heating tube: hi...,https://www.amazon.in/Swiffer-Instant-Electric...,53803.0,4.0
1205,1205,"Instant Pot Air Fryer, Vortex 2QT, Touch Contr...",20049.0,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,VOLTAGE (230 Volts): Instant Vortex Air Fryer ...,https://www.amazon.in/Instant-Vortex-2QT-EvenC...,3964.0,4.0
1348,1348,"Oratech Coffee Frother electric, milk frother ...",499.0,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,-Make delicious milk foam creamer for your dri...,https://www.amazon.in/Oratech-electric-cappucc...,28.0,4.0
1343,1343,Campfire Spring Chef Prolix Instant Portable W...,3500.0,"Home&Kitchen|Heating,Cooling&AirQuality|WaterH...",Prolix Auto cut off feature: It is available w...,https://www.amazon.in/Spring-Chef-Stainless-Re...,2591.0,4.0
1145,1145,Spigen EZ Fit Tempered Glass Screen Protector ...,2899.0,Electronics|Mobiles&Accessories|MobileAccessor...,Compatibility ; Glass Screen Protector Compati...,https://www.amazon.in/Spigen-Tempered-Screen-P...,7779.0,4.0
1078,1078,Sony Bravia 164 cm (65 inches) 4K Ultra HD Sma...,139900.0,"Electronics|HomeTheater,TV&Video|Televisions|S...",Resolution: 4K Ultra HD (3840 x 2160) | Refres...,https://www.amazon.in/Sony-Bravia-inches-Googl...,5935.0,4.0
1318,1318,Zuvexa USB Rechargeable Electric Foam Maker - ...,1299.0,Home&Kitchen|Kitchen&HomeAppliances|SmallKitch...,The food taste stainless steel whisking head i...,https://www.amazon.in/Zuvexa-Rechargeable-Elec...,54.0,4.0


In [8]:
# Specify the file path where you want to save the cleaned data
output_file_path = 'top_rated_products.csv'

# Save the cleaned DataFrame to a CSV file
rating_base_recommendation.to_csv(output_file_path, index=False)

## Creating Tags

In [10]:
# Define the set of stop words
stop_words = set(stopwords.words('english'))

def clean_and_extract_tags(text):
    # Tokenize the text and convert to lowercase
    tokens = word_tokenize(text.lower())
    # Filter out non-alphanumeric tokens and stop words
    tags = [token for token in tokens if token.isalnum() and token not in stop_words]
    return ', '.join(tags)

columns_to_extract_tags_from = ['category', 'about_product']

for column in columns_to_extract_tags_from:
    df[column] = df[column].apply(clean_and_extract_tags)

In [11]:
# Concatenate the cleaned tags from all relevant columns
df['Tags'] = df[columns_to_extract_tags_from].apply(lambda row: ', '.join(row), axis=1)

## Conetent Based Reccomendation

In [13]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix_content = tfidf_vectorizer.fit_transform(df['Tags'])
cosine_similarities_content = cosine_similarity(tfidf_matrix_content,tfidf_matrix_content)

In [14]:
item_name = 'Portronics Konnect L POR-1081 Fast Charging 3A Type-C Cable 1.2Meter with Charge & Sync Function for All Type-C Devices (Grey)'
item_index = df[df['product_name']==item_name].index[0]

In [15]:
similar_items = list(enumerate(cosine_similarities_content[item_index]))

In [16]:
similar_items = sorted(similar_items, key=lambda x:x[1], reverse=True)
top_similar_items = similar_items[1:10]

recommended_items_indics = [x[0] for x in top_similar_items]

In [17]:
df.iloc[recommended_items_indics][['product_name','rating_count']]

Unnamed: 0,product_name,rating_count
588,Portronics Konnect L 1.2M Fast Charging 3A 8 P...,16905.0
939,Portronics Konnect L 1.2M POR-1401 Fast Chargi...,4768.0
938,Portronics Konnect L POR-1403 Fast Charging 3A...,1717.0
587,"Portronics Konnect L 1.2Mtr, Fast Charging 3A ...",7064.0
1342,Syncwire LTG to USB Cable for Fast Charging Co...,5.0
987,Flix (Beetel) Usb To Type C Pvc Data Sync And ...,9378.0
989,FLiX (Beetel) USB to Type C PVC Data Sync & 2A...,9377.0
555,Portronics Konnect CL 20W POR-1067 Type-C to 8...,2262.333333
471,Wecool Nylon Braided Multifunction Fast Chargi...,9792.0


## Function for content based reccomendation

In [84]:
def content_based_recommendations(df, item_name, top_n=10):
    # Check if the item name exists in the training data
    if item_name not in df['product_name'].values:
        print(f"Item '{item_name}' not found in the training data.")
        return pd.DataFrame()

    # Create a TF-IDF vectorizer for item descriptions
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')

    # Apply TF-IDF vectorization to item descriptions
    tfidf_matrix_content = tfidf_vectorizer.fit_transform(df['Tags'])

    # Calculate cosine similarity between items based on descriptions
    cosine_similarities_content = cosine_similarity(tfidf_matrix_content, tfidf_matrix_content)

    # Find the index of the item
    item_index = df[df['product_name'] == item_name].index[0]

    # Get the cosine similarity scores for the item
    similar_items = list(enumerate(cosine_similarities_content[item_index]))

    # Sort similar items by similarity score in descending order
    similar_items = sorted(similar_items, key=lambda x: x[1], reverse=True)

    # Get the top N most similar items (including the item itself)
    top_similar_items = similar_items[:top_n]

    # Get the indices of the top similar items
    recommended_item_indices = [x[0] for x in top_similar_items]

    # Get the details of the top similar items
    recommended_items_details = df.iloc[recommended_item_indices][['product_name', 'rating_count', 'rating']]

    return recommended_items_details

In [86]:
# Example: Get content-based recommendations for a specific item
item_name = 'Portronics Konnect L POR-1081 Fast Charging 3A Type-C Cable 1.2Meter with Charge & Sync Function for All Type-C Devices (Grey)'
content_based_rec = content_based_recommendations(df, item_name, top_n=8)

content_based_rec

Unnamed: 0,product_name,rating_count,rating
589,Portronics Konnect L POR-1081 Fast Charging 3A...,13391.0,4.3
588,Portronics Konnect L 1.2M Fast Charging 3A 8 P...,16905.0,4.2
939,Portronics Konnect L 1.2M POR-1401 Fast Chargi...,4768.0,4.1
938,Portronics Konnect L POR-1403 Fast Charging 3A...,1717.0,4.1
587,"Portronics Konnect L 1.2Mtr, Fast Charging 3A ...",7064.0,4.3
1342,Syncwire LTG to USB Cable for Fast Charging Co...,5.0,5.0
987,Flix (Beetel) Usb To Type C Pvc Data Sync And ...,9378.0,4.0
989,FLiX (Beetel) USB to Type C PVC Data Sync & 2A...,9377.0,4.0


In [88]:
# Example: Get content-based recommendations for a specific item
item_name = 'Classmate Long Book - Unruled, 160 Pages, 314 mm x 194 mm - Pack Of 3'
content_based_rec = content_based_recommendations(df, item_name, top_n=8)

content_based_rec

Unnamed: 0,product_name,rating_count,rating
563,"Classmate Long Book - Unruled, 160 Pages, 314 ...",1674.0,4.5
564,"Classmate Drawing Book - Unruled, 40 Pages, 21...",4951.0,4.5
103,Classmate Pulse Spiral Notebook - 240 mm x 180...,1269.0,4.1
60,"Classmate Long Notebook - 140 Pages, Single Li...",3182.0,4.4
102,"Classmate Pulse 6 Subject Notebook - Unruled, ...",8938.0,4.2
104,Classmate Soft Cover 6 Subject Spiral Binding ...,4428.0,4.5
83,Classmate Soft Cover 6 Subject Spiral Binding ...,8618.0,4.5
82,Classmate 2100117 Soft Cover 6 Subject Spiral ...,6537.0,4.4


## Collaborative Filtering 

In [40]:
user_item_matrix = df.pivot_table(index='user_id', columns='product_id', values='rating',aggfunc='mean').fillna(0).astype(int)

In [42]:
user_similarity = cosine_similarity(user_item_matrix)


In [43]:
target_user_id = 4
target_user_index = user_item_matrix.index.get_loc(target_user_id)


In [44]:
user_similarities = user_similarity[target_user_index]

similar_user_indices = user_similarities.argsort()[::-1][1:]


In [48]:
recommend_items = []

for user_index in similar_user_indices:
    rated_by_similar_user = user_item_matrix.iloc[user_index]
    not_rated_by_target_user = (rated_by_similar_user==0) & (user_item_matrix.iloc[target_user_index]==0)
    
    recommend_items.extend(user_item_matrix.columns[not_rated_by_target_user][:10])

recommended_items_details = df[df['product_id'].isin(recommend_items)][['product_name','rating_count','rating']]


In [50]:
recommended_items_details.head(10)

Unnamed: 0,product_name,rating_count,rating
0,D-Link DWA-131 300 Mbps Wireless Nano USB Adap...,8131.0,4.1
1,TP-Link Nano USB WiFi Dongle 150Mbps High Gain...,179692.0,4.2
2,Duracell Plus AAA Rechargeable Batteries (750 ...,27201.0,4.3
3,"Logitech B100 Wired USB Mouse, 3 yr Warranty, ...",31534.0,4.3
4,"Logitech M235 Wireless Mouse, 1000 DPI Optical...",54405.0,4.5
5,SanDisk Cruzer Blade 32GB USB Flash Drive,253105.0,4.3
6,BlueRigger Digital Optical Audio Toslink Cable...,30023.0,4.2
7,BlueRigger Digital Optical Audio Toslink Cable...,30023.0,4.2
8,Amazon Basics 16-Gauge Speaker Wire - 50 Feet,12091.0,4.4
9,Bajaj ATX 4 750-Watt Pop-up Toaster (White),9331.0,4.3


## Item Reccomendation Function

In [53]:
def collaborative_filtering_recommendations(df, target_user_id, top_n=10):
    # Create the user-item matrix
    user_item_matrix = df.pivot_table(index='user_id', columns='product_id', values='rating', aggfunc='mean').fillna(0)

    # Calculate the user similarity matrix using cosine similarity
    user_similarity = cosine_similarity(user_item_matrix)

    # Find the index of the target user in the matrix
    target_user_index = user_item_matrix.index.get_loc(target_user_id)

    # Get the similarity scores for the target user
    user_similarities = user_similarity[target_user_index]

    # Sort the users by similarity in descending order (excluding the target user)
    similar_users_indices = user_similarities.argsort()[::-1][1:]

    # Generate recommendations based on similar users
    recommended_items = []

    for user_index in similar_users_indices:
        # Get items rated by the similar user but not by the target user
        rated_by_similar_user = user_item_matrix.iloc[user_index]
        not_rated_by_target_user = (rated_by_similar_user == 0) & (user_item_matrix.iloc[target_user_index] == 0)

        # Extract the item IDs of recommended items
        recommended_items.extend(user_item_matrix.columns[not_rated_by_target_user][:top_n])

    # Get the details of recommended items
    recommended_items_details = df[df['product_id'].isin(recommended_items)][['product_name', 'rating_count','rating']]

    return recommended_items_details.head(10)

# Example usage
target_user_id = 4
top_n = 5
collaborative_filtering_rec = collaborative_filtering_recommendations(df, target_user_id)
print(f"Top {top_n} recommendations for User {target_user_id}:")
collaborative_filtering_rec

Top 5 recommendations for User 4:


Unnamed: 0,product_name,rating_count,rating
0,D-Link DWA-131 300 Mbps Wireless Nano USB Adap...,8131.0,4.1
1,TP-Link Nano USB WiFi Dongle 150Mbps High Gain...,179692.0,4.2
2,Duracell Plus AAA Rechargeable Batteries (750 ...,27201.0,4.3
3,"Logitech B100 Wired USB Mouse, 3 yr Warranty, ...",31534.0,4.3
4,"Logitech M235 Wireless Mouse, 1000 DPI Optical...",54405.0,4.5
5,SanDisk Cruzer Blade 32GB USB Flash Drive,253105.0,4.3
6,BlueRigger Digital Optical Audio Toslink Cable...,30023.0,4.2
7,BlueRigger Digital Optical Audio Toslink Cable...,30023.0,4.2
8,Amazon Basics 16-Gauge Speaker Wire - 50 Feet,12091.0,4.4
9,Bajaj ATX 4 750-Watt Pop-up Toaster (White),9331.0,4.3


## Hybrid Reccomendations

In [56]:
# Hybrid Recommendations (Combine Content-Based and Collaborative Filtering)
def hybrid_recommendations(df,target_user_id, item_name, top_n=10):
    # Get content-based recommendations
    content_based_rec = content_based_recommendations(df,item_name, top_n)

    # Get collaborative filtering recommendations
    collaborative_filtering_rec = collaborative_filtering_recommendations(df,target_user_id, top_n)
    
    # Merge and deduplicate the recommendations
    hybrid_rec = pd.concat([content_based_rec, collaborative_filtering_rec]).drop_duplicates()
    
    return hybrid_rec.head(10)

In [58]:
# Example usage: Get hybrid recommendations for a specific user and item
target_user_id = 4 # Change this to the user_id you want recommendations for
item_name = "Classmate Long Book - Unruled, 160 Pages, 314 mm x 194 mm - Pack Of 3"  # Change this to the item name
hybrid_rec = hybrid_recommendations(df,target_user_id, item_name, top_n=10)

print(f"Top 10 Hybrid Recommendations for User {target_user_id} and Item '{item_name}':")
hybrid_rec

Top 10 Hybrid Recommendations for User 4 and Item 'Classmate Long Book - Unruled, 160 Pages, 314 mm x 194 mm - Pack Of 3':


Unnamed: 0,product_name,rating_count,rating
564,"Classmate Drawing Book - Unruled, 40 Pages, 21...",4951.0,4.5
103,Classmate Pulse Spiral Notebook - 240 mm x 180...,1269.0,4.1
60,"Classmate Long Notebook - 140 Pages, Single Li...",3182.0,4.4
102,"Classmate Pulse 6 Subject Notebook - Unruled, ...",8938.0,4.2
104,Classmate Soft Cover 6 Subject Spiral Binding ...,4428.0,4.5
83,Classmate Soft Cover 6 Subject Spiral Binding ...,8618.0,4.5
82,Classmate 2100117 Soft Cover 6 Subject Spiral ...,6537.0,4.4
860,Classmate Pulse 1 Subject Notebook - 240mm x 1...,419.0,4.2
70,"Luxor 5 Subject Single Ruled Notebook - A4, 70...",3785.0,4.5
79,Camel Oil Pastel with Reusable Plastic Box - 5...,9427.0,4.5


In [59]:
# Example usage: Get hybrid recommendations for a specific user and item
target_user_id = 10 # Change this to the user_id you want recommendations for
item_name = 'Usha CookJoy (CJ1600WPC) 1600 Watt Induction cooktop (Black)'

hybrid_rec = hybrid_recommendations(df,target_user_id, item_name, top_n=10)

print(f"Top 10 Hybrid Recommendations for User {target_user_id} and Item '{item_name}':")
hybrid_rec

Top 10 Hybrid Recommendations for User 10 and Item 'Usha CookJoy (CJ1600WPC) 1600 Watt Induction cooktop (Black)':


Unnamed: 0,product_name,rating_count,rating
839,PHILIPS Digital Air Fryer HD9252/90 with Touch...,2981.0,4.4
1252,"Pigeon Healthifry Digital Air Fryer, 360° High...",136.0,4.2
470,realme Buds Wireless in Ear Bluetooth Earphone...,72563.0,4.1
95,Prestige PIC 16.0+ 1900W Induction Cooktop wit...,15034.0,4.0
1322,White Feather Portable Heat Sealer Mini Sealin...,12.0,3.3
171,Pigeon by Stovekraft Cruise 1800 watt Inductio...,54032.0,3.8
1239,Green Tales Heat Seal Mini Food Sealer-Impulse...,24.0,2.6
245,Prestige PIC 15.0+ 1900-Watt Induction Cooktop...,6919.0,4.2
478,AmazonBasics Induction Cooktop 1600 Watt (Black),780.0,4.2
1154,Shakti Technology S5 High Pressure Car Washer ...,170.0,4.2
