In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# Load the product data
product_data1 = pd.read_csv('product_data.csv')

product_data1.head()


Unnamed: 0,index,product,category,sub_category,brand,sale_price,market_price,type,rating,description
0,1,Garlic Oil - Vegetarian Capsule 500 mg,Beauty & Hygiene,Hair Care,Sri Sri Ayurveda,220.0,220.0,Hair Oil & Serum,4.1,This Product contains Garlic Oil that is known...
1,2,Water Bottle - Orange,"Kitchen, Garden & Pets",Storage & Accessories,Mastercook,180.0,180.0,Water & Fridge Bottles,2.3,"Each product is microwave safe (without lid), ..."
2,3,"Brass Angle Deep - Plain, No.2",Cleaning & Household,Pooja Needs,Trm,119.0,250.0,Lamp & Lamp Oil,3.4,"A perfect gift for all occasions, be it your m..."
3,4,Cereal Flip Lid Container/Storage Jar - Assort...,Cleaning & Household,Bins & Bathroom Ware,Nakoda,149.0,176.0,"Laundry, Storage Baskets",3.7,Multipurpose container with an attractive desi...
4,5,Creme Soft Soap - For Hands & Body,Beauty & Hygiene,Bath & Hand Wash,Nivea,162.0,162.0,Bathing Bars & Soaps,4.4,Nivea Creme Soft Soap gives your skin the best...


In [43]:
product_data1[['product','category']]

Unnamed: 0,product,category
0,Garlic Oil - Vegetarian Capsule 500 mg,Beauty & Hygiene
1,Water Bottle - Orange,"Kitchen, Garden & Pets"
2,"Brass Angle Deep - Plain, No.2",Cleaning & Household
3,Cereal Flip Lid Container/Storage Jar - Assort...,Cleaning & Household
4,Creme Soft Soap - For Hands & Body,Beauty & Hygiene
...,...,...
27550,"Wottagirl! Perfume Spray - Heaven, Classic",Beauty & Hygiene
27551,Rosemary,Gourmet & World Food
27552,Peri-Peri Sweet Potato Chips,Gourmet & World Food
27553,Green Tea - Pure Original,Beverages


In [3]:
product_data1.count()

index           27555
product         27554
category        27555
sub_category    27555
brand           27554
sale_price      27555
market_price    27555
type            27555
rating          18929
description     27440
dtype: int64

In [17]:
product_data=product_data1.iloc[:10000]
#product_data=product_data1.head(10000)

In [18]:
print(product_data.count())
product_data.head()

index           10000
product         10000
category        10000
sub_category    10000
brand            9999
sale_price      10000
market_price    10000
type            10000
rating           6843
description      9958
dtype: int64


Unnamed: 0,index,product,category,sub_category,brand,sale_price,market_price,type,rating,description
0,1,Garlic Oil - Vegetarian Capsule 500 mg,Beauty & Hygiene,Hair Care,Sri Sri Ayurveda,220.0,220.0,Hair Oil & Serum,4.1,This Product contains Garlic Oil that is known...
1,2,Water Bottle - Orange,"Kitchen, Garden & Pets",Storage & Accessories,Mastercook,180.0,180.0,Water & Fridge Bottles,2.3,"Each product is microwave safe (without lid), ..."
2,3,"Brass Angle Deep - Plain, No.2",Cleaning & Household,Pooja Needs,Trm,119.0,250.0,Lamp & Lamp Oil,3.4,"A perfect gift for all occasions, be it your m..."
3,4,Cereal Flip Lid Container/Storage Jar - Assort...,Cleaning & Household,Bins & Bathroom Ware,Nakoda,149.0,176.0,"Laundry, Storage Baskets",3.7,Multipurpose container with an attractive desi...
4,5,Creme Soft Soap - For Hands & Body,Beauty & Hygiene,Bath & Hand Wash,Nivea,162.0,162.0,Bathing Bars & Soaps,4.4,Nivea Creme Soft Soap gives your skin the best...


In [19]:
# Preprocess the data (e.g., lowercase, remove punctuation)
product_data['description'] = product_data['description'].str.lower()
product_data['description'] = product_data['description'].str.replace('[^\w\s]', '')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  product_data['description'] = product_data['description'].str.lower()
  product_data['description'] = product_data['description'].str.replace('[^\w\s]', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  product_data['description'] = product_data['description'].str.replace('[^\w\s]', '')


In [20]:
# Handle missing values (if any)
product_data['description'].fillna('', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  product_data['description'].fillna('', inplace=True)


In [56]:
# Convert the product descriptions into TF-IDF feature vectors
tfidf = TfidfVectorizer()
feature_matrix = tfidf.fit_transform(product_data['description'])

# TfidfVectorizer is used to convert the product descriptions into TF-IDF (Term Frequency-Inverse Document Frequency)-
    #-feature vectors, The product descriptions are textual data, and to perform similarity calculations, we need to convert
    #-them into a numerical representation. TfidfVectorizer helps transform the text data into a numerical feature matrix 
    #-suitable for further analysis.
#TfidfVectorizer takes into account the importance of terms in a document and across the corpus. 
    #It assigns higher weights to terms that are more informative and discriminative, considering both their frequency 
    #-in the current document (term frequency) and their rarity across the entire dataset (inverse document frequency).


In [57]:
# Compute the cosine similarity matrix
similarity_matrix = cosine_similarity(feature_matrix, feature_matrix)


In [58]:
def get_recommendations(product, top_n=5):
    # Find the index of the target product
    product_index = product_data[product_data['product'] == product].index[0]

    # Get the similarity scores for the target product
    similarity_scores = similarity_matrix[product_index]

    # Sort the similarity scores and get the indices of top-n similar products
    top_indices = similarity_scores.argsort()[:-top_n-1:-1]

    # Get the product IDs of the top-n similar products
    top_product_ids = product_data.iloc[top_indices]['product'].values

    return top_product_ids



In [59]:
# Example usage:
product = 'Water Bottle - Orange'
recommendations = get_recommendations(product)
print(recommendations)


['Water Bottle - Orange'
 'Premium Rectangular Plastic Container With Lid - Multicolour'
 'Premium Round Plastic Container With Lid - Yellow'
 'Plastic Container - Square, Pink'
 'Premium Square Plastic Container - Green']


In [60]:
#describtion
print(product_data[product_data['product']=='Water Bottle - Orange'][['description','category']],'\n\n')
print(product_data[product_data['product']=='Premium Rectangular Plastic Container With Lid - Multicolour'][['description','category']],'\n\n')
print(product_data[product_data['product']=='Premium Round Plastic Container With Lid - Yellow'][['description','category']],'\n\n')
print(product_data[product_data['product']=='Plastic Container - Square, Pink'][['description','category']],'\n\n')
print(product_data[product_data['product']=='Premium Square Plastic Container - Green'][['description','category']],'\n\n')

                                         description                category
1  each product is microwave safe without lid ref...  Kitchen, Garden & Pets 


                                            description  \
6163  these containers are microwave safe without li...   

                    category  
6163  Kitchen, Garden & Pets   


                                            description  \
9546  these containers are microwave safe without li...   

                    category  
9546  Kitchen, Garden & Pets   


                                            description  \
8588  each product is microwave safe but without a l...   

                    category  
8588  Kitchen, Garden & Pets   


                                           description                category
125  these containers are microwave safe but withou...  Kitchen, Garden & Pets 


