In [1]:
import pandas as pd
import numpy as np
import nltk

In [2]:
df = pd.read_csv('amazon_product.csv')
df.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [3]:
df.drop('id', axis=1, inplace= True)
df.head()

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [4]:
df.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

In [5]:
from nltk.stem import PorterStemmer

In [6]:
nltk.download('punkt')
nltk.download('punkt_tab') # Download the punkt_tab resource

# Initialize stemmer
stemmer = PorterStemmer()

# Define your tokenize_stem function
def tokenize_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stemmed = [stemmer.stem(w) for w in tokens]
    return " ".join(stemmed)

# Apply function to DataFrame
df['stemmed_tokens'] = df.apply(lambda row: tokenize_stem(row["Title"] + " " + row["Description"]), axis=1)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [7]:
df.head()

Unnamed: 0,Title,Description,Category,stemmed_tokens
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,swissmar capstor select storag rack for 18-pac...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,gemini200 delta cv-880 gold crown liveri aircr...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...",superior thread 10501-2172 magnifico cream puf...
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,fashion angel color rox hair chox kit experi w...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,union creativ giant kill figur 05 : daisuk tsu...


In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Create vectorizer with custom tokenizer
vectorizer = TfidfVectorizer(tokenizer=lambda x: tokenize_stem(x).split())

# Define similarity function
def cosine_sim(txt1, txt2):
    tfidf_matrix = vectorizer.fit_transform([txt1, txt2])
    sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])
    return sim[0][0]

In [9]:
def search_product(query):
    stemmed_query = tokenize_stem(query)
    #calcualting cosine similarity between query and stemmed tokens columns
    df['similarity'] = df['stemmed_tokens'].apply(lambda x:cosine_sim(stemmed_query,x))
    res = df.sort_values(by=['similarity'],ascending=False).head(10)[['Title','Description','Category']]
    return res

In [10]:
search_product(' PURELL ES8 Professional HEALTHY SOAP Foam Refill, Fresh Scent Fragrance, 1200 mL Soap Refill for PURELL ES8 Touch-Free Dispenser (Pack of 2) - 7777-02 ')



Unnamed: 0,Title,Description,Category
10,PURELL ES8 Professional HEALTHY SOAP Foam Ref...,1200 ml refill for Purell ES8 touch-free soap ...,Industrial & Scientific › Janitorial & Sanita...
606,Aquaphor Healing Skin Ointment Advanced Thera...,"For Dry, Cracked Or Irritated Skin, Preservati...",Health & Household › Health Care › Medication...
585,"Etude House Missing U Hand Cream, Fairy Pengu...","Formulated With Natural Mixture Of Herbs, Shea...","Beauty & Personal Care Foot, Hand & Nail Care..."
261,Premium Compatibles DR720-PCI Imaging Drum Un...,Brother DR-720 DR720 Drum Unit 35K - GSA and T...,Office Products Office & School Supplies Prin...
427,"St. Ives Naturally Clear Apricot Scrub, Blemi...","For the most up to date information, we recomm...",Beauty & Personal Care › Skin Care › Face › C...
619,Premium Compatibles LC51YPC PCI Brother LC-51...,PCI Brother LC-51Y LC51Y Yellow InkJet Cartrid...,Office Products › Office & School Supplies › ...
333,"TOPS Time Cards, Weekly, 1-Sided, Numbered Da...","Since 1952, TOPS has provided quality products...",Office Products Office & School Supplies Time...
349,Fixodent Free Denture Adhesive Cream 2.40 Oun...,"For the most up to date information, we recomm...",Beauty & Personal Care Oral Care Denture Care...
322,"EO PRODUCTS SOAP,EVERYONE,CITRUS&MINT, 32 FZ ...",EO Products Everyone Soap Citrus and Mint is f...,Beauty & Personal Care Skin Care Body Cleanse...
452,Department 56 Christmas Basics Decor Titanium...,Department 56 is committed to bringing joy eve...,Home & Kitchen › Home Décor › Home Décor Acce...


In [11]:
df['Title'][10]

' PURELL ES8 Professional HEALTHY SOAP Foam Refill, Fresh Scent Fragrance, 1200 mL Soap Refill for PURELL ES8 Touch-Free Dispenser (Pack of 2) - 7777-02 '