In [16]:
import numpy as np
import pandas as pd
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sachk\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [17]:
data = pd.read_csv('amazon_product.csv')
data.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [18]:
data = data.drop('id', axis=1)
data.head()

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [19]:
data.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

In [20]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')
def tokenize_and_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stems = [stemmer.stem(t) for t in tokens]
    return stems

In [21]:
data['stemmed_tokens'] = data.apply(lambda row: tokenize_and_stem(row['Title'] + ' ' + row['Description']), axis=1)

In [22]:
data.head()

Unnamed: 0,Title,Description,Category,stemmed_tokens
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,"[swissmar, capstor, select, storag, rack, for,..."
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,"[gemini200, delta, cv-880, gold, crown, liveri..."
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...","[superior, thread, 10501-2172, magnifico, crea..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,"[fashion, angel, color, rox, hair, chox, kit, ..."
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,"[union, creativ, giant, kill, figur, 05, :, da..."


In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
tfidf_vectorizer = TfidfVectorizer(tokenizer=tokenize_and_stem)
def cosine_sim(text1, text2):
    text1_concatenated = ' '.join(text1)
    text2_concatenated = ' '.join(text2)
    tfidf_matrix = tfidf_vectorizer.fit_transform([text1_concatenated, text2_concatenated])
    return cosine_similarity(tfidf_matrix)[0][1]


In [24]:
def search_products(query):
    query_stemmed = tokenize_and_stem(query)
    data['similarity'] = data['stemmed_tokens'].apply(lambda x: cosine_sim(query_stemmed, x))
    results = data.sort_values(by=['similarity'], ascending=False).head(10)[['Title', 'Description', 'Category']]
    return results

In [28]:
res = search_products(data['Title'][0])
res

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
541,Remington SP290 for F4790 Shaver (2-Pack),Technical Features for Remington SP290-2 The R...,Beauty & Personal Care › Shave & Hair Removal...
34,Gear Aid Replacement Triglide Buckle Kit,When you need to lengthen or shorten straps an...,Sports & Outdoors › Outdoor Recreation › Camp...
551,"C2G 03836 2-Port Cat5e Surface Mount Box, Ivory","Ideal for surface mount applications, especial...",Electronics Computers & Accessories Computer ...
285,SleepRight Ultra-Comfort Dental Guard,"For the most up to date information, we recomm...",Beauty & Personal Care › Oral Care › Oral Pai...
240,C-Line Poly 3-Compartment Storage Box with Sn...,C-Line's durable storage box features three co...,Office Products Office & School Supplies Offi...
381,"Zoom 6"" Lizard Plastic Fishing Baits 9-Pack",The Zoom Lizard gives anglers a wide color sel...,Sports & Outdoors Sports & Fitness Hunting & ...
349,Fixodent Free Denture Adhesive Cream 2.40 Oun...,"For the most up to date information, we recomm...",Beauty & Personal Care Oral Care Denture Care...
463,Versio Mobile 3-Pack Screen Protector for LG ...,Clear 3 Pack Screen Protector For LG G2,Cell Phones & Accessories Accessories Screen ...
40,Rivacase Ladies 15.6 Inch Laptop Shoulder Bag,Slim and stylish long-handle laptop purse for ...,Electronics › Computers & Accessories › Lapto...
