In [20]:
import numpy as np
import pandas as pd
import nltk
from nltk.stem.snowball import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("amazon_product.csv")
df.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [3]:
df.shape

(668, 4)

In [4]:
df.drop('id', axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [7]:
df.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

In [11]:
stemmer = SnowballStemmer('english')

In [13]:
def tokenize_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stemmed = [stemmer.stem(w) for w in tokens]
    return " ".join(stemmed)

In [14]:
df['stemmed_tokens'] =  df.apply(lambda x: tokenize_stem(x['Title'] + " " + x['Description']), axis=1)

In [17]:
df['stemmed_tokens']

0      swissmar capstor select storag rack for 18-pac...
1      gemini200 delta cv-880 gold crown liveri aircr...
2      superior thread 10501-2172 magnifico cream puf...
3      fashion angel color rox hair chox kit experi w...
4      union creativ giant kill figur 05 : daisuk tsu...
                             ...                        
663    rosemeri ( rosemari ) - box of six 20 stick he...
664    interdesign linus stack organ bin , extra larg...
665    gourmet rubber stamp diagon stripe stencil , 6...
666    spenco rx arch cushion full length comfort sup...
667                                  car kit kit for car
Name: stemmed_tokens, Length: 668, dtype: object

In [18]:
df.head()

Unnamed: 0,Title,Description,Category,stemmed_tokens
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,swissmar capstor select storag rack for 18-pac...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,gemini200 delta cv-880 gold crown liveri aircr...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...",superior thread 10501-2172 magnifico cream puf...
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,fashion angel color rox hair chox kit experi w...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,union creativ giant kill figur 05 : daisuk tsu...


In [22]:
tfidf = TfidfVectorizer()
def cosine_sim(txt1, txt2):
    matrix = tfidf.fit_transform([txt1, txt2])
    return cosine_similarity(matrix)

In [32]:
def search_product(query):
    stemmed_query = tokenize_stem(query)
    # calculating cosine similarity between query and stemmed token columns
    df['Similarity'] = df['stemmed_tokens'].apply(lambda x: cosine_sim(stemmed_query, x))
    res = df.assign(Similarity=df['Similarity'].apply(lambda x: np.max(x))).sort_values(by='Similarity', ascending=False).head(10)[['Title', 'Description', 'Category']]
    
    return res

In [34]:
search_product('Handling Label')

Unnamed: 0,Title,Description,Category
233,Beautiful Girl Jason Freddy Krueger Second Ed...,Kotobukiya brings a bold new direction to thei...,Toys & Games › Action Figures & Statues › Act...
284,"C&F Home Buffalo Checks King Bed Skirt, 78 by...",Outdoorsy Timberline pieces together geometric...,Home & Kitchen › Bedding › Bed Skirts
407,One Bella Casa I Love My Mom Hearts Throw Pil...,Add a great conversation piece with this brigh...,"Home & Kitchen Bedding Decorative Pillows, In..."
632,wet n wild Megalast Liquid Catsuit Lipstick B...,"What glides on like butter, feels like a secon...",Beauty & Personal Care › Makeup › Lips › Lips...
341,Deny Designs Leonidas Oxby Theres More To New...,"Wanna transform a serious room into a fun, inv...","Home & Kitchen Bedding Decorative Pillows, In..."
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
100,"Inkology 1-Subject Spiral Bound Notebook, 5 x...",Who ever said school supplies had to be plain ...,Office Products › Office & School Supplies › ...
255,Enclume Premier 10-Inch Finishing Touches Din...,Come and get it. These dinner triangles are ma...,Home & Kitchen Kitchen & Dining Cookware
133,"freshMinerals Mineral Loose Eyeshadow, Cancun...",Freshminerals eyeshadows are formulated using ...,Beauty & Personal Care › Makeup › Eyes › Eyes...
253,KISS COVER + CARE ACF06 NEUTRAL COOL 30,FOR MEDIUM CHOCOLATE TONED SKIN. GOOD IF YOU H...,Beauty & Personal Care › Makeup › Face › Conc...


In [33]:
df['Title'][5]

' Tape Logic Shipping & Handling Label, Legend"FRAGILE - GLASS - HANDLE WITH CARE" (DL1570) '