In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
# Load the dataset
amazon_df = pd.read_csv("Dataset/amazon_product_search.csv")
amazon_df.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [3]:
# Drop the "id" axis
amazon_df.drop("id", axis=1, inplace=True)
amazon_df.head()

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [4]:
# Check null values
amazon_df.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

In [5]:
from nltk.stem.snowball import SnowballStemmer

# Create a SnowballStemmer instance for English
stemmer = SnowballStemmer("english")

# Define a function for tokenization and stemming
def tokenize_stem(text):
    
    # Tokenize the text into words and convert to lowercase
    tokens = nltk.word_tokenize(text.lower())
    
    # Stem each token using SnowballStemmer
    stemmed = [stemmer.stem(w) for w in tokens]
    
    return " ".join(stemmed)

In [6]:
# Apply "tokenize_stem()" to create a new column in a DataFrame
amazon_df["Stemmed Tokens"] = amazon_df.apply(lambda row: tokenize_stem(row["Title"] + " " + row["Category"]), axis=1)
amazon_df.head()

Unnamed: 0,Title,Description,Category,Stemmed Tokens
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,swissmar capstor select storag rack for 18-pac...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,gemini200 delta cv-880 gold crown liveri aircr...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...",superior thread 10501-2172 magnifico cream puf...
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,fashion angel color rox hair chox kit beauti &...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,union creativ giant kill figur 05 : daisuk tsu...


In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Create a TfidfVectorizer instance with the custom tokenizer
tfidf_vectorizer = TfidfVectorizer(tokenizer=tokenize_stem)

# Define a function to check cosine similarity between two texts
def check_cosine_similarity(text1, text2):
    # Create a TF-IDF matrix for the given texts
    matrix = tfidf_vectorizer.fit_transform([text1, text2])
    
    # Calculate cosine similarity between the vectors in the matrix
    similarity_matrix = cosine_similarity(matrix)[0][1]
    
    # Return the cosine similarity value
    return similarity_matrix

In [8]:
def search_product(query):
    # Tokenize and stem the user query
    stemmed_query = tokenize_stem(query)
    
    # Apply the check_cosine_similarity function to calculate similarity for each product
    amazon_df["Similarity"] = amazon_df["Stemmed Tokens"].apply(lambda x: check_cosine_similarity(stemmed_query, x))
    
    # Sort the DataFrame by similarity in descending order and take the top 10 results
    result = amazon_df.sort_values(by=["Similarity"], ascending=False).head(10)[["Title", "Description", "Category"]]
    
    # Return the result containing product titles, descriptions, and categories
    return result

In [9]:
search_product("Toys")

Unnamed: 0,Title,Description,Category
380,Hasbro Nerf Weather Blitz Football - Colors M...,The NERF WEATHER BLITZ is tough! Designed with...,Toys & Games Sports & Outdoor Play Sports Toy...
330,KOLE IMPORTS GIANT HOT DOG SQUEAKY TOY,Giant Hot Dog Squeaky Dog Toy . Give dogs a bi...,Pet Supplies Dogs Toys Squeak Toys
144,Hog Wild Raccoon Popper Toy,The Raccoon Popper is making more mischief. Sq...,Toys & Games Sports & Outdoor Play Blasters &...
218,Green Toys Rescue Boat with Helicopter,No water rescue is out of reach when the Green...,Toys & Games Baby & Toddler Toys Car Seat & S...
124,GUESS Boys' Little Printed Twill Cargo Pocket...,Little boys printed twill cargo short pant wit...,"Clothing, Shoes & Jewelry Boys Clothing Shorts"
72,FyrFlyz Green White LEDs Cyclone,Get your hands on a one of a kind toy with the...,Toys & Games › Novelty & Gag Toys › Light-Up ...
243,"The Body Shop Drops of Youth Liquid Peel, 4.9...",The Drops of Youth™ Youth Liquid Peel exfoliat...,Automotive Motorcycle & Powersports Parts
67,ALEX Toys Rub a Dub Tub Tunes Symphony,ALEX Toys Rub a Dub Tub Tunes Symphony has eve...,Toys & Games › Baby & Toddler Toys › Bath Toys
520,"Goliath 31601 Zoingo Boingo, The Flexible Fre...",Zoingo Boingo is the fantastic flexible freest...,Sports & Outdoors › Sports & Fitness › Leisur...
436,KMC Closer Missing Link,No further description available.,Sports & Outdoors › Outdoor Recreation › Cycl...
