In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("amazon_product.csv")

In [3]:
df.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [4]:
df.drop(columns=['id'], inplace = True)

In [5]:
df["Title"][0]

' Swissmar Capstore Select Storage Rack for 18-Pack '

In [6]:
df['Description'][1]

'Welcome to the exciting world of GeminiJets! Our models are highly collectible and are all limited editions. '

In [7]:
df['Category'][3]

' Beauty & Personal Care Hair Care Hair Coloring Products Hair Color '

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 668 entries, 0 to 667
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Title        668 non-null    object
 1   Description  668 non-null    object
 2   Category     668 non-null    object
dtypes: object(3)
memory usage: 15.8+ KB


In [9]:
df.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

In [10]:
df["Num_Title"] = df['Title'].apply(lambda x:x.split())

In [11]:
df.head()

Unnamed: 0,Title,Description,Category,Num_Title
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,"[Swissmar, Capstore, Select, Storage, Rack, fo..."
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,"[Gemini200, Delta, CV-880, Gold, Crown, Livery..."
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...","[Superior, Threads, 10501-2172, Magnifico, Cre..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,"[Fashion, Angels, Color, Rox, Hair, Chox, Kit]"
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,"[Union, Creative, Giant, Killing, Figure, 05:,..."


In [12]:
df["Num_Title"][0]

['Swissmar', 'Capstore', 'Select', 'Storage', 'Rack', 'for', '18-Pack']

In [13]:
import nltk

In [14]:
nltk.sent_tokenize("How are you bro.? I am always doing very good")

['How are you bro.?', 'I am always doing very good']

In [15]:
nltk.word_tokenize("How are you bro.? I am always doing very good")

['How',
 'are',
 'you',
 'bro.',
 '?',
 'I',
 'am',
 'always',
 'doing',
 'very',
 'good']

In [16]:
from nltk.stem.snowball import SnowballStemmer

In [17]:
stemmer = SnowballStemmer('english')

### Tokenizes and stems the input text

This function performs the following steps:  
1. Converts the input text to lowercase.  
2. Tokenizes the text into individual words using **NLTK's** `word_tokenize`.  
3. Applies stemming to each token using the **PorterStemmer**.  
4. Joins the stemmed tokens back into a single string.

#### Parameters:
**text** : `str`  
&emsp;The input string of text to be processed.

#### Returns:
**str**  
&emsp;A single string containing the stemmed tokens, separated by spaces.

#### Example:
```python
tokenize_stem("Running runners run quickly.")
# Output: 'run runner run quickli'


In [18]:
def tokenize_stem(text):
    # Convert text to lowercase and tokenize it
    tokens = nltk.word_tokenize(text.lower())
    
    # Apply stemming to each token
    stemmed = [stemmer.stem(w) for w in tokens]
    
    # Join stemmed words into a single string
    return " ".join(stemmed)

In [19]:
df["stemmed_tokens"]= df.apply(lambda row:tokenize_stem(row['Title'] + "" + row['Description']), axis = 1 )

In [20]:
df.head()

Unnamed: 0,Title,Description,Category,Num_Title,stemmed_tokens
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,"[Swissmar, Capstore, Select, Storage, Rack, fo...",swissmar capstor select storag rack for 18-pac...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,"[Gemini200, Delta, CV-880, Gold, Crown, Livery...",gemini200 delta cv-880 gold crown liveri aircr...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...","[Superior, Threads, 10501-2172, Magnifico, Cre...",superior thread 10501-2172 magnifico cream puf...
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,"[Fashion, Angels, Color, Rox, Hair, Chox, Kit]",fashion angel color rox hair chox kit experi w...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,"[Union, Creative, Giant, Killing, Figure, 05:,...",union creativ giant kill figur 05 : daisuk tsu...


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [22]:
from sklearn.metrics.pairwise import cosine_similarity  

In [23]:
tfidfv = TfidfVectorizer(tokenizer= tokenize_stem)

In [24]:
def cosine_sim(text1, text2):
    matrix = tfidfv.fit_transform([text1, text2])
    return cosine_similarity(matrix)    

In [34]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")

- text1 and text2: any two text strings (e.g., Title and Description)

- fit_transform([text1, text2]): learns vocabulary and vectorizes both texts

- cosine_similarity(matrix): computes a similarity matrix between them

In [39]:
def search_product(query):
    stemmed_query = tokenize_stem(query)
    
    # Calculate cosine similarity between the query and each product
    df['similarity'] = df['stemmed_tokens'].apply(lambda x: cosine_sim(stemmed_query, x)[0]
                                                 [0]) ## cosine_sim must return float
    
    # Sort and select top 10 most similar products
    res = df.sort_values(by=['similarity'], ascending=False).head(10)[['Title', 'Description', 'Category']]
    
    return res


## 🔍 Product Search Function Using Cosine Similarity

### 📌 Purpose
The `search_product(query)` function allows users to **search for products** by comparing a text query against a database of product descriptions or tokens. It ranks and returns the **top 10 most similar products** based on cosine similarity.

---

### 🧠 Function Breakdown

```python
def search_product(query):
    stemmed_query = tokenize(query)
    
    # Calculating the cosine similarity between the query and each product's stemmed tokens
    df['similarity'] = df['stemmed_tokens'].apply(lambda x: cosine_sim(stemmed_query, x)) ##cosine_sim must return float
    
    # Sorting the products by similarity score (highest first) and selecting top 10
    res = df.sort_values(by=['similarity'], ascending=False).head(10)[['Title', 'Description', 'Category']]
    
    return res


In [40]:

 search_product("' Swissmar Capstore Select Storage Rack for 18-Pack'")

Unnamed: 0,Title,Description,Category
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
69,EARTHLITE Professional Massage Creme - Multi ...,Our Multi-Purpose Massage Crème is long lastin...,Beauty & Personal Care Tools & Accessories Sa...
652,"Alexander Dolls 10"" Morticia - Broadway Music...",The Addams Family Musical - Based on character...,Toys & Games › Dolls & Accessories › Dolls
563,Rosalina White Multi-Dot Tin Tea Set 15Pc Chi...,Our great toys will engage and entertain littl...,Toys & Games Dress Up & Pretend Play Pretend ...
169,GALLERY SOLUTIONS 8x10 Distressed Turquoise &...,Display your most cherished photos with this 8...,Home & Kitchen › Wall Art › Posters & Prints
544,GUM Proxabrush Go-Betweens Interdental Brush ...,GUM Proxabrush Go-Betweens handle and refills ...,Industrial & Scientific Professional Dental S...
646,Dunroven House Winter Snowman Bird Brain Desi...,Towel Embroidery Set 1 Snowman. A perfect way ...,"Arts, Crafts & Sewing Needlework Embroidery"
478,Olivia Garden Fingerbrush Paddle - Medium - F...,- FBCOMB-MD - Special blend of ionic & 100% bo...,Beauty & Personal Care › Hair Care › Styling ...
467,"Martin Yale T15 Premier SharpCut 15"" Cutting ...",The T15 SharpCut trimmer has self-sharpening s...,Office Products › Office & School Supplies › ...
271,2.25 Inch Dog Double Wall Hook,This gorgeous 2.25 inch dog double wall hook h...,Home & Kitchen Storage & Organization Home St...


In [41]:
df['Title'][0]

' Swissmar Capstore Select Storage Rack for 18-Pack '