In [1]:
from sentence_transformers import SentenceTransformer, util
import torch
import numpy as np
import pandas as pd
import os
import pickle
import time
import re

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = SentenceTransformer('all-MiniLM-L6-v2')


In [3]:
# import data
df = pd.read_csv('myDatabase.items.csv')

In [4]:
df.isnull().sum()


_id        0
name    4035
dtype: int64

In [5]:
# drop rows with null values
df = df.dropna()

In [6]:
ids = df['_id'].tolist()
names = df['name'].tolist()

In [7]:
for name in names:
    name = re.sub(r'[^\w\s]', '', name)
    name = name.lower()

In [8]:
# Embed the dataset 
name_embeddings = model.encode(names, convert_to_tensor=True)

In [11]:
name_embeddings.shape

torch.Size([61142, 384])

In [12]:
def semantic_search(query, embeddings, texts, ids, top_k=100):
    # Encode the query
    query_embedding = model.encode(query, convert_to_tensor=True)
  
    # Compute cosine similarity between the query and texts
    cos_similarities = util.cos_sim(query_embedding, embeddings)
  
    # Sort the results by cosine similarity in descending order
    sorted_indices = cos_similarities.argsort(descending=True)
  
    # Return the top-k most similar texts and their corresponding indices
    top_k_indices = sorted_indices[0][:top_k]
    return [(texts[i], ids[i]) for i in top_k_indices]




In [15]:
query = input('Enter your query: ')
results = semantic_search(query, name_embeddings, names, ids, top_k=100)
for name, idx in results:
    print(f'name: {name} -  id: {idx}')


name: Cat What Murderous Black Cat With Knife Magnet Stickers Kids Children Home Decor Cute Colorful Holder Funny Toy Baby -  id: 1005004829830973
name: Catry Cat Tree with Scratching Post - Animal Cat Tower – Cat Climber with Jute Cat Scratch Post – Cat Condo -  id: B09QHKZTDN
name: OurPets Alpine Cat Scratcher -  id: B005BP8MQ8
name: Super Difficult Puzzle Cute Cat Toy -  id: -p-1637014974600065024
name: Cats Catch Board And On The Platform -  id: -p-1589888222472187904
name: Pet Cat Toy Natural Catnip Pet Cat Matatabi Bell Sound Plush Mouse Cat Toy Bell Funny Cat Suitable Toy For Cats To Play Supplies -  id: 1005002358585669
name: Decor Cat Cat Saying If Loving Cats is Wrong I Don't Want to Be Right Decor Cat -  id: B08W3DMY8X
name: Cat bell collar -  id: 1005004050080941
name: A Bag Of Snack Pillow For The Cat -  id: -p-1614888279319523328
name: Purrfect Cat Box , Blue -  id: B07K3YRPDV
name: Luxury Cat Climb Pet Cat Tree Tower Condo Cat House Multi-Level Cat House Cat Scratching P

In [14]:
# Save the embeddings
torch.save(name_embeddings, 'name_embeddings_items.pt')
