In [1]:
from elasticsearch import Elasticsearch
import os
from dotenv import load_dotenv
import pandas as pd

In [2]:
load_dotenv()

# Get the values
ELASTIC_USERNAME = os.getenv("ELASTIC_USERNAME")
ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD")
ELASTIC_HOST = os.getenv("ELASTIC_HOST")
ELASTIC_HOST="https://localhost:9200"
ELASTIC_HOST


'https://localhost:9200'

In [3]:
elastic_DB = Elasticsearch(
    ELASTIC_HOST,
    basic_auth=(ELASTIC_USERNAME, ELASTIC_PASSWORD),
    ca_certs=r"G:\elasticsearch-9.0.4\config\certs\http_ca.crt"
)

elastic_DB.ping()

True

## Prepare Data

In [4]:
df =pd.read_csv('myntra_products_catalog.csv').loc[:499]
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White


In [5]:
df.isna().value_counts()

ProductID  ProductName  ProductBrand  Gender  Price (INR)  NumImages  Description  PrimaryColor
False      False        False         False   False        False      False        False           468
                                                                                   True             32
Name: count, dtype: int64

In [6]:
df.fillna("None",inplace=True)

In [7]:
df.isna().value_counts()

ProductID  ProductName  ProductBrand  Gender  Price (INR)  NumImages  Description  PrimaryColor
False      False        False         False   False        False      False        False           500
Name: count, dtype: int64

## Convert Relvent data to vector

In [8]:
from sentence_transformers import SentenceTransformer

# Load https://huggingface.co/sentence-transformers/all-mpnet-base-v2
model = SentenceTransformer("all-mpnet-base-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [9]:
df["DescriptionVector"]= df["Description"].apply(lambda x: model.encode(x))


In [47]:
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor,DescriptionVector
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black,"[0.027645713, -0.0026341556, -0.003588426, 0.0..."
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige,"[-0.024660744, -0.028755462, -0.02033244, 0.03..."
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink,"[-0.046943355, 0.08182783, 0.048335187, -0.000..."
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue,"[-0.015098742, -0.010285478, 0.009487344, -0.0..."
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White,"[-0.017746514, 0.0062095993, 0.021813946, 0.02..."


In [11]:
elastic_DB.ping()

True

In [10]:
from indexMapping import indexMapping
elastic_DB.indices.create(index="all_products",mappings=indexMapping)

BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [all_products/BCv7TqFsRNaOGWjtzFNYAQ] already exists')

In [12]:
record_list = df.to_dict("records")

In [15]:
for record in record_list:
    try:
       elastic_DB.index(index="all_products",document=record,id = record["ProductID"])
    except Exception as e:
        print(e)

In [17]:
elastic_DB.count(index="all_products")

ObjectApiResponse({'count': 500, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

## Search The Data

In [18]:
input_keyword = "blue Shoes"
vector_of_input_keyword = model.encode(input_keyword)

In [20]:
query = {
    "filed": "DescriptionVector",
    "query_selector": vector_of_input_keyword,
    "k":2,
    "num_candidates":500,

}

res= elastic_DB.knn_search(index="all_products",knn=query,source = ["ProductName","Description"])
res["hits"]["hits"]

AttributeError: 'Elasticsearch' object has no attribute 'knn_search'

In [None]:
embeddings = model.encode([
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
])
similarities = model.similarity(embeddings, embeddings)