In [14]:
from elasticsearch import Elasticsearch

In [15]:
import os
from dotenv import load_dotenv
import pandas as pd

In [53]:
load_dotenv()

# Get the values
ELASTIC_USERNAME = os.getenv("ELASTIC_USERNAME")
ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD")
ELASTIC_HOST = os.getenv("ELASTIC_HOST")
# ELASTIC_HOST="https://localhost:9200"
ELASTIC_HOST


'ht'

In [58]:
load_dotenv()

ELASTIC_CLOUD_URL = os.getenv("Elasticsearch_URL")
ELASTIC_API_KEY=os.getenv("ELASTIC_API_KEY")

In [62]:
es = Elasticsearch(
    ELASTIC_CLOUD_URL,
    api_key=ELASTIC_API_KEY
)


In [63]:
if es.ping():
    print("✅ Connected to Elasticsearch!")
else:
    print("❌ Failed to connect to Elasticsearch.")

✅ Connected to Elasticsearch!


## Prepare the data

In [20]:
import pandas as pd

df = pd.read_csv("myntra_products_catalog.csv").loc[:499]
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White


In [21]:
df.isna().value_counts()

ProductID  ProductName  ProductBrand  Gender  Price (INR)  NumImages  Description  PrimaryColor
False      False        False         False   False        False      False        False           468
                                                                                   True             32
Name: count, dtype: int64

In [22]:
df.fillna("None", inplace=True)

## Convert the relevant field to Vector using BERT model

In [28]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-mpnet-base-v2')

In [29]:
df["DescriptionVector"] = df["Description"].apply(lambda x: model.encode(x))

In [59]:
df.head()

Unnamed: 0,ProductID,ProductName,ProductBrand,Gender,Price (INR),NumImages,Description,PrimaryColor,DescriptionVector
0,10017413,DKNY Unisex Black & Grey Printed Medium Trolle...,DKNY,Unisex,11745,7,"Black and grey printed medium trolley bag, sec...",Black,"[0.027645713, -0.0026341556, -0.003588426, 0.0..."
1,10016283,EthnoVogue Women Beige & Grey Made to Measure ...,EthnoVogue,Women,5810,7,Beige & Grey made to measure kurta with churid...,Beige,"[-0.024660744, -0.028755462, -0.02033244, 0.03..."
2,10009781,SPYKAR Women Pink Alexa Super Skinny Fit High-...,SPYKAR,Women,899,7,Pink coloured wash 5-pocket high-rise cropped ...,Pink,"[-0.046943355, 0.08182783, 0.048335187, -0.000..."
3,10015921,Raymond Men Blue Self-Design Single-Breasted B...,Raymond,Men,5599,5,Blue self-design bandhgala suitBlue self-desig...,Blue,"[-0.015098742, -0.010285478, 0.009487344, -0.0..."
4,10017833,Parx Men Brown & Off-White Slim Fit Printed Ca...,Parx,Men,759,5,"Brown and off-white printed casual shirt, has ...",White,"[-0.017746514, 0.0062095993, 0.021813946, 0.02..."


In [64]:
es.ping()

True

## Create new index in ElasticSearch!

In [66]:
from indexMapping import indexMapping

if not es.indices.exists(index="all_products"):
    es.indices.create(index="all_products", body={"mappings": indexMapping})
    print("Index created with mappings!")
else:
    print("Index already exists.")

Index already exists.


In [65]:

es.indices.create(index="all_products", mappings=indexMapping)

BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [all_products/7M7VmE4rRy6T0h_BAoMp-Q] already exists')

## Ingest the data into index

In [32]:
record_list = df.to_dict("records")

In [67]:
for record in record_list:
    try:
        es.index(index="all_products", document=record, id=record["ProductID"])
    except Exception as e:
        print(e)

In [69]:
es.count(index="all_products")

ObjectApiResponse({'count': 500, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}})

## Search the data

In [70]:
# Replace the problematic code block with this:

input_keyword = "Blue Shoes"
vector_of_input_keyword = model.encode(input_keyword)

# Modern approach without body parameter
res = es.search(
    index="all_products",
    knn={
        "field": "DescriptionVector",
        "query_vector": vector_of_input_keyword.tolist(),  # Convert numpy array to list
        "k": 2,
        "num_candidates": 500
    },
    source=["ProductName", "Description"]
)

results = res["hits"]["hits"]



In [71]:
results

[{'_index': 'all_products',
  '_id': '10018013',
  '_score': 0.84302884,
  '_source': {'ProductName': 'Puma Men Blue Sneakers',
   'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer'}},
 {'_index': 'all_products',
  '_id': '10018075',
  '_score': 0.84302884,
  '_source': {'ProductName': 'Puma Men Blue Sneakers',
   'Description': 'A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer'}}]

In [72]:
# Display results
for hit in results:
    print(f"Score: {hit['_score']}")
    print(f"Product: {hit['_source']['ProductName']}")
    print(f"Description: {hit['_source']['Description']}")
    print("-" * 50)

Score: 0.84302884
Product: Puma Men Blue Sneakers
Description: A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer
--------------------------------------------------
Score: 0.84302884
Product: Puma Men Blue Sneakers
Description: A pair of round-toe blue sneakers, has regular styling, lace-up detailTextile upperCushioned footbedTextured and patterned outsoleWarranty: 3 monthsWarranty provided by brand/manufacturer
--------------------------------------------------


In [22]:
res = es.search(
    index="all_products",
    size=2,
    query={
        "script_score": {
            "query": {"match_all": {}},
            "script": {
                "source": "cosineSimilarity(params.query_vector, 'DescriptionVector') + 1.0",
                "params": {"query_vector": vector_of_input_keyword.tolist()}
            }
        }
    },
    _source=["ProductName", "Description"]
)


ConnectionTimeout: Connection timed out