# One time:

In [2]:
import pandas as pd
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [3]:
# 2) Load CSV
csv_path = "./dataset/Nykaa_Product_Review_Cleaned.csv"  # place the file in working dir
df = pd.read_csv(csv_path)

In [4]:
import re


def safe(x):
    return "" if pd.isna(x) else str(x)


def safe_int(x):
    if pd.isna(x):
        return 0
    s = str(x).strip()
    # Check if it's a valid integer or float
    if re.fullmatch(r"[+-]?\d+(\.\d+)?", s):
        return int(float(s))
    return 0  # fallback for invalid strings


# Build texts
texts = (
    df["Product Name"].map(safe) + " | " +
    df["Product Category"].map(safe) + " | " +
    df["Product Brand"].map(safe) + " | " +
    df["Product Tags"].map(safe) + " | " +
    df["Product Contents"].map(safe) + " | " +
    df["Product Description"].map(safe)
).tolist()

# Build metadatas (FIXED)
metadatas = [
    {
        "product_id": safe(r["Product Id"]),
        "brand_code": safe(r["Product Brand Code"]),
        "retailer": safe(r["Retailer"]),
        "category": safe(r["Product Category"]),
        "brand": safe(r["Product Brand"]),
        "name": safe(r["Product Name"]),
        "price": float(r["Product Price"]) if pd.notna(r["Product Price"]) else 0.0,
        "url": safe(r["Product Url"]),
        "market": safe(r["Market"]),
        "currency": safe(r["Product Currency"]),
        "image_url": safe(r["Product Image Url"]),
        "tags": safe(r["Product Tags"]),
        "contents": safe(r["Product Contents"]),
        "rating": float(r["Product Rating"]) if pd.notna(r["Product Rating"]) else 0.0,
        "reviews_count": safe_int(r["Product Reviews Count"]),
        "exp_cat_count": safe_int(r["Expected Category Count"]),
        "exp_brand_count": safe_int(r["Expected Brand Count"]),

    }
    for _, r in df.iterrows()
]

In [5]:
# metadatas

In [6]:
# texts


In [7]:
# 4) Embeddings (Sentence-Transformers via HuggingFace)
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)

  embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# 5) Create FAISS vector store and persist
faiss_store = FAISS.from_texts(texts=texts, embedding=embeddings, metadatas=metadatas)
INDEX_DIR = "faiss_index"
faiss_store.save_local(INDEX_DIR)

In [9]:
# from langchain.schema import Document
# faiss_store.add_documents([Document("timepass")], embedding=embeddings)

In [10]:
resp = faiss_store.similarity_search("lipstick", k=5)
resp

[Document(id='c66a7da1-dfb6-4f7e-aaf8-d9eb8d7faedd', metadata={'product_id': 'c157e14d2f2c994b106d670227ab2739', 'brand_code': 'BZ1000', 'retailer': 'nykaa.com', 'category': 'Makeup > Lips > Lipstick', 'brand': 'Kay Beauty', 'name': 'Kay Beauty Matte Lipstick - Award Night', 'price': 999.0, 'url': 'https://www.nykaa.com/c/p/1010793?skuId=772943', 'market': 'IN', 'currency': 'INR', 'image_url': 'https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/a/w/award-night_2.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/2/_/2_light_18.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/3/_/3_medium_18.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/4/_/4_shade-card_15.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/o/n/on-screen_4.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize

In [11]:
import json
useful = []

for prod in resp:
    prod_data = prod.metadata
    useful.append(prod_data)
    
with open("temp_faiss.json", "w") as f:
    json.dump(useful, f, indent=4)


# Run Time

In [12]:
# 6) Reload FAISS later (new session / new cell)
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS



In [13]:
embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
faiss_loaded = FAISS.load_local(INDEX_DIR, embeddings, allow_dangerous_deserialization=True)

In [14]:
# 7) Example similarity search with optional category filter
query = "lipstick"
candidates = faiss_loaded.similarity_search_with_score(query, k=5)

In [15]:
candidates

[(Document(id='c66a7da1-dfb6-4f7e-aaf8-d9eb8d7faedd', metadata={'product_id': 'c157e14d2f2c994b106d670227ab2739', 'brand_code': 'BZ1000', 'retailer': 'nykaa.com', 'category': 'Makeup > Lips > Lipstick', 'brand': 'Kay Beauty', 'name': 'Kay Beauty Matte Lipstick - Award Night', 'price': 999.0, 'url': 'https://www.nykaa.com/c/p/1010793?skuId=772943', 'market': 'IN', 'currency': 'INR', 'image_url': 'https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/a/w/award-night_2.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/2/_/2_light_18.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/3/_/3_medium_18.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/4/_/4_shade-card_15.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/o/n/on-screen_4.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resiz

In [16]:
# filter by category if needed
category = "Makeup"  # example category
filtered = [(doc, score) for doc, score in candidates 
            if doc.metadata.get("category", "").lower() == category.lower()]

In [17]:
top10 = filtered[:10]
for doc, score in top10:
    print(doc.metadata["product_id"], doc.metadata["name"], score)

In [18]:
#debugg
print("Total candidates retrieved:", len(candidates))
print("Filtered by category:", len(filtered))

# Show first few candidate categories to check if 'Makeup' exists
for doc, score in candidates[:5]:
    print("Candidate category:", doc.metadata.get("category"))


Total candidates retrieved: 5
Filtered by category: 0
Candidate category: Makeup > Lips > Lipstick
Candidate category: Makeup > Lips > Lipstick
Candidate category: Makeup > Lips > Lipstick
Candidate category: Makeup > Lips > Lipstick
Candidate category: Makeup > Lips > Lip Liner


In [19]:
## code fixed
query = "lipstick"
candidates = faiss_loaded.similarity_search_with_score(query, k=50)

category = "Makeup"  # broader match

filtered = [
    (doc, score) for doc, score in candidates
    if doc.metadata.get("category", "").lower().startswith(category.lower())
]

top10 = filtered[:10]
for doc, score in top10:
    print(doc.metadata["product_id"], doc.metadata["name"], doc.metadata["category"], score)


c157e14d2f2c994b106d670227ab2739 Kay Beauty Matte Lipstick - Award Night Makeup > Lips > Lipstick 0.7151291
ce8e085f6e6a0f4153861f8928e204a4 Jaquline USA Matte Stick Velvet Matte Lipstick - Boss Girl Makeup > Lips > Lipstick 0.7269218
91b6095937645fb33cc1da6ed38f1b3a Kay Beauty Matte Lipstick - On Screen Makeup > Lips > Lipstick 0.73444396
bb6f24059ebc1a764a00815b75deb271 Dermacol Iconic Lips 2-In-1 Lipstick & Lipliner - 1 Makeup > Lips > Lipstick 0.74202996
30e44a225f4e6dd2f06749326bc990ff Charlotte Tilbury Lip Cheat - Hot Gossip Makeup > Lips > Lip Liner 0.76888466
4432406fa34344aec70a8a89d4e5c640 Kay Beauty Metallic Lip Highlighter - Cover Girl Makeup > Lips > Liquid Lipstick 0.7955445
b12108251840bc0bf7cfc81e8282c5a7 Nicka K Matte Lip Color - Liatris Makeup > Lips > Liquid Lipstick 0.80097365
b22678e308247f2a60b1c819d094ee85 Lime Crime Velvetines Liquid Matte Lipstick - Lulu Makeup > Lips > Liquid Lipstick 0.8095227
4d9a3ff7951c4fea10942ade607e7d38 The Face Shop Club Ryan Velvet Li

In [26]:
### data in json, name + image url
results = [
    {
        "name": doc.metadata.get("name"),
        "category": doc.metadata.get("category"),
        "image_url": doc.metadata.get("image_url")
    }
    for doc, _ in filtered[:10]
]


In [27]:
# filtered[:20]

In [28]:
results

[{'name': 'Kay Beauty Matte Lipstick - Award Night',
  'category': 'Makeup > Lips > Lipstick',
  'image_url': 'https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/a/w/award-night_2.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/2/_/2_light_18.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/3/_/3_medium_18.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/4/_/4_shade-card_15.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/o/n/on-screen_4.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/a/w/award-night_1.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/7/_/7_all_looks_15.jpg|https://images-static.nykaa.com/media/catalog/product/tr:h-800,w-800,cm-pad_resize/8/_/8_claims_14.jpg|https://images-static.nykaa.com/media/catalog/product/t