In [9]:
from data import get_fashion_data
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import time
start=time.time()  #time calculation starts

load_dotenv()
embedding=GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")  #Google embedding model
df=get_fashion_data()

GENERATING VECTORS 

In [10]:
store_embeddings=[]  #storage for emdeddings of description of clothes data
for desc in df["desc"]:
    vector=embedding.embed_query(desc)
    store_embeddings.append(vector)
user="enegetic, urban, chic"   #vibes user wants
user_query_vec=embedding.embed_query(user)  #embeddings of user query
dataframe=pd.DataFrame(store_embeddings)
scores = cosine_similarity([user_query_vec], store_embeddings)[0]  # flatten the 2D array
indices = np.argsort(scores) [-3:][::-1] # indices of top three matches


MATCHING SIMILARITY VECTORS THAT OF QUERY WITH DATA EMBEDDING VECTORS

In [11]:
print("\n TOP THREE MATCHES")
for rank, idx in enumerate (indices,start=1):
    print(f"{rank} {df.iloc[idx]['name']} - Similarity :{scores[idx]:.3f}")
    print(f"Description {df.iloc[idx]['desc']}")
    
print("Similarity Scores",scores)
print("\n")
if max(scores) <0.4 :
    print("No similar vibe found")   # no accurate match
else:
    match_found=np.argmax(scores) #index of best match
    print("Perfect Match","\n",df.iloc[match_found])
end=time.time()   #time calculation


 TOP THREE MATCHES
1 Street Hoodie - Similarity :0.865
Description Bold colors, oversized fit for an energetic street look
2 Graphic Tee - Similarity :0.846
Description Pop-culture prints with a modern street vibe
3 Boho Dress - Similarity :0.815
Description Flowy, earthy tones for festival vibes
Similarity Scores [0.8146538  0.86485727 0.79301806 0.80090898 0.80157798 0.84564956
 0.80587567 0.7721266  0.78957215 0.78221655 0.78213877]


Perfect Match 
 name                                         Street Hoodie
desc     Bold colors, oversized fit for an energetic st...
vibes                                        [urban, chic]
Name: 1, dtype: object


CALCULATING LATENCY (TIME TAKEN TO EXECUTE THE PROGRAM)

In [12]:
print("LATENCY",end-start,"seconds")

LATENCY 11.166866540908813 seconds


TEST & EVALUATION

In [13]:
best_score = max(scores)

if best_score >= 0.7:
    quality = "Good Match ✅"
elif best_score >= 0.4:
    quality = "Moderate Match ⚠️"
else:
    quality = "Poor Match ❌"

print(f"\nMatch Quality: {quality} (Score: {best_score:.3f})")



Match Quality: Good Match ✅ (Score: 0.865)


Improvement Ideas: The system can be enhanced by integrating a vector database like Pinecone or FAISS to store and therefore I tried similarity search with FAISS vector storage databse. Retrieve embeddings more efficiently and results are visible (LATENCY).  Now one thing needs to be checked between Pinecone and FAISS which one is more accurate, precise and has low latency.

Scalability: Currently works program works but batch embedding generation and caching should be added.

Edge Case Handling: Added a fallback check — if all cosine similarity scores are below 0.4, the system returns “No similar vibe found” instead of random matches.

Model Flexibility: Switched from OpenAI’s paid embeddings to Google Gemini Embeddings, keeping the system free-tier compatible while maintaining reasonable semantic accuracy.

Future Improvements: Could add a frontend UI where users select vibes via tags or emojis, and display top-3 results visually with images and similarity scores. To make the program visually phenomenal.

CODE USING FAISS VECTOR STORAGE

In [16]:
from data import get_fashion_data
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import faiss
import time

# TIME CALCULATION START
start = time.time()

load_dotenv()

# Google Embedding Model
embedding = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

# Get fashion dataset
df = get_fashion_data()

print("GENERATING VECTORS...")

# embeddings for each clothing description
store_embeddings = []
for desc in df["desc"]:
    vector = embedding.embed_query(desc)
    store_embeddings.append(vector)

# Convert to numpy array
store_embeddings = np.array(store_embeddings).astype("float32")

# FAISS INDEX CREATION
# Create FAISS index for cosine similarity 
dimension = store_embeddings.shape[1]
faiss_index = faiss.IndexFlatIP(dimension)

# Normalize vectors for cosine similarity   
#To simplify things for speed and accuracy, we normalize each vector so that its length = 1.
faiss.normalize_L2(store_embeddings)

# Add vectors to FAISS index
faiss_index.add(store_embeddings)

# USER QUERY EMBEDDING

user = "energetic, urban, chic"
user_query_vec = np.array([embedding.embed_query(user)]).astype("float32")

# Normalize user vector too
faiss.normalize_L2(user_query_vec)

# SEARCH IN FAISS INDEX

k = 3  # top 3 results
scores, indices = faiss_index.search(user_query_vec, k)

# Flatten results
scores = scores.flatten()
indices = indices.flatten()

# DISPLAY RESULTS
print("\nTOP THREE MATCHES:")
for rank, (idx, score) in enumerate(zip(indices, scores), start=1):
    print(f"{rank}. {df.iloc[idx]['name']} - Similarity: {score:.3f}")
    print(f"   Description: {df.iloc[idx]['desc']}\n")

print("All Similarity Scores:", scores)
print("\n")

if max(scores) < 0.4:
    print("No similar vibe found ")
else:
    match_found = indices[np.argmax(scores)]
    print("Perfect Match \n", df.iloc[match_found])

# LATENCY CALCULATION

end = time.time()
print("\nLATENCY:", round(end - start, 3), "seconds")

# TEST & EVALUATION
#
best_score = max(scores)

if best_score >= 0.7:
    quality = "Good Match ✅"
elif best_score >= 0.4:
    quality = "Moderate Match ⚠️"
else:
    quality = "Poor Match ❌"

print(f"\nMatch Quality: {quality} (Score: {best_score:.3f})")


GENERATING VECTORS...

TOP THREE MATCHES:
1. Street Hoodie - Similarity: 0.861
   Description: Bold colors, oversized fit for an energetic street look

2. Graphic Tee - Similarity: 0.847
   Description: Pop-culture prints with a modern street vibe

3. Boho Dress - Similarity: 0.814
   Description: Flowy, earthy tones for festival vibes

All Similarity Scores: [0.86058563 0.8470954  0.8138657 ]


Perfect Match 
 name                                         Street Hoodie
desc     Bold colors, oversized fit for an energetic st...
vibes                                        [urban, chic]
Name: 1, dtype: object

LATENCY: 5.303 seconds

Match Quality: Good Match ✅ (Score: 0.861)
