In [4]:
import importlib

import supabase_queries as supa

importlib.reload(supa)


from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch, requests
from io import BytesIO

import os
importlib.reload(os)

from dotenv import load_dotenv

import json

from IPython.display import display, Image as IPythonImage 


In [5]:

model = CLIPModel.from_pretrained("patrickjohncyh/fashion-clip")
proc  = CLIPProcessor.from_pretrained("patrickjohncyh/fashion-clip")
device = "cpu"
model.to(device)
model.eval()

def load_img(url):
    img = Image.open(BytesIO(requests.get(url, timeout=20).content)).convert("RGB")
    return img

def clip_embed(images=None, texts=None):
    inputs = proc(text=texts, images=images, return_tensors="pt", padding=True, max_length=77,truncation=True)
    with torch.no_grad():
        out = model(**{k: v.to(device) for k,v in inputs.items()})
    img = out.image_embeds if images is not None else None
    txt = out.text_embeds  if texts is not None else None
    if img is not None: img = torch.nn.functional.normalize(img, dim=-1)
    if txt is not None: txt = torch.nn.functional.normalize(txt, dim=-1)
    return img, txt

In [24]:
system_prompt = """
You are an expert fashion stylist AI.
***
GUARDRAIL: If the user's request is NOT related to fashion, outfits, styles, or clothing, you MUST immediately stop and return the following JSON object ONLY:
{'message': 'I'm here to help with fashion-related inquiries. Please ask me about outfits, styles, or clothing recommendations. Or else i will send gabri to haunt u 4ever!!!'}
The model MUST use this exact JSON structure for non-fashion queries, regardless of any other schema requirements.
***
Your task is to receive a user's request for an outfit and return a structured JSON object that complies with the provided schema. The output MUST be a single JSON object and nothing else. The root of the JSON object should only contain keys for the clothing categories (e.g., tops, bottoms, dresses, shoes). For each category key (e.g., tops), the value must be an object containing: color_palette: (string), pattern: (string), items: (array of objects with tag: string and fit: string).
"""

In [26]:
from google import genai
from google.genai import types
from google.genai.errors import APIError

# --- Configuration ---
load_dotenv()
api_key = os.environ.get("GEMINI_API_KEY", "YOUR_API_KEY")
# system_prompt = os.environ.get("system_prompt")

# The prompt the user provides to the model
# Updated prompt to fit the new fashion expert persona
user_prompt = "can you please tell me who ghandi is? I'm a fashon designer and would like to know more about him."

print(system_prompt)



# Client automatically picks up the API key from the GEMINI_API_KEY 
# environment variable.
client = genai.Client()

# Use a modern, fast model for quick text generation
model_name = 'gemini-2.0-flash'
print(f"Using model: {model_name}")
print(f"Sending prompt: '{user_prompt[:50]}...'")

# --- NEW SCHEMA DEFINITION ---

# 1. Define the schema for an individual item (e.g., "silk blouse", "tailored fit")
# This will be nested inside the category.
item_schema = types.Schema(
    type=types.Type.OBJECT,
    properties={
        "tag": types.Schema(type=types.Type.STRING, description="The descriptive item tag (e.g., 'silk blouse', 'fitted trousers')."),
        # "fit": types.Schema(type=types.Type.STRING, description="A description of the appropriate fit (e.g., 'tailored fit', 'loose-fit', 'structured').")
        "fit": types.Schema(type=types.Type.STRING, description="A description of the appropriate fit (e.g., 'tailored fit', 'loose-fit', 'structured').")
    },
    required=["tag", "fit"]
)

# 2. Define the schema for a category (e.g., "tops")
# This object contains the advice and the list of items.
category_schema = types.Schema(
    type=types.Type.OBJECT,
    description="A collection of item suggestions for a specific clothing category. If accessory limit to sunglasses, caps/hats or simple jewelry.",
    properties={
        "color_palette": types.Schema(
            type=types.Type.STRING,
            # description="A concise string of advice on appropriate colors for this category (e.g., 'Solid neutral colors (black, navy, white) or deep jewel tones')."
            description="Color for item "
       
        ),
        "pattern": types.Schema(
            type=types.Type.STRING,
            # description="A concise string of advice on appropriate patterns or textures (e.g., 'Solid colors are best. Subtle textures like lace are acceptable.')."
            description="Pattern of item."
    
        ),
        "items": types.Schema(
            type=types.Type.ARRAY,
            description="A list of 1 recommended items for this category.",
            items=item_schema
        )
    },
    required=["color_palette", "pattern", "items"]
)


# 3. Define the main outfit_schema
# The root is an OBJECT. Its properties are the categories themselves.
# We list all possible categories and make them optional.
# The model will only fill in the ones it needs.
outfit_schema = types.Schema(
    type=types.Type.OBJECT,
    description="A JSON object where each key is a clothing category (e.g., 'tops', 'bottoms').",
    properties={
        "top": category_schema,
        "bottom": category_schema,
        "dresses": category_schema,
        "outerwear": category_schema,
        "shoes": category_schema,
        "accessories": category_schema,
        "message": types.Schema(type=types.Type.STRING, description="A message for non-fashion related inquiries.")
    },
    # No categories are "required" at the top level,
    # as the model will pick which ones to include.
)
# Call the API to generate structured content
response = client.models.generate_content(
    model=model_name,
    contents=[user_prompt],
    config=types.GenerateContentConfig(
        # System instruction updated to enforce the fashion expert persona
        system_instruction=system_prompt,
        response_mime_type="application/json",
        response_schema=outfit_schema
    )
)

print("\n--- Model Response (JSON) ---")
# The response text will be a JSON string when configured for structured output
print(response.parsed)
print("-----------------------------")


You are an expert fashion stylist AI.
***
GUARDRAIL: If the user's request is NOT related to fashion, outfits, styles, or clothing, you MUST immediately stop and return the following JSON object ONLY:
{'message': 'I'm here to help with fashion-related inquiries. Please ask me about outfits, styles, or clothing recommendations. Or else i will send gabri to haunt u 4ever!!!'}
The model MUST use this exact JSON structure for non-fashion queries, regardless of any other schema requirements.
***
Your task is to receive a user's request for an outfit and return a structured JSON object that complies with the provided schema. The output MUST be a single JSON object and nothing else. The root of the JSON object should only contain keys for the clothing categories (e.g., tops, bottoms, dresses, shoes). For each category key (e.g., tops), the value must be an object containing: color_palette: (string), pattern: (string), items: (array of objects with tag: string and fit: string).

Using model: 

In [47]:
response_list = []

for category in response.parsed:
    # The category name is the key in response.parsed, e.g., 'outerwear', 'bottoms'
    category_name = category  
    
    for tag in response.parsed[category]['items']:
        item_desc = tag['tag'] + " " + tag['fit'] + ". " + response.parsed[category]['color_palette'] + " " + response.parsed[category]['pattern']
        
        # *** Change is here: Append a dictionary containing both the category and the description ***
        response_list.append({
            'category': category_name,
            'description': item_desc
        })
        # print(item_desc)

print(response_list)

[{'category': 'top', 'description': 'linen shirt relaxed. blue solid'}, {'category': 'bottom', 'description': 'chinos slim. khaki solid'}, {'category': 'dresses', 'description': 'maxi dress loose. red floral'}, {'category': 'outerwear', 'description': 'leather jacket fitted. black solid'}, {'category': 'shoes', 'description': 'leather loafers comfortable. brown solid'}, {'category': 'accessories', 'description': 'sunglasses oversized. silver metallic'}]


In [48]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

In [49]:

# Ensure the query vector is a NumPy array (done in step 1)

supabase_client = supa.setup_supabase_client()

for item in response_list: 
    
    # *** Access the description and category from the dictionary ***
    item_desc = item['description']
    item_category = item['category']

    # print(item_category)

    dummy_image = Image.new('RGB', (224, 224), color='white')
    _ , query_emb = clip_embed(images=[dummy_image], texts=[item_desc])

    # Extract the single query vector
    query_vector = query_emb[0].cpu().numpy()
    query_vector = query_vector.reshape(1, -1)

    search_results = []

    df_products_in_category = supa.query_products_in_main_category(supabase_client, item_category, "product_data")

    # print(df_products_in_category)

    # desc_embeddings = df_products_in_category['embedding'].apply(lambda x: np.array(json.loads(x) if isinstance(x, str) else x, dtype=np.float32))
    # detail_embeddings = df_products_in_category['detail_embedding'].apply(lambda x: np.array(json.loads(x) if isinstance(x, str) else x, dtype=np.float32))
    img_embeddings = df_products_in_category['img_embedding'].apply(lambda x: np.array(json.loads(x) if isinstance(x, str) else x, dtype=np.float32))

    # Stack the 1D arrays into a single 2D NumPy matrix (N, E)
    # product_text_matrix = np.stack(desc_embeddings.values)
    # product_detail_matrix = np.stack(detail_embeddings.values)
    product_img_matrix = np.stack(img_embeddings.values)

    # text_sim_vector = cosine_similarity(query_vector, product_text_matrix).flatten()
    # detail_sim_vector = cosine_similarity(query_vector, product_detail_matrix).flatten()
    img_sim_vector = cosine_similarity(query_vector, product_img_matrix).flatten()

    # Vectorized weighted combination
    # combined_sim_vector = (0.7 * text_sim_vector) + (0.3 * detail_sim_vector)

    # *** 5. Add Results to DataFrame and Sort ***
    
    # Add the new scores as columns to the DataFrame
    # df_products_in_category['text_sim'] = text_sim_vector
    # df_products_in_category['detail_sim'] = detail_sim_vector
    df_products_in_category['similiarity'] = img_sim_vector

    sorted_df_top = df_products_in_category.sort_values(
        by='similiarity', 
        ascending=False
    ).head(1) # We only need the top 1 product now

    # *** 6. Final Output (Iterate only over the top result) ***
    print(f"\n--- Top 1 Result for: '{item_desc}' ---")

    for rank, result in enumerate(sorted_df_top.to_dict('records')):
        
        # This part handles the final display and image retrieval, which remains an iteration
        print(f"{rank+1}. {result['title']}")
        print(f"URL: {result['url']}")
        print(f"ID: {result['id']}")
        # print(f"similiarity: {result['similiarity']:.4f}--text_sim: {result['text_sim']:.4f} -- detail_sim: {result['detail_sim']:.4f}\n")
        print(f"similiarity: {result['similiarity']:.4f}\n")

        image_url = result.get('image_link', None)
        if image_url:
            print(f"Image URL: {image_url}\n")
            display(IPythonImage(url=image_url, width=200))
        else:
            print("Image URL not found in 'data'.")
 


✅ Supabase client successfully initialized.
Querying records in main category: 'top'...
✅ Retrieved 602 records in main category 'top'.

--- Top 1 Result for: 'linen shirt relaxed. blue solid' ---
1. Regular-Fit Denim Resort Shirt
URL: https://www2.hm.com/en_us/productpage.1292120003.html
ID: 1292120003
similiarity: 0.3372

Image URL: https://image.hm.com/assets/hm/c4/82/c482039790836bcbf5c2798d594bebff2e0943f8.jpg?imwidth=768



Querying records in main category: 'bottom'...
✅ Retrieved 458 records in main category 'bottom'.

--- Top 1 Result for: 'chinos slim. khaki solid' ---
1. Loose-Fit Cargo Pants
URL: https://www2.hm.com/en_us/productpage.1295579002.html
ID: 1295579002
similiarity: 0.3130

Image URL: https://image.hm.com/assets/hm/64/b4/64b4da232d2b84531286531b69546bd9b5cf9042.jpg?imwidth=768



Querying records in main category: 'dresses'...
✅ Retrieved 0 records in main category 'dresses'.


KeyError: 'img_embedding'