In [40]:
import importlib

import supabase_queries as supa

importlib.reload(supa)


from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch, requests
from io import BytesIO

import os
importlib.reload(os)

from dotenv import load_dotenv

import json

from IPython.display import display, Image as IPythonImage 


In [41]:

model = CLIPModel.from_pretrained("patrickjohncyh/fashion-clip")
proc  = CLIPProcessor.from_pretrained("patrickjohncyh/fashion-clip")
device = "cpu"
model.to(device)
model.eval()

def load_img(url):
    img = Image.open(BytesIO(requests.get(url, timeout=20).content)).convert("RGB")
    return img

def clip_embed(images=None, texts=None):
    inputs = proc(text=texts, images=images, return_tensors="pt", padding=True, max_length=77,truncation=True)
    with torch.no_grad():
        out = model(**{k: v.to(device) for k,v in inputs.items()})
    img = out.image_embeds if images is not None else None
    txt = out.text_embeds  if texts is not None else None
    if img is not None: img = torch.nn.functional.normalize(img, dim=-1)
    if txt is not None: txt = torch.nn.functional.normalize(txt, dim=-1)
    return img, txt

In [42]:

model = CLIPModel.from_pretrained("patrickjohncyh/fashion-clip")
proc  = CLIPProcessor.from_pretrained("patrickjohncyh/fashion-clip")
device = "cpu"
model.to(device)
model.eval()

def load_img(url):
    # Add a User-Agent header to mimic a web browser
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    # Make the request with the headers
    response = requests.get(url, headers=headers, timeout=20)
    
    # Check if the request was successful
    if response.status_code != 200:
        raise ConnectionError(f"Request failed with status code {response.status_code} for URL: {url}")
        
    # Open the image from the content
    img = Image.open(BytesIO(response.content)).convert("RGB")
    return img

def clip_embed(images=None, texts=None):
    inputs = proc(text=texts, images=images, return_tensors="pt", padding=True, max_length=77,truncation=True)
    with torch.no_grad():
        out = model(**{k: v.to(device) for k,v in inputs.items()})
    img = out.image_embeds if images is not None else None
    txt = out.text_embeds  if texts is not None else None
    if img is not None: img = torch.nn.functional.normalize(img, dim=-1)
    if txt is not None: txt = torch.nn.functional.normalize(txt, dim=-1)
    return img, txt

In [None]:
# Ensure the query vector is a NumPy array (done in step 1)

item_1_url = "https://image.hm.com/assets/hm/45/c5/45c538c59a9386480da9acd855d2dcff99591a1f.jpg?imwidth=768"
item_2_url = "https://static.zara.net/assets/public/f37d/0305/83004adf9a2a/d8a08df9e15b/04696310803-e1/04696310803-e1.jpg?ts=1758534493243&w=750"

query = "Relaxed fit knit polo shirt made from a cotton blend yarn. Features a lapel collar with a front zip closure and long sleeves. Ribbed trim"


img_1 = load_img(item_1_url)
img_emb_1, query_emb = clip_embed(images=[img_1], texts=[query])

img_2 = load_img(item_2_url)
img_emb_2, _ = clip_embed(images=[img_2], texts=[query])



In [53]:
import numpy as np

def calculate_cosine_similarity(vec_a, vec_b):
    """Calculates the cosine similarity between two NumPy vectors."""
    # Ensure a single dimension for the dot product if necessary, 
    # though numpy.dot usually handles this.
    vec_a = vec_a.squeeze()
    vec_b = vec_b.squeeze()

    # Dot product
    dot_product = np.dot(vec_a, vec_b)
    
    # Magnitudes (Euclidean norms)
    norm_a = np.linalg.norm(vec_a)
    norm_b = np.linalg.norm(vec_b)
    
    # Check for zero vector to prevent division by zero
    if norm_a == 0 or norm_b == 0:
        return 0.0 # Or raise an error, but 0.0 is common for similarity
        
    return dot_product / (norm_a * norm_b)

# 1. Similarity for Item 1
similarity_1 = calculate_cosine_similarity(img_emb_1, query_emb)
print(f"Similarity (Item 1 vs Query): {similarity_1:.4f}")

# 2. Similarity for Item 2
similarity_2 = calculate_cosine_similarity(img_emb_2, query_emb)
print(f"Similarity (Item 2 vs Query): {similarity_2:.4f}")

Similarity (Item 1 vs Query): 0.2597
Similarity (Item 2 vs Query): 0.3298


  dot_product = np.dot(vec_a, vec_b)


In [54]:
client = supa.setup_supabase_client()

url = 'https://www.zara.com/us/en/knit-quarter-zip-polo-shirt-p04696310.html'

product = supa.query_product_url(client, url=url, table_name="product_data")


✅ Supabase client successfully initialized.
Querying record with url: 'https://www.zara.com/us/en/knit-quarter-zip-polo-shirt-p04696310.html'...
✅ Retrieved 1 record with url 'https://www.zara.com/us/en/knit-quarter-zip-polo-shirt-p04696310.html'.


In [55]:
img_embedding_str = product['img_embedding'].item()

# 1. Parse the string into a Python list
img_list = json.loads(img_embedding_str)

# 2. Convert the list to a NumPy array with the correct float type
vec_a = np.array(img_list, dtype=np.float32)
similarity_3 = calculate_cosine_similarity(vec_a, query_emb)
print(f"Similarity (Item from supabase vs Query): {similarity_3:.4f}")

Similarity (Item from supabase vs Query): 0.3420


  dot_product = np.dot(vec_a, vec_b)
