In [3]:
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
import os

# Step 1: Load FashionCLIP 
model_name = "patrickjohncyh/fashion-clip"
image_processor = CLIPProcessor.from_pretrained(model_name)
model = CLIPModel.from_pretrained(model_name)

# Step 2: Load and preprocess the dataset
dataset_dir = r"C:\Users\hp\Downloads\business_case\data\DAM"
dataset_image_paths = [
    os.path.join(dataset_dir, file_name)
    for file_name in os.listdir(dataset_dir)
    if file_name.endswith(".jpeg")
]

dataset_embeddings = []
for image_path in dataset_image_paths:
    image = Image.open(image_path).convert("RGB")
    inputs = image_processor(images=image, return_tensors="pt", padding=True)
    with torch.no_grad():
        embedding = model.get_image_features(**inputs)
        embedding = embedding / embedding.norm(p=2, dim=-1)  # Normalize embeddings
        dataset_embeddings.append(embedding)

dataset_embeddings = torch.cat(dataset_embeddings, dim=0)

# Step 3: Process the test dataset
dataset_test_dir = r"C:\Users\hp\Downloads\business_case\data\test_image_headmind"
dataset_imagetests_paths = [
    os.path.join(dataset_test_dir, file_name)
    for file_name in os.listdir(dataset_test_dir)
    if file_name.endswith(".jpg")
]

dataset_test_embeddings = []
for image_path in dataset_imagetests_paths:
    image = Image.open(image_path).convert("RGB")
    inputs = image_processor(images=image, return_tensors="pt", padding=True)
    with torch.no_grad():
        embedding = model.get_image_features(**inputs)
        embedding = embedding / embedding.norm(p=2, dim=-1)
        dataset_test_embeddings.append(embedding)

dataset_test_embeddings = torch.cat(dataset_test_embeddings, dim=0)

# Step 4: Compute cosine similarity
def compute_scores(emb_one, emb_two):
    scores = torch.nn.functional.cosine_similarity(emb_one, emb_two)
    return scores

def fetch_similar(test_embeddings, dataset_embeddings, dataset_image_paths, top_k=5):
    results = []
    for i, test_embedding in enumerate(test_embeddings):
        scores = compute_scores(test_embedding.unsqueeze(0), dataset_embeddings)
        top_k_indices = torch.argsort(scores, descending=True)[:top_k]
        top_k_images = [(dataset_image_paths[idx], scores[idx].item()) for idx in top_k_indices]
        results.append({"test_image_index": i, "similar_images": top_k_images})
    return results

# Step 5: Get results
top_k = 5
results = fetch_similar(
    test_embeddings=dataset_test_embeddings,
    dataset_embeddings=dataset_embeddings,
    dataset_image_paths=dataset_image_paths,
    top_k=top_k
)

# Print results
for result in results:
    print(f"Test Image Index: {result['test_image_index']}")
    for similar_image in result["similar_images"]:
        print(f"  Similar Image: {similar_image[0]}, Score: {similar_image[1]:.4f}")







preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer_config.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.46k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

Test Image Index: 0
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\S0856OWCBM74P.jpeg, Score: 0.7348
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0538OCALM52R.jpeg, Score: 0.7342
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0505OCEAM68P.jpeg, Score: 0.7320
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M505SOUCGM54P.jpeg, Score: 0.7233
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0505OAZEXM43R.jpeg, Score: 0.7220
Test Image Index: 1
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0531NWDDM900.jpeg, Score: 0.7575
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0505SLOIM989.jpeg, Score: 0.7504
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0538BCALM900.jpeg, Score: 0.7463
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\CAL44551M83B.jpeg, Score: 0.7451
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0505SNEAM900.jpeg, Score: 0.7391
