In [8]:
! pip install img2vec-pytorch

import os
import pickle
from img2vec_pytorch import Img2Vec
from PIL import Image
import torch



In [14]:

img2vec = Img2Vec(model='resnet50') 


dataset_dir = r"C:\Users\hp\Downloads\business_case\data\DAM"
dataset_test_dir = r"C:\Users\hp\Downloads\business_case\data\test_image_headmind"

# Function to extract embeddings
def extract_embeddings(img_dir):
    embeddings = []
    image_paths = []

    for img_name in os.listdir(img_dir):
        img_path = os.path.join(img_dir, img_name)
        try:
            img = Image.open(img_path)
            
            vec = torch.tensor(img2vec.get_vec(img))
            embeddings.append(vec)
            image_paths.append(img_path)
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

    return torch.stack(embeddings), image_paths

# Extract embeddings for dataset images
dataset_embeddings, dataset_image_paths = extract_embeddings(dataset_dir)

# Extract embeddings for test images
test_embeddings, test_image_paths = extract_embeddings(dataset_test_dir)

# Function to compute cosine similarity scores
def compute_scores(emb_one, emb_two):
    scores = torch.nn.functional.cosine_similarity(emb_one, emb_two)
    return scores

# Function to fetch the most similar images for each test image
def fetch_similar(test_embeddings, dataset_embeddings, dataset_image_paths, top_k=1):
    results = []

    for i, test_embedding in enumerate(test_embeddings):
        
        scores = compute_scores(test_embedding.unsqueeze(0), dataset_embeddings)
        
        
        top_k_indices = torch.argsort(scores, descending=True)[:top_k]
        
        
        top_k_images = [(dataset_image_paths[idx], scores[idx].item()) for idx in top_k_indices]
        
        results.append({
            "test_image_index": i,
            "test_image_path": test_image_paths[i],
            "similar_images": top_k_images
        })

    return results

# Fetch the most similar images for each test image
top_k = 5  
results = fetch_similar(
    test_embeddings=test_embeddings,
    dataset_embeddings=dataset_embeddings,
    dataset_image_paths=dataset_image_paths,
    top_k=top_k
)

# Print results for each test image
for result in results:
    print(f"Test Image: {result['test_image_path']}")
    for similar_image in result['similar_images']:
        print(f"  Similar Image: {similar_image[0]}, Score: {similar_image[1]:.4f}")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\hp/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:07<00:00, 13.2MB/s]


Test Image: C:\Users\hp\Downloads\business_case\data\test_image_headmind\image-20210928-102713-12d2869d.jpg
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0538OCALM35R.jpeg, Score: 0.7730
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0538OCALM52R.jpeg, Score: 0.7677
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0505OAWAXM25Y.jpeg, Score: 0.7551
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\CRO44500SM334.jpeg, Score: 0.7534
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\S0856ORWOM70H.jpeg, Score: 0.7531
Test Image: C:\Users\hp\Downloads\business_case\data\test_image_headmind\image-20210928-102718-2474636a.jpg
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\S0856ORWOM70H.jpeg, Score: 0.7767
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0538OCALM22Y.jpeg, Score: 0.7698
  Similar Image: C:\Users\hp\Downloads\business_case\data\DAM\M0538OCEAM39U.jpeg, Score: 0.7675
  Similar Image

In [16]:
import random

# Select a random test image
random_index = random.randint(0, len(test_image_paths) - 1)
random_test_image = test_image_paths[random_index]

# Extract the embedding for the random test image
random_test_embedding = test_embeddings[random_index]

# Compute cosine similarity with all dataset embeddings
random_scores = torch.nn.functional.cosine_similarity(
    random_test_embedding.unsqueeze(0), dataset_embeddings
)

# Find the most similar image
best_match_idx = torch.argmax(random_scores)
best_match_image = dataset_image_paths[best_match_idx]
best_score = random_scores[best_match_idx].item()

# Display results
print(f"Random Test Image: {random_test_image}")
print(f"Best Match: {best_match_image}")
print(f"Similarity Score: {best_score:.4f}")

Random Test Image: C:\Users\hp\Downloads\business_case\data\test_image_headmind\IMG_6940.jpg
Best Match: C:\Users\hp\Downloads\business_case\data\DAM\KCK276NKRS900.jpeg
Similarity Score: 0.7572
