# 3 Ways to do get the most similar Image according to the query.

In [2]:
import os
import numpy as np
from PIL import Image
from scipy.spatial.distance import euclidean

def extract_histogram(image_path):
    image = Image.open(image_path).convert('RGB')
    histogram = image.histogram()
    return np.array(histogram)

def find_most_similar_image(query_image_path, database_folder):
    query_histogram = extract_histogram(query_image_path)
    
    min_distance = float('inf')
    max_distance = 0
    most_similar_image = None
    least_similar_image = None
    
    similarities = []

    for image_name in os.listdir(database_folder):
        image_path = os.path.join(database_folder, image_name)
        image_histogram = extract_histogram(image_path)
        distance = euclidean(query_histogram, image_histogram)
        
        similarities.append((image_name, distance))
        
        if distance < min_distance:
            min_distance = distance
            most_similar_image = image_name
        
        if distance > max_distance:
            max_distance = distance
            least_similar_image = image_name
    
   
    for image_name, distance in similarities:
        similarity_percentage = 100 - ((distance - min_distance) / (max_distance - min_distance) * 100)
        print(f"{image_name}: {similarity_percentage:.2f}% similarity")
    
    print(f"\nMost similar image: {most_similar_image} ({min_distance:.2f} distance)")
    print(f"Least similar image: {least_similar_image} ({max_distance:.2f} distance)")

query_image_path = 'query.jpg'
database_folder = 'database'

find_most_similar_image(query_image_path, database_folder)


12.jpg: 96.38% similarity
4.jpg: 39.56% similarity
3.jpg: 59.67% similarity
14.jpg: 100.00% similarity
10.jpg: 67.20% similarity
5.jpg: 69.83% similarity
6.jpg: 78.52% similarity
2.jpg: 43.48% similarity
13.jpg: 21.20% similarity
7.jpg: 0.00% similarity
11.jpg: 97.22% similarity
1.jpg: 15.61% similarity

Most similar image: 14.jpg (1206648.01 distance)
Least similar image: 7.jpg (5555478.37 distance)


In [6]:
import cv2
import os
import numpy as np

def extract_orb_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    orb = cv2.ORB_create()
    keypoints, descriptors = orb.detectAndCompute(image, None)
    return descriptors

def find_most_similar_image(query_image_path, database_folder):
    query_descriptors = extract_orb_features(query_image_path)
    
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    max_matches = 0
    most_similar_image = None

    for image_name in os.listdir(database_folder):
        image_path = os.path.join(database_folder, image_name)
        database_descriptors = extract_orb_features(image_path)

        if database_descriptors is not None:
            matches = bf.match(query_descriptors, database_descriptors)
            num_matches = len(matches)

            if num_matches > max_matches:
                max_matches = num_matches
                most_similar_image = image_name
    
    print(f"Most similar image: {most_similar_image} with {max_matches} matches")

query_image_path = 'query.jpg'
database_folder = 'database'

find_most_similar_image(query_image_path, database_folder)



Most similar image: 14.jpg with 147 matches


In [8]:
import os
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np


model = models.vgg16(pretrained=True)
model.classifier = nn.Sequential(*list(model.classifier.children())[:-3])
model.eval()


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def extract_cnn_features(image_path, model):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    with torch.no_grad():
        features = model(image)
    return features.flatten().numpy()

def find_most_similar_image(query_image_path, database_folder):
    query_features = extract_cnn_features(query_image_path, model)
    min_distance = float('inf')
    most_similar_image = None

    for image_name in os.listdir(database_folder):
        image_path = os.path.join(database_folder, image_name)
        database_features = extract_cnn_features(image_path, model)
        distance = np.linalg.norm(query_features - database_features)

        if distance < min_distance:
            min_distance = distance
            most_similar_image = image_name
    
    print(f"Most similar image: {most_similar_image} with distance {min_distance:.2f}")


query_image_path = 'query.jpg'
database_folder = 'database'

find_most_similar_image(query_image_path, database_folder)




  from .autonotebook import tqdm as notebook_tqdm
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /home/cvpr/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:50<00:00, 11.0MB/s] 


Most similar image: 14.jpg with distance 23.19
