In [1]:
import numpy as np
import os
import cv2
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

In [2]:
model = VGG16(weights='imagenet', include_top=False)


In [3]:
def extract_features(img_path, model):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = model.predict(img)
    features = features.reshape((-1,))
    return features


In [4]:
def retrieve_similar_images(query_path, dataset_path, model, top_k=5):
    query_features = extract_features(query_path, model)
    dataset_files = os.listdir(dataset_path)
    similarities = []
    for foldername in dataset_files:
        for files in os.listdir(os.path.join(dataset_path, foldername)):
            filepath = os.path.join(dataset_path, foldername, files)
            features = extract_features(filepath, model)
            similarity = np.dot(query_features, features) / (np.linalg.norm(query_features) * np.linalg.norm(features))
            similarities.append((filepath, similarity))
    similarities.sort(key=lambda x: x[1], reverse=True)
    top_k_results = similarities[:top_k]
    return top_k_results


In [5]:
query_path = 'query.jpeg'
dataset_path = 'dataset'
top_k = 5
results = retrieve_similar_images(query_path, dataset_path, model, top_k)



In [6]:

# print(results)
print(f'Top {top_k} most similar images to {query_path}:')
i=1
for filename, similarity in results:
    img=cv2.imread(filename)
    cv2.imshow(f"match{i}", img)
    i+=1
cv2.waitKey(0)
cv2.destroyAllWindows()


Top 5 most similar images to query.jpeg:
