In [1]:
import numpy as np
import matplotlib.image as mpimg
from PIL import Image
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt




In [2]:
vgg16 = VGG16(weights='imagenet', include_top=False, pooling='max', input_shape=(224, 224, 3))

# Freezing the layers
for model_layer in vgg16.layers:
    model_layer.trainable = False

def load_and_preprocess_image(image_path):
    """
    Load and preprocess an image for the VGG16 model.
    """
    img = image.load_img(image_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array_expanded = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array_expanded)

def get_image_embeddings(img_array):
    """
    Get image embeddings using the VGG16 model.
    """
    return vgg16.predict(img_array)

def get_similarity_score(first_image_path, second_image_path):
    """
    Compute the similarity score between two images.
    """
    first_image_array = load_and_preprocess_image(first_image_path)
    second_image_array = load_and_preprocess_image(second_image_path)
    first_image_embedding = get_image_embeddings(first_image_array)
    second_image_embedding = get_image_embeddings(second_image_array)
    return cosine_similarity(first_image_embedding, second_image_embedding)[0][0]

def draw_images(*image_paths):
    fig, axes = plt.subplots(1, len(image_paths), figsize=(8, 8))
    for ax, image_path in zip(axes, image_paths):
        img = mpimg.imread(image_path)
        ax.imshow(img)
        ax.axis('off')
    plt.show()





In [6]:
import os
import random

def pick_two_random_images(image_directory):
    # Get the list of all files in the given directory
    files = os.listdir(image_directory)
    # Filter out only image files if necessary (e.g., JPEG and PNG files)
    images = [file for file in files if file.endswith(('.jpeg', '.jpg', '.png', '.webp'))]
    # Randomly pick two unique images
    chosen_images = random.sample(images, 2)
    return chosen_images

In [7]:
similarity_scores = []
image_directory = 'images_dataset'

for i in range(50):
    random_images = pick_two_random_images(image_directory)
    random_images = ["./images_dataset/" + x for x in random_images]
    similarity_score = get_similarity_score(random_images[0], random_images[1])
    similarity_scores.append(similarity_score)



In [9]:
print(similarity_scores)
print(sum(similarity_scores) / len(similarity_scores))

[0.37437832, 0.44788152, 0.35394078, 0.3487333, 0.4075293, 0.28552586, 0.4715221, 0.3959198, 0.30082694, 0.3250595, 0.42805463, 0.41222394, 0.15829447, 0.301834, 0.31155407, 0.34102622, 0.61908424, 0.449053, 0.27050802, 0.3419294, 0.23727755, 0.41815513, 0.5046177, 0.51089966, 0.51966983, 0.46939695, 0.45255134, 0.34645164, 0.39024067, 0.36121836, 0.57808304, 0.2815311, 0.39767712, 0.3544037, 0.46438825, 0.350104, 0.48360658, 0.4322598, 0.48374, 0.40060586, 0.43448043, 0.46017373, 0.44232988, 0.40160435, 0.44389543, 0.3482908, 0.3600008, 0.53659683, 0.50571513, 0.25043538]
0.39930560857057573


In [10]:
similarity_scores = []
image_directory = 'images_dataset'

for i in range(50):
    random_images = pick_two_random_images(image_directory)
    random_images = ["./images_dataset/" + x for x in random_images]
    similarity_score = get_similarity_score(random_images[0], random_images[0])
    similarity_scores.append(similarity_score)



In [11]:
print(similarity_scores)
print(sum(similarity_scores) / len(similarity_scores))

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0000001, 1.0, 0.9999999, 0.99999994, 1.0, 1.0000002, 1.0, 1.0000002, 0.9999999, 0.99999994, 0.9999999, 1.0000001, 1.0, 0.99999994, 1.0000001, 1.0000001, 1.0, 1.0, 1.0, 1.0000001, 1.0, 0.9999999, 1.0000001, 1.0, 1.0, 1.0, 0.99999994, 0.9999999, 1.0, 0.99999994, 0.99999994, 1.0, 1.0000002, 1.0000002, 1.0, 1.0, 1.0000002, 1.0, 0.99999994, 1.0000001, 1.0, 1.0, 0.9999999, 1.0000001, 0.9999999]
1.0000000178813935
