<a href="https://colab.research.google.com/github/KhangTheKangaroo/Image-Retrieval/blob/main/Image_Retrieval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
!gdown --id 1msLVo0g0LFmL9-qZ73vq9YEVZwbzOePF # Download the dataset
!unzip -q data.zip

In [25]:
def plot_results(query_path, ls_path_score, reverse=False):
    ls_path_score.sort(key=lambda x: x[1], reverse=reverse)  # Sort by score

    # Display query image
    query_image = plt.imread(query_path)
    plt.figure(figsize=(5, 5))
    plt.imshow(query_image)
    plt.title("Query Image")
    plt.axis('off')
    plt.show()

    # Display top 5 results
    plt.figure(figsize=(20, 10))
    for i in range(5):
        image_path, score = ls_path_score[i]
        image = plt.imread(image_path)
        plt.subplot(1, 5, i + 1)
        plt.imshow(image)

        # Extract the class name from the path
        class_name = image_path.split('/')[-2]
        plt.title(f"{class_name}")

        plt.axis('off')
    plt.suptitle("Top 5 Results")
    plt.show()

In [3]:
ROOT = 'data'
CLASS_NAME = sorted(list(os.listdir(f"{ROOT}/train"))) # Get the images' classes from data

In [11]:
def read_image_from_path(path, size):
  img = Image.open(path).convert('RGB').resize(size) # Open the image from path, convert color to RGB type and resize the image
  return np.array(img) # Vectorize the img

def folder_to_images(folder, size):
  list_dir = [folder + '/' + name for name in os.listdir(folder)] # Get the images' path
  images = np.zeros(shape = (len(list_dir), *size, 3))
  images_path = []

  for i, path in enumerate(list_dir): # This step is to check if an image could be opened

    images[i] = read_image_from_path(path, size)
    images_path.append(path)

  return images, images_path

In [9]:
# Using L1/Manhattan Distance

def abs_diff(query, data):
  axis_batch_size = tuple(range(1, len(data.shape)))
  return np.sum(np.abs(data - query), axis = axis_batch_size) # Get the absolute difference of every images in data to the query image

def get_L1_Score(root_img_path, query_path, size):
  query = read_image_from_path(query_path, size) # Get query image
  ls_path_score = []
  for folder in os.listdir(root_img_path):

    if folder in CLASS_NAME:
      path = root_img_path + folder # Path of image + image's class
      images_np, images_path = folder_to_images(path, size)  # Get images in the class
      score = abs_diff(query, images_np)
      ls_path_score.extend(zip(images_path, score))

  return query, ls_path_score

In [None]:
# L1/Manhattan Distance Test

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/Orange_easy/0_100.jpg"
size = (448, 448)
query, ls_path_score = get_L1_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=False)

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/African_crocodile/n01697457_18534.JPEG"
size = (448, 448)
query, ls_path_score = get_L1_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=False)

In [None]:
# Using L2/Euclidian Distance

def mean_square_diff(query, data):
  axis_batch_size = tuple(range(1, len(data.shape)))
  return np.mean((data - query)**2, axis = axis_batch_size)

def get_L2_Score(root_img_path, query_path, size):
  query = read_image_from_path(query_path, size) # Get query image
  ls_path_score = []
  for folder in os.listdir(root_img_path):

    if folder in CLASS_NAME:
      path = root_img_path + folder # Path of image + image's class
      images_np, images_path = folder_to_images(path, size)  # Get images in the class
      score = mean_square_diff(query, images_np)
      ls_path_score.extend(zip(images_path, score))

  return query, ls_path_score

In [None]:
# L2/Euclidian Distance Test

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/Orange_easy/0_100.jpg"
size = (448, 448)
query, ls_path_score = get_L2_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=False)

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/African_crocodile/n01697457_18534.JPEG"
size = (448, 448)
query, ls_path_score = get_L2_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=False)

In [39]:
# Using Cosine Similarity

def cosine_similarity(query, data):
  axis_batch_size = tuple(range(1,len(data.shape)))
  query_norm = np.sqrt(np.sum(query**2))
  data_norm = np.sqrt(np.sum(data**2, axis=axis_batch_size))
  return np.sum(data * query, axis=axis_batch_size) / (query_norm*data_norm + np.finfo(float).eps)


def get_CosineSimilarity_Score(root_img_path, query_path, size):
  query = read_image_from_path(query_path, size) # Get query image
  ls_path_score = []
  for folder in os.listdir(root_img_path):

    if folder in CLASS_NAME:
      path = root_img_path + folder # Path of image + image's class
      images_np, images_path = folder_to_images(path, size)  # Get images in the class
      score = cosine_similarity(query, images_np)
      ls_path_score.extend(zip(images_path, score))

  return query, ls_path_score

In [None]:
# Cosine Similarity Test

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/Orange_easy/0_100.jpg"
size = (448, 448)
query, ls_path_score = get_CosineSimilarity_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=True)

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/African_crocodile/n01697457_18534.JPEG"
size = (448, 448)
query, ls_path_score = get_CosineSimilarity_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=True)

In [41]:
# Using Correlation Coefficient

def correlation_coefficient(query, data):
  axis_batch_size = tuple(range(1,len(data.shape)))
  query_mean = query - np.mean(query)
  data_mean = data - np.mean(data, axis=axis_batch_size, keepdims=True)
  query_norm = np.sqrt(np.sum(query_mean**2))
  data_norm = np.sqrt(np.sum(data_mean**2, axis=axis_batch_size))

  return np.sum(data_mean * query_mean, axis=axis_batch_size) / (query_norm*data_norm + np.finfo(float).eps)

def get_CorrCoef_Score(root_img_path, query_path, size):
  query = read_image_from_path(query_path, size) # Get query image
  ls_path_score = []
  for folder in os.listdir(root_img_path):

    if folder in CLASS_NAME:
      path = root_img_path + folder # Path of image + image's class
      images_np, images_path = folder_to_images(path, size)  # Get images in the class
      score = correlation_coefficient(query, images_np)
      ls_path_score.extend(zip(images_path, score))

  return query, ls_path_score

In [None]:
# Correlation Coefficient Test

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/Orange_easy/0_100.jpg"
size = (448, 448)
query, ls_path_score = get_CorrCoef_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=True)

root_img_path = f"{ROOT}/train/"
query_path = f"{ROOT}/test/African_crocodile/n01697457_18534.JPEG"
size = (448, 448)
query, ls_path_score = get_CorrCoef_Score(root_img_path, query_path, size)
plot_results(query_path, ls_path_score, reverse=True)