In [9]:
# Cell 1: Install required packages
!pip install -r requirements.txt





[notice] A new release of pip is available: 23.2.1 -> 24.1.1
[notice] To update, run: C:\Users\PiyushSAMANT\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [10]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import euclidean_distances

In [11]:
# set the path for both the sets  
image_path_set_1 = 'set_1'
image_path_set_2 = 'set_2'

In [12]:
def load_image_from_folder(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            images.append(img)
    return images

def detect_sift_features(image, max_features=500):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create(nfeatures=max_features)
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    if descriptors is None:
        descriptors = np.zeros((max_features, 128))  
    elif descriptors.shape[0] < max_features:
        
        descriptors = np.vstack([descriptors, np.zeros((max_features - descriptors.shape[0], 128))])
    return descriptors.flatten()

def extract_hog_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hog = cv2.HOGDescriptor()
    h = hog.compute(gray)
    return h.flatten()

def extract_color_histogram(image, bins=(8, 8, 8)):
    hist = cv2.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

def extract_features(images):
    features = []
    for img in images:
        sift_features = detect_sift_features(img)
        hog_features = extract_hog_features(img)
        color_histogram = extract_color_histogram(img)
        combined_features = np.hstack((sift_features, hog_features, color_histogram))
        features.append(combined_features)
        # print(f"SIFT features shape: {sift_features.shape}")
        # print(f"HOG features shape: {hog_features.shape}")
        # print(f"Color histogram shape: {color_histogram.shape}")
        # print(f"Combined features shape: {combined_features.shape}")
    return np.vstack(features)  


In [13]:

set1_images = load_image_from_folder(image_path_set_1)
set2_images = load_image_from_folder(image_path_set_2)

# Extract features
set1_features = extract_features(set1_images)
set2_features = extract_features(set2_images)

# Check the shapes of the feature arrays
print(f"Shape of set1_features: {set1_features.shape}")
print(f"Shape of set2_features: {set2_features.shape}")


Shape of set1_features: (4, 1274112)
Shape of set2_features: (8, 1274112)


In [14]:
# Calculate Euclidean distances between features of each image in set 1 and all images in set 2
distances = euclidean_distances(set1_features, set2_features)


print("Distance matrix between set 1 and set 2 features:")
print(distances)

Distance matrix between set 1 and set 2 features:
[[ 3738.7277601   2461.66064252  2707.10101711 11392.00238735
   6738.60172045 11423.46654499 11473.30115333  3958.16172091]
 [ 3663.38039837  2711.0592997   2705.3938658  11414.73679792
   6668.60448983 11487.23537059 11525.51426032  3850.11456763]
 [ 4433.68616573  4416.01117424  4506.53801961 11425.76272782
   6626.01604367 11528.06708508 11567.45576438  4496.1254242 ]
 [ 5479.35970008  5532.94958961  5606.46996819 11412.81879513
   6674.10656569 11555.58421099 11666.05262858  5699.28451586]]


Analyzing the distances to differentiate sets


In [6]:
# For each image in set 1, find the closest image in set 2
closest_images = np.argmin(distances, axis=1)
print("Closest images in set 2 for each image in set 1:")
print(closest_images)

# Calculate average distance to the closest image in set 2 for each image in set 1
average_distances = np.mean(distances, axis=1)
print("Average distance to images in set 2 for each image in set 1:")
print(average_distances)



Closest images in set 2 for each image in set 1:
[1 2 1 0]
Average distance to images in set 2 for each image in set 1:
[6736.62786834 6753.25488127 7374.95780059 7953.32824677]
