In [None]:
pip install annoy
pip install Pillow
pip install matplotlib 
pip install opencv-python 
pip install scikit-image 
pip install numpy 

In [None]:
import os
import cv2
import numpy as np
from annoy import AnnoyIndex
from skimage.metrics import structural_similarity as ssim
from PIL import Image
import matplotlib.pyplot as plt
from itertools import combinations
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA

## COMPUTE SIMILARITY AMONG CHOSEN IMAGES

In [None]:
# Folder path
FOLDER_PATH = "/Users/name/Downloads/images"

In [None]:
# Resize images (200x200 pixels)
IMAGE_SIZE = (200, 200)

In [None]:
# ORB feature extractor
orb = cv2.ORB_create(nfeatures=1000)

In [None]:
# Function to compute ORB descriptors
def get_orb_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        return None
    image = cv2.resize(image, IMAGE_SIZE)
    keypoints, descriptors = orb.detectAndCompute(image, None)
    if descriptors is None:
        return None
    return descriptors.flatten()[:128]

In [None]:
# Function to compute pHash
def get_phash(image_path):
    image = Image.open(image_path).convert("L").resize((8, 8))
    pixels = np.array(image)
    dct = cv2.dct(np.float32(pixels))
    median = np.median(dct)
    return "".join(["1" if px > median else "0" for row in dct for px in row])


In [None]:
# Load images and extract features
image_paths = [os.path.join(FOLDER_PATH, f) for f in os.listdir(FOLDER_PATH) if f.endswith(('.png', '.jpg', '.jpeg'))]
image_features = {}
image_hashes = {}

for img_path in image_paths:
    orb_feat = get_orb_features(img_path)
    if orb_feat is not None:
        image_features[img_path] = orb_feat
    image_hashes[img_path] = get_phash(img_path)

In [None]:
# Build Annoy index for similarity search
feature_dim = 128 
annoy_index = AnnoyIndex(feature_dim, metric='euclidean')

for i, (img_path, feat) in enumerate(image_features.items()):
    annoy_index.add_item(i, feat)

annoy_index.build(10)

In [None]:
# Find similar images
def find_similar_images(img_path, top_n=5):
    if img_path not in image_features:
        return []

    vector = image_features[img_path]
    indices = annoy_index.get_nns_by_vector(vector, top_n + 1)[1:]  # Exclude itself
    similar_images = [list(image_features.keys())[idx] for idx in indices]

    # Filter by pHash
    phash_query = image_hashes[img_path]
    final_matches = []
    for sim_img in similar_images:
        phash_sim = sum(1 for a, b in zip(phash_query, image_hashes[sim_img]) if a == b) / 64
        if phash_sim > 0.50:
            final_matches.append(sim_img)

    return final_matches

In [None]:
# Example: Find similar images for a sample image
sample_image = image_paths[15]
similar_images = find_similar_images(sample_image, top_n=5)

print(f"Images similar to {sample_image}:")
for img in similar_images:
    print(img)

## COMPUTE AVG. SSIM
SSIM: Structural closeness between images [-1, 1].

Method: Compute SSIM for all image pairs and take the mean SSIM score.


In [None]:
# Load drawings
image_paths = [os.path.join(FOLDER_PATH, f) for f in os.listdir(FOLDER_PATH) if f.endswith(('.png', '.jpg', '.jpeg'))]
images = {path: cv2.imread(path) for path in image_paths}

In [None]:
# Function to compute SSIM
def compute_ssim(img1, img2, size=(200, 200)):
    img1 = cv2.resize(img1, size) 
    img2 = cv2.resize(img2, size)
    gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
    return ssim(gray1, gray2)

In [None]:
# Compute SSIM
ssim_scores = []
for img1_path, img2_path in combinations(images.keys(), 2):
    ssim_score = compute_ssim(images[img1_path], images[img2_path])
    ssim_scores.append(ssim_score)

average_ssim = np.mean(ssim_scores)
std_ssim = np.std(ssim_scores)
print(f"Average SSIM: {average_ssim:.4f}")
print(f"SSIM Standard Deviation: {std_ssim:.4f}")

## COMPUTE DBSCAN CLUSTERS

In [None]:
# Turn ORB feature vectors into a list
feature_vectors = list(image_features.values())

# Reduce dimensionality with PCA
pca = PCA(n_components=10)
reduced_features = pca.fit_transform(feature_vectors)

# Apply DBSCAN clustering
dbscan = DBSCAN(eps=50, min_samples=2).fit(reduced_features)

num_clusters = len(set(dbscan.labels_)) - (1 if -1 in dbscan.labels_ else 0)
print(f"Number of Image Clusters: {num_clusters}")