In [5]:
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.preprocessing import normalize
from itertools import combinations
from scipy.spatial.distance import cosine
import os

# Resize all images to fixed size to ensure consistent HOG dimensions
def resize_image(img, size=(128, 128)):
    return cv2.resize(img, size, interpolation=cv2.INTER_LINEAR)

# Extract SIFT descriptor, average-pooled to fixed length
def extract_sift(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    sift = cv2.SIFT_create()
    kp, des = sift.detectAndCompute(gray, None)
    if des is None:
        return np.zeros(128)
    return np.mean(des, axis=0)

# Extract fixed-length HOG descriptor
def extract_hog(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hog_feat = hog(gray, pixels_per_cell=(8, 8),
                   cells_per_block=(2, 2),
                   feature_vector=True)
    return hog_feat

# Extract normalized HSV histogram
def extract_hsv_hist(img, bins=(8, 8, 8)):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
                        [0, 180, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

# Combined feature vector: SIFT + HOG + HSV
def extract_combined_features(img):
    img = resize_image(img)  # Force consistent size
    sift_feat = extract_sift(img)
    hog_feat = extract_hog(img)
    hsv_feat = extract_hsv_hist(img)
    combined = np.concatenate([sift_feat, hog_feat, hsv_feat])
    return normalize(combined.reshape(1, -1))[0]

# Build graph of similar images using cosine similarity
def build_similarity_graph(feature_vectors, threshold=0.9):
    n = len(feature_vectors)
    graph = [[] for _ in range(n)]
    for i, j in combinations(range(n), 2):
        sim = 1 - cosine(feature_vectors[i], feature_vectors[j])
        if sim > threshold:
            graph[i].append(j)
            graph[j].append(i)
    return graph

def connected_components(graph):
    visited = set()
    clusters = []

    def iterative_dfs(start):
        stack = [start]
        cluster = []
        while stack:
            node = stack.pop()
            if node not in visited:
                visited.add(node)
                cluster.append(node)
                stack.extend(graph[node])
        return cluster

    for i in range(len(graph)):
        if i not in visited:
            cluster = iterative_dfs(i)
            clusters.append(cluster)
    return clusters


# Load all images from a folder
def load_images_from_folder(folder):
    images = []
    filenames = []
    for filename in os.listdir(folder):
        path = os.path.join(folder, filename)
        img = cv2.imread(path)
        if img is not None:
            images.append(img)
            filenames.append(filename)
    return images, filenames

# === Main Execution ===
folder_path = "dataset/logos"
images, names = load_images_from_folder(folder_path)

# Feature extraction
features = [extract_combined_features(img) for img in images]

# Optional: sanity check on vector lengths
vec_lengths = [len(f) for f in features]
assert all(l == vec_lengths[0] for l in vec_lengths), "Inconsistent feature vector lengths!"

# Clustering
graph = build_similarity_graph(features, threshold=0.9)
clusters = connected_components(graph)

# Output results
for idx, cluster in enumerate(clusters):
    print(f"Cluster {idx + 1}: {[names[i] for i in cluster]}")


Cluster 1: ['263_net_cn.png', 'zkteco_me.png', 'zkteco_vn.png', 'zkteco_in.png', 'zkteco_co_za.png', 'zkteco_com_eg.png', 'zkteco_com_bd.png', 'zkteco_com_pk.png', 'zhcpa_cn.png', 'zktecoma_com.png', 'yves-rocher_se.png', 'yves-rocher_nl.png', 'yves-rocher_it.png', 'yves-rocher_ch.png', 'yvesrocher_com_tr.png', 'ymcaup_org.png', 'yves-rocher_pt.png', 'ymcatrivalley_org.png', 'zenithplasticsurgery_com.png', 'ymcawaycross_com.png', 'yves-rocher_de.png', 'xwiftracingevents_be.png', 'yves-rocher_at.png', 'ymcaswv_com.png', 'ymcatoledo_org.png', 'ymcatriangle_org.png', 'yord_pl.png', 'yonyou_com_hk.png', 'ymcaokc_org.png', 'ymcasf_org.png', 'ymcalouisville_org.png', 'ymcasteuben_org.png', 'ymcachicago_org.png', 'ymca-ywca_org.png', 'ymca-cnw_org.png', 'wwf_ro.png', 'xtransfer_cn.png', 'wwfcz_org.png', 'wwf_or_th.png', 'wwf_org_nz.png', 'wwf_org.png', 'wwf_no.png', 'wwf_es.png', 'wwf_at.png', 'wwfchina_org.png', 'wurth_ua.png', 'wurth_sk.png', 'wurth_rs.png', 'wurth_pt.png', 'wurth_pe.png', 