In [17]:
import os
import cv2
import numpy as np
from scipy.io import loadmat
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, adjusted_rand_score
from scipy.stats import mode
from skimage.feature import hog, local_binary_pattern
from google.colab import drive
from tqdm import tqdm

In [18]:
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

In [19]:
IMG_DIR = "/content/drive/MyDrive/Dataset/102flowers/jpg"
LABELS_FILE = "/content/drive/MyDrive/Dataset/imagelabels.mat"
CHOOSEN_CLASSES = [1, 47, 69, 88, 97]
NUM_IMAGES = 8189
IMG_SIZE = (128, 128)

In [20]:
def crop_center(img):
    y, x, _ = img.shape
    startx = x // 4
    starty = y // 4
    return img[starty:starty+(y//2), startx:startx+(x//2)]

In [21]:
def extract_advanced_features(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, IMG_SIZE)
    center_img = crop_center(img)
    hsv = cv2.cvtColor(center_img, cv2.COLOR_RGB2HSV)
    hist_h = cv2.calcHist([hsv], [0], None, [64], [0, 180])
    hist_s = cv2.calcHist([hsv], [1], None, [32], [0, 256])
    hist_h = cv2.normalize(hist_h, None).flatten()
    hist_s = cv2.normalize(hist_s, None).flatten()

    gray = cv2.cvtColor(center_img, cv2.COLOR_RGB2GRAY)

    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method="uniform")
    (hist_lbp, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist_lbp = hist_lbp.astype("float")
    hist_lbp /= (hist_lbp.sum() + 1e-7)

    gray_full = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    hog_feats = hog(gray_full, orientations=9, pixels_per_cell=(16, 16),
                    cells_per_block=(2, 2), block_norm='L2-Hys', visualize=False)

    return np.hstack([hist_h, hist_s, hist_lbp, hog_feats])

In [22]:
print("Processing images with Advanced Handcrafted Features...")
image_files, labels = get_filtered_image_paths()

data = []
valid_labels = []

for i, img_path in tqdm(enumerate(image_files), total=len(image_files)):
    try:
        features = extract_advanced_features(img_path)
        data.append(features)
        valid_labels.append(labels[i])
    except Exception as e:
        print(f"Error {img_path}: {e}")

Processing images with Advanced Handcrafted Features...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 381/381 [00:07<00:00, 53.93it/s]


In [23]:
X = np.array(data)
y_original = np.array(valid_labels)

label_map = {c: i for i, c in enumerate(np.unique(y_original))}
y = np.array([label_map[l] for l in y_original])

print(f"Features Shape (Before PCA): {X.shape}")
print("Running Scaling, PCA and K-Means...")

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=50, random_state=42)
X_pca = pca.fit_transform(X_scaled)

kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42, n_init=50)
kmeans.fit(X_pca)
cluster_labels = kmeans.labels_

Features Shape (Before PCA): (381, 1886)
Running Scaling, PCA and K-Means...


In [24]:
def cluster_accuracy(true_labels, cluster_labels):
    labels = np.zeros_like(cluster_labels)
    for cluster in np.unique(cluster_labels):
        mask = cluster_labels == cluster
        if np.sum(mask) > 0:
            labels[mask] = mode(true_labels[mask], keepdims=True)[0]
    return accuracy_score(true_labels, labels)

In [25]:
acc = cluster_accuracy(y, cluster_labels)
ari = adjusted_rand_score(y, cluster_labels)

print("\n" + "="*40)
print(f"ðŸŽ¯ Accuracy {acc:f}%")
print(f"ðŸ“Š ARI: {ari:.4f}")
print("="*40)


ðŸŽ¯ Accuracy 0.619423%
ðŸ“Š ARI: 0.2521
