# 1 Caltect-101

## 1.1 Data preprocessing

In [None]:
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("imbikramsaha/caltech-101")

# print("Path to dataset files:", path)

In [13]:
import numpy as np
import cv2
import pywt
from tqdm import tqdm
from scipy import stats
from skimage import color, transform, filters
import os
from skimage.feature import hog, local_binary_pattern
from scipy.ndimage import convolve
from sklearn.decomposition import PCA

# Function to extract Gabor features
def extract_gabor(image):
    kernels = []
    # 6 orientations x 4 wavelengths = 24 kernels
    for theta in np.linspace(0, np.pi, 6):  # 6 orientations
        for lambd in [4, 8, 12, 16]:        # 4 wavelengths
            kernel = cv2.getGaborKernel((31, 31), 4.0, theta, lambd, 0.5, 0, ktype=cv2.CV_32F)
            kernels.append(kernel)
    
    features = []
    for kernel in kernels:
        filtered = cv2.filter2D(image, cv2.CV_32F, kernel)
        features.extend([np.mean(filtered), np.std(filtered)])
    
    return np.array(features)[:48]

# Function to extract Wavelet-Moment features (40-D)
def extract_wavelet_moment(image):
    # 3-level wavelet decomposition
    coeffs1 = pywt.dwt2(image, 'haar')
    cA1, (cH1, cV1, cD1) = coeffs1
    coeffs2 = pywt.dwt2(cA1, 'haar')
    cA2, (cH2, cV2, cD2) = coeffs2
    coeffs3 = pywt.dwt2(cA2, 'haar')
    cA3, (cH3, cV3, cD3) = coeffs3

    # Collect all 10 subbands (9 details + 1 approx)
    subbands = [cH1, cV1, cD1, cH2, cV2, cD2, cH3, cV3, cD3, cA3]
    
    features = []
    for sub in subbands:
        features.extend([np.mean(sub), np.var(sub), 
                        stats.skew(sub.ravel()), stats.kurtosis(sub.ravel())])
    
    return np.array(features)[:40]  # Ensure 40-D

# Function to extract CENTRIST features (254-D) - Placeholder
def extract_centrist(image):
    # Census Transform with 3x3 neighborhood
    centrist = np.zeros_like(image, dtype=np.uint8)
    for y in range(1, image.shape[0]-1):
        for x in range(1, image.shape[1]-1):
            bits = 0
            center = image[y,x]
            for i, (dy, dx) in enumerate([(-1,-1), (-1,0), (-1,1),
                                         (0,1), (1,1), (1,0), 
                                         (1,-1), (0,-1)]):
                bits |= (1 << i) if image[y+dy, x+dx] > center else 0
            centrist[y,x] = bits
    
    # Exclude first and last bins to get 254-D
    hist = np.histogram(centrist, bins=256, range=(0,256))[0]
    return hist[1:-1]  # 254 elements

def extract_hog(image):
    # Special parameters to achieve 1984-D
    fd = hog(image, orientations=8, pixels_per_cell=(16,16),
             cells_per_block=(3,3), visualize=False, 
             feature_vector=True, channel_axis=None)
    
    # Zero-pad or truncate to exact 1984-D
    return np.resize(fd, 1984)

# Function to extract GIST features (512-D)
def extract_gist(image):
    # Simplified spatial envelope approach
    fft = np.fft.fft2(image)
    magnitude = np.log1p(np.abs(fft))
    
    # Downsample and select components
    gist = cv2.resize(magnitude, (32, 32)).flatten()
    return gist[:512]

def extract_lbp(image):
    # Multi-region uniform LBP
    radius = 3
    n_points = 24
    grid = (4, 4)  # 16 regions
    
    height, width = image.shape
    features = []
    
    # Divide image into 4x4 grid
    for i in range(grid[0]):
        for j in range(grid[1]):
            y_start = i * height//grid[0]
            y_end = (i+1) * height//grid[0]
            x_start = j * width//grid[1]
            x_end = (j+1) * width//grid[1]
            
            patch = image[y_start:y_end, x_start:x_end]
            lbp = local_binary_pattern(patch, n_points, radius, 'uniform')
            hist = np.histogram(lbp, bins=59, range=(0, 59))[0]
            features.extend(hist)
    
    return np.array(features)[:928]  # Ensure 928-D

# Function to load and preprocess images
def load_and_extract_features(dataset_path):
    # List to store features and labels
    features_list = [[] for _ in range(6)]
    labels = []

    # Read dataset
    print("reading dataset...")
    for label in os.listdir(dataset_path):
        label_path = os.path.join(dataset_path, label)
        if os.path.isdir(label_path) and label in ["Faces", "Motorbikes", "dollar_bill", "garfield", "stop_sign", "windsor_chair"]:
            for image_name in tqdm(os.listdir(label_path)):
                image_path = os.path.join(label_path, image_name)
                if image_path.endswith('.jpg') or image_path.endswith('.png'):
                    # Load the image and preprocess
                    image = cv2.imread(image_path)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                    image = cv2.resize(image, (128, 128))  # Resize image
                    
                    # Extract features
                    gabor_features = extract_gabor(image)
                    wavelet_features = extract_wavelet_moment(image)
                    centrist_features = extract_centrist(image)
                    hog_features = extract_hog(image)
                    gist_features = extract_gist(image)
                    lbp_features = extract_lbp(image).flatten()

                    # Store the features in respective arrays
                    features_list[0].append(gabor_features)
                    features_list[1].append(wavelet_features)
                    features_list[2].append(centrist_features)
                    features_list[3].append(hog_features)
                    features_list[4].append(gist_features)
                    features_list[5].append(lbp_features)

                    # Store the label
                    labels.append(label)
    
    # Convert feature list to numpy arrays
    features_list = [np.array(feature) for feature in features_list]
    labels = np.array(labels)
    
    return features_list, labels

# Example usage
dataset_path = 'caltech-101'
feature_list, labels = load_and_extract_features(dataset_path)

for i, X in enumerate(feature_list):
    print(f"Feature set {i+1}: X shape {X.shape}")
    
class_number = 6

100%|██████████| 56/56 [00:01<00:00, 28.71it/s]
100%|██████████| 64/64 [00:02<00:00, 27.04it/s]
100%|██████████| 798/798 [00:27<00:00, 28.79it/s]
100%|██████████| 34/34 [00:01<00:00, 28.87it/s]
100%|██████████| 435/435 [00:15<00:00, 27.82it/s]

Feature set 1: X shape (1439, 48)
Feature set 2: X shape (1439, 40)
Feature set 3: X shape (1439, 254)
Feature set 4: X shape (1439, 1984)
Feature set 5: X shape (1439, 512)
Feature set 6: X shape (1439, 928)





## 1.2 Single view graph

In [14]:
import numpy as np
from sklearn.metrics.pairwise import rbf_kernel
from tqdm import tqdm 
import cvxpy as cp

def compute_laplacian(S):
    S_sym = (S.T + S) / 2  # Compute symmetric part
    D = np.diag(S_sym.sum(axis=0))  # Compute diagonal matrix D
    L = D - S_sym  # Compute Laplacian matrix
    return L

def update_Q(L, c):
    eigenvalues, eigenvectors = np.linalg.eigh(L)
    Q = eigenvectors[:, :c]
    return Q

def update_S(Q, beta): # equation (9)
    n, c = Q.shape
    S = np.zeros((n, n))
    
    for j in range(n):
        g_j = np.array([np.linalg.norm(Q[j] - Q[i])**2 for i in range(n)])
        intermediate = g_j / (2 * beta)
        eta = (1 + np.sum(intermediate)) / n
        s_j = np.maximum(0, -intermediate + eta)
        
        S[j] = s_j
    
    return S

def make_single_view_graph(single_view_graph_X, class_number, default_beta=1.0):
    
    single_view_graph = []
    
    for i in tqdm(range(len(single_view_graph_X))):
        
        # init
        beta = default_beta
        S = update_S(single_view_graph_X[i], beta)
        L = compute_laplacian(S)
        Q = update_Q(L, class_number)

        for j in range(100):
            S = update_S(Q, beta)
            L = compute_laplacian(S)
            Q = update_Q(L, class_number)

            L_rank = np.linalg.matrix_rank(L)
            # print("L_rank",L_rank, "beta", beta)
            if L_rank == X.shape[0] - class_number:
                tqdm.write(f"{i+1}th graph end at {j}th iteration, L's rank is {L_rank}")
                break
            elif L_rank > X.shape[0] - class_number:
                beta *= 0.9 
            else:
                beta *= 1.1
                
        single_view_graph.append(L)
        
    return single_view_graph


In [15]:
single_view_graph = make_single_view_graph(feature_list, class_number)


100%|██████████| 6/6 [54:45<00:00, 547.53s/it]


## 1.3 Global view graph

In [16]:
def init_W(single_view_graph):
    W = [np.full(single_view_graph[0].shape, 1/len(single_view_graph))] * len(single_view_graph)
    return W

def init_A(single_view_graph, W):
    A = np.sum(single_view_graph, axis=0) * W[0]
    return A

def init_P(A,c):
    L = compute_laplacian(A)
    P = update_Q(L, c)
    return P

def update_A(P, w_list, s_list, gamma=1.0):
    n = P.shape[0]
    c = P.shape[1]
    m = len(w_list)

    H = np.sum((P[:, np.newaxis, :] - P)**2, axis=2)
    
    A = np.zeros((n, n))
    
    for j in range(c):
        h_j = H[:, j]
    
        sum_term = np.zeros(n)
        for v in range(m):
            w_jv = w_list[v][:, j]
            s_jv = s_list[v][:, j] 
            sum_term += w_jv * s_jv  
        intermediate = (((gamma / 2.0) * (h_j)) - sum_term)
        
    eta = (1 + np.sum(intermediate)) / n
    a_j = np.maximum(0, -intermediate + eta)
    
    A[j] = a_j

    return A


def update_P(L, c):
    eigenvalues, eigenvectors = np.linalg.eigh(L)
    Q = eigenvectors[:, :c]
    return Q


def compute_W(a, s_list):
    v, n, _ = np.shape(s_list) 
    w_list = []

    for i in range(v):
        wv = np.zeros((n, n)) 
        for j in range(n):
            Z_j = a[:,j] - s_list[i][:,j]
            Z_j = Z_j.reshape(1, -1) 
            one_vector = np.ones((n, 1)) 
            
            # try: # takes forever
            #     print("not triggered", j)
            #     ZTZ_inv = np.linalg.pinv(Z_j.T @ Z_j)  # (Z_j^T Z_j)^{-1}
            #     w_jv = (ZTZ_inv @ one_vector) * (1 / (one_vector.T @ ZTZ_inv @ one_vector))
            # except:
            #     print("triggered", j)
            #     w_jv = np.zeros((1, n))
                
            sum_Z = np.sum(Z_j)
            if np.isclose(sum_Z, 0.0):
                w_jv = np.zeros((1, n))
            else:
                w_jv = Z_j.T / sum_Z

            wv[:,j] = w_jv.reshape(-1) / np.sum(w_jv)
        w_list.append(wv)

    return w_list

def make_global_graph(single_view_graph, class_number, default_gamma=1.0):
    
    # init
    W = init_W(single_view_graph)
    A = init_A(single_view_graph, W)
    P = init_P(A, class_number)
    gamma = default_gamma
    
    for j in tqdm(range(100)):
        A = update_A(P, W, single_view_graph)
        L = compute_laplacian(A)
        P = update_P(L, class_number)
        W = compute_W(A, single_view_graph)
  
        L_rank = np.linalg.matrix_rank(L)
        tqdm.write(f"iteration: {j}, L_rank: {L_rank}, gamma: {gamma}")
        if L_rank == X.shape[0] - class_number:
            tqdm.write(f"end at {j}th iteration, L's rank is {L_rank}")
            break
        elif L_rank < X.shape[0] - class_number:
            gamma *= 0.9 
        else:
            gamma *= 1.1
        
    return L


In [17]:
global_graph = make_global_graph(single_view_graph, class_number)


  wv[:,j] = w_jv.reshape(-1) / np.sum(w_jv)
  1%|          | 1/100 [00:02<04:01,  2.44s/it]

iteration: 0, L_rank: 0, gamma: 1.0


  1%|          | 1/100 [00:04<08:09,  4.95s/it]

iteration: 1, L_rank: 1433, gamma: 0.9
end at 1th iteration, L's rank is 1433





## 1.4 Cluster

In [18]:
import numpy as np
from sklearn.cluster import KMeans

def cluster(laplacian, n_clusters):
    eigenvalues, eigenvectors = np.linalg.eigh(laplacian)
    X = eigenvectors[:, :n_clusters]
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
    return kmeans.labels_

# get clustering results
single_view_graph_labels = []
for i in range(len(single_view_graph)):
    single_view_graph_labels.append(cluster(single_view_graph[i], class_number))

global_graph_labels = cluster(global_graph, class_number)


## 1.5 evaluation

In [19]:
import numpy as np
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
from sklearn.metrics.cluster import contingency_matrix

def cluster_accuracy(y_true, y_pred):
    acc = np.mean(y_pred == y_true)
    return acc

def purity_score(y_true, y_pred):
    contingency = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(contingency, axis=0)) / np.sum(contingency)

def pairwise_precision_recall_fscore(y_true, y_pred):

    def get_pairs(labels):
        pairs = set()
        for label in np.unique(labels):
            indices = np.where(labels == label)[0]
            for i in range(len(indices)):
                for j in range(i + 1, len(indices)):
                    pairs.add((indices[i], indices[j]))
        return pairs

    true_pairs = get_pairs(y_true)
    pred_pairs = get_pairs(y_pred)
    
    tp = len(true_pairs & pred_pairs)
    fp = len(pred_pairs - true_pairs)
    fn = len(true_pairs - pred_pairs)

    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    f_score = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0
    
    return precision, recall, f_score

def evaluate_clustering(y_true, y_pred):
    
    # remapping 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    assert y_true.shape == y_pred.shape

    labels = np.unique(y_true)
    pred_labels = np.unique(y_pred)
    cost_matrix = -contingency_matrix(y_true, y_pred)

    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    best_mapping = {pred_labels[col]: labels[row] for row, col in zip(row_ind, col_ind)}

    y_pred_mapped = np.array([best_mapping[label] for label in y_pred])

    # evaluate
    acc = cluster_accuracy(y_true, y_pred_mapped)
    nmi = normalized_mutual_info_score(y_true, y_pred)
    purity = purity_score(y_true, y_pred_mapped)
    precision, recall, f_score = pairwise_precision_recall_fscore(y_true, y_pred_mapped)
    ari = adjusted_rand_score(y_true, y_pred_mapped)

    return {
        "ACC": acc,
        "NMI": nmi,
        "Purity": purity,
        "Precision": precision,
        "Recall": recall,
        "F-score": f_score,
        "ARI": ari
    }


In [21]:
metrics = evaluate_clustering(labels, global_graph_labels)
print(metrics)


{'ACC': 0.5552466990965949, 'NMI': 0.01966058694104668, 'Purity': 0.5580264072272412, 'Precision': 0.40643489144600137, 'Recall': 0.9994136525616203, 'F-score': 0.5778667710969749, 'ARI': 0.008627131249400025}
