# 1 Caltect-101

## 1.1 Data preprocessing

In [4]:
import numpy as np
import tensorflow_datasets as tfds
import cv2
from skimage.feature import hog, local_binary_pattern
from skimage import color
import pywt
from scipy import stats
from gist import extract_gist_feature  # Requires installation: pip install git+https://github.com/tuttieee/lear-gist-python.git

# Function to extract Gabor features (48-D)
def extract_gabor(img_gray):
    features = []
    for theta in np.linspace(0, np.pi, 6):  # 6 orientations
        for sigma in [1, 2, 3, 4]:         # 4 scales
            filt_real, filt_imag = pywt.filters.gabor(img_gray, frequency=1.0/sigma, theta=theta)
            magnitude = np.sqrt(filt_real**2 + filt_imag**2)
            features.append(np.mean(magnitude))
            features.append(np.std(magnitude))
    return np.array(features)[:48]  # Ensure 48-D

# Function to extract Wavelet-Moment features (40-D)
def extract_wavelet_moment(img_gray):
    coeffs = pywt.wavedec2(img_gray, 'db1', level=3)
    features = []
    # Process Approximation (Level 3)
    cA = coeffs[0]
    features.extend([np.mean(cA), np.var(cA), stats.skew(cA.ravel()), stats.kurtosis(cA.ravel())])
    # Process Details (Levels 1-3)
    for detail in coeffs[1:]:
        for d in detail:
            flattened = d.ravel()
            features.extend([np.mean(flattened), np.var(flattened), stats.skew(flattened), stats.kurtosis(flattened)])
    return np.array(features)[:40]  # Truncate to 40-D

# Function to extract CENTRIST features (254-D) - Placeholder
def extract_centrist(img_gray):
    # Implement custom CENTRIST feature extraction here
    return np.zeros(254)  # Replace with actual implementation

# Function to extract HOG features (1984-D)
def extract_hog(img_gray):
    fd = hog(img_gray, orientations=9, pixels_per_cell=(16, 16),
             cells_per_block=(3, 3), feature_vector=True)
    # Adjust parameters to reach 1984-D if necessary
    return fd[:1984] if len(fd) >= 1984 else np.pad(fd, (0, 1984 - len(fd)))

# Function to extract GIST features (512-D)
def extract_gist(img_rgb):
    gist_feat = extract_gist_feature(img_rgb)
    return gist_feat[:512]  # Ensure 512-D

# Function to extract LBP features (928-D)
def extract_lbp(img_gray):
    lbp = local_binary_pattern(img_gray, P=24, R=3, method='uniform')
    hist, _ = np.histogram(lbp, bins=59, range=(0, 58))
    # Spatial pyramid (4x4 grid)
    height, width = lbp.shape
    hist_features = []
    for i in range(4):
        for j in range(4):
            cell = lbp[i*height//4:(i+1)*height//4, j*width//4:(j+1)*width//4]
            cell_hist, _ = np.histogram(cell, bins=59, range=(0, 58))
            hist_features.extend(cell_hist)
    return np.array(hist_features)[:928]  # Ensure 928-D

# Main processing
def process_caltech101():
    # Load dataset
    ds, info = tfds.load('caltech101', split='train', shuffle_files=True, with_info=True)
    images, labels = [], []

    # Collect data
    for example in tfds.as_numpy(ds):
        images.append(example['image'])
        labels.append(example['label'])

    # Preprocess and extract features
    feature_list = [[] for _ in range(6)]
    label_list = []

    for img, label in zip(images, labels):
        # Resize and convert to RGB & Grayscale
        img_rgb = cv2.resize(img, (128, 128))  # Resize for consistency
        img_gray = color.rgb2gray(img_rgb) * 255
        img_gray = img_gray.astype(np.uint8)

        # Extract features
        feature_list[0].append(extract_gabor(img_gray))
        feature_list[1].append(extract_wavelet_moment(img_gray))
        feature_list[2].append(extract_centrist(img_gray))
        feature_list[3].append(extract_hog(img_gray))
        feature_list[4].append(extract_gist(img_rgb))
        feature_list[5].append(extract_lbp(img_gray))
        label_list.append(label)

    # Convert to numpy arrays
    feature_list = [np.array(lst) for lst in feature_list]
    label_list = np.array(label_list)

    return feature_list, label_list

# Execute
feature_list, label_list = process_caltech101()

ModuleNotFoundError: No module named 'tensorflow_datasets'

## 1.2 Single view graph

In [2]:
import numpy as np
from sklearn.metrics.pairwise import rbf_kernel
from tqdm import tqdm 

def estimate_sigma(X):
    pairwise_dists = np.linalg.norm(X[:, np.newaxis] - X, axis=2)  # Compute pairwise L2 distances
    sigma = np.median(pairwise_dists)  # Use the median distance as sigma
    return sigma

def compute_laplacian(S):
    S_sym = (S.T + S) / 2  # Compute symmetric part
    D = np.diag(S_sym.sum(axis=0))  # Compute diagonal matrix D
    L = D - S_sym  # Compute Laplacian matrix
    return L

def update_Q(L, c):
    eigenvalues, eigenvectors = np.linalg.eigh(L)
    Q = eigenvectors[:, :c]
    return Q

def project_to_simplex(v):# equation (9)
    n = len(v)
    u = np.sort(v)[::-1] 
    cumsum_u = np.cumsum(u)
    rho = np.where(u > (cumsum_u - 1) / (np.arange(n) + 1))[0][-1]
    theta = (cumsum_u[rho] - 1) / (rho + 1)
    return np.maximum(v - theta, 0)

def update_S(Q, beta): # equation (9)
    n, c = Q.shape
    S = np.zeros((n, n))
    
    for j in range(n):
        g_j = np.array([np.linalg.norm(Q[j] - Q[i])**2 for i in range(n)])
        raw_sj = -g_j / (2 * beta)
        S[j] = project_to_simplex(raw_sj)
    
    return S

def make_single_view_graph(single_view_graph_X, class_number, default_beta=1.0):
    
    single_view_graph = []
    
    for i in tqdm(range(len(single_view_graph_X))):
        
        # init
        beta = default_beta
        S = update_S(single_view_graph_X[i], beta)
        L = compute_laplacian(S)
        Q = update_Q(L, class_number)

        for j in tqdm(range(100)):
            S = update_S(Q, beta)
            L = compute_laplacian(S)
            Q = update_Q(L, class_number)

            L_rank = np.linalg.matrix_rank(L)
            if L_rank == X.shape[0] - class_number:
                tqdm.write(f"{i}th graph end at {j}th iteration, L's rank is {L_rank}")
                break
            elif L_rank > X.shape[0] - class_number:
                beta *= 0.9 
            else:
                beta *= 1.1
        single_view_graph.append(L)
        
    return single_view_graph


In [None]:
single_view_graph = make_single_view_graph(feature_list, class_number)


                                     
 12%|█▏        | 12/100 [02:53<21:13, 14.47s/it]
 17%|█▋        | 1/6 [03:01<15:08, 181.76s/it]

0th graph end at 12th iteration, L's rank is 1990


                                              
  7%|▋         | 7/100 [01:40<22:14, 14.35s/it]
 33%|███▎      | 2/6 [04:50<09:15, 138.88s/it]

1th graph end at 7th iteration, L's rank is 1990




## 1.3 Global view graph

In [None]:
def init_W(single_view_graph):
    W = [np.full(single_view_graph[0].shape, 1/len(single_view_graph))] * len(single_view_graph)
    return W

def init_A(single_view_graph, W):
    A = np.sum(single_view_graph, axis=0) * W[0]
    return A

def init_P(A,c):
    L = compute_laplacian(A)
    P = update_Q(L, c)
    return P

def update_A(P, w_list, s_list, gamma=1.0):
    n = P.shape[0]
    c = P.shape[1]
    m = len(w_list)

    H = np.sum((P[:, np.newaxis, :] - P)**2, axis=2)
    
    A = np.zeros((n, n))
    
    for j in range(c):
        h_j = H[:, j]
    
        sum_term = np.zeros(n)
        for v in range(m):
            w_jv = w_list[v][:, j]
            s_jv = s_list[v][:, j] 
            sum_term += w_jv * s_jv  
        intermediate = -(((gamma / 2.0) * (h_j)) - sum_term)
        
    A[j] = project_to_simplex(intermediate)

    return A


def update_P(L, c):
    eigenvalues, eigenvectors = np.linalg.eigh(L)
    Q = eigenvectors[:, :c]
    return Q


def compute_W(a, s_list):
    v, n, _ = np.shape(s_list) 
    w_list = []

    for i in range(v):
        wv = np.zeros((n, n)) 
        for j in range(n):
            Z_j = a[:,j] - s_list[i][:,j]
            Z_j = Z_j.reshape(1, -1) 
            ZTZ_inv = np.linalg.pinv(Z_j.T @ Z_j)  # (Z_j^T Z_j)^{-1}
            one_vector = np.ones((n, 1)) 
            w_jv = (ZTZ_inv @ one_vector) * (1 / (one_vector.T @ ZTZ_inv @ one_vector))
            wv[:,j] = w_jv.reshape(-1) / np.sum(w_jv)
        w_list.append(wv)

    return w_list

def make_global_graph(single_view_graph, class_number, default_gamma=1.0):
    
    # init
    W = init_W(single_view_graph)
    A = init_A(single_view_graph, W)
    P = init_P(A, class_number)
    gamma = default_gamma
    
    for j in tqdm(range(100)):
        A = update_A(P, W, single_view_graph)
        L = compute_laplacian(A)
        P = update_P(L, class_number)
        W = compute_W(A, single_view_graph)

        tqdm.write(f"iteration: {j}, L_rank: {np.linalg.matrix_rank(L)}, gamma: {gamma}")
        print(sum(W[0][:,0]))
        L_rank = np.linalg.matrix_rank(L)
        if L_rank == X.shape[0] - class_number:
            tqdm.write(f"end at {j}th iteration, L's rank is {L_rank}")
            break
        elif L_rank < X.shape[0] - class_number:
            gamma *= 0.9 
        else:
            gamma *= 1.1
        
    return L


In [None]:
global_graph = make_global_graph(single_view_graph, class_number)


## 1.4 Cluster

In [None]:
import numpy as np
from sklearn.cluster import KMeans

def cluster(laplacian, n_clusters):
    eigenvalues, eigenvectors = np.linalg.eigh(laplacian)
    X = eigenvectors[:, :n_clusters]
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X)
    return kmeans.labels_

# get clustering results
single_view_graph_labels = []
for i in range(len(single_view_graph)):
    single_view_graph_labels.append(cluster(single_view_graph[i], class_number))

global_graph_labels = cluster(global_graph, class_number)


## 1.5 evaluation

In [None]:
import numpy as np
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
from sklearn.metrics.cluster import contingency_matrix

def cluster_accuracy(y_true, y_pred):
    acc = np.mean(y_pred == y_true)
    return acc

def purity_score(y_true, y_pred):
    contingency = contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(contingency, axis=0)) / np.sum(contingency)

def pairwise_precision_recall_fscore(y_true, y_pred):

    def get_pairs(labels):
        pairs = set()
        for label in np.unique(labels):
            indices = np.where(labels == label)[0]
            for i in range(len(indices)):
                for j in range(i + 1, len(indices)):
                    pairs.add((indices[i], indices[j]))
        return pairs

    true_pairs = get_pairs(y_true)
    pred_pairs = get_pairs(y_pred)
    
    tp = len(true_pairs & pred_pairs)
    fp = len(pred_pairs - true_pairs)
    fn = len(true_pairs - pred_pairs)

    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    f_score = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0
    
    return precision, recall, f_score

def evaluate_clustering(y_true, y_pred):
    
    # remapping 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    assert y_true.shape == y_pred.shape

    labels = np.unique(y_true)
    pred_labels = np.unique(y_pred)
    cost_matrix = -contingency_matrix(y_true, y_pred)

    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    best_mapping = {pred_labels[col]: labels[row] for row, col in zip(row_ind, col_ind)}

    y_pred_mapped = np.array([best_mapping[label] for label in y_pred])

    # evaluate
    acc = cluster_accuracy(y_true, y_pred_mapped)
    nmi = normalized_mutual_info_score(y_true, y_pred)
    purity = purity_score(y_true, y_pred_mapped)
    precision, recall, f_score = pairwise_precision_recall_fscore(y_true, y_pred_mapped)
    ari = adjusted_rand_score(y_true, y_pred_mapped)

    return {
        "ACC": acc,
        "NMI": nmi,
        "Purity": purity,
        "Precision": precision,
        "Recall": recall,
        "F-score": f_score,
        "ARI": ari
    }


In [None]:
metrics = evaluate_clustering(y, global_graph_labels)
print(metrics)
