EEE3032 – Computer Vision and Pattern Recognition
Coursework Assignment
Prof Miroslaw Bober


In [5]:
import os
import numpy as np
import cv2
import scipy.io as sio
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import auc, precision_recall_curve, average_precision_score, f1_score, confusion_matrix
import glob  # For file handling
from sklearn.cluster import MiniBatchKMeans  # For codebook
from sklearn.preprocessing import StandardScaler
import shutil  # To clear old descriptors
from sklearn.decomposition import PCA
from sklearn.covariance import EmpiricalCovariance
from skimage.feature import local_binary_pattern

In [6]:
## Images and Descriptors Location 

DATASET_FOLDER = 'MSRC_ObjCategImageDatabase_v2'
OUT_FOLDER = 'descriptors'
OUT_SUBFOLDER = 'globalHSVhisto'  ## For global HSV
OUT_SUBFOLDER_GRID = 'gridHSV_LBP'  ## For grid + LBP
OUT_SUBFOLDER_BOVW = 'bovw_ORB'  ## Output Folder for BoVW with ORB
FIXED_SIZE = (240, 240)  # Fixed resize for consistency across phases

# Task 1 : Create Global Color Histogram

In [7]:
def extract_global_hsv_hist(img, Q):
    img_uint8 = (img * 255).astype(np.uint8)
    hsv = cv2.cvtColor(img_uint8, cv2.COLOR_RGB2HSV)
    h, s, v = cv2.split(hsv)
    h_bins = np.floor((h / 180.0) * Q).astype(int)
    s_bins = np.floor((s / 255.0) * Q).astype(int)
    v_bins = np.floor((v / 255.0) * Q).astype(int)
    hist_h = np.bincount(h_bins.flatten(), minlength=Q)
    hist_s = np.bincount(s_bins.flatten(), minlength=Q)
    hist_v = np.bincount(v_bins.flatten(), minlength=Q)
    hist = np.concatenate([hist_h, hist_s, hist_v])
    norm = np.linalg.norm(hist)
    if norm > 0:
        hist = hist / norm
    return hist

In [8]:
def compute_and_save_descriptors(Q, dataset_folder, out_folder, out_subfolder):
    # Ensure the output directory exists
    os.makedirs(os.path.join(out_folder, out_subfolder), exist_ok=True)

    # Iterate through all BMP files in the dataset folder using glob
    for img_path in glob.glob(os.path.join(dataset_folder, 'Images', '*.bmp')):
        filename = os.path.basename(img_path)
        print(f"Processing file {filename}")
        img = cv2.imread(img_path).astype(np.float64) / 255.0  # Normalize the image
        fout = os.path.join(out_folder, out_subfolder, filename.replace('.bmp', '.mat'))
        
        # Extract HSV histogram
        F = extract_global_hsv_hist(img, Q)
        
        # Save the descriptor to a .mat file
        sio.savemat(fout, {'F': F})
    print("Successfully Created Global HSV Color Descriptors")

In [9]:
# Experiment with quantization levels: 4,8,16,32,48
quant_levels = [4,8,16,32,48]
for Q in quant_levels:
    print(f"Testing Q={Q}; expects better color separation but higher compute cost")
    compute_and_save_descriptors(Q, DATASET_FOLDER, OUT_FOLDER, OUT_SUBFOLDER)

Testing Q=4; expects better color separation but higher compute cost
Processing file 17_15_s.bmp
Processing file 16_15_s.bmp
Processing file 14_27_s.bmp
Processing file 3_2_s.bmp
Processing file 1_20_s.bmp
Processing file 8_5_s.bmp
Processing file 2_12_s.bmp
Processing file 3_12_s.bmp
Processing file 18_20_s.bmp
Processing file 19_20_s.bmp
Processing file 13_3_s.bmp
Processing file 18_4_s.bmp
Processing file 10_2_s.bmp
Processing file 20_18_s.bmp
Processing file 17_28_s.bmp
Processing file 16_28_s.bmp
Processing file 8_10_s.bmp
Processing file 9_10_s.bmp
Processing file 7_18_s.bmp
Processing file 1_7_s.bmp
Processing file 9_1_s.bmp
Processing file 6_18_s.bmp
Processing file 2_6_s.bmp
Processing file 11_6_s.bmp
Processing file 5_17_s.bmp
Processing file 4_17_s.bmp
Processing file 12_7_s.bmp
Processing file 7_25_s.bmp
Processing file 6_25_s.bmp
Processing file 12_22_s.bmp
Processing file 13_22_s.bmp
Processing file 10_10_s.bmp
Processing file 11_10_s.bmp
Processing file 7_27_s.bmp
Proces

In [10]:
def load_descriptors(image_folder, descriptor_folder, descriptor_subfolder):
    ALLFEAT = []
    ALLFILES = []
    for filename in os.listdir(os.path.join(descriptor_folder, descriptor_subfolder)):
        if filename.endswith('.mat'):
            img_path = os.path.join(descriptor_folder, descriptor_subfolder, filename)
            img_actual_path = os.path.join(image_folder, 'Images', filename.replace(".mat", ".bmp"))
            img_data = sio.loadmat(img_path)
            ALLFILES.append(img_actual_path)
            ALLFEAT.append(img_data['F'].flatten())  # Ensure 1D
    return ALLFILES, np.array(ALLFEAT)

In [11]:
def cvpr_compare(F1, F2, dist_type='L2'):
    if dist_type == 'L2':
        return np.linalg.norm(F1 - F2)
    elif dist_type == 'L1':
        return np.sum(np.abs(F1 - F2))
    elif dist_type == 'Chi2':
        return np.sum(((F1 - F2)**2) / (F1 + F2 + 1e-10))
    elif dist_type == 'Cosine':
        return 1 - np.dot(F1, F2) / (np.linalg.norm(F1) * np.linalg.norm(F2) + 1e-10)
    elif dist_type == 'Mahalanobis':
        diff = F1 - F2
        return np.sqrt(np.dot(np.dot(diff, cov_inv), diff))  # cov_inv from phase 4

In [12]:
def extract_class_from_filename(file_path):
    filename = os.path.basename(file_path)
    parts = filename.split('_')
    class_no = int(parts[0])
    return class_no

In [13]:
def evaluate_pr_auc(response_class, query_class, all_classes_true, all_classes_pred, fig_title):
    precisions, recalls, _ = precision_recall_curve(response_class == query_class, np.ones(len(response_class)))
    ap = average_precision_score(response_class == query_class, np.ones(len(response_class)))
    f1 = f1_score(all_classes_true, all_classes_pred, average='macro')
    cm = confusion_matrix(all_classes_true, all_classes_pred)
    plt.plot(recalls, precisions, label=f'AP={ap:.3f}')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve for ' + fig_title)
    plt.legend()
    plt.savefig(fig_title + '.png')
    plt.show()
    return ap, f1, cm

In [14]:
# Req1 experiments
quant_levels = [4,8,16,32,48]
results_req1 = {'Q': [], 'AUC/AP': [], 'F1': []}
for Q in quant_levels:
    ALLFILES, FEAT = load_descriptors(DATASET_FOLDER, OUT_FOLDER, OUT_SUBFOLDER)
    query_image = os.path.join(DATASET_FOLDER, '7_9_s.bmp')
    position = ALLFILES.index(query_image)
    query = FEAT[position]
    dst = [(cvpr_compare(query, FEAT[i]), i) for i in range(len(FEAT))]
    dst.sort()
    response_class_no = [extract_class_from_filename(ALLFILES[dst[j][1]]) for j in range(len(ALLFILES))]
    query_class = response_class_no[0]
    response_class = np.array(response_class_no[1:])
    all_classes_true = [extract_class_from_filename(f) for f in ALLFILES[1:]]
    all_classes_pred = response_class
    fig_title = f'pr_q{Q}'
    ap, f1, cm = evaluate_pr_auc(response_class, query_class, all_classes_true, all_classes_pred, fig_title)
    results_req1['Q'].append(Q)
    results_req1['AUC/AP'].append(ap)
    results_req1['F1'].append(f1)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (591,) + inhomogeneous part.

In [None]:
# Req3 grid + LBP
def grid_lbp_descriptor(img, Q_color, grid_size=3, lbp_points=16):
    h, w, _ = img.shape
    cell_h, cell_w = h // grid_size, w // grid_size
    desc = []
    for i in range(grid_size):
        for j in range(grid_size):
            cell = img[i*cell_h:(i+1)*cell_h, j*cell_w:(j+1)*cell_w]
            hist = extract_global_hsv_hist(cell, Q_color)
            desc.append(hist)
            gray = cv2.cvtColor((cell*255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
            lbp = local_binary_pattern(gray, lbp_points, 3, method='uniform')
            lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, lbp_points + 3))
            lbp_hist = lbp_hist.astype("float") / (lbp_hist.sum() + 1e-7)
            desc.append(lbp_hist)
    return np.concatenate(desc)

In [None]:
def compute_and_save_grid_lbp(Q_color, lbp_points, dataset_folder, out_folder, out_subfolder):
    os.makedirs(os.path.join(out_folder, out_subfolder), exist_ok=True)
    for img_path in glob.glob(os.path.join(dataset_folder, 'Images', '*.bmp')):
        filename = os.path.basename(img_path)
        img = cv2.imread(img_path).astype(np.float64) / 255.0
        F = grid_lbp_descriptor(img, Q_color, lbp_points=lbp_points)
        fout = os.path.join(out_folder, out_subfolder, filename.replace('.bmp', '.mat'))
        sio.savemat(fout, {'F': F})
    print("Successfully Created Grid HSV + LBP Descriptors")

In [None]:
compute_and_save_grid_lbp(32, 16, DATASET_FOLDER, OUT_FOLDER, OUT_SUBFOLDER_GRID)
ALLFILES, FEAT = load_descriptors(DATASET_FOLDER, OUT_FOLDER, OUT_SUBFOLDER_GRID)
query_image = os.path.join(DATASET_FOLDER, 'Images', '7_9_s.bmp')
position = ALLFILES.index(query_image)
query = FEAT[position]
dst = [(cvpr_compare(query, FEAT[i]), i) for i in range(len(FEAT))]
dst.sort()
response_class = [extract_class_from_filename(ALLFILES[d[1]]) for d in dst[1:]]
query_class = extract_class_from_filename(query_image)
y_true = np.array([cls == query_class for cls in response_class])
fig_title = 'pr_grid_lbp16'
ap, f1, cm = evaluate_pr_auc(y_true, query_class, response_class, response_class, fig_title)

In [None]:
# Req4: PCA + Mahalanobis
FEAT_reduced, pca = apply_pca(FEAT)
cov = EmpiricalCovariance().fit(FEAT_reduced)
cov_inv = np.linalg.pinv(cov.covariance_)
query_reduced = FEAT_reduced[position]
dst = [(cvpr_compare(query_reduced, FEAT_reduced[i], 'Mahalanobis', cov_inv), i) for i in range(len(FEAT_reduced))]
dst.sort()
response_class = [extract_class_from_filename(ALLFILES[d[1]]) for d in dst[1:]]
query_class = extract_class_from_filename(query_image)
y_true = np.array([cls == query_class for cls in response_class])
fig_title = 'pr_pca_mahal'
ap, f1, cm = evaluate_pr_auc(y_true, query_class, response_class, response_class, fig_title)

In [None]:
subfolder = compute_and_save_bovw(100, DATASET_FOLDER, OUT_FOLDER, OUT_SUBFOLDER_BOVW)
ALLFILES, FEAT = load_descriptors(DATASET_FOLDER, OUT_FOLDER, subfolder)
query_image = os.path.join(DATASET_FOLDER, 'Images', '7_9_s.bmp')
position = ALLFILES.index(query_image)
query = FEAT[position]
dst = [(cvpr_compare(query, FEAT[i], 'Chi2'), i) for i in range(len(FEAT))]
dst.sort()
response_class = [extract_class_from_filename(ALLFILES[d[1]]) for d in dst[1:]]
query_class = extract_class_from_filename(query_image)
y_true = np.array([cls == query_class for cls in response_class])
fig_title = 'pr_bovw100'
ap, f1, cm = evaluate_pr_auc(y_true, query_class, response_class, response_class, fig_title)