## Setup

In [None]:
# Week 4 CBIR Experiments
# -----------------------
# Feature extraction, matching, and hyperparameter search for QSD1-W4.

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import json
import itertools


## Load images

In [None]:
def load_images_from_folder(folder):
    images, filenames = [], []
    for filename in sorted(os.listdir(folder)):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
            img = cv2.imread(os.path.join(folder, filename))
            if img is not None:
                images.append(img)
                filenames.append(filename)
    print(f"Loaded {len(images)} images from {folder}")
    return images, filenames


## Keypoint Detection

In [None]:
def detect_keypoints(img, method='SIFT'):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    method = method.upper()
    if method == 'HARRIS':
        gray = np.float32(gray)
        dst = cv2.cornerHarris(gray, 2, 3, 0.04)
        dst = cv2.dilate(dst, None)
        kp = np.argwhere(dst > 0.01 * dst.max())
        return [cv2.KeyPoint(float(x[1]), float(x[0]), 3) for x in kp]
    elif method == 'FAST':
        return cv2.FastFeatureDetector_create().detect(gray, None)
    elif method == 'SIFT':
        return cv2.SIFT_create().detect(gray, None)
    elif method == 'ORB':
        return cv2.ORB_create().detect(gray, None)
    elif method == 'SURF':
        return cv2.xfeatures2d.SURF_create(400).detect(gray, None)
    else:
        raise ValueError("Unknown method")


## Descriptor Extraction

In [None]:
def extract_descriptors(img, method='SIFT', kp=None):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    method = method.upper()
    if method == 'SIFT':
        extractor = cv2.SIFT_create()
    elif method == 'ORB':
        extractor = cv2.ORB_create()
    elif method == 'SURF':
        extractor = cv2.xfeatures2d.SURF_create(400)
    elif method in ['HARRIS', 'FAST']:
        extractor = cv2.xfeatures2d.BriefDescriptorExtractor_create()
    else:
        raise ValueError("Unknown method")
    if kp is None:
        kp, desc = extractor.detectAndCompute(gray, None)
    else:
        kp, desc = extractor.compute(gray, kp)
    return kp, desc


## Visualize Keypoints

In [None]:
def visualize_keypoints(img, kp):
    vis = cv2.drawKeypoints(img, kp, None, color=(0,255,0))
    plt.figure(figsize=(6,6))
    plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.show()


## Matching & Hyperparameter Experiment

In [None]:
def ratio_test(matches, ratio=0.75):
    good = []
    for m,n in matches:
        if m.distance < ratio * n.distance:
            good.append(m)
    return good

def match_and_score(desc1, desc2, metric='L2', ratio=0.75, crosscheck=False):
    if desc1 is None or desc2 is None:
        return 0
    if metric == 'L2':
        matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=crosscheck)
    elif metric == 'HAMMING':
        matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=crosscheck)
    else:
        raise ValueError("Unknown metric")

    matches = matcher.knnMatch(desc1, desc2, k=2)
    good = ratio_test(matches, ratio)
    return len(good)


## Hyperparameter Search Loop

In [None]:
def hyperparam_experiments(query_descs, museum_descs, method="SIFT"):
    metrics = ['L2'] if method in ['SIFT', 'SURF'] else ['HAMMING']
    ratios = [0.6, 0.7, 0.8]
    crosschecks = [False, True]
    min_matches = [5, 10, 20]

    results_summary = []

    for metric, ratio, cc, min_m in itertools.product(metrics, ratios, crosschecks, min_matches):
        total_best = []
        for qdesc in tqdm(query_descs, desc=f"{metric} r={ratio} cc={cc}"):
            scores = [match_and_score(qdesc, mdesc, metric, ratio, cc) for mdesc in museum_descs]
            best = max(scores)
            total_best.append(best)
        avg_score = np.mean(total_best)
        results_summary.append({
            "metric": metric,
            "ratio": ratio,
            "crosscheck": cc,
            "min_matches": min_m,
            "avg_best_score": avg_score
        })
    return results_summary


## Run Full Experiment

In [None]:
museum_imgs, _ = load_images_from_folder("datasets/museum")
query_imgs, _ = load_images_from_folder("datasets/qsd1_w4")

# Choose descriptor method
method = "SIFT"

# Extract descriptors
museum_descs = [extract_descriptors(img, method)[1] for img in tqdm(museum_imgs)]
query_descs = [extract_descriptors(img, method)[1] for img in tqdm(query_imgs)]

# Run hyperparameter search
summary = hyperparam_experiments(query_descs, museum_descs, method)
for s in summary:
    print(s)


## Plot/Compare Results

In [None]:
import pandas as pd
df = pd.DataFrame(summary)
df.sort_values("avg_best_score", ascending=False)
df.plot.bar(x='ratio', y='avg_best_score', title=f"Performance ({method})")
plt.show()
