In [1]:
import cv2
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from background_removal_exp import background_remover_w2 as background_remover
# from background_removal_exp import bckg_rmv as background_remover
from descriptors import preprocess_image, extract_descriptor, extract_descriptors
from image_split import split_images

# Paths
IMG_FOLDER = "../Data/Week3/qsd2_w3/"
IMG_FOLDER_GT = "../Data/Week3/BBDD/"
GT_CORRESPS_PATH = "../Data/Week3/qsd2_w3/gt_corresps.pkl"
DESC_GT_PATH = "results/descriptors_gt.pkl"

# --- 1. Load or compute GT descriptors ---

def build_gt_descriptors(gt_folder, extractor):
    names = sorted([f for f in os.listdir(gt_folder) if f.lower().endswith(('.jpg','.png','.jpeg'))])
    descs = []
    for name in names:
        img = cv2.imread(os.path.join(gt_folder, name))
        # === Do the SAME as query ===
        # 1) (Optional) convert BGR->RGB if your descriptors expect RGB
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # 2) preprocess
        img_p = preprocess_image(img)

        # 3) background removal + crop
        im, mask, _, _ = background_remover.remove_background_morphological_gradient(img_p)
        img_c = crop_to_mask_rectangle(img_p, mask)

        # 4) descriptor (L2 normalize)
        d = extract_descriptor(img_c)
        d = d / (np.linalg.norm(d) + 1e-12)
        descs.append(d)
    return np.vstack(descs), names


if os.path.exists(DESC_GT_PATH):
    # Optional: delete the cache to rebuild with the new consistent pipeline
    with open(DESC_GT_PATH, "rb") as f:
        data = pickle.load(f)
        desc_gt = data["desc_gt"]
        gt_names = data["gt_names"]
else:
    desc_gt, gt_names = build_gt_descriptors(IMG_FOLDER_GT, extract_descriptor)
    os.makedirs(os.path.dirname(DESC_GT_PATH), exist_ok=True)
    with open(DESC_GT_PATH, "wb") as f:
        pickle.dump({"desc_gt": desc_gt, "gt_names": gt_names}, f)

# --- 2. Load GT correspondences ---
with open(GT_CORRESPS_PATH, "rb") as f:
    gt_corresps = pickle.load(f)


def crop_to_mask_rectangle(image, mask):
    """Crop the image to the rectangular bounding box of the mask (removes black areas)."""
    # Ensure mask is binary (0 or 255)
    mask = (mask > 0).astype(np.uint8)

    # Find nonzero points (foreground)
    coords = cv2.findNonZero(mask)
    if coords is None:
        return image  # fallback if mask is empty

    # Get bounding rectangle
    x, y, w, h = cv2.boundingRect(coords)

    # Crop the original image to the bounding box
    cropped = image[y:y+h, x:x+w]

    return cropped


# --- 3. Process all images in the folder ---
def _l2norm(x):
    x = np.asarray(x, dtype=np.float32)
    n = np.linalg.norm(x) + 1e-12
    return x / n

image_names = sorted([f for f in os.listdir(IMG_FOLDER) if f.endswith('.jpg')])
desc_query = []

for img_idx, img_name in enumerate(image_names):
    print(f"Processing {img_name} ...")
    img_path = os.path.join(IMG_FOLDER, img_name)
    img = cv2.imread(img_path)

    if img is None:
        print(f"⚠️ Skipping {img_name}: could not read image.")
        continue

    # Split possible multiple artworks
    splitted = split_images(img)

    if splitted[0] is True:
        splitted = splitted[1]  # two artworks detected
        left_artwork, right_artwork = splitted

        left_artwork = preprocess_image(left_artwork)
        right_artwork = preprocess_image(right_artwork)

        iml, left_mask, left_output, _ = background_remover.remove_background_morphological_gradient(left_artwork)
        imr, right_mask, right_output, _ = background_remover.remove_background_morphological_gradient(right_artwork)

        # Crop each artwork to its mask bounding box (no black borders)
        left_cropped = crop_to_mask_rectangle(left_artwork, left_mask)
        right_cropped = crop_to_mask_rectangle(right_artwork, right_mask)

        # Extract descriptors
        desc_left  = _l2norm(extract_descriptor(left_cropped))
        desc_right = _l2norm(extract_descriptor(right_cropped))

        desc_query.append([desc_left, desc_right])
        
        """plt.figure(figsize=(12, 6))
        plt.subplot(1, 2, 1)
        plt.imshow(cv2.cvtColor(left_cropped, cv2.COLOR_BGR2RGB))
        plt.title(f"Left Artwork: {img_name}")
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(cv2.cvtColor(right_cropped, cv2.COLOR_BGR2RGB))
        plt.title(f"Right Artwork: {img_name}")
        plt.axis('off')
        
        plt.show()"""

    else:  # single artwork
        splitted = splitted[1]  # single artwork
        img = preprocess_image(splitted)
        im, mask, output, _ = background_remover.remove_background_morphological_gradient(img)

        # Crop to mask bounding box (remove black)
        cropped = crop_to_mask_rectangle(img, mask)

        # Extract descriptor
        desc = _l2norm(extract_descriptor(cropped))
        desc_query.append([desc])  # keep structure consistent
        
        """plt.imshow(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
        plt.title(f"{img_name}")
        plt.axis('off')
        plt.show()"""

# --- 4. Compute mAP@1 and mAP@5 ---
def compute_map_at_k(desc_query, desc_gt, gt_corresps, k=5):
    """
    Compute mean Average Precision at K.
    If a query image has multiple artworks, we pair each sub-descriptor with its matching GT index
    (i.e., descs[j] -> gt_corresps[i][j]) when lengths match.
    """
    aps = []

    for i, descs in enumerate(desc_query):
        q_gt = gt_corresps[i]
        # Ensure list
        if not isinstance(q_gt, list):
            q_gt = [q_gt]

        # Case 1: same number of descriptors and GT labels → pair 1:1
        if len(descs) == len(q_gt):
            for j, desc in enumerate(descs):
                sims = cosine_similarity([desc], desc_gt)[0]
                ranked_indices = np.argsort(-sims)[:k]
                # Only 1 relevant item for this subquery
                rel = q_gt[j]
                # Precision@rank if found within top-k
                ap = 0.0
                for rank, idx in enumerate(ranked_indices, start=1):
                    if idx == rel:
                        ap = 1.0 / rank
                        break
                aps.append(ap)
        else:
            # Fallback: multiple relevant labels for each sub-descriptor (your original logic)
            for desc in descs:
                sims = cosine_similarity([desc], desc_gt)[0]
                ranked_indices = np.argsort(-sims)[:k]

                num_relevant = len(q_gt)
                num_correct = 0
                precision_at_i = []

                for rank, idx in enumerate(ranked_indices, start=1):
                    if idx in q_gt:
                        num_correct += 1
                        precision_at_i.append(num_correct / rank)

                ap = np.sum(precision_at_i) / num_relevant if num_relevant > 0 else 0
                aps.append(ap)

    return float(np.mean(aps)) if aps else 0.0



map1 = compute_map_at_k(desc_query, desc_gt, gt_corresps, k=1)
map5 = compute_map_at_k(desc_query, desc_gt, gt_corresps, k=5)

print(f"\n✅ mAP@1 = {map1:.4f}")
print(f"✅ mAP@5 = {map5:.4f}")


Processing 00000.jpg ...
Processing 00001.jpg ...
Processing 00002.jpg ...
Processing 00003.jpg ...
Processing 00004.jpg ...
Processing 00005.jpg ...
Processing 00006.jpg ...
Processing 00007.jpg ...
Processing 00008.jpg ...
Processing 00009.jpg ...
Processing 00010.jpg ...
Processing 00011.jpg ...
Processing 00012.jpg ...
Processing 00013.jpg ...
Processing 00014.jpg ...
Processing 00015.jpg ...
Processing 00016.jpg ...
Processing 00017.jpg ...
Processing 00018.jpg ...
Processing 00019.jpg ...
Processing 00020.jpg ...
Processing 00021.jpg ...
Processing 00022.jpg ...
Processing 00023.jpg ...
Processing 00024.jpg ...
Processing 00025.jpg ...
Processing 00026.jpg ...
Processing 00027.jpg ...
Processing 00028.jpg ...
Processing 00029.jpg ...

✅ mAP@1 = 0.3611
✅ mAP@5 = 0.4255
