In [12]:
import os
import pickle
import numpy as np
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from noise_filter import *

from Week2.week2_histograms import SpatialPyramidHistogram
from Week2.similarity_measures_optimized import (
    l1_distance_matrix,
    histogram_intersection_matrix, 
    kl_divergence_matrix,
    normalize_hist
)
from Week2.mapk import mapk
from Week2.background_remover import remove_background_morphological_gradient

# --- Configuration ---
DB_PATH = "../Data/BBDD/"
QUERY_PATH = "../Data/Week3/qsd1_w3/"  # Changed to qsd1_w3
GT_PATH = "../Data/Week3/qsd1_w3/gt_corresps.pkl"  # Changed to qsd1_w3
CACHE_DIR = "../Week2/best_method_cache"

In [10]:
# Define Utility Functions
import cv2


def pil_to_cv2(img):
    """Convert PIL image to OpenCV format."""
    return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)


In [22]:

PYRAMID_CONFIG = {
    "bins": (4, 4, 4),
    "levels": 4,
    "weights": "uniform"
}

def load_image_cv(path):
    """Load image in OpenCV BGR format"""
    pil = Image.open(path).convert("RGB")
    cv_img = pil_to_cv2(pil)
    return cv_img

def filename_to_id(fname):
    """Convert filename to image ID"""
    import re
    base = os.path.splitext(os.path.basename(fname))[0]
    m = re.search(r"\d+", base)
    return int(m.group()) if m else None

def get_bounding_box_mask(mask):
    """Get minimum bounding rectangle from polygon mask"""
    if mask is None or mask.size == 0 or mask.max() == 0:
        # fallback: whole image if mask is empty
        h, w = mask.shape if mask is not None else (0, 0)
        rect_mask = np.ones((h, w), dtype=np.uint8)
        return rect_mask, (0, h, 0, w)

    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
    y_idx = np.where(rows)[0]
    x_idx = np.where(cols)[0]
    y_min, y_max = y_idx[[0, -1]]
    x_min, x_max = x_idx[[0, -1]]
    rect_mask = np.zeros_like(mask, dtype=np.uint8)
    rect_mask[y_min:y_max+1, x_min:x_max+1] = 1
    return rect_mask, (y_min, y_max+1, x_min, x_max+1)

def build_descriptors_cached(image_list, base_path, color_space, cache_name, crop_background=False):
    """Build pyramid descriptors with caching (+ optional background crop)."""
    os.makedirs(CACHE_DIR, exist_ok=True)
    # Make cache path include folder name to avoid collisions between different base_paths
    folder_tag = os.path.basename(os.path.normpath(base_path))
    cache_file = os.path.join(CACHE_DIR, f"{cache_name}_{folder_tag}_{color_space}_bg{int(crop_background)}.pkl")
    
    if os.path.exists(cache_file):
        try:
            with open(cache_file, "rb") as f:
                cached = pickle.load(f)
            if cached.get("image_list") == image_list:
                print(f"Loaded {color_space} descriptors from cache ({cache_name}/{folder_tag})")
                return cached["descriptors"]
        except Exception as e:
            print(f"Cache load failed: {e}")
            
    print(f"Computing {color_space} descriptors for {cache_name}/{folder_tag}...")
    pyramid = SpatialPyramidHistogram(
        bins=PYRAMID_CONFIG["bins"],
        levels=PYRAMID_CONFIG["levels"],
        color_space=color_space,
        weights=PYRAMID_CONFIG["weights"]
    )
    
    descriptors = []
    for fname in tqdm(image_list):
        img_path = os.path.join(base_path, fname)
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Missing image: {img_path}")
        img = load_image_cv(img_path)

        if crop_background:
            # Compute polygon mask and crop to bounding rect
            _, poly_mask, _, _ = remove_background_morphological_gradient(img)
            rect_mask, (y1, y2, x1, x2) = get_bounding_box_mask(poly_mask)
            img = img[y1:y2, x1:x2]

        desc = pyramid.compute(img)
        desc = normalize_hist(desc)
        descriptors.append(desc)
        
    descriptors = np.vstack(descriptors) if len(descriptors) else np.empty((0, np.prod(PYRAMID_CONFIG["bins"]) * (4**PYRAMID_CONFIG["levels"])))
    
    with open(cache_file, "wb") as f:
        pickle.dump({"image_list": image_list, "descriptors": descriptors}, f)
        
    return descriptors

def denoise_and_list_queries(src_folder, out_folder, threshold=40, radius_ratio=0.75,
                             median_ksize=3, nl_h=5, nl_t=3, nl_s=21):
    """Denoise all images from src_folder to out_folder, keep filenames list."""
    os.makedirs(out_folder, exist_ok=True)
    names = sorted([f for f in os.listdir(src_folder) if f.lower().endswith('.jpg')])
    for fname in tqdm(names, desc="Denoising queries"):
        noisy = cv2.imread(os.path.join(src_folder, fname), cv2.IMREAD_COLOR)
        if noisy is None:
            raise FileNotFoundError(f"Cannot read image {os.path.join(src_folder, fname)}")
        score = fourier_noise_score(noisy, radius_ratio=radius_ratio)
        if score > threshold:
            img = remove_noise_median(noisy, ksize=median_ksize)
            img = remove_noise_nlmeans(img, h=nl_h, templateWindowSize=nl_t, searchWindowSize=nl_s)
        else:
            img = noisy
        cv2.imwrite(os.path.join(out_folder, fname), img)
    return names  # filenames preserved

def assert_2d_nonempty(name, arr):
    if arr is None or arr.ndim != 2 or arr.shape[0] == 0 or arr.shape[1] == 0:
        raise ValueError(f"{name} must be a non-empty 2D array, got shape {None if arr is None else arr.shape}")

def evaluate_best_method():
    print("=== Evaluating Best Week 2 Method (with denoised queries) ===")
    print("Method: Pyramid Histogram Fusion")
    print(f"Configuration: {PYRAMID_CONFIG}")
    
    # --- 0) Prepare lists ---
    db_images = sorted([f for f in os.listdir(DB_PATH) if f.lower().endswith('.jpg')])
    print(f"Found {len(db_images)} database images")

    # Denoise queries from Week3 noisy folder into a temp folder (same filenames)
    denoised_folder = "./denoised_images"
    query_images = denoise_and_list_queries(QUERY_PATH, denoised_folder, threshold=40)

    print(f"Found {len(query_images)} query images (after denoising)")
    
    # --- 1) Load GT (keep your path; change if you actually have Week3 GT elsewhere) ---
    with open(GT_PATH, "rb") as f:
        gt = pickle.load(f)
    print(f"Ground truth length: {len(gt)}")
    
    # --- 2) Build descriptors ---
    # DB (no background crop)
    db_hsv = build_descriptors_cached(db_images, DB_PATH, "HSV", "db", crop_background=False)
    db_hls = build_descriptors_cached(db_images, DB_PATH, "HLS", "db", crop_background=False)
    # QUERIES (on denoised images; keep crop if you want the same ROI behavior as before)
    query_hsv = build_descriptors_cached(query_images, denoised_folder, "HSV", "query", crop_background=True)
    query_hls = build_descriptors_cached(query_images, denoised_folder, "HLS", "query", crop_background=True)

    # Safety checks
    assert_2d_nonempty("db_hsv", db_hsv)
    assert_2d_nonempty("db_hls", db_hls)
    assert_2d_nonempty("query_hsv", query_hsv)
    assert_2d_nonempty("query_hls", query_hls)

    # --- 3) Similarities ---
    print("\nComputing similarities...")
    sim_funcs = [l1_distance_matrix, histogram_intersection_matrix, kl_divergence_matrix]

    sim_hsv = np.stack([f(query_hsv, db_hsv) for f in sim_funcs], axis=-1)
    sim_hls = np.stack([f(query_hls, db_hls) for f in sim_funcs], axis=-1)

    # Convert histogram intersection (higher = better) to a distance by negating
    sim_hsv[..., 1] *= -1
    sim_hls[..., 1] *= -1

    # --- 4) Fuse ---
    w1 = np.array([0.0, 1.0, 0.5])  # HSV weights
    w2 = np.array([0.5, 0.5, 0.0])  # HLS weights
    weighted_hsv = np.tensordot(sim_hsv, w1, axes=([2], [0]))
    weighted_hls = np.tensordot(sim_hls, w2, axes=([2], [0]))
    combined = 0.5 * (weighted_hsv + weighted_hls)

    # --- 5) Rankings ---
    print("\nEvaluating rankings...")
    predictions = []
    for q_idx in range(combined.shape[0]):
        top_k = np.argsort(combined[q_idx])[:5]
        pred_ids = [filename_to_id(db_images[idx]) for idx in top_k]
        predictions.append(pred_ids)

    # --- 6) MAP ---
    map1 = mapk(gt, predictions, k=1)
    map5 = mapk(gt, predictions, k=5)

    print(f"\n=== Results ===")
    print(f"MAP@1: {map1:.4f}")
    print(f"MAP@5: {map5:.4f}")
    return map1, map5

if __name__ == "__main__":
    evaluate_best_method()


=== Evaluating Best Week 2 Method (with denoised queries) ===
Method: Pyramid Histogram Fusion
Configuration: {'bins': (4, 4, 4), 'levels': 4, 'weights': 'uniform'}
Found 287 database images


Denoising queries: 100%|██████████| 31/31 [00:11<00:00,  2.76it/s]


Found 31 query images (after denoising)
Ground truth length: 30
Computing HSV descriptors for db/BBDD...


100%|██████████| 287/287 [00:36<00:00,  7.89it/s]


Computing HLS descriptors for db/BBDD...


100%|██████████| 287/287 [00:38<00:00,  7.40it/s]


Computing HSV descriptors for query/denoised_images...


100%|██████████| 31/31 [00:10<00:00,  2.93it/s]


Computing HLS descriptors for query/denoised_images...


100%|██████████| 31/31 [00:09<00:00,  3.15it/s]



Computing similarities...

Evaluating rankings...

=== Results ===
MAP@1: 0.5333
MAP@5: 0.5833
