# Global ROI Analysis (Per Image)

This notebook processes the entire dataset in `data/raw/`.
For **each image**, it:
1.  Computes an ensemble score from 5 filters (Harris, Variance, Edge, Finder, Gabor).
2.  Extracts the **Top 200** ROIs using IoU-based Non-Maximum Suppression (NMS).
3.  Visualizes these ROIs directly on the image.


In [14]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact
from pathlib import Path
from tqdm.notebook import tqdm

%matplotlib inline

## 1. Filter Implementations

In [15]:
def normalize01(x: np.ndarray) -> np.ndarray:
    if x is None: return None
    x = x.astype(np.float32)
    mn, mx = float(x.min()), float(x.max())
    if mx - mn < 1e-6:
        return np.zeros_like(x, dtype=np.float32)
    return (x - mn) / (mx - mn)

# --- 1. Harris Corner Density ---
def filter_harris(gray, block_size=2, k=0.04, density_radius=15, thresh_rel=0.01):
    harris = cv2.cornerHarris(gray, block_size, 3, k)
    thresh = thresh_rel * harris.max()
    corners_binary = (harris > thresh).astype(np.float32)
    d_k = density_radius * 2 + 1
    density = cv2.blur(corners_binary, (d_k, d_k))
    return normalize01(density)

# --- 2. Local Variance (Texture Energy) ---
def filter_variance(gray, window_size=21):
    if window_size % 2 == 0: window_size += 1
    img_f = gray.astype(np.float32) / 255.0
    mu = cv2.blur(img_f, (window_size, window_size))
    mu2 = cv2.blur(img_f * img_f, (window_size, window_size))
    variance = mu2 - mu * mu
    variance = np.maximum(variance, 0)
    return normalize01(np.sqrt(variance))

# --- 3. Edge Density ---
def filter_edge_density(gray, thresh1=50, thresh2=150, radius=15):
    edges = cv2.Canny(gray, thresh1, thresh2)
    edges_f = (edges > 0).astype(np.float32)
    d_k = radius * 2 + 1
    density = cv2.blur(edges_f, (d_k, d_k))
    return normalize01(density)

# --- 4. Finder Pattern Match ---
def filter_finder_pattern(gray, module_size_px=3):
    ms = max(1, module_size_px)
    template_size = 7 * ms
    tpl = np.zeros((template_size, template_size), dtype=np.float32)
    c = template_size // 2
    # Rings
    w1_s = ms; w1_e = template_size - ms
    b_s = 2 * ms; b_e = template_size - 2 * ms
    tpl[:, :] = -1.0 
    tpl[w1_s:w1_e, w1_s:w1_e] = 1.0
    tpl[b_s:b_e, b_s:b_e] = -1.0
    tpl = tpl - tpl.mean()
    
    gray_f = gray.astype(np.float32)
    match = cv2.matchTemplate(gray_f, tpl, cv2.TM_CCOEFF_NORMED)
    match = np.maximum(match, 0)
    match = cv2.resize(match, (gray.shape[1], gray.shape[0]))
    return normalize01(match)

# --- 5. Gabor Cross-Energy ---
def filter_gabor_cross(gray, wavelength=5.0, sigma=3.0, k_size=15):
    kern_h = cv2.getGaborKernel((k_size, k_size), sigma, 0, wavelength, 1.0, 0, ktype=cv2.CV_32F)
    kern_v = cv2.getGaborKernel((k_size, k_size), sigma, np.pi/2, wavelength, 1.0, 0, ktype=cv2.CV_32F)
    gray_f = gray.astype(np.float32)
    filt_h = cv2.filter2D(gray_f, cv2.CV_32F, kern_h)
    filt_v = cv2.filter2D(gray_f, cv2.CV_32F, kern_v)
    cross = np.sqrt((filt_h**2) * (filt_v**2))
    return normalize01(cross)

## 2. Processing Pipeline with IoU-based NMS

In [16]:
def get_combined_score(gray):
    # Ensemble of 5 filters
    m1 = filter_harris(gray, block_size=2, k=0.04, density_radius=15)
    m2 = filter_variance(gray, window_size=21)
    m3 = filter_edge_density(gray, thresh1=50, thresh2=150, radius=15)
    m4 = filter_finder_pattern(gray, module_size_px=3)
    m5 = filter_gabor_cross(gray, wavelength=5.0, sigma=3.0)
    
    # Average
    combined = (m1 + m2 + m3 + m4 + m5) / 5.0
    return normalize01(combined)

def process_image_and_draw_boxes(img_path, patch_size=128, top_k=200):
    img = cv2.imread(str(img_path))
    if img is None: return None
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # 1. Compute Score
    score_map = get_combined_score(gray)
    h, w = score_map.shape[:2]
    
    s_map = score_map.copy()
    half = patch_size // 2
    out_img = img.copy()
    
    # 2. Collect Candidates (Relaxed NMS)
    # Find many peaks but assume min distance to avoid same-pixel hits
    candidates = [] # Format: [x, y, w, h]
    scores = []
    
    search_limit = 2000 # Search for many candidates first
    for _ in range(search_limit):
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(s_map)
        if max_val < 0.1: break
        
        cx, cy = max_loc
        x1 = max(0, cx - half)
        y1 = max(0, cy - half)
        # Check boundaries for candidate (NMSBoxes expects valid Rects usually)
        x1 = int(x1)
        y1 = int(y1)
        
        candidates.append([x1, y1, patch_size, patch_size])
        scores.append(float(max_val))
        
        # Suppression Loop: Valid overlap is allowed later by NMSBoxes,
        # but we suppress a small local area to ensure we find distinct peaks first.
        # e.g. 20% of patch size
        pad = int(patch_size * 0.2)
        nx1 = max(0, cx - pad//2)
        ny1 = max(0, cy - pad//2)
        nx2 = min(w, cx + pad//2)
        ny2 = min(h, cy + pad//2)
        s_map[ny1:ny2, nx1:nx2] = 0
        
    # 3. Apply NMS (IoU Threshold = 0.6)
    if not candidates:
        return out_img
        
    # Use OpenCV DNN NMS
    # indices = cv2.dnn.NMSBoxes(bboxes, scores, score_threshold, nms_threshold)
    # nms_threshold: IoU threshold. Overlap > 0.6 => Suppress.
    indices = cv2.dnn.NMSBoxes(candidates, scores, score_threshold=0.1, nms_threshold=0.6)
    
    # 4. Draw Top K
    count = 0
    if len(indices) > 0:
        for i in indices.flatten():
            if count >= top_k: break
            box = candidates[i]
            x, y, w_box, h_box = box
            
            # Clip for drawing safety
            x2 = min(w, x + w_box)
            y2 = min(h, y + h_box)
            
            cv2.rectangle(out_img, (x, y), (x2, y2), (0, 255, 0), 2)
            count += 1
            
    return out_img

In [19]:
# Run Batch Processing
data_dir = Path("../data/raw")
files = sorted(list(data_dir.glob("*.png")) + list(data_dir.glob("*.jpg")) + list(data_dir.glob("*.jpeg")))

# Cache results in memory for quick browsing
results_cache = {}

print(f"Processing {len(files)} images...")
for f in tqdm(files):
    res_img = process_image_and_draw_boxes(f, patch_size=256, top_k=200)
    if res_img is not None:
        # Convert BGR to RGB for matplotlib
        res_rgb = cv2.cvtColor(res_img, cv2.COLOR_BGR2RGB)
        results_cache[f.name] = res_rgb

print("Done!")

Processing 50 images...


  0%|          | 0/50 [00:00<?, ?it/s]

Done!


## 3. Visualization

In [18]:
img_options = sorted(list(results_cache.keys()))

def view_result(filename):
    if filename not in results_cache:
        return
    
    plt.figure(figsize=(12, 12))
    plt.imshow(results_cache[filename])
    plt.axis("off")
    plt.title(f"{filename} - Top 200 ROIs (IoU<0.6)")
    plt.show()

if img_options:
    interact(view_result, filename=widgets.Dropdown(options=img_options, description="Image:"));
else:
    print("No results to show.")

interactive(children=(Dropdown(description='Image:', options=('001.png', '002.png', '003.png', '004.png', '005â€¦