# TP3: Detección del logo de Coca-Cola

## 1: Detección única por imagen

Encontrar el logotipo de la gaseosa dentro de las imágenes provistas.

### Estrategia

Para cada imagen se selecciona el método más adecuado según sus características:

| Imagen | Método | Justificación |
|--------|--------|---------------|
| coca_logo_1.png | Edge TM | Fondo limpio, bordes preservados tras Canny |
| coca_logo_2.png | SIFT | Superficie curva, distorsión de perspectiva |
| coca_multi.png | TM invertido | Múltiples logos, contraste invertido |
| coca_retro_1.png | SIFT | Logo retro estructuralmente diferente |
| coca_retro_2.png | SIFT | Logo curvado en emblema circular |
| COCA-COLA-LOGO.jpg | SIFT | Logo grande, fondo complejo |
| logo_1.png | TM invertido | Reflejos en vidrio, variaciones de iluminación |

In [20]:
%matplotlib qt
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict, Optional

### Utility Functions

In [21]:
def load_image(path: str, max_size: int = 1200) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Load image and return RGB, grayscale, and BGR versions."""
    img = cv.imread(path)
    if img is None:
        raise FileNotFoundError(f"Image not found: {path}")
    h, w = img.shape[:2]
    if max(h, w) > max_size:
        scale = max_size / max(h, w)
        img = cv.resize(img, None, fx=scale, fy=scale)
    img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    return img_rgb, img_gray, img


def load_template(path: str, max_size: int = 400) -> np.ndarray:
    """Load template as grayscale."""
    template = cv.imread(path, 0)
    if template is None:
        raise FileNotFoundError(f"Template not found: {path}")
    h, w = template.shape[:2]
    if max(h, w) > max_size:
        scale = max_size / max(h, w)
        template = cv.resize(template, None, fx=scale, fy=scale)
    return template


def preprocess_image(img_gray: np.ndarray, method: str = 'clahe') -> np.ndarray:
    """Apply preprocessing to grayscale image.
    
    Methods:
        - 'clahe': Contrast Limited Adaptive Histogram Equalization
        - 'smooth': Gaussian blur
        - 'equalize': Histogram equalization
        - 'clahe_smooth': CLAHE + Gaussian blur
    """
    if method == 'clahe':
        clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        return clahe.apply(img_gray)
    elif method == 'smooth':
        return cv.GaussianBlur(img_gray, (3, 3), 0)
    elif method == 'equalize':
        return cv.equalizeHist(img_gray)
    elif method == 'clahe_smooth':
        clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        img = clahe.apply(img_gray)
        return cv.GaussianBlur(img, (3, 3), 0)
    else:
        return img_gray


def create_template_variants(template: np.ndarray) -> Dict[str, np.ndarray]:
    """Create template variants: original, clahe, smooth."""
    clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return {
        'original': template,
        'clahe': clahe.apply(template),
        'smooth': cv.GaussianBlur(template, (3, 3), 0),
    }


def rotate_template(template: np.ndarray, angle: float) -> np.ndarray:
    """Rotate template by given angle (degrees)."""
    h, w = template.shape[:2]
    center = (w // 2, h // 2)
    M = cv.getRotationMatrix2D(center, angle, 1.0)
    
    # Calculate new bounding box size
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    new_w = int(h * sin + w * cos)
    new_h = int(h * cos + w * sin)
    
    # Adjust rotation matrix
    M[0, 2] += (new_w - w) / 2
    M[1, 2] += (new_h - h) / 2
    
    return cv.warpAffine(template, M, (new_w, new_h), borderValue=255)


def compute_iou(box1: Tuple, box2: Tuple) -> float:
    """Compute IoU between two boxes (x, y, w, h)."""
    x1, y1, w1, h1 = box1[:4]
    x2, y2, w2, h2 = box2[:4]
    
    xi1, yi1 = max(x1, x2), max(y1, y2)
    xi2, yi2 = min(x1 + w1, x2 + w2), min(y1 + h1, y2 + h2)
    
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    union_area = w1 * h1 + w2 * h2 - inter_area
    
    return inter_area / union_area if union_area > 0 else 0


def nms_global(detections: List[Tuple], iou_threshold: float = 0.3) -> List[Tuple]:
    """Apply Non-Maximum Suppression globally."""
    if not detections:
        return []
    
    detections = sorted(detections, key=lambda d: d[4], reverse=True)
    keep = []
    
    while detections:
        best = detections.pop(0)
        keep.append(best)
        detections = [d for d in detections if compute_iou(best[:4], d[:4]) < iou_threshold]
    
    return keep


def draw_detection(img_rgb: np.ndarray, bbox: Tuple, color=(0, 255, 0), thickness=3) -> np.ndarray:
    """Draw bounding box on image."""
    img_out = img_rgb.copy()
    if bbox:
        x, y, w, h = bbox[:4]
        cv.rectangle(img_out, (x, y), (x + w, y + h), color, thickness)
    return img_out


def plot_result(img_rgb: np.ndarray, bbox: Tuple, title: str, score: float = None):
    """Plot single detection result."""
    img_out = draw_detection(img_rgb, bbox)
    plt.figure(figsize=(10, 8))
    plt.imshow(img_out)
    title_str = title
    if score is not None:
        title_str += f" | Score: {score:.3f}"
    if bbox:
        title_str += f" | Size: {bbox[2]}x{bbox[3]}"
    plt.title(title_str)
    plt.axis('off')
    plt.tight_layout()
    plt.show()

### Core Detection Functions

In [22]:
def template_match_multiscale(
    img_gray: np.ndarray,
    template_variants: Dict[str, np.ndarray],
    scales: np.ndarray,
    threshold: float = 0.30,
    preprocess: str = None,
    aspect_filter: Tuple[float, float] = None,
    min_width_ratio: float = 0.05
) -> Tuple[Optional[Tuple], float, str]:
    """
    Multi-scale template matching.
    
    Returns: (bbox, score, method_info)
    """
    h, w = img_gray.shape
    
    # Preprocess image
    if preprocess:
        img_processed = preprocess_image(img_gray, preprocess)
    else:
        img_processed = cv.GaussianBlur(img_gray, (3, 3), 0)
        img_processed = cv.equalizeHist(img_processed)
    
    min_det_width = w * min_width_ratio
    all_detections = []
    
    for scale in scales:
        for name, tmpl in template_variants.items():
            th, tw = tmpl.shape
            new_w, new_h = int(tw * scale), int(th * scale)
            
            # Skip invalid sizes
            if new_w > w or new_h > h or new_w < 20 or new_h < 10:
                continue
            if new_w < min_det_width:
                continue
            
            scaled_tmpl = cv.resize(tmpl, (new_w, new_h))
            res = cv.matchTemplate(img_processed, scaled_tmpl, cv.TM_CCOEFF_NORMED)
            
            # Adaptive threshold
            mean_val, std_val = np.mean(res), np.std(res)
            adaptive_thresh = max(mean_val + 2.5 * std_val, threshold)
            
            locations = np.where(res >= adaptive_thresh)
            
            for pt in zip(*locations[::-1]):
                x, y = pt
                score = res[y, x]
                
                # Aspect ratio filter
                aspect = new_w / new_h if new_h > 0 else 0
                if aspect_filter:
                    if not (aspect_filter[0] < aspect < aspect_filter[1]):
                        continue
                elif not (0.5 < aspect < 4.0):
                    continue
                
                all_detections.append((x, y, new_w, new_h, score, name, scale))
    
    if not all_detections:
        return None, 0, "No detection"
    
    # Apply NMS and get best
    final = nms_global(all_detections, iou_threshold=0.3)
    best = final[0]
    
    bbox = (best[0], best[1], best[2], best[3])
    method_info = f"TM-{best[5]}@{best[6]:.2f}"
    
    return bbox, best[4], method_info


def template_match_edges(
    img_gray: np.ndarray,
    template: np.ndarray,
    scales: np.ndarray,
    threshold: float = 0.25,
    canny_low: int = 50,
    canny_high: int = 150,
    y_range: Tuple[int, int] = None
) -> Tuple[Optional[Tuple], float, str]:
    """
    Edge-based template matching - robust to contrast inversion.
    
    Args:
        img_gray: Grayscale image
        template: Grayscale template
        scales: Array of scales to test
        threshold: Minimum score threshold
        canny_low/high: Canny edge detection thresholds
        y_range: Optional (y_min, y_max) to restrict detection area
    
    Returns: (bbox, score, method_info)
    """
    h, w = img_gray.shape
    
    # Create edge versions
    template_edges = cv.Canny(template, canny_low, canny_high)
    img_edges = cv.Canny(img_gray, canny_low, canny_high)
    
    # Dilate edges for better matching
    kernel = np.ones((2, 2), np.uint8)
    template_edges = cv.dilate(template_edges, kernel, iterations=1)
    img_edges = cv.dilate(img_edges, kernel, iterations=1)
    
    all_detections = []
    
    for scale in scales:
        th, tw = template.shape
        new_w, new_h = int(tw * scale), int(th * scale)
        
        if new_w >= w or new_h >= h or new_w < 40:
            continue
        
        scaled_edges = cv.resize(template_edges, (new_w, new_h))
        res = cv.matchTemplate(img_edges, scaled_edges, cv.TM_CCOEFF_NORMED)
        
        # Adaptive threshold
        mean_val, std_val = np.mean(res), np.std(res)
        adaptive_thresh = max(mean_val + 2.5 * std_val, threshold)
        
        locations = np.where(res >= adaptive_thresh)
        
        for pt in zip(*locations[::-1]):
            x, y = pt
            score = res[y, x]
            
            # Y-range filter (if specified)
            if y_range:
                if not (y_range[0] <= y <= y_range[1]):
                    continue
            
            all_detections.append((x, y, new_w, new_h, score, 'edges', scale))
    
    if not all_detections:
        return None, 0, "No detection (edges)"
    
    # Apply NMS and get best
    final = nms_global(all_detections, iou_threshold=0.3)
    best = final[0]
    
    bbox = (best[0], best[1], best[2], best[3])
    method_info = f"TM-edges@{best[6]:.2f}"
    
    return bbox, best[4], method_info


def template_match_with_rotation(
    img_gray: np.ndarray,
    template_variants: Dict[str, np.ndarray],
    scales: np.ndarray,
    rotations: List[float],
    threshold: float = 0.30,
    preprocess: str = None
) -> Tuple[Optional[Tuple], float, str]:
    """
    Multi-scale + multi-rotation template matching.
    
    Returns: (bbox, score, method_info)
    """
    h, w = img_gray.shape
    
    # Preprocess image
    if preprocess:
        img_processed = preprocess_image(img_gray, preprocess)
    else:
        img_processed = cv.GaussianBlur(img_gray, (3, 3), 0)
        img_processed = cv.equalizeHist(img_processed)
    
    min_det_width = w * 0.10
    all_detections = []
    
    for rotation in rotations:
        for scale in scales:
            for name, tmpl in template_variants.items():
                # Rotate template
                if rotation != 0:
                    tmpl_rot = rotate_template(tmpl, rotation)
                else:
                    tmpl_rot = tmpl
                
                th, tw = tmpl_rot.shape
                new_w, new_h = int(tw * scale), int(th * scale)
                
                if new_w > w or new_h > h or new_w < 20 or new_h < 10:
                    continue
                if new_w < min_det_width:
                    continue
                
                scaled_tmpl = cv.resize(tmpl_rot, (new_w, new_h))
                res = cv.matchTemplate(img_processed, scaled_tmpl, cv.TM_CCOEFF_NORMED)
                
                mean_val, std_val = np.mean(res), np.std(res)
                adaptive_thresh = max(mean_val + 2.5 * std_val, threshold)
                
                locations = np.where(res >= adaptive_thresh)
                
                for pt in zip(*locations[::-1]):
                    x, y = pt
                    score = res[y, x]
                    all_detections.append((x, y, new_w, new_h, score, name, scale, rotation))
    
    if not all_detections:
        return None, 0, "No detection"
    
    final = nms_global(all_detections, iou_threshold=0.3)
    best = final[0]
    
    bbox = (best[0], best[1], best[2], best[3])
    method_info = f"TM-{best[5]}@{best[6]:.2f},rot={best[7]}"
    
    return bbox, best[4], method_info


def detect_by_sift(
    img_gray: np.ndarray,
    template: np.ndarray,
    min_matches: int = 8,
    ratio: float = 0.75,
    min_bbox_ratio: float = 0.02,
    max_bbox_ratio: float = 0.95
) -> Tuple[Optional[Tuple], float, str]:
    """
    SIFT-based detection with homography.
    
    Args:
        img_gray: Grayscale image
        template: Grayscale template
        min_matches: Minimum number of good matches required
        ratio: Lowe's ratio test threshold (0.7 stricter, 0.75-0.8 more relaxed)
        min_bbox_ratio: Minimum bbox area as ratio of image area
        max_bbox_ratio: Maximum bbox area as ratio of image area
    
    Returns: (bbox, score, method_info)
    """
    img_h, img_w = img_gray.shape
    img_area = img_h * img_w
    
    sift = cv.SIFT_create(nfeatures=2000)
    kp2, des2 = sift.detectAndCompute(img_gray, None)
    
    if des2 is None or len(des2) < min_matches:
        return None, 0, "SIFT: insufficient features in image"
    
    # Try both normal and inverted template
    for tmpl_name, tmpl in [('normal', template), ('inverted', 255 - template)]:
        kp1, des1 = sift.detectAndCompute(tmpl, None)
        
        if des1 is None or len(des1) < 4:
            continue
        
        # FLANN matcher
        FLANN_INDEX_KDTREE = 1
        index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
        search_params = dict(checks=50)
        flann = cv.FlannBasedMatcher(index_params, search_params)
        matches = flann.knnMatch(des1, des2, k=2)
        
        # Lowe's ratio test with configurable ratio
        good = []
        for match in matches:
            if len(match) == 2:
                m, n = match
                if m.distance < ratio * n.distance:
                    good.append(m)
        
        if len(good) < min_matches:
            continue
        
        src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
        dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
        
        M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 5.0)
        
        if M is None:
            continue
        
        # Count inliers from RANSAC
        inliers = mask.ravel().sum() if mask is not None else 0
        
        th, tw = tmpl.shape
        pts = np.float32([[0,0], [0,th-1], [tw-1,th-1], [tw-1,0]]).reshape(-1,1,2)
        dst = cv.perspectiveTransform(pts, M)
        x, y, bw, bh = cv.boundingRect(dst)
        
        # Clamp to image bounds
        x, y = max(0, x), max(0, y)
        bw = min(bw, img_w - x)
        bh = min(bh, img_h - y)
        
        # Validate bounding box size relative to image
        bbox_area = bw * bh
        if bbox_area < img_area * min_bbox_ratio:
            continue  # Too small
        if bbox_area > img_area * max_bbox_ratio:
            continue  # Too large (likely bad homography)
        
        # Additional sanity checks
        if bw < 20 or bh < 10:
            continue  # Absolute minimum size
        if bw / bh > 10 or bh / bw > 5:
            continue  # Unreasonable aspect ratio
        
        score = len(good) / 100.0  # Normalize score
        return (x, y, bw, bh), score, f"SIFT-{tmpl_name}({len(good)} matches)"
    
    return None, 0, "SIFT: no valid homography found"

### Load Template

In [23]:
# Load template and create variants
TEMPLATE_PATH = 'template/pattern.png'
template = load_template(TEMPLATE_PATH)
template_variants = create_template_variants(template)

print(f"Template shape: {template.shape}")
print(f"Variants: {list(template_variants.keys())}")

# Display template variants
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
for ax, (name, img) in zip(axes, template_variants.items()):
    ax.imshow(img, cmap='gray')
    ax.set_title(name)
    ax.axis('off')
plt.suptitle('Template Variants')
plt.tight_layout()
plt.show()

# Results storage
results_summary = []

Template shape: (175, 400)
Variants: ['original', 'clahe', 'smooth']


---
### Image Detection

#### 1 coca_logo_1.png

Se utilizó template matching basado en bordes porque el fondo es limpio y el contorno del logotipo se preserva bien tras Canny, logrando una coincidencia muy estable. Además, el uso de un rango de escalas reducido y un filtro por posición vertical permite evitar falsas detecciones en áreas sin contenido relevante.

**Características:** Botella con etiqueta frontal, texto BLANCO sobre fondo ROJO.  
**Problema:** En escala de grises, el contraste está invertido respecto al template.  
**Estrategia:** Template matching basado en bordes (Canny es invariante a la inversión de contraste).  
**Escalas:** 0.30 - 0.55

In [24]:
# coca_logo_1.png - Bottle with white text on red background
# Problem: Inverted contrast in grayscale
# Solution: Edge-based matching (edges are invariant to contrast inversion)
img_name = "coca_logo_1.png"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

print(f"Image size: {img_gray.shape}")

# Edge-based matching with scales optimized for this image
scales = np.linspace(0.30, 0.55, 20)

# Use relative y_range based on image height (robust to resizing)
h, w = img_gray.shape
y_min, y_max = int(0.3 * h), int(0.7 * h)

bbox, score, method = template_match_edges(
    img_gray, 
    template,
    scales=scales,
    threshold=0.25,
    y_range=(y_min, y_max)  # Restrict to label area (avoid false positives)
)

print(f"Image: {img_name}")
print(f"Method: {method}")
print(f"Score: {score:.3f}")
print(f"BBox: {bbox}")

results_summary.append({
    'image': img_name,
    'method': method,
    'score': score,
    'bbox': bbox
})

Image size: (500, 207)
Image: coca_logo_1.png
Method: TM-edges@0.42
Score: 0.411
BBox: (np.int64(31), np.int64(198), 167, 73)


#### 2 coca_logo_2.png

Para esta imagen se aplicó SIFT debido a la curvatura del envase y las variaciones de iluminación, que afectan la correlación directa de plantillas pero no la coincidencia de puntos clave. La homografía obtenida a partir de los matches permite recuperar con precisión la región del logotipo pese a la deformación de perspectiva.

**Características:** Lata con texto BLANCO sobre fondo ROJO, superficie curva, gotas de agua.  
**Problema:** Template matching falla debido a la distorsión por curvatura e inversión de contraste.  
**Estrategia:** SIFT con template invertido (el texto blanco genera features que coinciden con el template invertido).  
**Nota:** SIFT maneja la distorsión de perspectiva de la superficie curva.

In [25]:
# coca_logo_2.png - Can with curved surface
# Problem: Template matching fails due to curved surface and contrast inversion
# Solution: SIFT with inverted template
img_name = "coca_logo_2.png"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

print(f"Image size: {img_gray.shape}")

# SIFT works well for curved surfaces (handles perspective distortion)
bbox, score, method = detect_by_sift(img_gray, template, min_matches=8)

print(f"Image: {img_name}")
print(f"Method: {method}")
print(f"Score: {score:.3f}")
print(f"BBox: {bbox}")

results_summary.append({
    'image': img_name,
    'method': method,
    'score': score,
    'bbox': bbox
})

Image size: (363, 233)
Image: coca_logo_2.png
Method: SIFT-inverted(48 matches)
Score: 0.480
BBox: (0, 95, 233, 133)


#### 3 coca_multi.png

Se utilizó template matching invertido y un rango de escalas estrecho porque todos los logos aparecen a tamaño similar y contrastan de manera consistente con el fondo. La supresión global de no-máximos permite conservar solo la mejor detección individual, cumpliendo con la restricción de una única coincidencia por imagen.

**Características:** Estante con múltiples botellas, texto BLANCO sobre etiquetas ROJAS.  
**Problema:** Múltiples logos similares, inversión de contraste en escala de grises.  
**Estrategia:** Template matching invertido con CLAHE (corrige la inversión de contraste).  
**Escalas:** Derivadas del tamaño esperado del logo (~10% del ancho de imagen).  
**Nota:** Para Assignment 1, se detecta solo UN logo (mejor coincidencia).

In [26]:
# coca_multi.png - Shelf with many bottles
# Problem: White text on red background = contrast inversion
# Solution: Use inverted template with CLAHE
img_name = "coca_multi.png"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

print(f"Image size: {img_gray.shape}")

# Create inverted template variants (for white-on-red labels)
template_inv = 255 - template
inverted_variants = {
    'inverted': template_inv,
    'inverted_clahe': preprocess_image(template_inv, 'clahe'),
}

# Derive scale range from template and expected logo size
# Reference: typical bottle label logo is ~80px wide in a 800px wide shelf image
# This gives us a reference ratio that scales with any image size
h, w = img_gray.shape
th, tw = template.shape  # template: 175x400

reference_logo_width = 80  # expected logo width in pixels (for ~800px wide image)
reference_image_width = 800
expected_logo_ratio = reference_logo_width / reference_image_width  # ~0.10

# Scale to actual image width
expected_logo_width = w * expected_logo_ratio
center_scale = expected_logo_width / tw  # scale where template matches expected logo

# Scale range: ±50% around center scale
scale_min = center_scale * 0.6
scale_max = center_scale * 1.5
scales = np.linspace(scale_min, scale_max, 20)

print(f"Expected logo width: ~{expected_logo_width:.0f}px")
print(f"Scale range: {scale_min:.2f} - {scale_max:.2f}")

# Min width as ratio of image (derived from expected size)
min_width_ratio = expected_logo_ratio * 0.5  # allow logos down to 50% of expected

bbox, score, method = template_match_multiscale(
    img_gray, 
    inverted_variants,
    scales=scales,
    threshold=0.35,
    preprocess='clahe',
    aspect_filter=(1.8, 3.5),  # Coca-Cola logo aspect ratio
    min_width_ratio=min_width_ratio
)

# NOTE: For Assignment 2 (multiple detections), modify template_match_multiscale
# to return `final` (all NMS results) instead of just `final[0]`

print(f"Image: {img_name}")
print(f"Method: {method}")
print(f"Score: {score:.3f}")
print(f"BBox: {bbox}")

results_summary.append({
    'image': img_name,
    'method': method,
    'score': score,
    'bbox': bbox
})

Image size: (598, 799)
Expected logo width: ~80px
Scale range: 0.12 - 0.30
Image: coca_multi.png
Method: TM-inverted_clahe@0.23
Score: 0.501
BBox: (np.int64(274), np.int64(146), 93, 40)


#### 4 coca_retro_1.png

Se aplicó SIFT porque el logotipo retro difiere estructuralmente del template moderno, lo que hace que la correlación clásica falle al no haber similitud pixel-a-pixel. Los puntos clave permiten encontrar correspondencias parciales y estimar una homografía incluso cuando la forma global del logotipo no coincide con la plantilla.

**Características:** Etiqueta vintage B/N, logotipo estilizado diferente al template.  
**Problema:** Diferencias estructurales entre el logo retro y el template moderno.  
**Estrategia:** SIFT con `min_matches=6` (menor umbral por diferencias estructurales).  
**Fallback:** Template matching con aspect ratio amplio (1.2, 4.0) para formas retro.

In [27]:
# coca_retro_1.png - Vintage label (structurally different)
img_name = "coca_retro_1.png"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

print(f"Image size: {img_gray.shape}")

# Try SIFT first (best for this case)
# Lower min_matches=6 because vintage logo has different shape, fewer strong correspondences
bbox, score, method = detect_by_sift(img_gray, template, min_matches=6)

# Fallback to template matching if SIFT fails
if bbox is None:
    print("SIFT failed, trying template matching...")
    scales = np.linspace(0.5, 2.0, 25)
    bbox, score, method = template_match_multiscale(
        img_gray, 
        template_variants,
        scales=scales,
        threshold=0.25,  # Lower threshold for difficult case
        preprocess='clahe',
        aspect_filter=(1.2, 4.0)  # Wider range for retro logo shape
    )

print(f"Image: {img_name}")
print(f"Method: {method}")
print(f"Score: {score:.3f}")
print(f"BBox: {bbox}")

results_summary.append({
    'image': img_name,
    'method': method,
    'score': score,
    'bbox': bbox
})

Image size: (493, 715)
Image: coca_retro_1.png
Method: SIFT-normal(45 matches)
Score: 0.450
BBox: (62, 84, 579, 206)


#### 5 coca_retro_2.png

La detección se resolvió con SIFT ya que el logotipo aparece rotado, curvado y dentro de un disco circular, condiciones que degradan el desempeño del template matching. SIFT permite identificar características locales invariantes y recuperar la transformación geométrica del emblema con alta estabilidad.

**Características:** Póster vintage con emblema circular rojo, texto BLANCO sobre fondo ROJO.  
**Problema:** Texto curvado en emblema circular, inversión de contraste.  
**Estrategia:** SIFT con template invertido (maneja curvatura y contraste).  
**Nota:** SIFT funciona mejor que template matching para logos curvados/distorsionados.

In [28]:
# coca_retro_2.png - Vintage poster with circular badge
# Problem: White text on red, curved logo on circular badge
# Solution: SIFT with inverted template
img_name = "coca_retro_2.png"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

print(f"Image size: {img_gray.shape}")

# SIFT handles curved/distorted logos well
bbox, score, method = detect_by_sift(img_gray, template, min_matches=8)

print(f"Image: {img_name}")
print(f"Method: {method}")
print(f"Score: {score:.3f}")
print(f"BBox: {bbox}")

results_summary.append({
    'image': img_name,
    'method': method,
    'score': score,
    'bbox': bbox
})

Image size: (429, 715)
Image: coca_retro_2.png
Method: SIFT-inverted(33 matches)
Score: 0.330
BBox: (61, 187, 162, 63)


#### 6 COCA-COLA-LOGO.jpg

Se empleó SIFT porque el logotipo ocupa una región grande, con gradientes complejos, sombras y un fondo texturizado que altera fuertemente la correlación normalizada. Los descriptores locales permiten detectar el texto independientemente del color y la iluminación, obteniendo una caja bien ajustada mediante homografía.

**Características:** Imagen grande (1389x1389), texto BLANCO sobre emblema circular ROJO.  
**Problema:** Logo grande, inversión de contraste, fondo complejo (botella, hielo, burbujas).  
**Estrategia:** SIFT con template invertido (maneja escala y contraste).  
**Nota:** SIFT encuentra ~47 matches, proporcionando detección robusta.

In [29]:
# COCA-COLA-LOGO.jpg - Large image with complex background
# Problem: White text on red, very large logo
# Solution: SIFT with inverted template
img_name = "COCA-COLA-LOGO.jpg"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

print(f"Image size: {img_gray.shape}")

# SIFT handles large scale differences well
bbox, score, method = detect_by_sift(img_gray, template, min_matches=8)

print(f"Image: {img_name}")
print(f"Method: {method}")
print(f"Score: {score:.3f}")
print(f"BBox: {bbox}")

results_summary.append({
    'image': img_name,
    'method': method,
    'score': score,
    'bbox': bbox
})

Image size: (1200, 1200)
Image: COCA-COLA-LOGO.jpg
Method: SIFT-inverted(61 matches)
Score: 0.610
BBox: (16, 339, 1144, 478)


#### 7 logo_1.png

Se aplicó template matching con preprocesamiento CLAHE porque la imagen presenta reflejos, variaciones de iluminación y textura sobre el vidrio que dificultan la correlación directa. La ecualización adaptativa y el suavizado previo permiten estabilizar el contraste del logotipo y mejorar la respuesta del método en un entorno visual ruidoso.

**Características:** Botellas de vidrio con texto BLANCO sobre etiquetas ROJAS, reflejos, sombras.  
**Problema:** Inversión de contraste, variaciones de iluminación, reflejos en el vidrio.  
**Estrategia:** Template matching con preprocesamiento (Gaussian blur + CLAHE), fallback a SIFT.  
**Escalas:** Derivadas del ratio esperado del logo (~35% del ancho de imagen para tomas cercanas).

In [30]:
# logo_1.png - Glass bottles with glare and shadows
# Problem: White text on red, lighting variations, reflections
# Strategy: Try template matching first (with preprocessing), fall back to SIFT
img_name = "logo_1.png"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

print(f"Image size: {img_gray.shape}")

# Preprocess: light Gaussian blur + CLAHE to handle reflections
img_blur = cv.GaussianBlur(img_gray, (3, 3), 0)
clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
img_preprocessed = clahe.apply(img_blur)

# Create inverted template variants (white text on red = inverted contrast)
template_inv = 255 - template
inverted_variants = {
    'inverted': template_inv,
    'inverted_clahe': preprocess_image(template_inv, 'clahe'),
}

# First try: Template matching with moderate scale band
# Logo is roughly 200-300px wide in 687px image, template is 400px
# Scale ~0.5-0.75 expected
h, w = img_gray.shape
th, tw = template.shape

# Derive scale from expected logo ratio
expected_logo_ratio = 0.35  # logo ~35% of image width for close-up bottle shots
expected_logo_width = w * expected_logo_ratio
center_scale = expected_logo_width / tw
scale_min = center_scale * 0.7
scale_max = center_scale * 1.4
scales = np.linspace(scale_min, scale_max, 20)

print(f"Scale range: {scale_min:.2f} - {scale_max:.2f}")

bbox, score, method = template_match_multiscale(
    img_preprocessed, 
    inverted_variants,
    scales=scales,
    threshold=0.35,
    preprocess=None,  # Already preprocessed
    aspect_filter=(1.5, 4.0),
    min_width_ratio=0.10
)

# Fall back to SIFT if template matching fails or score is low
if bbox is None or score < 0.40:
    print(f"Template matching {'failed' if bbox is None else f'score too low ({score:.3f})'}, trying SIFT...")
    bbox, score, method = detect_by_sift(img_gray, template, min_matches=8)

print(f"Image: {img_name}")
print(f"Method: {method}")
print(f"Score: {score:.3f}")
print(f"BBox: {bbox}")

results_summary.append({
    'image': img_name,
    'method': method,
    'score': score,
    'bbox': bbox
})

Image size: (450, 687)
Scale range: 0.42 - 0.84
Image: logo_1.png
Method: TM-inverted_clahe@0.73
Score: 0.426
BBox: (np.int64(198), np.int64(191), 292, 127)


---
### Results Summary

In [31]:
print("Resumen de detecciones:")
print(f"{'Image':<25} {'Method':<35} {'Score':>8} {'Size':>12}")
print("-" * 80)

detected_count = 0
for r in results_summary:
    img = r['image']
    method = r['method'][:33] if len(r['method']) > 33 else r['method']
    score = r['score']
    bbox = r['bbox']
    
    if bbox:
        size = f"{bbox[2]}x{bbox[3]}"
        detected_count += 1
    else:
        size = "N/A"
    
    print(f"{img:<25} {method:<35} {score:>8.3f} {size:>12}")

print("-" * 80)
print(f"Total detected: {detected_count}/{len(results_summary)}")
print("=" * 80)

Resumen de detecciones:
Image                     Method                                 Score         Size
--------------------------------------------------------------------------------
coca_logo_1.png           TM-edges@0.42                          0.411       167x73
coca_logo_2.png           SIFT-inverted(48 matches)              0.480      233x133
coca_multi.png            TM-inverted_clahe@0.23                 0.501        93x40
coca_retro_1.png          SIFT-normal(45 matches)                0.450      579x206
coca_retro_2.png          SIFT-inverted(33 matches)              0.330       162x63
COCA-COLA-LOGO.jpg        SIFT-inverted(61 matches)              0.610     1144x478
logo_1.png                TM-inverted_clahe@0.73                 0.426      292x127
--------------------------------------------------------------------------------
Total detected: 7/7


In [32]:
# Final grid visualization
fig, axes = plt.subplots(3, 3, figsize=(15, 15))
axes = axes.flatten()

for idx, r in enumerate(results_summary):
    img_rgb, _, _ = load_image(f"images/{r['image']}")
    img_out = draw_detection(img_rgb, r['bbox'])
    
    axes[idx].imshow(img_out)
    axes[idx].set_title(f"{r['image']}\nScore: {r['score']:.3f}")
    axes[idx].axis('off')

# Hide unused axes
for idx in range(len(results_summary), len(axes)):
    axes[idx].axis('off')

plt.suptitle('Assignment 1: Single Detection per Image', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('results/TP3-v2_results.png', dpi=120, bbox_inches='tight')
plt.show()

print("Saved: results/TP3-v2_results.png")

Saved: results/TP3-v2_results.png


---
## 2: Detección múltiple por imagen

Plantear y validar un algoritmo para múltiples detecciones en la imagen `coca_multi.png` con el mismo template del ítem 1.

### Enfoque

En `coca_multi` hay muchas botellas alineadas con logos a diferentes escalas según el estante.

**Mejoras implementadas para reducir falsos positivos:**

1. **Filtro por banda horizontal (y_ranges):** Los logos aparecen solo en franjas específicas de la imagen. Restricción a zonas donde realmente hay etiquetas.

2. **Validación de consistencia de color:** Después de cada detección, se verifica que la región sea predominantemente roja (R > 110, R > G+25, R > B+25).

3. **Filtros de forma (aspect ratio + height):** Filtro estricto de aspect ratio (2.0–3.8) y altura (28–75px) para rechazar detecciones con proporciones incorrectas.

4. **Test de aislamiento de picos:** Rechaza picos de correlación aislados (ruido) verificando que el vecindario local también tenga respuesta alta.

In [33]:
def detect_multiple_logos(
    img_gray: np.ndarray,
    img_rgb: np.ndarray,
    template: np.ndarray,
    scales: np.ndarray,
    threshold_sigma: float = 2.5,
    min_threshold: float = 0.35,
    iou_threshold: float = 0.3,
    size_filter: Tuple[int, int] = None,
    height_filter: Tuple[int, int] = None,
    aspect_filter: Tuple[float, float] = (2.6, 3.8),
    y_ranges: List[Tuple[float, float]] = None,
    local_max_kernel: int = 7,
    peak_isolation_ratio: float = 0.5,
    color_check: bool = True,
    min_red_dominance: int = 30,
    min_red_value: int = 120
) -> List[Tuple]:
    """
    Multi-detection using template matching with local maxima and validation filters.
    
    Args:
        img_gray: Grayscale image
        img_rgb: RGB image (for color validation)
        template: Single template (use best variant, e.g., inverted + CLAHE)
        scales: Array of scales to test
        threshold_sigma: Number of std devs above mean for adaptive threshold
        min_threshold: Minimum absolute threshold
        iou_threshold: IoU threshold for NMS
        size_filter: Optional (min_width, max_width) to filter detections
        height_filter: Optional (min_height, max_height) to filter detections
        aspect_filter: (min_aspect, max_aspect) for logo shape validation
        y_ranges: List of (y_min_ratio, y_max_ratio) for valid detection zones
        local_max_kernel: Kernel size for local maxima detection
        peak_isolation_ratio: Minimum ratio of neighborhood mean to peak score
        color_check: Whether to validate red color dominance
        min_red_dominance: Minimum R - max(G, B) for color check
        min_red_value: Minimum R channel value for color check
    
    Returns: List of detections [(x, y, w, h, score), ...]
    """
    h, w = img_gray.shape
    th, tw = template.shape
    template_aspect = tw / th
    
    # Preprocess image
    img_processed = cv.GaussianBlur(img_gray, (3, 3), 0)
    img_processed = cv.equalizeHist(img_processed)
    
    all_detections = []
    
    for scale in scales:
        new_w, new_h = int(tw * scale), int(th * scale)
        
        # Skip invalid sizes
        if new_w > w or new_h > h or new_w < 20 or new_h < 10:
            continue
        
        # Width filter
        if size_filter:
            if new_w < size_filter[0] or new_w > size_filter[1]:
                continue
        
        # Height filter
        if height_filter:
            if new_h < height_filter[0] or new_h > height_filter[1]:
                continue
        
        # Resize template
        scaled_tmpl = cv.resize(template, (new_w, new_h))
        
        # Template matching
        res = cv.matchTemplate(img_processed, scaled_tmpl, cv.TM_CCOEFF_NORMED)
        
        # Adaptive threshold
        mean_val, std_val = np.mean(res), np.std(res)
        adaptive_thresh = max(mean_val + threshold_sigma * std_val, min_threshold)
        
        # Find local maxima instead of simple thresholding
        kernel = np.ones((local_max_kernel, local_max_kernel), np.uint8)
        local_max = cv.dilate(res, kernel)
        mask = (res == local_max) & (res >= adaptive_thresh)
        
        ys, xs = np.where(mask)
        
        for x, y in zip(xs, ys):
            score = res[y, x]
            
            # Strict aspect ratio filter
            det_aspect = new_w / new_h
            if aspect_filter:
                if not (aspect_filter[0] <= det_aspect <= aspect_filter[1]):
                    continue
            elif abs(det_aspect - template_aspect) > 0.5:
                continue
            
            # Y-range filter (horizontal band where labels appear)
            if y_ranges:
                in_valid_zone = False
                for y_min_ratio, y_max_ratio in y_ranges:
                    y_min = int(h * y_min_ratio)
                    y_max = int(h * y_max_ratio)
                    if y_min <= y <= y_max:
                        in_valid_zone = True
                        break
                if not in_valid_zone:
                    continue
            
            # Peak isolation test
            # Reject sparse bright noise - keep peaks supported by local neighborhood
            y1 = max(0, y - 2)
            y2 = min(res.shape[0], y + 3)
            x1 = max(0, x - 2)
            x2 = min(res.shape[1], x + 3)
            window = res[y1:y2, x1:x2]
            if window.size > 0 and window.mean() < score * peak_isolation_ratio:
                continue
            
            # Color consistency check (red dominance)
            if color_check and img_rgb is not None:
                # Sample the bounding box region in RGB
                box_y1 = max(0, y)
                box_y2 = min(img_rgb.shape[0], y + new_h)
                box_x1 = max(0, x)
                box_x2 = min(img_rgb.shape[1], x + new_w)
                
                if box_y2 > box_y1 and box_x2 > box_x1:
                    patch = img_rgb[box_y1:box_y2, box_x1:box_x2]
                    mean_color = patch.mean(axis=(0, 1))  # R, G, B average
                    
                    # True Coca-Cola labels: R > 120, R > G + 30, R > B + 30
                    r, g, b = mean_color[0], mean_color[1], mean_color[2]
                    if not (r > min_red_value and 
                            r > g + min_red_dominance and 
                            r > b + min_red_dominance):
                        continue
            
            all_detections.append((x, y, new_w, new_h, score, scale))
    
    if not all_detections:
        return []
    
    # Apply NMS
    final_detections = nms_global(all_detections, iou_threshold=iou_threshold)
    
    # Return as (x, y, w, h, score)
    return [(d[0], d[1], d[2], d[3], d[4]) for d in final_detections]


def draw_multiple_detections(img_rgb: np.ndarray, detections: List[Tuple], 
                              color=(0, 255, 0), thickness=2, 
                              show_numbers: bool = True) -> np.ndarray:
    """Draw multiple bounding boxes on image."""
    img_out = img_rgb.copy()
    for i, det in enumerate(detections):
        x, y, w, h = int(det[0]), int(det[1]), int(det[2]), int(det[3])
        cv.rectangle(img_out, (x, y), (x + w, y + h), color, thickness)
        if show_numbers:
            cv.putText(img_out, str(i+1), (x+2, y+h-5), 
                      cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 0), 1)
    return img_out

In [34]:
# Assignment 2: Multi-detection en coca_multi.png

img_name = "coca_multi.png"
img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")

h, w = img_gray.shape
th, tw = template.shape

print(f"Image size: {w}x{h}")
print(f"Template size: {tw}x{th}")

# Create single best template: inverted + CLAHE
clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
template_inv_clahe = clahe.apply(255 - template)

# Scale range for expected logo sizes
scale_min = 0.19  # ~76px
scale_max = 0.42  # ~168px
scales = np.linspace(scale_min, scale_max, 15)
print(f"Scale range: {scale_min:.2f} - {scale_max:.2f}")

# Strict size filters (width and height)
w_min, w_max = 72, 170
h_min, h_max = 28, 75
print(f"Width filter: {w_min}px - {w_max}px")
print(f"Height filter: {h_min}px - {h_max}px")

# Precise y_ranges for label horizontal bands
y_ranges = [
    (0.08, 0.35),  # Top shelf
    (0.68, 0.95),  # Bottom shelf
]
print(f"Y-ranges (relative): {y_ranges}")

# Strict aspect ratio filter
aspect_filter = (2.0, 3.8)
print(f"Aspect ratio filter: {aspect_filter}")

# Detect multiple logos
detections = detect_multiple_logos(
    img_gray,
    img_rgb,                      # Pass RGB for color check
    template_inv_clahe,
    scales=scales,
    threshold_sigma=2.5,
    min_threshold=0.34,
    iou_threshold=0.20,
    size_filter=(w_min, w_max),
    height_filter=(h_min, h_max), # Height filter
    aspect_filter=aspect_filter,  # Strict aspect ratio filter
    y_ranges=y_ranges,            # Horizontal band filter
    local_max_kernel=11,
    peak_isolation_ratio=0.5,     # Peak isolation test
    color_check=True,             # Red color validation
    min_red_dominance=25,         # R must be > G+25 and > B+25
    min_red_value=110             # R must be > 110
)

print(f"\n{'='*60}")
print(f"Detected: {len(detections)} logos")
print(f"{'='*60}")

# Count by shelf
top_count = sum(1 for d in detections if d[1] < h * 0.5)
bottom_count = len(detections) - top_count
print(f"Top shelf: {top_count}, Bottom shelf: {bottom_count}\n")

for i, det in enumerate(detections):
    x, y, bw, bh, score = det
    shelf = "Top" if y < h * 0.5 else "Bottom"
    print(f"  {i+1:2d}. [{shelf:6s}] pos=({x:3d}, {y:3d}), size={bw}x{bh}, score={score:.3f}")

# Visualize
img_out = draw_multiple_detections(img_rgb, detections, color=(0, 255, 0), thickness=2)

plt.figure(figsize=(14, 10))
plt.imshow(img_out)
plt.title(f"Assignment 2: {len(detections)} logos detected")
plt.axis('off')
plt.tight_layout()
plt.savefig('results/TP3-v2_assignment2.png', dpi=120, bbox_inches='tight')
plt.show()

print("\nSaved: results/TP3-v2_assignment2.png")

Image size: 799x598
Template size: 400x175
Scale range: 0.19 - 0.42
Width filter: 72px - 170px
Height filter: 28px - 75px
Y-ranges (relative): [(0.08, 0.35), (0.68, 0.95)]
Aspect ratio filter: (2.0, 3.8)

Detected: 18 logos
Top shelf: 9, Bottom shelf: 9

   1. [Bottom] pos=( 72, 422), size=89x39, score=0.437
   2. [Bottom] pos=(564, 428), size=89x39, score=0.436
   3. [Bottom] pos=(487, 424), size=89x39, score=0.432
   4. [Bottom] pos=(405, 430), size=89x39, score=0.432
   5. [Bottom] pos=(303, 429), size=95x41, score=0.426
   6. [Top   ] pos=( 29, 147), size=89x39, score=0.425
   7. [Top   ] pos=(276, 147), size=89x39, score=0.425
   8. [Bottom] pos=(148, 428), size=89x39, score=0.419
   9. [Bottom] pos=(234, 429), size=89x39, score=0.416
  10. [Top   ] pos=(109, 151), size=89x39, score=0.409
  11. [Bottom] pos=(  0, 419), size=82x36, score=0.402
  12. [Bottom] pos=(655, 429), size=76x33, score=0.400
  13. [Top   ] pos=(707, 155), size=76x33, score=0.397
  14. [Top   ] pos=(201, 153),

---
## 3: Generalización del algoritmo

Generalizar el algoritmo del ítem 2 para todas las imágenes.

### Enfoque: Meta-Detector con Fusión de Detectores

En lugar de un único detector, implementamos un **meta-detector** que:

1. **Ejecuta múltiples detectores especializados:**
   - Template Matching (single) - bueno para fondos limpios
   - Template Matching con bordes - robusto a inversión de contraste
   - SIFT (single) - bueno para logos curvados/rotados/retro
   - Multi-detection - para imágenes con múltiples logos

2. **Normaliza las puntuaciones** a rango [0, 1] para comparación justa:
   - TM: `norm = clip((max_val - 0.2) / 0.5, 0, 1)`
   - SIFT: `norm = min(1.0, num_inliers / 25)`
   - Multi: `norm = 0.6 * norm_TM + 0.4 * min(1.0, num / 10)`

3. **Selecciona el mejor resultado** con lógica de meta-decisión:
   - Single mode: Si SIFT_norm > TM_norm + 0.15 → elegir SIFT
   - Auto mode: Si multi_tm ≥ 5 detecciones AND norm > 0.5 → elegir multi

In [35]:
# Meta-Detector: Unified Logo Detection Framework

from dataclasses import dataclass
from typing import List, Optional, Tuple, Dict

@dataclass
class DetectionResult:
    """Standardized detection result from any detector."""
    method: str           # "single_tm", "edge_tm", "sift_single", "multi_tm"
    bboxes: List[Tuple]   # [(x, y, w, h), ...]
    raw_score: float      # Native score from detector
    norm_score: float     # Normalized score [0, 1] for comparison
    num: int              # Number of detections
    details: str = ""     # Additional info


class UnifiedLogoDetector:
    """
    Meta-detector that runs multiple specialized detectors and selects the best result.
    
    Score Normalization Strategy:
    - TM scores: norm = clip((max_val - 0.2) / 0.5, 0, 1)
    - SIFT scores: norm = min(1.0, num_inliers / 25)
    - Multi scores: norm = 0.6 * mean(TM_scores) + 0.4 * min(1.0, num_detections / 10)
    
    Modes:
        - "single": Assignment 1 - returns best single detection
        - "multi": Assignment 2 - returns all detections from multi_tm
        - "auto": Automatically choose based on multi_tm count
    """
    
    def __init__(self, template: np.ndarray):
        self.template = template
        self.th, self.tw = template.shape
        
        # Create template variants
        clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        self.template_clahe = clahe.apply(template)
        self.template_inv = 255 - template
        self.template_inv_clahe = clahe.apply(self.template_inv)
        
        # SIFT configuration
        self.sift = cv.SIFT_create(nfeatures=2000)
        self.sift_min_matches = 8
        
    def _normalize_tm_score(self, max_val: float) -> float:
        """Normalize TM score to [0, 1] range."""
        return np.clip((max_val - 0.2) / 0.5, 0.0, 1.0)
    
    def _normalize_sift_score(self, num_inliers: int) -> float:
        """Normalize SIFT score to [0, 1] range."""
        return min(1.0, num_inliers / 25.0)
    
    def _normalize_multi_score(self, mean_tm_score: float, num_detections: int) -> float:
        """Normalize multi-detection score to [0, 1] range."""
        tm_component = self._normalize_tm_score(mean_tm_score)
        count_component = min(1.0, num_detections / 10.0)
        return 0.6 * tm_component + 0.4 * count_component
    
    def run_single_tm(self, img_gray: np.ndarray) -> DetectionResult:
        """Single-logo template matching with multiple variants."""
        h, w = img_gray.shape
        
        clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        img_proc = clahe.apply(cv.GaussianBlur(img_gray, (3, 3), 0))
        
        scale_min = (w * 0.08) / self.tw
        scale_max = (w * 0.60) / self.tw
        scales = np.linspace(max(0.1, scale_min), min(2.0, scale_max), 25)
        
        best_result = None
        best_score = -1.0
        
        templates = [('normal', self.template_clahe), ('inverted', self.template_inv_clahe)]
        
        for tmpl_name, tmpl in templates:
            for scale in scales:
                new_w, new_h = int(self.tw * scale), int(self.th * scale)
                if new_w >= w or new_h >= h or new_w < 30 or new_h < 15:
                    continue
                
                scaled_tmpl = cv.resize(tmpl, (new_w, new_h))
                res = cv.matchTemplate(img_proc, scaled_tmpl, cv.TM_CCOEFF_NORMED)
                _, max_val, _, max_loc = cv.minMaxLoc(res)
                
                aspect = new_w / new_h
                if not (1.5 < aspect < 4.5):
                    continue
                
                if max_val > best_score and max_val > 0.25:
                    best_score = max_val
                    best_result = {'bbox': (max_loc[0], max_loc[1], new_w, new_h),
                                   'raw_score': max_val,
                                   'scale': scale, 'variant': tmpl_name}
        
        if best_result is None:
            return DetectionResult("single_tm", [], 0.0, 0.0, 0, "No detection")
        
        norm_score = self._normalize_tm_score(best_result['raw_score'])
        
        return DetectionResult("single_tm", [best_result['bbox']], best_result['raw_score'],
                              norm_score, 1, 
                              f"{best_result['variant']}@{best_result['scale']:.2f}")
    
    def run_edge_tm(self, img_gray: np.ndarray) -> DetectionResult:
        """Edge-based template matching - robust to contrast inversion."""
        h, w = img_gray.shape
        
        template_edges = cv.Canny(self.template, 50, 150)
        img_edges = cv.Canny(img_gray, 50, 150)
        kernel = np.ones((2, 2), np.uint8)
        template_edges = cv.dilate(template_edges, kernel, iterations=1)
        img_edges = cv.dilate(img_edges, kernel, iterations=1)
        
        scale_min = (w * 0.08) / self.tw
        scale_max = (w * 0.60) / self.tw
        scales = np.linspace(max(0.1, scale_min), min(2.0, scale_max), 25)
        
        best_result = None
        best_score = -1.0
        
        for scale in scales:
            new_w, new_h = int(self.tw * scale), int(self.th * scale)
            if new_w >= w or new_h >= h or new_w < 30:
                continue
            
            scaled_edges = cv.resize(template_edges, (new_w, new_h))
            res = cv.matchTemplate(img_edges, scaled_edges, cv.TM_CCOEFF_NORMED)
            _, max_val, _, max_loc = cv.minMaxLoc(res)
            
            if max_val > best_score and max_val > 0.20:
                best_score = max_val
                best_result = {'bbox': (max_loc[0], max_loc[1], new_w, new_h),
                               'raw_score': max_val, 'scale': scale}
        
        if best_result is None:
            return DetectionResult("edge_tm", [], 0.0, 0.0, 0, "No detection")
        
        norm_score = self._normalize_tm_score(best_result['raw_score'])
        
        return DetectionResult("edge_tm", [best_result['bbox']], best_result['raw_score'],
                              norm_score, 1, f"edges@{best_result['scale']:.2f}")
    
    def run_sift_single(self, img_gray: np.ndarray) -> DetectionResult:
        """SIFT-based detection with homography."""
        img_h, img_w = img_gray.shape
        img_area = img_h * img_w
        
        kp2, des2 = self.sift.detectAndCompute(img_gray, None)
        if des2 is None or len(des2) < self.sift_min_matches:
            return DetectionResult("sift_single", [], 0.0, 0.0, 0, "Insufficient features")
        
        best_result = None
        
        for tmpl_name, tmpl in [('normal', self.template), ('inverted', self.template_inv)]:
            kp1, des1 = self.sift.detectAndCompute(tmpl, None)
            if des1 is None or len(des1) < 4:
                continue
            
            FLANN_INDEX_KDTREE = 1
            index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
            search_params = dict(checks=50)
            flann = cv.FlannBasedMatcher(index_params, search_params)
            matches = flann.knnMatch(des1, des2, k=2)
            
            good = [m for match in matches if len(match) == 2 
                    for m, n in [match] if m.distance < 0.75 * n.distance]
            
            if len(good) < self.sift_min_matches:
                continue
            
            src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
            dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
            
            M, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC, 5.0)
            if M is None:
                continue
            
            num_inliers = mask.ravel().sum() if mask is not None else len(good)
            
            th, tw = tmpl.shape
            pts = np.float32([[0,0], [0,th-1], [tw-1,th-1], [tw-1,0]]).reshape(-1,1,2)
            dst = cv.perspectiveTransform(pts, M)
            x, y, bw, bh = cv.boundingRect(dst)
            
            x, y = max(0, x), max(0, y)
            bw = min(bw, img_w - x)
            bh = min(bh, img_h - y)
            
            bbox_area = bw * bh
            if bbox_area < img_area * 0.01 or bbox_area > img_area * 0.95:
                continue
            if bw < 20 or bh < 10 or bw / bh > 10 or bh / bw > 5:
                continue
            
            if best_result is None or num_inliers > best_result['num_inliers']:
                best_result = {'bbox': (x, y, bw, bh), 'num_inliers': num_inliers,
                               'num_matches': len(good), 'variant': tmpl_name}
        
        if best_result is None:
            return DetectionResult("sift_single", [], 0.0, 0.0, 0, "No valid homography")
        
        norm_score = self._normalize_sift_score(best_result['num_inliers'])
        raw_score = best_result['num_inliers']
        
        return DetectionResult("sift_single", [best_result['bbox']], 
                              raw_score, norm_score, 1,
                              f"{best_result['variant']}({best_result['num_inliers']} inliers)")
    
    def run_multi_tm(self, img_gray: np.ndarray, img_rgb: np.ndarray) -> DetectionResult:
        """Multi-logo detection using template matching with color validation."""
        h, w = img_gray.shape
        template = self.template_inv_clahe
        img_proc = cv.equalizeHist(cv.GaussianBlur(img_gray, (3, 3), 0))
        
        # Use relative size filters based on image dimensions
        # Logo width typically 8% - 30% of image width
        min_logo_w = int(w * 0.08)
        max_logo_w = int(w * 0.35)
        min_logo_h = int(h * 0.04)
        max_logo_h = int(h * 0.20)
        
        # Scale range based on image size
        scale_min = max(0.10, min_logo_w / self.tw)
        scale_max = min(0.60, max_logo_w / self.tw)
        scales = np.linspace(scale_min, scale_max, 20)
        
        all_detections = []
        
        for scale in scales:
            new_w, new_h = int(self.tw * scale), int(self.th * scale)
            if new_w >= w or new_h >= h or new_w < 40 or new_h < 15:
                continue
            
            # Relative size filter
            if not (min_logo_w <= new_w <= max_logo_w):
                continue
            if not (min_logo_h <= new_h <= max_logo_h):
                continue
            
            scaled_tmpl = cv.resize(template, (new_w, new_h))
            res = cv.matchTemplate(img_proc, scaled_tmpl, cv.TM_CCOEFF_NORMED)
            
            mean_val, std_val = np.mean(res), np.std(res)
            adaptive_thresh = max(mean_val + 2.5 * std_val, 0.30)
            
            kernel = np.ones((11, 11), np.uint8)
            local_max = cv.dilate(res, kernel)
            mask = (res == local_max) & (res >= adaptive_thresh)
            
            ys, xs = np.where(mask)
            
            for x, y in zip(xs, ys):
                score = res[y, x]
                det_aspect = new_w / new_h
                if not (1.8 <= det_aspect <= 4.0):
                    continue
                
                # Peak isolation test
                y1, y2 = max(0, y - 2), min(res.shape[0], y + 3)
                x1, x2 = max(0, x - 2), min(res.shape[1], x + 3)
                window = res[y1:y2, x1:x2]
                if window.size > 0 and window.mean() < score * 0.5:
                    continue
                
                # Color check (red dominance)
                box_y1, box_y2 = max(0, y), min(h, y + new_h)
                box_x1, box_x2 = max(0, x), min(w, x + new_w)
                if box_y2 > box_y1 and box_x2 > box_x1:
                    patch = img_rgb[box_y1:box_y2, box_x1:box_x2]
                    r, g, b = patch.mean(axis=(0, 1))
                    if not (r > 100 and r > g + 20 and r > b + 20):
                        continue
                
                all_detections.append((x, y, new_w, new_h, score, scale))
        
        if not all_detections:
            return DetectionResult("multi_tm", [], 0.0, 0.0, 0, "No detections")
        
        final = nms_global(all_detections, iou_threshold=0.25)
        
        if final:
            mean_score = np.mean([d[4] for d in final])
            norm_score = self._normalize_multi_score(mean_score, len(final))
        else:
            mean_score = 0.0
            norm_score = 0.0
        
        bboxes = [(d[0], d[1], d[2], d[3]) for d in final]
        
        return DetectionResult("multi_tm", bboxes, mean_score, norm_score, 
                              len(final), f"{len(final)} logos, avg={mean_score:.3f}")
    
    def detect(self, img_rgb: np.ndarray, img_gray: np.ndarray,
               mode: str = "single") -> Dict:
        """
        Run detectors and select the best result based on mode.
        
        Selection Rules:
        - SINGLE MODE: If SIFT_norm > TM_norm + 0.15 → choose SIFT, else TM
        - AUTO MODE: If multi_tm >= 5 AND norm > 0.5 → choose multi, else single rules
        """
        results = {}
        
        results['single_tm'] = self.run_single_tm(img_gray)
        results['edge_tm'] = self.run_edge_tm(img_gray)
        results['sift_single'] = self.run_sift_single(img_gray)
        
        if mode in ['multi', 'auto']:
            results['multi_tm'] = self.run_multi_tm(img_gray, img_rgb)
        else:
            results['multi_tm'] = DetectionResult("multi_tm", [], 0.0, 0.0, 0, "Skipped")
        
        print("Detector Results:")
        for name, r in results.items():
            print(f"  {name:12s}: num={r.num:2d}, norm={r.norm_score:.3f}, "
                  f"raw={r.raw_score:.3f}, {r.details}")
        
        # META-DECISION LOGIC
        if mode == "multi":
            best = results['multi_tm']
            reason = "Mode=multi"
        elif mode == "single":
            sift = results['sift_single']
            tm_results = [results['single_tm'], results['edge_tm']]
            best_tm = max(tm_results, key=lambda r: r.norm_score if r.num > 0 else 0)
            
            if sift.num > 0 and sift.norm_score > best_tm.norm_score + 0.15:
                best = sift
                reason = f"SIFT ({sift.raw_score:.0f} inliers, norm={sift.norm_score:.2f})"
            elif best_tm.num > 0:
                best = best_tm
                reason = f"Best TM (norm={best_tm.norm_score:.2f})"
            elif sift.num > 0:
                best = sift
                reason = "SIFT fallback"
            else:
                best = results['single_tm']
                reason = "No valid detection"
        else:  # auto
            multi = results['multi_tm']
            if multi.num >= 5 and multi.norm_score > 0.5:
                best = multi
                reason = f"Auto: Multi found {multi.num} logos (norm={multi.norm_score:.2f})"
            else:
                sift = results['sift_single']
                tm_results = [results['single_tm'], results['edge_tm']]
                best_tm = max(tm_results, key=lambda r: r.norm_score if r.num > 0 else 0)
                
                if sift.num > 0 and sift.norm_score > best_tm.norm_score + 0.15:
                    best = sift
                    reason = f"Auto: SIFT ({sift.raw_score:.0f} inliers)"
                elif best_tm.num > 0:
                    best = best_tm
                    reason = f"Auto: Best TM (norm={best_tm.norm_score:.2f})"
                else:
                    best = sift if sift.num > 0 else results['single_tm']
                    reason = "Auto: Fallback"
        
        print(f"\nSelected: {best.method} - {reason}")
        
        return {'best': best, 'all_results': results, 'reason': reason}


def draw_result(img_rgb: np.ndarray, result: DetectionResult, 
                color=(0, 255, 0), thickness=2) -> np.ndarray:
    """Draw detection result on image."""
    img_out = img_rgb.copy()
    for i, bbox in enumerate(result.bboxes):
        x, y, w, h = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
        cv.rectangle(img_out, (x, y), (x + w, y + h), color, thickness)
        if result.num > 1:
            cv.putText(img_out, str(i+1), (x+2, y+h-5), 
                      cv.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 0), 1)
    return img_out

In [36]:
# Single Detection Mode (all images)

# Initialize the unified detector
detector = UnifiedLogoDetector(template)

# Test images
test_images = [
    "coca_logo_1.png",
    "coca_logo_2.png",
    "coca_multi.png",
    "coca_retro_1.png",
    "coca_retro_2.png",
    "COCA-COLA-LOGO.jpg",
    "logo_1.png"
]

# Run single detection on all images
print("SINGLE DETECTION MODE")

unified_results = []

for img_name in test_images:
    print(f"\n{'='*70}")
    print(f"Processing: {img_name}")
    print('='*70)
    
    img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")
    
    # Run unified detector in single mode
    result = detector.detect(img_rgb, img_gray, mode="single")
    
    unified_results.append({
        'image': img_name,
        'result': result['best'],
        'reason': result['reason'],
        'all_results': result['all_results']
    })

print("\n" + "="*80)
print("SINGLE DETECTION - SUMMARY")
print("="*80)
print(f"{'Image':<25} {'Method':<15} {'norm':>8} {'Details':<30}")
print("-"*80)

for r in unified_results:
    img = r['image']
    best = r['result']
    print(f"{img:<25} {best.method:<15} {best.norm_score:>8.3f} {best.details:<30}")

print("="*80)

SINGLE DETECTION MODE

Processing: coca_logo_1.png
Detector Results:
  single_tm   : num= 1, norm=0.511, raw=0.456, inverted@0.10
  edge_tm     : num= 1, norm=0.165, raw=0.282, edges@0.12
  sift_single : num= 1, norm=1.000, raw=42.000, inverted(42 inliers)
  multi_tm    : num= 0, norm=0.000, raw=0.000, Skipped

Selected: sift_single - SIFT (42 inliers, norm=1.00)

Processing: coca_logo_2.png
Detector Results:
  single_tm   : num= 1, norm=0.515, raw=0.457, inverted@0.10
  edge_tm     : num= 1, norm=0.251, raw=0.326, edges@0.10
  sift_single : num= 1, norm=1.000, raw=29.000, inverted(29 inliers)
  multi_tm    : num= 0, norm=0.000, raw=0.000, Skipped

Selected: sift_single - SIFT (29 inliers, norm=1.00)

Processing: coca_multi.png
Detector Results:
  single_tm   : num= 1, norm=0.563, raw=0.482, inverted@0.33
  edge_tm     : num= 1, norm=0.116, raw=0.258, edges@0.25
  sift_single : num= 1, norm=0.240, raw=6.000, inverted(6 inliers)
  multi_tm    : num= 0, norm=0.000, raw=0.000, Skipped

Se

In [37]:
# Visualize Single Detection Results
fig, axes = plt.subplots(3, 3, figsize=(16, 14))
axes = axes.flatten()

for idx, r in enumerate(unified_results):
    img_rgb, _, _ = load_image(f"images/{r['image']}")
    img_out = draw_result(img_rgb, r['result'])
    
    axes[idx].imshow(img_out)
    
    best = r['result']
    title = f"{r['image']}\n{best.method} | norm={best.norm_score:.2f}"
    axes[idx].set_title(title, fontsize=9)
    axes[idx].axis('off')

# Hide unused axes
for idx in range(len(unified_results), len(axes)):
    axes[idx].axis('off')

plt.suptitle('Unified Meta-Detector - Single Detection Mode', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('results/TP3-v2_assignment3_single.png', dpi=120, bbox_inches='tight')
plt.show()

print("Saved: results/TP3-v2_assignment3_single.png")

Saved: results/TP3-v2_assignment3_single.png


In [38]:
# Multi Detection Mode (coca_multi.png + coca_retro_2.png)

print("="*80)
print("MULTI DETECTION MODE")
print("="*80)

# Test images with multiple logos
multi_test_images = [
    "coca_multi.png",      # Shelf with many bottles
    "coca_retro_2.png"     # Poster with logo on circular badge + logo on glass
]

multi_results = []

for img_name in multi_test_images:
    print(f"\n{'='*70}")
    print(f"Processing: {img_name}")
    print('='*70)
    
    img_rgb, img_gray, img_bgr = load_image(f"images/{img_name}")
    
    # Run unified detector in multi mode
    result = detector.detect(img_rgb, img_gray, mode="multi")
    multi_result = result['best']
    
    multi_results.append({
        'image': img_name,
        'result': multi_result,
        'img_rgb': img_rgb
    })
    
    print(f"\nMulti-detection found {multi_result.num} logos")
    print(f"Average TM score: {multi_result.raw_score:.3f}")
    print(f"Normalized score: {multi_result.norm_score:.3f}")

# Visualize results
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

for idx, r in enumerate(multi_results):
    img_out = draw_result(r['img_rgb'], r['result'])
    axes[idx].imshow(img_out)
    axes[idx].set_title(f"{r['image']}\n{r['result'].num} logos detected | norm={r['result'].norm_score:.2f}")
    axes[idx].axis('off')

plt.suptitle('Test 3.2: Unified Meta-Detector - Multi Detection Mode', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('results/TP3-v2_assignment3_multi.png', dpi=120, bbox_inches='tight')
plt.show()

print("\nSaved: results/TP3-v2_assignment3_multi.png")

MULTI DETECTION MODE

Processing: coca_multi.png
Detector Results:
  single_tm   : num= 1, norm=0.563, raw=0.482, inverted@0.33
  edge_tm     : num= 1, norm=0.116, raw=0.258, edges@0.25
  sift_single : num= 1, norm=0.240, raw=6.000, inverted(6 inliers)
  multi_tm    : num=18, norm=0.640, raw=0.400, 18 logos, avg=0.400

Selected: multi_tm - Mode=multi

Multi-detection found 18 logos
Average TM score: 0.400
Normalized score: 0.640

Processing: coca_retro_2.png
Detector Results:
  single_tm   : num= 1, norm=0.922, raw=0.661, inverted@0.38
  edge_tm     : num= 1, norm=0.296, raw=0.348, edges@0.38
  sift_single : num= 1, norm=1.000, raw=28.000, inverted(28 inliers)
  multi_tm    : num= 7, norm=0.508, raw=0.390, 7 logos, avg=0.390

Selected: multi_tm - Mode=multi

Multi-detection found 7 logos
Average TM score: 0.390
Normalized score: 0.508

Saved: results/TP3-v2_assignment3_multi.png
