# TP3 - Encontrar el logotipo de la gaseosa

In [11]:
%matplotlib qt
import os
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt

## Encontrar el logotipo de la gaseosa dentro de las imágenes provistas

1. Obtener una detección del logo en cada imagen sin falsos positivos

In [12]:
# UTILITY FUNCTIONS

# Image I/O

def load_image(path):
    """Load image and return RGB and grayscale versions."""
    img = cv.imread(path)
    img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    img_gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    return img_rgb, img_gray


def load_template(path):
    """Load template as grayscale."""
    return cv.imread(path, 0)


# Image Processing

def scale_template(template, scale):
    """Scale template by given factor."""
    return cv.resize(template, None, fx=scale, fy=scale)


def get_relative_scales(img_shape, template_shape, target_ratios):
    """
    Compute scales so template covers target_ratios of image width.
    
    Example: ratio=0.5 means template will cover 50% of image width.
    """
    img_width = img_shape[1]
    template_width = template_shape[1]
    return [(img_width * ratio) / template_width for ratio in target_ratios]


# Template Matching Core

def match_template(img, template, method_id):
    """Run template matching and return best match info."""
    w, h = template.shape[::-1]
    res = cv.matchTemplate(img, template, method_id)
    min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res)
    
    if method_id in [cv.TM_SQDIFF, cv.TM_SQDIFF_NORMED]:
        return {'score': min_val, 'location': min_loc, 'size': (w, h), 'res': res}
    return {'score': max_val, 'location': max_loc, 'size': (w, h), 'res': res}


def find_best_scale(img, template, scales, method_name, target_ratios=None):
    """Find the best matching scale for template in image."""
    method_id = eval(method_name)
    is_sqdiff = 'SQDIFF' in method_name
    best_score = float('inf') if is_sqdiff else -float('inf')
    best_result = None
    
    for idx, scale in enumerate(scales):
        scaled_template = scale_template(template, scale)
        
        # Skip if template larger than image
        if scaled_template.shape[0] > img.shape[0] or scaled_template.shape[1] > img.shape[1]:
            continue
        
        result = match_template(img, scaled_template, method_id)
        is_better = result['score'] < best_score if is_sqdiff else result['score'] > best_score
        
        if is_better:
            best_score = result['score']
            best_result = {
                **result,
                'scale': scale,
                'method': method_name,
                'ratio': target_ratios[idx] if target_ratios else None,
            }
    
    return best_result


# Non-Maximum Suppression

def remove_overlapping(detections, overlap_thresh=0.3):
    """Remove overlapping detections using IoU-based NMS."""
    if len(detections) == 0:
        return []
    
    boxes = np.array([[d[0], d[1], d[0]+d[2], d[1]+d[3], d[4]] for d in detections])
    x1, y1, x2, y2, scores = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]
    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]
    
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        
        # Compute IoU with remaining boxes
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        
        intersection = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
        iou = intersection / (areas[i] + areas[order[1:]] - intersection)
        
        # Keep boxes with IoU below threshold
        order = order[np.where(iou <= overlap_thresh)[0] + 1]
    
    return [detections[i] for i in keep]


# Visualization

def draw_detection(img_rgb, result):
    """Draw single detection rectangle on image."""
    img_out = img_rgb.copy()
    x, y = result['location']
    w, h = result['size']
    cv.rectangle(img_out, (x, y), (x + w, y + h), (0, 255, 0), 3)
    return img_out


def draw_detections(img_rgb, detections):
    """Draw multiple detection rectangles on image."""
    img_out = img_rgb.copy()
    for (x, y, w, h, score) in detections:
        cv.rectangle(img_out, (x, y), (x + w, y + h), (0, 255, 0), 2)
    return img_out


def plot_results_grid(results, title, save_path=None):
    """
    Plot results in a grid layout.
    results: list of (img_rgb, detections, img_name) tuples
    """
    n = len(results)
    n_cols = 3
    n_rows = (n + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows))
    axes = axes.flatten()
    
    for idx, (img_rgb, detections, img_name) in enumerate(results):
        img_result = draw_detections(img_rgb, detections)
        axes[idx].imshow(img_result)
        axes[idx].set_title(f"{img_name}\n{len(detections)} detection(s)")
        axes[idx].axis('off')
    
    for idx in range(n, len(axes)):
        axes[idx].axis('off')
    
    fig.suptitle(title, fontsize=14, fontweight='bold')
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=120, bbox_inches='tight')
    plt.show()

In [13]:
# DETECTION FUNCTIONS

# Single Detection

def find_best_detection(img_gray, template, scales, method_name, target_ratios):
    """
    Find best single detection by trying both gray and inverted variants.
    Returns the detection with highest score.
    """
    img_inv = 255 - img_gray
    
    result_gray = find_best_scale(img_gray, template, scales, method_name, target_ratios)
    result_inv = find_best_scale(img_inv, template, scales, method_name, target_ratios)
    
    # Return best result with variant info
    if result_inv and (not result_gray or result_inv['score'] > result_gray['score']):
        result_inv['variant'] = 'gray_inv'
        result_inv['use_inversion'] = True
        return result_inv
    elif result_gray:
        result_gray['variant'] = 'gray'
        result_gray['use_inversion'] = False
        return result_gray
    return None


# Multiple Detection

def detect_at_scale(img, template, scale, threshold, method_name):
    """Detect all matches at a single scale above threshold."""
    method_id = eval(method_name)
    scaled_template = scale_template(template, scale)
    w, h = scaled_template.shape[::-1]
    
    if scaled_template.shape[0] > img.shape[0] or scaled_template.shape[1] > img.shape[1]:
        return []
    
    res = cv.matchTemplate(img, scaled_template, method_id)
    
    if 'SQDIFF' in method_name:
        loc = np.where(res <= threshold)
    else:
        loc = np.where(res >= threshold)
    
    return [(pt[0], pt[1], w, h, res[pt[1], pt[0]]) for pt in zip(*loc[::-1])]


def detect_multiscale(img, template, scales, threshold, method_name):
    """Detect all matches across multiple scales."""
    all_detections = []
    for scale in scales:
        all_detections.extend(detect_at_scale(img, template, scale, threshold, method_name))
    return all_detections


def get_adaptive_threshold(img, template, scales, method_name, factor=0.70):
    """Compute threshold as a factor of max correlation found."""
    method_id = eval(method_name)
    max_corr = 0
    
    for scale in scales:
        scaled_template = scale_template(template, scale)
        if scaled_template.shape[0] > img.shape[0] or scaled_template.shape[1] > img.shape[1]:
            continue
        res = cv.matchTemplate(img, scaled_template, method_id)
        max_corr = max(max_corr, res.max())
    
    return max_corr * factor


def detect_multiple_logos(img_gray, template, scales, method_name, threshold_factor, use_inversion):
    """Detect multiple logos using specified variant."""
    img_processed = 255 - img_gray if use_inversion else img_gray
    threshold = get_adaptive_threshold(img_processed, template, scales, method_name, threshold_factor)
    detections = detect_multiscale(img_processed, template, scales, threshold, method_name)
    return remove_overlapping(detections, overlap_thresh=0.3)


# Generalized Detection

def detect_logos(img_gray, template, method='cv.TM_CCOEFF_NORMED', 
                 single_detection=False, threshold_factor=0.70):
    """
    General logo detection for all images.
    
    Algorithm:
    1. Determines best variant (gray or gray_inv) using wide scale range
    2. For single detection: returns best match
    3. For multiple detection: returns all matches above adaptive threshold
    
    Parameters:
        single_detection: If True, returns only best match
        threshold_factor: Sensitivity for multi-detection (0.70 = 70% of max)
    """
    # Scale configurations
    RATIOS_SINGLE = [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90]
    RATIOS_MULTI = [0.15, 0.18, 0.20, 0.22, 0.25, 0.28, 0.30, 0.35, 0.40]
    
    # Find best variant using wide scale range
    scales_single = get_relative_scales(img_gray.shape, template.shape, RATIOS_SINGLE)
    best_result = find_best_detection(img_gray, template, scales_single, method, RATIOS_SINGLE)
    
    if not best_result:
        return []
    
    use_inversion = best_result.get('use_inversion', True)
    
    if single_detection:
        return [(best_result['location'][0], best_result['location'][1],
                 best_result['size'][0], best_result['size'][1], best_result['score'])]
    
    # Multi-detection with winning variant
    scales_multi = get_relative_scales(img_gray.shape, template.shape, RATIOS_MULTI)
    return detect_multiple_logos(img_gray, template, scales_multi, method, threshold_factor, use_inversion)

In [14]:
# Configuration
TEMPLATE_PATH = 'template/pattern.png'
IMAGES_DIR = 'images/'
METHOD = 'cv.TM_CCOEFF_NORMED'

# Load template
template = load_template(TEMPLATE_PATH)

# Get all images
image_files = sorted([f for f in os.listdir(IMAGES_DIR) if f.endswith(('.png', '.jpg'))])

print("="*70)
print("ASSIGNMENT 1: Single Detection per Image")
print("="*70)

# Process all images
results = []
for img_name in image_files:
    img_rgb, img_gray = load_image(os.path.join(IMAGES_DIR, img_name))
    detections = detect_logos(img_gray, template, METHOD, single_detection=True)
    results.append((img_rgb, detections, img_name))
    print(f"  {img_name}: {len(detections)} detection")

# Plot results
plot_results_grid(results, "ASSIGNMENT 1: Single Detection per Image", 'results/Figure_1.png')

ASSIGNMENT 1: Single Detection per Image
  COCA-COLA-LOGO.jpg: 1 detection
  coca_logo_1.png: 1 detection
  coca_logo_2.png: 1 detection
  coca_multi.png: 1 detection
  coca_retro_1.png: 1 detection
  coca_retro_2.png: 1 detection
  logo_1.png: 1 detection


2. Plantear y validar un algoritmo para múltiples detecciones en la imagen coca_multi.png con el mismo témplate del ítem 1

In [None]:
# Load coca_multi.png
img_rgb, img_gray = load_image('images/coca_multi.png')

print("="*70)
print("ASSIGNMENT 2: Multiple Detections on coca_multi.png")
print("="*70)

# Test different threshold factors
print("\nTesting threshold factors:")
for factor in [0.80, 0.75, 0.70, 0.65]:
    detections = detect_logos(img_gray, template, METHOD, single_detection=False, threshold_factor=factor)
    print(f"  factor={factor}: {len(detections)} detections")

# Final result with best threshold
THRESHOLD_FACTOR = 0.70
detections = detect_logos(img_gray, template, METHOD, single_detection=False, threshold_factor=THRESHOLD_FACTOR)
print(f"\nFinal: {len(detections)} detections (threshold_factor={THRESHOLD_FACTOR})")

# Plot result
img_result = draw_detections(img_rgb, detections)
plt.figure(figsize=(12, 10))
plt.imshow(img_result)
plt.title(f"ASSIGNMENT 2: coca_multi.png | {len(detections)} detections")
plt.axis('off')
plt.savefig('results/Figure_2.png', dpi=120, bbox_inches='tight')
plt.show()

3. Generalizar el algoritmo del item 2 para todas las imágenes

In [None]:
print("="*70)
print("ASSIGNMENT 3: Generalized Algorithm for All Images")
print("="*70)

# Process all images with multi-detection
THRESHOLD_FACTOR = 0.70

results_single = []
results_multi = []

for img_name in image_files:
    img_rgb, img_gray = load_image(os.path.join(IMAGES_DIR, img_name))
    
    # Single detection
    det_single = detect_logos(img_gray, template, METHOD, single_detection=True)
    results_single.append((img_rgb, det_single, img_name))
    
    # Multiple detection
    det_multi = detect_logos(img_gray, template, METHOD, single_detection=False, threshold_factor=THRESHOLD_FACTOR)
    results_multi.append((img_rgb, det_multi, img_name))
    
    print(f"  {img_name}: single={len(det_single)}, multi={len(det_multi)}")

# Plot single detection results
print("\n--- Single Detection Results")
plot_results_grid(results_single, "ASSIGNMENT 3: Generalized Single Detection", 'results/Figure_3a_single.png')

# Plot multiple detection results
print("\n--- Multiple Detection Results")
plot_results_grid(results_multi, "ASSIGNMENT 3: Generalized Multiple Detection", 'results/Figure_3b_multi.png')