# Product Recognition on Store Shelves

### Marco Scaramuzzi 
- Student ID: 0001057167
- email: marco.scaramuzzi@studio.unibo.it

## Task
Develop a computer vision system that, given a reference image for each product, is able to identify boxes of cereals of different brands from one picture of a store shelf. For each type of product displayed in the shelf the system should report:

1. Number of instances.
2. Dimension of each instance (width and height of the bounding box that enclose them in pixel).

In [None]:
#Import required modules
import numpy as np
from matplotlib import pyplot as plt
import cv2
from typing import Tuple
from collections import defaultdict
from glob import glob



# Only for jupyter notebook visualization
%matplotlib inline

%load_ext autoreload
%autoreload 2

# Enviromental variables

#### Train on model image: {0.png, 1.png, 11.png, 19.png, 24.png, 25.png, 26.png}
#### Test on scene image: {e1.png, e2.png, e3.png, e4.png, e5.png}


In [2]:
def centroid(vertexes):
    vertexes = np.array(vertexes)  # Converti in array NumPy per operazioni efficienti
    return tuple(np.round(vertexes.mean(axis=0)).astype(int))  # Calcola la media lungo le colonne

def euclidian_distance(point_1, point_2):
    return np.linalg.norm(np.array(point_1) - np.array(point_2))  # Distanza euclidea con NumPy


In [3]:
model_names = ["0.jpg", "1.jpg", "11.jpg", "19.jpg", "24.jpg", "25.jpg", "26.jpg"]
model_images = {name: cv2.imread(f"models/{name}") for name in model_names}


In [4]:
def findmodel(i, check=False):
    models = {
        0: "Nesquik Cioccomilk",
        1: "ChocoKrave al latte",
        11: "ChocoKrave Nocciole",
        19: "Country crisp",
        24: "Fitness",
        25: "Coco Pops",
        26: "Nesquik Duo"
    }
    
    if i in models:
        if check:
            print(f"model {i}: {models[i]}")
        return i, models[i]
    else:
        print(f"⚠️ Attention: model {i} not found!") if check else None
        return None, None


In [None]:
def build_r_table(center, source_vectors):
    """
    Generates an R-Table for Generalized Hough Transform.
    
    Parameters:
        center (tuple): (x, y) coordinates of the centroid.
        source_vectors (numpy.ndarray): Array of shape (N, 2) containing keypoint positions.

    Returns:
        defaultdict(list): R-Table with distances from keypoints to the centroid.
    """
    r_table = defaultdict(list)

    # Convert source_vectors to NumPy array for efficiency
    source_vectors = np.array(source_vectors)

    # Compute distance vectors in one vectorized operation
    delta_x = center[0] - source_vectors[:, 0]
    delta_y = center[1] - source_vectors[:, 1]

    # Stack results efficiently
    distances = np.column_stack((delta_x, delta_y, np.ones_like(delta_x)))  # Assume size = 1 for now

    # Store each keypoint's distances in the R-table
    for index, (dx, dy, size) in enumerate(distances):
        r_table[index].append((dx, dy, size))

    return r_table


In [None]:


def accumulate_votes(r_table, shelf_image, scene_keypoints):
    """
    Casts votes for barycentre position using the R-Table.
    
    Parameters:
        r_table (defaultdict(list)): The R-Table storing model keypoints' vectors.
        shelf_image (numpy.ndarray): The image of the shelf (used to get accumulator size).
        scene_keypoints (numpy.ndarray): Array of shape (N, 2) with detected keypoints in the scene.
    
    Returns:
        numpy.ndarray: The accumulator matrix where the most voted position is likely the barycentre.
    """
    # Initialize accumulator
    accumulator = np.zeros(shelf_image.shape[:2], dtype=np.int32)

    # Ensure keypoints are a NumPy array
    scene_keypoints = np.array(scene_keypoints)

    # Loop through each keypoint in the scene
    for idx, (scene_pos, scene_size) in enumerate(scene_keypoints):
        x_scene, y_scene = scene_pos

        # Find the corresponding model keypoint index
        model_index = idx // 3  # Assuming each model keypoint has 3 associated scene keypoints

        if model_index not in r_table:
            continue  # Skip if the model keypoint is not in the R-Table

        # Get the precomputed vectors from the R-Table
        for dx, dy, model_size in r_table[model_index]:
            scale_ratio = scene_size / model_size  # Scale factor
            x_accum = int(round(x_scene + scale_ratio * dx))
            y_accum = int(round(y_scene + scale_ratio * dy))

            # Ensure we don't go out of bounds
            if 0 <= x_accum < accumulator.shape[1] and 0 <= y_accum < accumulator.shape[0]:
                accumulator[y_accum, x_accum] += 1  # Increment vote at calculated barycentre position

    return accumulator


In [None]:
def plot_accumulator(accumulator, shelf_image):
    """
    Plots the accumulator on a white image and finds barycentre candidates.
    
    Parameters:
        accumulator (numpy.ndarray): The voting accumulator matrix.
        shelf_image (numpy.ndarray): The shelf image (used to get the shape).
    
    Returns:
        list: List of (x, y) barycentre candidate coordinates.
    """
    # Copy shelf image and fill with white
    height, width = accumulator.shape
    white_image = np.full_like(shelf_image, 255)  # Faster than .copy() + .fill(255)
    
    # Find all candidate barycentres (votes > 2)
    barycentres = np.argwhere(accumulator > 2)
    
    # Draw black points where votes > 0
    white_image[accumulator > 0] = 0
    
    # Draw diagonal cross around each barycentre
    radius = 10
    for y, x in barycentres:
        # Define safe bounding box using np.clip
        y_start, y_end = np.clip([y - radius, y + radius], 0, height - 1)
        x_start, x_end = np.clip([x - radius, x + radius], 0, width - 1)
        
        # Draw diagonal cross using slicing
        white_image[y_start:y_end, x_start:x_end] = 0
        white_image[y_start:y_end, x_end:x_start:-1] = 0  # Reverse diagonal
        
    # Plot the image
    plt.figure(figsize=(20, 10))
    plt.imshow(white_image, cmap='gray', vmin=0, vmax=255)
    plt.show()
    
    return barycentres.tolist()
