[![Labellerr](https://storage.googleapis.com/labellerr-cdn/%200%20Labellerr%20template/notebook.webp)](https://www.labellerr.com)

# **Fine-Tune YOLO for Product Recognition for Price Verification**

---

[![labellerr](https://img.shields.io/badge/Labellerr-BLOG-black.svg)](https://www.labellerr.com/blog/<BLOG_NAME>)
[![Youtube](https://img.shields.io/badge/Labellerr-YouTube-b31b1b.svg)](https://www.youtube.com/@Labellerr)
[![Github](https://img.shields.io/badge/Labellerr-GitHub-green.svg)](https://github.com/Labellerr/Hands-On-Learning-in-Computer-Vision)
[![Scientific Paper](https://img.shields.io/badge/Official-Paper-blue.svg)](<PAPER LINK>)

## **Dataset Creation**

In [None]:
"""

import kagglehub

# Download latest version
path = kagglehub.dataset_download("diyer22/retail-product-checkout-dataset")

print("Path to dataset files:", path)

"""

In [None]:
import json
import os
import shutil
import random
from collections import defaultdict
import yaml

def convert_coco_to_yolo_flat(
    json_path: str,
    images_dir: str,
    output_dir: str,
    max_images: int = None,
    seed: int = 42,
    split: bool = True,
    train_ratio: float = 0.8
):
    """
    Convert a COCO-like JSON file to flat YOLO detection format with bounding
    boxes, optional train/val split, and generate both data.yaml and classes.json.
    """
    random.seed(seed)

    # Load JSON data
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    images      = data['images']
    annotations = data['annotations']
    categories  = data['categories']

    # Build robust category-id → name map
    cat_id_to_name = {c['id']: c['name'] for c in categories}
    all_ann_ids    = {ann['category_id'] for ann in annotations}
    # fallback for any missing
    for missing in all_ann_ids - cat_id_to_name.keys():
        cat_id_to_name[missing] = f"class_{missing}"
    sorted_cat_ids = sorted(cat_id_to_name.keys())
    # contiguous 0-based mapping
    cat_id_map     = {cid: idx for idx, cid in enumerate(sorted_cat_ids)}
    class_names    = [cat_id_to_name[cid] for cid in sorted_cat_ids]
    num_classes    = len(class_names)

    # Group annotations by image
    ann_by_image = defaultdict(list)
    for ann in annotations:
        ann_by_image[ann['image_id']].append(ann)

    # Optional max_images sampling evenly across classes
    if max_images:
        images_by_class = defaultdict(list)
        for img in images:
            for ann in ann_by_image.get(img['id'], []):
                images_by_class[ann['category_id']].append(img)
        per_class   = max_images // num_classes
        selected_ids = set()
        for cid in sorted_cat_ids:
            cands = images_by_class.get(cid, [])
            if cands:
                pick = random.sample(cands, min(per_class, len(cands)))
                selected_ids |= {img['id'] for img in pick}
        remaining = max_images - len(selected_ids)
        if remaining > 0:
            others = [img for img in images if img['id'] not in selected_ids]
            extra  = random.sample(others, min(remaining, len(others)))
            selected_ids |= {img['id'] for img in extra}
        images = [img for img in images if img['id'] in selected_ids]

    # Split into subsets
    if split:
        random.shuffle(images)
        n_train = int(len(images) * train_ratio)
        subsets = {'train': images[:n_train], 'val': images[n_train:]}
    else:
        subsets = {'all': images}

    # Prepare directories
    for subset in subsets:
        img_out = os.path.join(output_dir, 'images', subset) if split else os.path.join(output_dir, 'images')
        lbl_out = os.path.join(output_dir, 'labels', subset) if split else os.path.join(output_dir, 'labels')
        os.makedirs(img_out, exist_ok=True)
        os.makedirs(lbl_out, exist_ok=True)

    # Helper to normalize
    def norm(x, m): return x / m

    # Process images & write YOLO bbox labels
    for subset, imgs in subsets.items():
        img_out = os.path.join(output_dir, 'images', subset) if split else os.path.join(output_dir, 'images')
        lbl_out = os.path.join(output_dir, 'labels', subset) if split else os.path.join(output_dir, 'labels')
        for img in imgs:
            src_img = os.path.join(images_dir, img['file_name'])
            dst_img = os.path.join(img_out, os.path.basename(img['file_name']))
            if not os.path.exists(src_img):
                print(f"Warning: {src_img} does not exist")
                continue
            shutil.copy2(src_img, dst_img)

            w, h = img['width'], img['height']
            lines = []
            for ann in ann_by_image.get(img['id'], []):
                cid = ann['category_id']
                cls_idx = cat_id_map[cid]
                bbox = ann.get('bbox', None)
                if not bbox or len(bbox) != 4:
                    continue
                x_min, y_min, bw, bh = bbox
                xc = (x_min + bw/2) / w
                yc = (y_min + bh/2) / h
                lines.append(f"{cls_idx} {xc:.6f} {yc:.6f} {bw/w:.6f} {bh/h:.6f}")
            # write label file
            label_path = os.path.join(lbl_out, os.path.splitext(os.path.basename(img['file_name']))[0] + '.txt')
            with open(label_path, 'w', encoding='utf-8') as lf:
                lf.write("\n".join(lines))

    # Write data.yaml
    data_yaml = {
    'path': output_dir,
    'train': 'images/train' if split else 'images',
    'val':   'images/val'   if split else 'images',
    'nc':    num_classes,
    # Instead of a list, build a dict of index → class name
    'names': {idx: name for idx, name in enumerate(class_names)}
}
    with open(os.path.join(output_dir, 'data.yaml'), 'w', encoding='utf-8') as yf:
        yaml.dump(data_yaml, yf, sort_keys=False)

    # Write classes.json
    classes_json = {'names': class_names}
    with open(os.path.join(output_dir, 'classes.json'), 'w', encoding='utf-8') as jf:
        json.dump(classes_json, jf, ensure_ascii=False, indent=2)

    total = sum(len(imgs) for imgs in subsets.values())
    print(f"Conversion complete: {total} images")
    print(f"data.yaml at {os.path.join(output_dir, 'data.yaml')}")
    print(f"classes.json at {os.path.join(output_dir, 'classes.json')}")


In [None]:
convert_coco_to_yolo_flat(
    json_path = r"archive\instances_train2019.json" ,
    images_dir = r"archive\train2019",
    output_dir = "yolo_train_format",
    max_images = 5000,
    split = False
)

# **Model Training**

In [None]:
!pwd

In [None]:
dataset_path = "yolo_train_format"

In [None]:
from ultralytics import YOLO

In [None]:
model = YOLO("yolov8x.pt")  # Use .pt suffix for weights

# Train the model
model.train(
    data=f"{dataset_path}/data.yaml",
    epochs=100,
    imgsz=640,
    batch=10,
    save_period=10,   # Save checkpoints every 10 epochs
)

In [None]:
model = YOLO("runs/detect/train/weights/best.pt")

In [None]:
results = model.predict(source="test_imgz")

In [None]:
from PIL import Image

Image.open("test_imgz/20180829-10-52-03-1253.jpg")

In [None]:
results = model.predict(source="test_imgz/20180829-10-52-03-1253.jpg", save= True)

print(results[0].boxes)  # Print class 



# **CHECKOUT SYSTEM CREATION**

In [None]:
import cv2
import random
import matplotlib.pyplot as plt
from collections import defaultdict

# Your existing mappings
SUPERCLASS_MAP = {
    'puffed_food': list(range(0, 12)),
    'dried_fruit': list(range(12, 21)),
    'dried_food': list(range(21, 30)),
    'instant_drink': list(range(30, 41)),
    'instant_noodles': list(range(41, 53)),
    'dessert': list(range(53, 70)),
    'drink':     list(range(70, 78)) + list(range(80, 87)),
    'alcohol':   list(range(78, 80)) + list(range(87, 96)),
    'milk':      list(range(96, 107)),
    'canned_food': list(range(107, 121)),
    'chocolate': list(range(121, 133)),
    'gum':       list(range(133, 141)),
    'candy':     list(range(141, 151)),
    'seasoner':  list(range(151, 163)),
    'personal_hygiene': list(range(163, 173)),
    'tissue':    list(range(173, 193)),
    'stationery':list(range(193, 200)),
}

# Invert mapping: class index → superclass name
INDEX_TO_SUPER = {}
for super_name, idx_list in SUPERCLASS_MAP.items():
    for idx in idx_list:
        INDEX_TO_SUPER[idx] = super_name

# Assign a unique color to each superclass (BGR format for OpenCV)
random.seed(42)  # For consistent colors
SUPERCLASS_COLORS = {
    super_name: tuple(random.choices(range(50, 256), k=3))
    for super_name in SUPERCLASS_MAP
}

INDEX_TO_SUPER


In [None]:
def annotate_and_count_superclasses(image, results, confidence_threshold=0.25):
    """
    Annotate YOLO detections with superclass colors and return superclass counts.
    
    Args:
        image: Input image (numpy array)
        results: YOLO results object from model inference
        confidence_threshold: Minimum confidence to consider detection
    
    Returns:
        tuple: (annotated_image, superclass_counts)
            - annotated_image: Image with bounding boxes and labels
            - superclass_counts: Dictionary with superclass names as keys and counts as values
    """
    annotated_image = image.copy()
    superclass_counts = defaultdict(int)
    
    # Get detection data from YOLO results
    if hasattr(results, '__len__') and len(results) > 0:
        r = results[0]  # First image results
        
        if r.boxes is not None and len(r.boxes) > 0:
            # Extract detection data
            boxes = r.boxes.xyxy.cpu().numpy()  # x1, y1, x2, y2
            confidences = r.boxes.conf.cpu().numpy()
            class_ids = r.boxes.cls.cpu().numpy().astype(int)
            
            for i, (box, conf, class_id) in enumerate(zip(boxes, confidences, class_ids)):
                if conf < confidence_threshold:
                    continue
                
                # Get superclass for this class_id
                superclass = INDEX_TO_SUPER.get(class_id, 'unknown')
                if superclass == 'unknown':
                    continue
                
                # Count this detection
                superclass_counts[superclass] += 1
                
                # Get color for this superclass
                color = SUPERCLASS_COLORS.get(superclass, (128, 128, 128))
                
                # Draw bounding box
                x1, y1, x2, y2 = map(int, box)
                cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
                
                # Prepare label text
                original_class_name = r.names[class_id] if hasattr(r, 'names') else f"class_{class_id}"
                label = f"{superclass}: {original_class_name} ({conf:.2f})"
                
                # Calculate text size and background
                font = cv2.FONT_HERSHEY_SIMPLEX
                font_scale = 0.6
                thickness = 1
                (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, thickness)
                
                # Draw background rectangle for text
                cv2.rectangle(annotated_image, 
                            (x1, y1 - text_height - 10), 
                            (x1 + text_width, y1), 
                            color, -1)
                
                # Draw text
                cv2.putText(annotated_image, label, 
                          (x1, y1 - 5), 
                          font, font_scale, 
                          (255, 255, 255), thickness)
    
    return annotated_image, dict(superclass_counts)


In [None]:
image = cv2.imread("test_imgz/20180829-10-52-03-1253.jpg")
result_img, count = annotate_and_count_superclasses(image, results)

In [None]:
count

In [None]:
plt.figure(figsize=(16, 16))
plt.axis('off')
plt.imshow(result_img[..., ::-1])

In [None]:
def pipeline(image_path):
    image = cv2.imread(image_path)
    model = YOLO("runs/detect/train/weights/best.pt")
    results = model.predict(source=image_path)
    result_img, count = annotate_and_count_superclasses(image, results)
    
    print(f"Detected superclasses: {count}")
    
    plt.figure(figsize=(16, 16))
    plt.axis('off')
    plt.imshow(result_img[..., ::-1])
    plt.show()
    
    return count

In [None]:
count = pipeline("test_imgz/20180829-10-52-03-1253.jpg")

## **Adding Total Price Counting Feature**

In [None]:
SUPERCLASS_PRICE = {
    'puffed_food': 100,
    'dried_fruit': 90,
    'dried_food': 80,
    'instant_drink': 70,
    'instant_noodles': 55,
    'dessert': 60,
    'drink': 50,
    'alcohol': 100,
    'milk': 40,
    'canned_food': 30,
    'chocolate': 2,
    'gum': 1,
    'candy': 5,
    'seasoner': 25,
    'personal_hygiene': 20,
    'tissue': 15,
    'stationery': 10,
}

In [None]:
def calculate_total_price(counts: dict):
    """
    Calculate and print total price based on detection counts and superclass prices
    
    Args:
        counts: Dictionary with superclass names as keys and detection counts as values
        prices: Dictionary with superclass names as keys and prices as values
    
    Returns:
        float: Total calculated price
    """
    prices = SUPERCLASS_PRICE
    total_price = 0
    print("Price Breakdown:")
    print("-" * 40)
    
    for superclass, count in counts.items():
        if superclass in prices:
            item_price = prices[superclass]
            subtotal = item_price * count
            total_price += subtotal
            
            print(f"{superclass:15} | {count:2d} × {item_price:3d} = {subtotal:4d}")
        else:
            print(f"{superclass:15} | {count:2d} × ??? = ???  (Price not found)")
    
    print("-" * 40)
    print(f"{'TOTAL':15} |            = {total_price:4.0f}")
    
    return total_price


In [None]:
total = calculate_total_price(count)


## **Final Checkout Function**

In [None]:
def checkout(image_path):
    """
    Perform checkout by calculating total price based on detected items in the image.
    
    Args:
        image_path: Path to the input image for detection
    
    Returns:
        float: Total price of detected items
    """
    counts = pipeline(image_path)
    total_price = calculate_total_price(counts)
    print(f"Total price for items in {image_path}: ${total_price}")

In [None]:
checkout("test_imgz/20180829-10-52-03-1253.jpg")

In [None]:
Image.open("test_imgz/20180927-09-49-23-1945.jpg")

In [None]:
checkout("test_imgz/20180927-09-49-23-1945.jpg")