# Dataset Label Verification

This notebook helps you visually verify that your image labels match the actual currency in the image. 
It reads your `data.yaml` for class names and plots random samples from your `train` folder with bounding boxes.

In [None]:
import os
import random
import yaml
import cv2
import matplotlib.pyplot as plt
from pathlib import Path

# --- Configuration ---
# Point to the local export directory where the clean data.yaml lives
PROJECT_ROOT = r"D:\I5\advance_programming\CamCurrencyProject\CurrencyFullStack\notebooks\dataset_export"
DATA_YAML = os.path.join(PROJECT_ROOT, "data.yaml")
TRAIN_IMAGES_DIR = os.path.join(PROJECT_ROOT, "train", "images")
TRAIN_LABELS_DIR = os.path.join(PROJECT_ROOT, "train", "labels")

# Load Class Names
with open(DATA_YAML, 'r') as f:
    data_config = yaml.safe_load(f)
    class_names = data_config['names']

print(f"Loaded {len(class_names)} classes: {class_names}")

In [None]:
def plot_image_with_boxes(image_path, label_path, class_names):
    if not os.path.exists(image_path) or not os.path.exists(label_path):
        print(f"Missing image or label for {image_path}")
        return

    # Read Image
    image = cv2.imread(str(image_path))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    h, w, _ = image.shape

    # Read Labels
    with open(label_path, 'r') as f:
        lines = f.readlines()

    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    ax = plt.gca()

    for line in lines:
        parts = line.strip().split()
        class_id = int(parts[0])
        x_center, y_center, width, height = map(float, parts[1:])

        # Convert Yolo format to matplotlib rect
        x1 = (x_center - width / 2) * w
        y1 = (y_center - height / 2) * h
        box_w = width * w
        box_h = height * h

        rect = plt.Rectangle((x1, y1), box_w, box_h, fill=False, color='red', linewidth=2)
        ax.add_patch(rect)
        
        label_text = class_names[class_id] if class_id < len(class_names) else str(class_id)
        plt.text(x1, y1 - 5, label_text, color='white', fontsize=12, backgroundcolor='red')

    plt.axis('off')
    plt.title(f"Sample: {os.path.basename(image_path)}")
    plt.show()

# --- Run Verification ---
all_images = list(Path(TRAIN_IMAGES_DIR).glob("*.jpg")) + list(Path(TRAIN_IMAGES_DIR).glob("*.png"))

if not all_images:
    print(f"No images found in {TRAIN_IMAGES_DIR}")
else:
    # Pick 3 random images
    samples = random.sample(all_images, min(3, len(all_images)))
    
    for img_path in samples:
        # Infer label path
        label_name = img_path.stem + ".txt"
        label_path = os.path.join(TRAIN_LABELS_DIR, label_name)
        
        plot_image_with_boxes(img_path, label_path, class_names)