# Create YOLO sets

In [None]:
%matplotlib inline


In [None]:
import os
import glob
import numpy as np

def convert_seg_to_bbox(seg_label_path, bbox_label_path):
    with open(seg_label_path, 'r') as f:
        lines = f.readlines()
    
    bbox_lines = []
    for line in lines:
        parts = line.strip().split()
        class_id = 0
        
        # Extract all x,y coordinates
        coords = [float(coord) for coord in parts[1:]]
        points = np.array(coords).reshape(-1, 2)
        
        # Find min/max to create bounding box
        x_min, y_min = points.min(axis=0)
        x_max, y_max = points.max(axis=0)
        
        # Convert to YOLO format (x_center, y_center, width, height)
        x_center = (x_min + x_max) / 2
        y_center = (y_min + y_max) / 2
        width = x_max - x_min
        height = y_max - y_min
        
        bbox_lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    # Write to new label file
    with open(bbox_label_path, 'w') as f:
        f.write('\n'.join(bbox_lines))

# Process all segmentation labels
seg_label_dir = "../data/processed/meatballs/gt_masks"
bbox_label_dir = "../data/processed/meatballs_yolo/labels"
os.makedirs(bbox_label_dir, exist_ok=True)

for seg_file in glob.glob(os.path.join(seg_label_dir, "*.txt")):
    filename = os.path.basename(seg_file)
    bbox_file = os.path.join(bbox_label_dir, filename)
    convert_seg_to_bbox(seg_file, bbox_file)

In [None]:
import shutil

# Copy the images (you don't need to change these)
src_img_dir = "../data/processed/meatballs/images"
dst_img_dir = "../data/processed/meatballs_yolo/images"
os.makedirs(dst_img_dir, exist_ok=True)

for img_file in glob.glob(os.path.join(src_img_dir, "*.png")):
    shutil.copy(img_file, dst_img_dir)


In [None]:
import os
import random
import shutil
from pathlib import Path

def reorganize_existing_dataset(dataset_dir, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1, seed=42):
    """
    Reorganize an existing dataset with flat images and labels folders into train/val/test splits
    """
    # Set random seed for reproducibility
    random.seed(seed)
    
    # Input directories
    img_dir = os.path.join(dataset_dir, "images")
    label_dir = os.path.join(dataset_dir, "labels")
    
    # Get all image files before creating new directories
    image_files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(image_files)
    
    # Create subdirectories
    for split in ["train", "val", "test"]:
        os.makedirs(os.path.join(img_dir, split), exist_ok=True)
        os.makedirs(os.path.join(label_dir, split), exist_ok=True)
    
    # Calculate split indices
    n_samples = len(image_files)
    train_end = int(n_samples * train_ratio)
    val_end = train_end + int(n_samples * val_ratio)
    
    # Split files
    train_files = image_files[:train_end]
    val_files = image_files[train_end:val_end]
    test_files = image_files[val_end:]
    
    # Move files to respective directories
    splits = {
        "train": train_files,
        "val": val_files,
        "test": test_files
    }
    
    for split, files in splits.items():
        for filename in files:
            # Move image
            src_img = os.path.join(img_dir, filename)
            dst_img = os.path.join(img_dir, split, filename)
            shutil.move(src_img, dst_img)
            
            # Move label (if exists)
            base_name = os.path.splitext(filename)[0]
            label_filename = f"{base_name}.txt"
            src_label = os.path.join(label_dir, label_filename)
            dst_label = os.path.join(label_dir, split, label_filename)
            
            if os.path.exists(src_label):
                shutil.move(src_label, dst_label)
    
    # Print statistics
    print(f"Dataset reorganization complete:")
    print(f"  Train: {len(train_files)} images")
    print(f"  Validation: {len(val_files)} images")
    print(f"  Test: {len(test_files)} images")
    
    # Create YAML file
    yaml_path = os.path.join(dataset_dir, "data.yaml")
    
    # Try to infer class names
    class_names = ["class0", "class1"]  # Default placeholder
    # Check for any label file to infer classes
    sample_dirs = [os.path.join(label_dir, split) for split in ["train", "val", "test"]]
    for dir_path in sample_dirs:
        label_files = [f for f in os.listdir(dir_path) if f.endswith('.txt')]
        if label_files:
            # Take the first label file and find max class ID
            with open(os.path.join(dir_path, label_files[0]), 'r') as f:
                class_ids = set()
                for line in f:
                    parts = line.strip().split()
                    if parts:
                        class_ids.add(int(parts[0]))
            
            if class_ids:
                num_classes = max(class_ids) + 1
                class_names = [f"class{i}" for i in range(num_classes)]
            break
    
    yaml_content = f"""
# YOLOv8 dataset config
path: {os.path.abspath(dataset_dir)}
train: images/train
val: images/val
test: images/test

# Classes
nc: {len(class_names)}
names: {class_names}
"""

    with open(yaml_path, 'w') as f:
        f.write(yaml_content)
    
    print(f"Dataset YAML created at: {yaml_path}")
    print("Note: Update the class names in the YAML file to match your actual classes.")
    
    return yaml_path

# Example usage
dataset_dir = "../data/processed/meatballs_yolo"  # Contains images/ and labels/ folders
yaml_file = reorganize_existing_dataset(dataset_dir)

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

def show_yolo_labels(img_path, label_path=None):
    """
    Simple function to display an image with its YOLO labels in a Jupyter notebook.
    
    Args:
        img_path (str): Path to the image
        label_path (str, optional): Path to the label file. If None, tries to find label 
                                    by replacing 'images' with 'labels' in the path
                                    and changing extension to .txt
    """
    # Load image
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB for matplotlib
    height, width, _ = img.shape
    
    # Auto-detect label path if not provided
    if label_path is None:
        label_path = img_path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt')
    
    # Red color for all bounding boxes (R,G,B)
    red_color = (255, 0, 0)
    
    # Load labels
    try:
        with open(label_path, 'r') as f:
            lines = f.readlines()
            
        # Draw each bounding box
        for line in lines:
            parts = line.strip().split()
            if len(parts) >= 5:
                x_center, y_center, w, h = map(float, parts[1:5])
                
                # Convert normalized coordinates to pixel coordinates
                x1 = int((x_center - w/2) * width)
                y1 = int((y_center - h/2) * height)
                x2 = int((x_center + w/2) * width)
                y2 = int((y_center + h/2) * height)
                
                # Draw rectangle with red color and thin line (thickness=1)
                cv2.rectangle(img, (x1, y1), (x2, y2), red_color, 1)
                
                # No label text
        
        # Display image with bounding boxes
        plt.figure(figsize=(12, 8))
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"Image with YOLO labels: {len(lines)} objects detected")
        plt.show()
        
    except FileNotFoundError:
        print(f"Label file not found: {label_path}")
        plt.figure(figsize=(12, 8))
        plt.imshow(img)
        plt.axis('off')
        plt.title("Image (no labels found)")
        plt.show()
    except Exception as e:
        print(f"Error reading labels: {e}")
        plt.figure(figsize=(12, 8))
        plt.imshow(img)
        plt.axis('off')
        plt.title("Image (error reading labels)")
        plt.show()

In [None]:
img = 'C:/Users/gtoft/OneDrive/DTU/4_Semester_AS/Master_Thesis/auto-annotation-sam/data/processed/meatballs_yolo/images/test/frame_000072.png'

result = show_yolo_labels(img)