# üîß Werkplek Inspectie AI - YOLO11 Object Detection Training

Training notebook voor Google Colab - **Object Detection**

**‚úÖ FEATURES:**
- Gebruikt **YOLO11s** (nieuwste versie, Small model voor beste balans)
- Detecteert specifieke objecten (Hamer, Schaar, Sleutel)
- Gebruikt bounding boxes (YOLO format)
- Genereert automatically data.yaml

**‚ö†Ô∏è BELANGRIJK: Zet Runtime op GPU!**
- Runtime ‚Üí Change runtime type ‚Üí GPU (T4)

## 1Ô∏è‚É£ Setup Omgeving

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Installeer dependencies
!pip install ultralytics opencv-python pillow -q

In [None]:
# Imports
import os
import shutil
from pathlib import Path
import random
import yaml
from ultralytics import YOLO
import torch

print(f"‚úÖ PyTorch versie: {torch.__version__}")
print(f"‚úÖ CUDA beschikbaar: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")

## 2Ô∏è‚É£ Upload Dataset (CVAT Export)

**Verwacht formaat (YOLO 1.1 van CVAT):**
- `obj_train_data/` (bevat alle images)
- `obj.data`
- `obj.names`
- `train.txt`

Of een simpele ZIP met:
- `images/`
- `labels/`

In [None]:
# OPTIE A: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Pas aan naar jouw Drive locatie
DATASET_SOURCE = '/content/drive/MyDrive/AI_CVAT_Export.zip'

# Kopieer naar Colab
!cp "{DATASET_SOURCE}" /content/dataset_raw.zip
print("‚úÖ Dataset gekopieerd")

In [None]:
# OPTIE B: Direct ZIP Upload
from google.colab import files
import zipfile

print("Upload je dataset.zip (CVAT export)...")
uploaded = files.upload()

for filename in uploaded.keys():
    if filename.endswith('.zip'):
        !mv "{filename}" /content/dataset_raw.zip
        print(f"‚úÖ {filename} hernoemd naar dataset_raw.zip")

In [None]:
# Unzip dataset
!rm -rf /content/temp_raw
!mkdir /content/temp_raw
!unzip -q /content/dataset_raw.zip -d /content/temp_raw
!ls -la /content/temp_raw

## 3Ô∏è‚É£ Prepareer Dataset voor YOLO11

In [None]:
# Configuratie
BASE_DIR = Path("/content/yolo_dataset")
RAW_DIR = Path("/content/temp_raw")

# Dynamisch laden van CLASS_NAMES uit data.yaml in de ruwe dataset
# Dit is cruciaal om ervoor te zorgen dat de volgorde van de klassen overeenkomt met de labels.
raw_data_yaml_path = RAW_DIR / 'data.yaml'
if raw_data_yaml_path.exists():
    with open(raw_data_yaml_path, 'r') as f:
        raw_data = yaml.safe_load(f)
        if 'names' in raw_data:
            # Assuming 'names' is a list or dictionary, convert to list of names
            if isinstance(raw_data['names'], list):
                CLASS_NAMES = raw_data['names']
            elif isinstance(raw_data['names'], dict):
                CLASS_NAMES = list(raw_data['names'].values())
            else:
                print(f"Warning: 'names' field in {raw_data_yaml_path} is of unexpected type. Using default CLASS_NAMES.")
                CLASS_NAMES = [
                    "schaar",
                    "sleutel",
                    "whiteboard"
                ]
        else:
            print(f"Warning: 'names' field not found in {raw_data_yaml_path}. Using default CLASS_NAMES.")
            CLASS_NAMES = [
                "schaar",
                "sleutel",
                "whiteboard"
            ]
else:
    print(f"Warning: {raw_data_yaml_path} not found. Using default CLASS_NAMES.")
    CLASS_NAMES = [
        "schaar",
        "sleutel",
        "whiteboard"
    ]

print(f"Gebruikte CLASS_NAMES: {CLASS_NAMES}")

def setup_yolo_structure():
    """
    Check if dataset is already in YOLO format (train/valid/test folders)
    If not, create the structure
    """
    # Check if already properly structured
    if (RAW_DIR / "train").exists() and (RAW_DIR / "valid").exists():
        print("‚úÖ Dataset is al in YOLO formaat met train/valid/test folders!")
        
        # Copy to working directory
        if BASE_DIR.exists():
            shutil.rmtree(BASE_DIR)
        shutil.copytree(RAW_DIR, BASE_DIR)
        
        # Check for data.yaml
        if not (BASE_DIR / 'data.yaml').exists():
            print("‚ö†Ô∏è data.yaml niet gevonden, cre√´ren...")
            create_data_yaml()
        
        return True
    else:
        # Create structure for manual split
        if BASE_DIR.exists():
            shutil.rmtree(BASE_DIR)
        
        (BASE_DIR / "train/images").mkdir(parents=True, exist_ok=True)
        (BASE_DIR / "train/labels").mkdir(parents=True, exist_ok=True)
        (BASE_DIR / "val/images").mkdir(parents=True, exist_ok=True)
        (BASE_DIR / "val/labels").mkdir(parents=True, exist_ok=True)
        
        print("‚úÖ YOLO mappen structuur aangemaakt")
        return False

def create_data_yaml():
    """Create data.yaml file"""
    # Check if train/valid exist, otherwise use train/val
    train_path = 'train/images' if (BASE_DIR / 'train').exists() else 'train/images'
    val_path = 'valid/images' if (BASE_DIR / 'valid').exists() else 'val/images'
    test_path = 'test/images' if (BASE_DIR / 'test').exists() else None
    
    yaml_data = {
        'path': str(BASE_DIR),
        'train': train_path,
        'val': val_path,
        'names': {i: name for i, name in enumerate(CLASS_NAMES)}
    }
    
    if test_path:
        yaml_data['test'] = test_path
    
    with open(BASE_DIR / 'data.yaml', 'w') as f:
        yaml.dump(yaml_data, f)
        
    print("‚úÖ data.yaml aangemaakt")

def find_images_and_labels(search_path):
    # Zoek recursief naar images
    extensions = {'.jpg', '.jpeg', '.png', '.bmp'}
    image_files = []
    
    for p in search_path.rglob("*"):
        if p.suffix.lower() in extensions:
            # Zoek bijbehorende label file (txt)
            # CVAT stopt labels vaak in 'obj_train_data' of naast de image
            label_path = p.with_suffix('.txt')
            
            # Soms zitten labels in een parallelle map, check dit later indien nodig
            if not label_path.exists():
                # Probeer recursief te zoeken naar een txt met zelfde naam
                candidates = list(search_path.rglob(p.stem + ".txt"))
                if candidates:
                    label_path = candidates[0]
            
            if label_path.exists():
                image_files.append((p, label_path))
    
    return image_files

def clean_and_copy_label(src_label_path, dst_label_path):
    cleaned_lines = []
    with open(src_label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if not parts: # Skip empty lines
                continue

            class_id = parts[0]
            raw_coords = [float(x) for x in parts[1:]]

            if len(raw_coords) == 4: # Already in YOLO bounding box format (x_center, y_center, width, height)
                x_center, y_center, width, height = raw_coords
            elif len(raw_coords) >= 6 and len(raw_coords) % 2 == 0: # Polygon format (x1 y1 x2 y2 ...)
                x_coords = raw_coords[0::2] # Get all x values
                y_coords = raw_coords[1::2] # Get all y values

                min_x = min(x_coords)
                max_x = max(x_coords)
                min_y = min(y_coords)
                max_y = max(y_coords)

                width = max_x - min_x
                height = max_y - min_y
                x_center = min_x + (width / 2)
                y_center = min_y + (height / 2)
            else:
                print(f"Warning: Skipping malformed or unsupported label line in {src_label_path}: {line.strip()}")
                continue

            # Clamp all calculated bounding box coordinates to [0, 1]
            x_center = max(0.0, min(1.0, x_center))
            y_center = max(0.0, min(1.0, y_center))
            width = max(0.0, min(1.0, width))
            height = max(0.0, min(1.0, height))

            # Reconstruct the line strictly in YOLO detection format
            cleaned_line = f"{class_id} {x_center} {y_center} {width} {height}"
            cleaned_lines.append(cleaned_line)

    with open(dst_label_path, 'w') as f:
        for line in cleaned_lines:
            f.write(line + '\n')

def split_dataset():
    """
    Setup dataset - either use pre-split data or create split
    """
    already_split = setup_yolo_structure()
    
    if already_split:
        print("‚úÖ Dataset al gesplit, ready to train!")
        config_path = BASE_DIR / 'data.yaml'
    else:
        # Original splitting code for unsplit data
        pairs = find_images_and_labels(RAW_DIR)
        print(f"‚úì Gevonden correcte paren (img+txt): {len(pairs)}")
        
        if len(pairs) == 0:
            print("‚ùå GEEN DATA GEVONDEN! Check je zip file structuur.")
            return
            
        random.shuffle(pairs)
        split_idx = int(len(pairs) * 0.8)
        train_set = pairs[:split_idx]
        val_set = pairs[split_idx:]
        
        # Move files and clean labels
        for (img, lbl) in train_set:
            shutil.copy2(img, BASE_DIR / "train/images" / img.name)
            clean_and_copy_label(lbl, BASE_DIR / "train/labels" / lbl.name)
            
        for (img, lbl) in val_set:
            shutil.copy2(img, BASE_DIR / "val/images" / img.name)
            clean_and_copy_label(lbl, BASE_DIR / "val/labels" / lbl.name)
            
        print(f"‚úÖ Split: {len(train_set)} train, {len(val_set)} val")
        
        create_data_yaml()
        config_path = BASE_DIR / 'data.yaml'
    
    return config_path

config_path = split_dataset()
with open(config_path, 'r') as f:
    print(f.read())

## 4Ô∏è‚É£ Train YOLO11 Detector

In [None]:
# Training Config
EPOCHS = 100
IMG_SIZE = 640
BATCH = 16
MODEL = 'yolo11s.pt'  # YOLO11 Small - beste balans tussen snelheid en accuraatheid!

model = YOLO(MODEL)

print("üöÄ START TRAINING (YOLO11 Object Detection)...")

results = model.train(
    data='/content/yolo_dataset/data.yaml',
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    project='runs/detect',
    name='werkplek_tools_yolo11',
    exist_ok=True,
    patience=15,
    save=True
)

## 5Ô∏è‚É£ Evaluatie & Download

In [None]:
# Show results
from IPython.display import Image, display
display(Image('runs/detect/werkplek_tools_yolo11/results.png'))

In [None]:
# Test op een validatie image
import glob
val_images = glob.glob('/content/yolo_dataset/val/images/*.jpg')[:3]

best_model = YOLO('runs/detect/werkplek_tools_yolo11/weights/best.pt')

for img in val_images:
    results = best_model(img)
    for r in results:
        im_array = r.plot() # plot a BGR numpy array of predictions
        im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
        display(im)

In [None]:
# Download Model
from google.colab import files

!cp runs/detect/werkplek_tools/weights/best.pt /content/werkplek_detector.pt
files.download('/content/werkplek_detector.pt')