In [None]:
import os
import shutil
from pathlib import Path
from tqdm import tqdm
import torch
from ultralytics import YOLO
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Check for GPU
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"Using device: {DEVICE}")

# Define project structure
base_dir = Path("/kaggle/input/hardhat-vest-dataset-v3")
img_dir = base_dir / "images"
label_dir = base_dir / "labels"
output_dir = Path("/kaggle/working")

# Classes used (REMOVED: 'person')
classes = ['helmet', 'vest', 'head']
num_classes = len(classes)

# Validate directories
for split in ['train', 'val', 'test']:
    img_split_dir = img_dir / split
    label_split_dir = label_dir / split
    if not img_split_dir.exists() or not label_split_dir.exists():
        logging.error(f"{split} directory for images or labels not found!")
        raise FileNotFoundError(f"Ensure 'images/{split}' and 'labels/{split}' directories exist in {base_dir}.")

# Copy files to working directory and filter out 'person' class (ID = 3)
for split in ['train', 'val', 'test']:
    split_img_dir = output_dir / "images" / split
    split_label_dir = output_dir / "labels" / split
    split_img_dir.mkdir(parents=True, exist_ok=True)
    split_label_dir.mkdir(parents=True, exist_ok=True)
    
    img_files = [f for f in os.listdir(img_dir / split) if f.endswith(('.png', '.jpg', '.jpeg'))]
    logging.info(f"Copying {len(img_files)} {split} files...")
    for img_file in tqdm(img_files, desc=f"Copying {split} files"):
        shutil.copy(img_dir / split / img_file, split_img_dir / img_file)
        label_file = img_file.rsplit('.', 1)[0] + '.txt'
        label_src = label_dir / split / label_file
        label_dst = split_label_dir / label_file

        if label_src.exists():
            with open(label_src, 'r') as f_in, open(label_dst, 'w') as f_out:
                for line in f_in:
                    if not line.startswith("3 "):  # filter out 'person'
                        f_out.write(line)
        else:
            logging.warning(f"Label file {label_file} not found for {img_file}.")

# Generate data.yaml
yaml_path = output_dir / "data.yaml"
with open(yaml_path, "w") as f:
    f.write("path: /kaggle/working\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write("test: images/test\n")
    f.write(f"nc: {num_classes}\n")
    f.write("names:\n")
    for i, name in enumerate(classes):
        f.write(f"  {i}: {name}\n")
logging.info(f"Generated data.yaml at {yaml_path}")

# Hyperparameter configuration for augmentation specifically for PPE detection
hyp_path = output_dir / "hyp.yaml"
with open(hyp_path, "w") as f:
    f.write("lr0: 0.01\n")  # initial learning rate
    f.write("lr1: 0.02\n")  # final learning rate
    f.write("momentum: 0.937\n")
    f.write("weight_decay: 0.0005\n")
    f.write("fl_gamma: 0.0\n")  # focal loss gamma
    f.write("hsv_hue: 0.015\n")  # hue augmentation
    f.write("hsv_saturation: 0.7\n")  # saturation augmentation
    f.write("hsv_value: 0.4\n")  # value augmentation
    f.write("degrees: 5.0\n")  # slight random rotation for PPE detection
    f.write("translate: 0.1\n")  # translation (shifting the image)
    f.write("scale: 0.5\n")  # scaling
    f.write("shear: 0.0\n")  # no shear needed
    f.write("flipud: 0.5\n")  # 50% chance of vertical flip (workers often move up and down)
    f.write("fliplr: 0.5\n")  # 50% chance of horizontal flip (workers can be viewed from different angles)
    f.write("mosaic: 1.0\n")  # mosaic augmentation for detecting small objects like helmets
    f.write("mixup: 0.0\n")  # no mixup, keeping individual PPE detection intact
logging.info(f"Generated hyp.yaml for PPE augmentation at {hyp_path}")

# Train YOLOv8 model with PPE-specific augmentation (no hyp argument in train())
logging.info("Starting YOLOv8 model training with PPE-specific augmentation...")
model = YOLO("yolov8n.pt")
results = model.train(
    data=str(yaml_path),
    epochs=50,
    imgsz=640,
    batch=16,
    name="helmet_vest_detection",
    project="runs/train",
    device=DEVICE,
    augment=True  # Enable augmentation
)
logging.info(f"Model training completed. Weights saved to {results.save_dir}/weights/best.pt")

2025-05-24 22:37:15,746 - INFO - Using device: cuda
2025-05-24 22:37:15,749 - INFO - Found existing weights at c:\WORK STUFF LOL\NTU file\NTU file\NTU courses\3rd year\Deep learning\Individual Project2\Hard Hat - Vest\runs\train\helmet_vest_detection\weights\best.pt. Loading model...
