# Selects the best available device for PyTorch computations

In [1]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: mps


# Data Augmentation

In [2]:
import albumentations as A

bbox_params = A.BboxParams(format='yolo', label_fields=['class_labels'], min_visibility=0.0, check_each_transform=True)

night_filter_0 = A.Compose([
    A.ToGray(p=0.7),
    A.RandomBrightnessContrast(brightness_limit=(-0.4, -0.2), contrast_limit=(-0.2, 0.1), p=0.9),
    A.GaussNoise(std_range=(0.039, 0.196), p=0.5),
    A.MotionBlur(blur_limit=(3, 7), p=0.3),
    A.ImageCompression(quality_range=(20,40),p=0.4),
    A.Affine(rotate=(-5, 5), p=0.4),
], bbox_params=bbox_params)

night_filter_1 = A.Compose([
    A.ToGray(p=1.0),
    A.RandomBrightnessContrast(brightness_limit=(-0.5, -0.3), contrast_limit=(-0.3, -0.1), p=1.0),
    A.GaussianBlur(blur_limit=(5, 9), p=0.5),
    A.ISONoise(intensity=(0.2, 0.4), p=0.7),
    A.ImageCompression(quality_range=(5,25), p=0.4),
    A.Affine(rotate=(-10, 10), p=0.4)
], bbox_params=bbox_params)

night_filter_2 = A.Compose([
    A.ToGray(p=0.9),
    A.RandomGamma(gamma_limit=(80, 120), p=0.7),
    A.ISONoise(intensity=(0.15, 0.35), p=0.4),
    A.MotionBlur(blur_limit=(3, 7), p=0.3),
    A.RandomBrightnessContrast(brightness_limit=(-0.3, -0.1), contrast_limit=(-0.2, 0.1), p=0.7),
    A.ImageCompression(quality_range=(20,40),p=0.5),
    A.Affine(rotate=(-10, 10), p=0.4)
], bbox_params=bbox_params)

night_filter_3 = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.0), contrast_limit=(-0.2, 0.05), p=1.0),
    A.GaussianBlur(blur_limit=(3, 7), p=0.4),
    A.ImageCompression(quality_range=(30,50),p=0.4),
    A.Defocus(radius=(3, 5), alias_blur=True, p=0.3),
    A.Affine(rotate=(-5, 5), p=0.3)
], bbox_params=bbox_params)

night_filter_4 = A.Compose([
    A.ToGray(p=1.0),
    A.ISONoise(intensity=(0.2, 0.4), p=0.5),
    A.MotionBlur(blur_limit=(5, 11), p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.4, -0.1), contrast_limit=(-0.2, 0.1), p=0.8),
    A.Affine(rotate=(-90, 90), p=1.0)  # YOLO rotation obbligatoria
], bbox_params=bbox_params)

night_filter_5 = A.Compose([
    A.ToGray(p=1.0),
    A.Downscale(scale_range=(0.3, 0.6), p=0.6),
    A.GaussNoise(std_range=(0.05, 0.2), p=0.5),
    A.ImageCompression(quality_range=(1, 30), p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.3, 0), contrast_limit=(-0.2, 0.1), p=0.7),
    A.Defocus(radius=(3, 5), alias_blur=True, p=0.3),
    A.Affine(
        translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)},
        rotate=(-5, 5),
        scale=(0.9, 1.1),
        p=0.3
    ),
], bbox_params=bbox_params)

night_filter_6 = A.Compose([
    A.ToGray(p=0.8),
    A.RandomBrightnessContrast(brightness_limit=(-0.4, -0.2), contrast_limit=(-0.3, -0.1), p=1.0),
    A.CoarseDropout(num_holes_range=(1,3),hole_height_range=(30,60),hole_width_range=(30,60),fill=0,p=0.5),
    A.MotionBlur(blur_limit=(3, 7), p=0.3),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.2),
    A.Affine(rotate=(-10, 10), p=0.3)
], bbox_params=bbox_params)

night_aug_list = [night_filter_0, night_filter_1, night_filter_2, night_filter_3, night_filter_4,night_filter_5, night_filter_6]

In [3]:
from pathlib import Path
import re

def add_items_in_dataset(dir_path, mode):
    dir_images = Path(dir_path) / "images"
    dir_labels = Path(dir_path) / "labels"

    images = sorted(
        f for f in dir_images.iterdir()
        if f.is_file() and not re.search(r"_t\d+\.jpg$", f.name)
    )

    for image in images:
        image_stem = image.stem
        label_path = dir_labels / f"{image_stem}.txt"

        if not label_path.exists():
            print(f"⚠️ Label mancante per {image.name}, salto.")
            continue

        image_output = dir_images / f"{image_stem}_t{mode}.jpg"
        label_output = dir_labels / f"{image_stem}_t{mode}.txt"

        try:
            apply_transformation(
                image_input=str(image),
                image_output=str(image_output),
                label_input=str(label_path),
                label_output=str(label_output),
                mode=mode
            )
        except Exception as e:
            print(f"❌ Errore su {image.name}: {e}")


def normalize_yolo_bbox(bbox, epsilon=1e-7):

    cx, cy, w, h = bbox

    cx = round(cx, 7)
    cy = round(cy, 7)
    w = round(w, 7)
    h = round(h, 7)

    x_min = cx - w / 2
    y_min = cy - h / 2
    x_max = cx + w / 2
    y_max = cy + h / 2

    x_min = max(0.0, x_min)
    y_min = max(0.0, y_min)
    x_max = min(1.0 - epsilon, x_max)
    y_max = min(1.0 - epsilon, y_max)

    new_cx = (x_min + x_max) / 2
    new_cy = (y_min + y_max) / 2
    new_w = x_max - x_min
    new_h = y_max - y_min

    new_w = max(0.0, new_w)
    new_h = max(0.0, new_h)

    return [new_cx, new_cy, new_w, new_h]


def apply_transformation(image_input, image_output, mode, label_input, label_output):
    import cv2

    image = cv2.imread(image_input)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    bboxes = []
    class_labels = []
    with open(label_input, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 5:
                class_id = int(parts[0])
                bbox = list(map(float, parts[1:]))

                normalized_bbox = normalize_yolo_bbox(bbox)
                bboxes.append(normalized_bbox)
                class_labels.append(class_id)

    augmented = night_aug_list[mode](
        image=image,
        bboxes=bboxes,
        class_labels=class_labels
    )

    assert len(augmented['bboxes']) == len(augmented['class_labels']), \
        f"bbox-label length mismatch: {len(augmented['bboxes'])} vs {len(augmented['class_labels'])}"

    augmented_bgr = cv2.cvtColor(augmented['image'], cv2.COLOR_RGB2BGR)
    cv2.imwrite(image_output, augmented_bgr)

    clipped_bboxes = []
    clipped_labels = []
    for bbox, label in zip(augmented['bboxes'], augmented['class_labels']):
        bbox = [min(max(x, 0.0), 1.0) for x in bbox]
        x, y, w, h = bbox
        if w > 0.001 and h > 0.001:
            clipped_bboxes.append([x, y, w, h])
            clipped_labels.append(label)

    with open(label_output, 'w', encoding='utf-8') as f:
        for label, bbox in zip(clipped_labels, clipped_bboxes):
            bbox_str = " ".join(f"{v:.6f}" for v in bbox)
            f.write(f"{label} {bbox_str}\n")

def clean_dataset(dir_path):

    dir_images = Path(dir_path + "/images")
    dir_labels = Path(dir_path + "/labels")

    images = sorted(f for f in dir_images.iterdir() if f.is_file())
    labels = sorted(f for f in dir_labels.iterdir() if f.is_file())

    for image in images:
        if re.search(r"_t\d+\.jpg$", image.name):
                image.unlink(missing_ok=True)

    for label in labels:
        if label:
            if re.search(r"_t\d+\.txt$", label.name):
                label.unlink(missing_ok=True)

In [4]:
def apply_data_augmentation(dataset_path):
    clean_dataset(str(dataset_path+"/test"))
    clean_dataset(str(dataset_path+"/train"))
    clean_dataset(str(dataset_path+"/valid"))

    for i in range(0,7):
        add_items_in_dataset(str(dataset_path+"/test"),i)
        add_items_in_dataset(str(dataset_path+"/train"),i)
        add_items_in_dataset(str(dataset_path+"/valid"),i)

## Exectute data Augmentation

In [6]:
apply_data_augmentation("/Users/lorenzodimaio/PyCharmMiscProject/Dataset_v3.v1i.yolov8")

KeyboardInterrupt: 

# Training

In [None]:
from ultralytics import YOLO

# Carica il modello pre-addestrato o uno vuoto
model = YOLO('yolov8n.pt')  # modello nano pre-addestrato, puoi scegliere anche yolov8s.pt ecc.

# Avvia il training
model.train(data='/Users/lorenzodimaio/PyCharmMiscProject/Dataset_v3.v1i.yolov8/data.yaml', epochs=1, imgsz=320, batch=16,device=device)