# Selects the best available device for PyTorch computations

In [181]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: mps


# Data Augmentation

This code defines four image augmentation pipelines using Albumentations, tailored for YOLO-style object detection. Each pipeline applies a series of transformations—such as grayscale conversion, blurring, noise injection, brightness/contrast adjustment, affine transformations, and compression—while ensuring that bounding boxes remain valid. These augmentations simulate challenging visual conditions like poor lighting, motion blur, or camera noise, and are stored in a list (night_aug_list) for easy selection during dataset expansion or training.

In [213]:
import albumentations as A
import cv2

bbox_params = A.BboxParams(format='yolo', label_fields=['class_labels'], min_visibility=0.0, check_each_transform=True)

filter_0 = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=(-0.4, -0.2), contrast_limit=(-0.3, 0), p=1.0),
    A.MotionBlur(blur_limit=(5, 15), p=0.6),
    A.GaussianBlur(blur_limit=(5, 11), p=0.4),
    A.MedianBlur(blur_limit=7, p=0.3),
    A.GaussNoise(p=0.6),
    A.ISONoise(color_shift=(0.01, 0.03), intensity=(0.1, 0.4), p=0.3),
    A.HueSaturationValue(hue_shift_limit=(-2, 2), sat_shift_limit=(-25, -10), val_shift_limit=(-10, 0), p=0.5),
    A.Affine(rotate=(-90, 90), p=1)],
    bbox_params= bbox_params
)

filter_1 = A.Compose([
    A.ToGray(p=1.0),
    A.RandomBrightnessContrast(
        brightness_limit=(-0.5, -0.3),  # scurisce ma non troppo
        contrast_limit=(-0.3, -0.1),    # contrasto moderato
        p=1.0
    ),
    A.GaussianBlur(blur_limit=(5, 10), p=0.5),   # sfocatura media
    A.ISONoise(color_shift=(0.01, 0.03), intensity=(0.15, 0.35), p=0.4),  # rumore ISO più leggero
    A.SaltAndPepper(p=0.07),  # rumore salt and pepper più leggero
    A.HueSaturationValue(hue_shift_limit=0, sat_shift_limit=(-5, -2), val_shift_limit=(-10, -5), p=0.4),
    A.Affine(rotate=(-90, 90), p=1)
], bbox_params=bbox_params)

filter_2 = A.Compose([
    A.ToGray(p=1.0),
    A.RandomGamma(gamma_limit=(80, 120), p=0.5),
    A.ImageCompression(p=0.4),
    A.MotionBlur(blur_limit=(3, 7), p=0.4),
    A.GaussNoise(p=0.3),
    A.Affine(rotate=(-90, 90), p=1)
    ],
    bbox_params= bbox_params
)

filter_3 = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=(-0.2, -0.05), contrast_limit=(-0.2, 0.05), p=1.0),
    A.CLAHE(clip_limit=(1, 3), tile_grid_size=(8, 8), p=0.4),
    A.Equalize(mode='cv', p=0.3),
    A.GaussianBlur(blur_limit=(3, 7), p=0.4),
    A.ImageCompression(p=0.3),
    A.Affine(translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)}, rotate=(-3, 3),p=0.3),
    A.Affine(rotate=(-90, 90), p=1)
    ],
    bbox_params= bbox_params
)

filter_4 = A.Compose([
    A.GaussianBlur(blur_limit=(21, 31), p=1.0),             # Sfocatura gaussiana massima
    A.MotionBlur(blur_limit=(25, 35), p=0.9),               # Sfocatura da movimento forte
    A.MedianBlur(blur_limit=15, p=0.7),                     # Sfocatura ulteriore
    A.GaussNoise(p=0.7),           # Rumore intenso (opzionale)
    A.ImageCompression( p=0.7),  # Compressione forte
    A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.0), contrast_limit=(-0.2, 0.05), p=0.8),
    A.Affine(rotate=(-10, 10), p=0.6),                      # Piccole rotazioni
    A.Affine(rotate=(-90, 90), p=1.0)                       # Rotazione YOLO obbligatoria
], bbox_params=bbox_params)

night_aug_list = [filter_0, filter_1,filter_2,filter_3,filter_4]

This function reads an image and its YOLO-format annotations, clips and normalizes the bounding boxes to ensure they remain within the [0.0, 1.0] range, then applies a selected augmentation from night_aug_list. The transformed image is saved, and the new bounding boxes are clipped again and written to the output label file, ensuring valid annotations after transformation.

In [184]:
def normalize_yolo_bbox(bbox, epsilon=1e-7):

    cx, cy, w, h = bbox

    # Arrotonda per ridurre le imprecisioni in virgola mobile
    cx = round(cx, 7)
    cy = round(cy, 7)
    w = round(w, 7)
    h = round(h, 7)

    # Calcola i limiti min/max
    x_min = cx - w / 2
    y_min = cy - h / 2
    x_max = cx + w / 2
    y_max = cy + h / 2

    # Clipa i limiti min/max all'intervallo [0.0, 1.0 - epsilon] per i massimi
    x_min = max(0.0, x_min)
    y_min = max(0.0, y_min)
    x_max = min(1.0 - epsilon, x_max) # Usa epsilon per evitare 1.0 esatto
    y_max = min(1.0 - epsilon, y_max) # Usa epsilon per evitare 1.0 esatto

    # Ricostruisci il bounding box YOLO dal formato min/max clippato
    new_cx = (x_min + x_max) / 2
    new_cy = (y_min + y_max) / 2
    new_w = x_max - x_min
    new_h = y_max - y_min

    # Assicurati che larghezza e altezza non siano negative o quasi zero
    new_w = max(0.0, new_w)
    new_h = max(0.0, new_h)

    return [new_cx, new_cy, new_w, new_h]


def apply_transformation(image_input, image_output, mode, label_input, label_output):
    import cv2

    image = cv2.imread(image_input)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    bboxes = []
    class_labels = []
    with open(label_input, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 5:
                class_id = int(parts[0])
                bbox = list(map(float, parts[1:]))
                # IMPORTANTE: Clipa le coordinate del bbox STRETTAMENTE prima di passarle ad Albumentations
                # Questo assicura che tutti i valori siano nell'intervallo [0.0, 1.0] per prevenire ValueError
                normalized_bbox = normalize_yolo_bbox(bbox)
                bboxes.append(normalized_bbox)
                class_labels.append(class_id)

    augmented = night_aug_list[mode](
        image=image,
        bboxes=bboxes,
        class_labels=class_labels
    )

    augmented_bgr = cv2.cvtColor(augmented['image'], cv2.COLOR_RGB2BGR)
    cv2.imwrite(image_output, augmented_bgr)

    clipped_bboxes = []
    clipped_labels = []
    for bbox, label in zip(augmented['bboxes'], augmented['class_labels']):
        # Di nuovo clipping per sicurezza
        bbox = [min(max(x, 0.0), 1.0) for x in bbox]
        x, y, w, h = bbox
        if w > 0.001 and h > 0.001:
            clipped_bboxes.append([x, y, w, h])
            clipped_labels.append(label)

    with open(label_output, 'w') as f:
        for label, bbox in zip(clipped_labels, clipped_bboxes):
            bbox_str = " ".join(f"{v:.6f}" for v in bbox)
            f.write(f"{label} {bbox_str}\n")

This code cleans previously augmented files and applies new augmentations to images and their labels in a dataset, saving the results with a suffix indicating the augmentation mode. It skips images missing labels.

In [185]:
from pathlib import Path
import re

def clean_dataset(dir):

    dir_images = Path(dir+"/images")
    dir_labels = Path(dir+"/labels")

    images = sorted(f for f in dir_images.iterdir() if f.is_file())
    labels = sorted(f for f in dir_labels.iterdir() if f.is_file())

    for image in images:
        if re.search(r"_t\d+\.jpg$", image.name):
                image.unlink(missing_ok=True)

    for label in labels:
        if label:
            if re.search(r"_t\d+\.txt$", label.name):
                label.unlink(missing_ok=True)


def add_items_in_dataset(dir, mode):
    dir_images = Path(dir + "/images")
    dir_labels = Path(dir + "/labels")

    images = sorted(f for f in dir_images.iterdir() if f.is_file() and not re.search(r"_t\d+\.jpg$", f.name))

    for image in images:
        image_name = str(image).split(".jpg")[0]
        label_path = Path(str(image_name).replace("/images/", "/labels/") + ".txt")

        if not label_path.exists():
            print(f"⚠️ Label mancante per {image.name}, salto.")
            continue

        image_output_path = str(image_name) + f"_t{mode}.jpg"
        label_output_path = str(label_path).replace(".txt", f"_t{mode}.txt")

        apply_transformation(
            image_input=str(image),
            label_input=str(label_path),
            image_output=image_output_path,
            label_output=label_output_path,
            mode=mode
        )

In [192]:
def clean_all_dataset(dataset_path):
    clean_dataset(str(dataset_path+"/test"))
    clean_dataset(str(dataset_path+"/train"))
    clean_dataset(str(dataset_path+"/valid"))

clean_all_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8")

In [187]:
#Data augmentation Test
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/test",0)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/test",1)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/test",2)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/test",3)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/test",4)

#Data augmentation Train
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/train",0)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/train",1)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/train",2)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/train",3)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/train",4)

#Data augmentation Test
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/valid",0)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/valid",1)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/valid",2)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/valid",3)
add_items_in_dataset("/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/valid",4)

In [None]:
from ultralytics import YOLO

# Carica il modello pre-addestrato o uno vuoto
model = YOLO('yolov8n.pt')  # modello nano pre-addestrato, puoi scegliere anche yolov8s.pt ecc.

# Avvia il training
model.train(data='/Users/lorenzodimaio/PyCharmMiscProject/second_try_face_detection.v1i.yolov8/data.yaml', epochs=1, imgsz=640, batch=16,device=device)