In [1]:
from PIL import Image
import os
import cv2
import numpy as np
import math

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F 
import torch.utils.data as data 
from torch.utils.data import DataLoader
import torch.optim as optim

from torchvision import transforms, datasets
from torchvision.io import read_image, read_video

## Matplotlib
import matplotlib.pyplot as plt

## Yolo
from ultralytics import YOLO

In [2]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)


set_seed(42)

### Preparazione dei dati
Il dataset originario era composto da 26067 immagini, mentre motii logistici in questo notebook è stato ridotto a 11860 immagini e 14 classi (invece delle 32 originarie). Per carciare le immagini usiamo la funzionalità `ImageLoader` presente in Pytorch che permette di estrarre tutte le immagini presenti in una cartella (e nelle sue sottocartelle) e assegna come lable a tali immagini il nome della cartella stessa.

In [5]:
dataset_path = "../archive"

class ImageFolderWithIndices(datasets.ImageFolder):
    def __getitem__(self, index):
        
        image, label = super().__getitem__(index)

        path = self.imgs[index][0]
        filename = os.path.basename(path)

        str_num = ''
        for elem in filename:
            if elem.isdigit():
                str_num += elem

        num = (int) (str_num)
    
        return image, label, num
    
dataset = ImageFolderWithIndices(root=dataset_path)

'''
    Il dataset viene suddiviso nel modo seguente:
        - 80% training_set
        - 10% validation_set
        - 10% test_set
'''

train_set, val_set, test_set = torch.utils.data.random_split(dataset, [9488, 1186, 1186])

#### Estrazione dei bounding boxes

In [7]:
bounding_boxes = []

entries = sorted(os.listdir(dataset_path))
for folder in entries:
    sub_directory = os.path.join(dataset_path, folder)
    tensor_list = []

    for filename in os.listdir(sub_directory):
        if filename.endswith("groundtruth_rect.txt"):
            path = os.path.join(sub_directory, filename)
            f = open(path, "r")
            lines = f.readlines()
            len_file = len(lines)

            for i in range(len_file):
                line = lines[i].split()
                floates = [float(x) for x in line]
                index = (float)(i)
                floates.append(index)
                coordinates = torch.tensor(floates)
                tensor_list.append(coordinates)
    bounding_boxes.append(tensor_list)

#### Preparazione del dataset per l'addestramento di YOLOv8

In [5]:
dataset_dir = "datasets/dataset"

image_dirs = {
    "train" : os.path.join(dataset_dir, "images/train"),
    "val": os.path.join(dataset_dir, "images/val"),
    "test": os.path.join(dataset_dir, "images/test")
}

label_dirs = {
    "train": os.path.join(dataset_dir, "labels/train"),
    "val": os.path.join(dataset_dir, "labels/val"),
    "test": os.path.join(dataset_dir, "labels/test")
}

# Creazione delle cartelle

for dir_path in image_dirs.values():
    os.makedirs(dir_path, exist_ok=True)
for dir_path in label_dirs.values():
    os.makedirs(dir_path, exist_ok=True)

# Salvataggio delle delle immagini e dei bounding boxes

def save_images_and_labels(data, dataset_type):
    image_dir = image_dirs[dataset_type]
    label_dir = label_dirs[dataset_type]

    for image, label, index in data:
        img_save_path = os.path.join(image_dir, f"{label}_{index}.jpg")
        image.save(img_save_path)

        label_save_path = os.path.join(label_dir, f"{label}_{index}.txt")
        with open(label_save_path, 'w') as f:
            width = image.width
            height = image.height

            bounding_box = bounding_boxes[label][index-1]
            class_id = label
            x_center = bounding_box[0] / width
            y_center = bounding_box[1] / height
            width = bounding_box[2] / width
            height = bounding_box[3] / height

            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

In [6]:
save_images_and_labels(train_set, "train")
save_images_and_labels(test_set, "test")
save_images_and_labels(val_set, "val")

20855
2606
2606


In [None]:
dataset_path = "../archive"

transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
    #transforms.Normalize([0.1987, 0.4163, 0.4947], [0.1501, 0.1751, 0.1693])
])

dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

'''
    Il dataset viene suddiviso nel modo seguente:
        - 80% training_set
        - 10% validation_set
        - 10% test_set
'''
train_set, val_set, test_set = torch.utils.data.random_split(dataset, [21182, 2648, 2648])

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

print('tranin_set size:', len(train_set))
print('val_set size:', len(val_set))
print('test_set size:', len(test_set))

class_names = dataset.classes

In [16]:
dataset_size = len(dataset)

mean = 0.0
var = 0.0

for i in range(dataset_size):
    image, _ = dataset[i]
    #print(len(image))
    mean += image.mean(dim=(1, 2))
    var += image.var(dim=(1, 2))
    

mean /= dataset_size
std = torch.sqrt(var/dataset_size)

print(f"Mean: {mean}")
print(f"std: {std}")


'''
Mean: tensor([0.1989, 0.4162, 0.4942])
std: tensor([0.1524, 0.1773, 0.1716])
'''


Mean: tensor([0.1989, 0.4161, 0.4941])
std: tensor([0.1524, 0.1773, 0.1716])


'\nMean: tensor([0.1989, 0.4162, 0.4942])\nstd: tensor([0.1501, 0.1751, 0.1693])\n'

In [15]:
# imgs, _ = next(iter(train_loader))
# print(f"Mean: {imgs.mean().item():5.3f}")
# print(f"Standard deviation: {imgs.std().item():5.3f}")
# print(f"Maximum: {imgs.max().item():5.3f}")
# print(f"Minimum: {imgs.min().item():5.3f}")

imgs, _ = next(iter(train_loader))
print("Batch mean", imgs.mean(dim=[0,2,3]))
print("Batch std", imgs.std(dim=[0,2,3]))


Batch mean tensor([0.1955, 0.4156, 0.4451])
Batch std tensor([0.1688, 0.2383, 0.2593])


In [30]:
#print(type(dataset))
#print("Mean", (dataset.data.float() / 255.0).mean().item())
#print("Std", (dataset.data.float() / 255.0).std().item())

means = []
stds = []

for images, _ in train_loader:
    # Calculate mean and std per image
    # images is of shape (1, C, H, W), so we need to flatten the pixels for each channel
    mean = images.mean([0, 2, 3])  # Compute mean across height and width, for each channel
    std = images.std([0, 2, 3])    # Compute std across height and width, for each channel
    
    means.append(mean)
    stds.append(std)

# Convert lists to tensors for easier manipulation
means = torch.stack(means)
stds = torch.stack(stds)

overall_mean = means.mean(0)  # Average across all images
overall_std = stds.mean(0)    # Average across all images

print("Overall Mean:", overall_mean)
print("Overall Std:", overall_std)

''' 
    Overall Mean: tensor([0.1987, 0.4163, 0.4947])
    Overall Std: tensor([0.1841, 0.2392, 0.2734])
'''

Overall Mean: tensor([0.1987, 0.4163, 0.4947])
Overall Std: tensor([0.1841, 0.2392, 0.2734])


In [None]:
images, _ = next(iter(train_loader))

for i in range(len(images)):
    image = cv2.imread(images[i])
    break

In [None]:
images, labels = next(iter(train_loader))

for i in range(len(images)-20):
    img = images[i].permute(1, 2, 0).numpy()

    plt.imshow(img)
    plt.title(class_names[labels[i]])
    plt.axis("off")
    plt.show()

### Yolov8

In [12]:
model = YOLO("yolov8n.pt")

result = model.train(data="file.yaml", epochs=8, batch=32)

New https://pypi.org/project/ultralytics/8.2.81 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.79 🚀 Python-3.10.12 torch-2.4.0+cu121 CPU (AMD Ryzen 7 5700U with Radeon Graphics)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=file.yaml, epochs=8, time=None, patience=100, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=Fa

[34m[1mtrain: [0mScanning /home/gjergj/Desktop/Lab AI/Underwater_object_tracking/datasets/dataset/labels/train.cache... 20855 images, 0 backgrounds, 22 corrupt: 100%|██████████| 20855/20855 [00:00<?, ?it/s]




[34m[1mval: [0mScanning /home/gjergj/Desktop/Lab AI/Underwater_object_tracking/datasets/dataset/labels/val.cache... 2606 images, 0 backgrounds, 2 corrupt: 100%|██████████| 2606/2606 [00:00<?, ?it/s]

Plotting labels to runs/detect/train/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000278, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 8 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/8         0G      2.479      5.101      2.066          4        640: 100%|██████████| 652/652 [1:31:57<00:00,  8.46s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 41/41 [02:23<00:00,  3.49s/it]

                   all       2604       2604      0.599      0.538      0.545      0.299






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/8         0G      1.839      2.985       1.52         72        640:   3%|▎         | 17/652 [02:24<1:30:03,  8.51s/it]


KeyboardInterrupt: 