# CONTAINER LEVEL MODEL TRAINING

### CONFIG VARS

In [5]:
from utils import *
import cv2
from shapely.geometry import Polygon, Point, GeometryCollection
from ultralytics import YOLO

In [2]:
DATASET_PATH="../Datasets/Dataset_ContainerLevel"
PREPROCESSED_PATH="../Preprocessed_Datasets/ContainerLevel"
AUGMENTED_PATH="../Augmented_Datasets/ContainerLevel"
AUGMENTED_PATH_TRAIN_EX="../Augmented_Datasets/ContainerLevel_train" # Data exclusively for training, not validating
YOLO_PATH="../YOLO_Datasets/ContainerLevel"
YOLO_PATH_TRAIN_EX="../YOLO_Datasets/ContainerLevel_train" # Data exclusively for training, not validating

### AUGMENTATION

For this model we will apply the following augmentation techniques:
- Hue transformations (-100º to +100º)
- Contrast inversion (To simulate dark and light modes)

In [7]:
resize_dataset_images(DATASET_PATH, AUGMENTED_PATH, 640, 360)

In [8]:
hue_augmentation(AUGMENTED_PATH, AUGMENTED_PATH_TRAIN_EX, 0.15, 100)

In [9]:
contrast_inversion_augmentation(AUGMENTED_PATH, AUGMENTED_PATH_TRAIN_EX, 0.15)

In [10]:
# Add the augmented data to the train exclusively folder
for file in os.listdir(AUGMENTED_PATH):
    shutil.copy(os.path.join(AUGMENTED_PATH, file), AUGMENTED_PATH_TRAIN_EX)

### FORMAT CONVERSION

Up to now, we have treated with labelme format datasets, but we need to convert it to YOLOv8 format if we want to train
a model, which has the following format:

```
YOLOv8_Dataset/
├── data.yaml
├── train/
│   ├── images/
│   │   ├── img1.jpg
│   │   ├── img2.jpg
│   │   └── ...
│   ├── labels/
│   │   ├── img1.txt
│   │   ├── img2.txt
│   │   └── ...
├── valid/
│   ├── images/
│   │   ├── img1.jpg
│   │   ├── img2.jpg
│   │   └── ...
│   ├── labels/
│   │   ├── img1.txt
│   │   ├── img2.txt
│   │   └── ...
└── test/ (OPTIONAL)
    ├── images/
    │   ├── img1.jpg
    │   ├── img2.jpg
    │   └── ...
    └── labels/
        ├── img1.txt
        ├── img2.txt
        └── ...
```

The format of the data.yml file is:
```
path: <path_to_dataset_root_dit>
train: <path_to_train_images>
val: <path_to_validation_images>
test: <path_to_test_images> (OPTIONAL)

nc: <number_of_classes>
names: ['class1', 'class2', 'class3', ...]
```

The labels for Instance segmentation have the following format for each annotation:
```
<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>
```

In [11]:
labelme_to_yolo(AUGMENTED_PATH_TRAIN_EX, YOLO_PATH_TRAIN_EX, 0.7,[
                "TabActive",
                "TabInactive",
                "Sidebar",
                "Navbar",
                "Container",
                "Image",
                "BrowserURLInput",
                ], "seg")

### TRAIN

We will perform fine-tuning over the mobile-sam model using the hyperparameter tuning provided by Ultralytics to get the
best results we can. Since this is a non-standard dataset in terms of object features it is not clear what are the values
we should use.

We will also configure the training to not do any augmentation over the train set

In [2]:
from ultralytics import SAM

model = SAM('mobile_sam.pt')

In [3]:
model.tune(
    data="../YOLO_Datasets/ContainerLevel_train/data.yaml",
    workers=1,
    epochs=100,
    iterations=50,
    patience=15,
    optimizer="AdamW",
    plots=False,
    save=True,
    hsv_h=0.0,
    hsv_s=0.0,
    hsv_v=0.0,
    fliplr=0.0,
)

[34m[1mTuner: [0mInitialized Tuner instance with 'tune_dir=runs\detect\tune10'
[34m[1mTuner: [0m Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning
[34m[1mTuner: [0mStarting iteration 1/50 with hyperparameters: {'lr0': 0.01, 'lrf': 0.01, 'momentum': 0.937, 'weight_decay': 0.0005, 'warmup_epochs': 3.0, 'warmup_momentum': 0.8, 'box': 7.5, 'cls': 0.5, 'dfl': 1.5, 'hsv_h': 0.0, 'hsv_s': 0.0, 'hsv_v': 0.0, 'degrees': 0.0, 'translate': 0.1, 'scale': 0.5, 'shear': 0.0, 'perspective': 0.0, 'flipud': 0.0, 'fliplr': 0.0, 'mosaic': 1.0, 'mixup': 0.0, 'copy_paste': 0.0}
Saved runs\detect\tune10\tune_scatter_plots.png
Saved runs\detect\tune10\tune_fitness.png

[34m[1mTuner: [0m1/50 iterations complete  (520.88s)
[34m[1mTuner: [0mResults saved to [1mruns\detect\tune10[0m
[34m[1mTuner: [0mBest fitness=0.58822 observed at iteration 1
[34m[1mTuner: [0mBest fitness metrics are {'metrics/precision(B)': 0.73157, 'metrics/recall(B)': 0.72583, 'metrics/mAP

In [7]:
# Validate the model
metrics = model.val(workers=1)  # no arguments needed, dataset and settings remembered

Ultralytics YOLOv8.0.208  Python-3.9.10 torch-1.13.1+cu116 CUDA:0 (NVIDIA GeForce GTX 1050 Ti, 4096MiB)
Model summary (fused): 168 layers, 3007013 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mScanning D:\Code\Screen2SOM-Training\YOLO_Datasets\ContainerLevel_train\val\labels.cache... 38 images, 1 backgrounds, 0 corrupt: 100%|██████████| 38/38 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  1.75it/s]
                   all         38        442      0.857      0.771      0.837      0.643
             TabActive         38         28      0.876      0.857      0.943      0.789
           TabInactive         38         58      0.953      0.703      0.863      0.654
               Sidebar         38         18      0.937      0.827      0.898      0.828
                Navbar         38         19       0.74       0.75      0.784       0.52
             Container         38        145     

In [11]:
metrics.seg.map    # map50-95
metrics.seg.map50  # map50
metrics.seg.map75  # map75
metrics.seg.maps   # a list contains map50-95 of each category

array([    0.19274,     0.32869,     0.40419,     0.39927,       0.561,     0.28032,     0.10029,     0.16814,     0.26469,     0.19651,     0.35886,     0.31838,     0.15703])

In [12]:
metrics.box.map    # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps   # a list contains map50-95 of each category

array([    0.44217,     0.57988,      0.5628,     0.66923,     0.72803,     0.39053,     0.33478,     0.53026,     0.41717,     0.49925,     0.55777,     0.43547,      0.2592])

In [10]:
inference = model("../YOLO_Datasets/Elementlevel_train/val/images/Captura de pantalla (44)_2.jpg", save=True)


image 1/1 d:\Code\Screen2SOM-Training\Training\..\YOLO_Datasets\Elementlevel_train\val\images\Captura de pantalla (44)_2.jpg: 384x640 33 WebIcons, 4 Icons, 3 BtnPills, 3 TextInputs, 2 Links, 29.0ms
Speed: 3.0ms preprocess, 29.0ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)
Results saved to [1mruns\segment\predict[0m
