# <font color="#5fa8d3"> Yolo model


## <font color="#62b6cb"> Table of Content <a name="ToC"></a>
0. [Libraries Importation, Parameters & Funtions Definition](#id0)<br>
1. [Preparation of the data](#id1)<br>
    1.1 [Copy Images to Yolo Folder](#id11)<br>
    1.2 [Obtain the labels for YOLO](#id12)<br>


## <font color="#62b6cb"> 0. Libraries Importation, Parameters & Funtions Definition <a name="id0"></a>

In [None]:
# cosas que poner en el config
'./Notebooks/data.yaml'

In [1]:
import json
import os
from ultralytics import YOLO
import shutil
import yaml # for importing a yaml file

In [2]:
current_directory = os.getcwd()
last_folder = os.path.basename(current_directory)
    
if last_folder != "project-danielteresa":
    while last_folder != "project-danielteresa":
        parent_directory = os.path.dirname(current_directory)
        last_folder = os.path.basename(parent_directory)

        os.chdir(parent_directory)
        print(f"Changed directory to: {parent_directory}")
else:
    print("Already in the project root directory.")

# our modules
from src.mymodule import * # for importing our functions

Changed directory to: /home/sagemaker-user/project-danielteresa


**Configuration Variables**

In [25]:
# Load the YAML file
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Function to set variables globally and store their names
def set_variables(config, prefix='', var_dict={}):
    for key, value in config.items():
        if isinstance(value, dict):
            set_variables(value, prefix + key + '_', var_dict)
        else:
            globals()[prefix + key] = value
            var_dict[prefix + key] = value
    return var_dict

# Set variables globally and get a dictionary of the set variables
set_vars = set_variables(config)

# Print all the variables that were set
print("Variables set from YAML file:")
for var_name, var_value in set_vars.items():
    print(f"{var_name}: {var_value}")

Variables set from YAML file:
seed: 123
color1: #62b6cb
color2: #fb8500
color3: #023047
color4: #FFB703
path_annotations: ./Data
path_train: ./Data/train/original
path_train_train: ./Data/train/train
path_train_aug: ./Data/train/train_aug
path_train_val: ./Data/train/val
path_test: ./Data/test
path_yolo: ./Data/Yoloimages/
kaggle_train_annotations: annotations_train.json
train_annotations_name_temp: annotations_train_temp.json
train_annotations_name: annotations_train_updated.json
aug_train_annotations_name: annotations_train_updated_aug.json
val_annotations_name: annotations_val_updated.json
kaggle_test_annotations: annotations_test.json
test_annotations_name_temp: annotations_test_temp.json
test_annotations_name: annotations_test_updated.json
weights_yolo_path: Models/yolo_weights
runs_path: Models/runs


## <font color="#62b6cb"> 1. Preparation of data <a name="id1"></a>

### <font color="#62b6cb"> 1.1 Copy Images to Yolo Folder  <a name="id11"></a> 

In [4]:
# Create the folders if they don't exist
directories = [
    os.path.join(path_yolo, "train/images"),
    os.path.join(path_yolo, "train/labels"),
    os.path.join(path_yolo, "val/images"),
    os.path.join(path_yolo, "val/labels"),
    os.path.join(path_yolo, "test/images"),
    os.path.join(path_yolo, "test/labels")
]

for directory in directories:
    if not os.path.exists(directory):
        os.makedirs(directory)

# Remove everything that exists in the folders
for directory in [    os.path.join(path_yolo, "train/images"),
                      os.path.join(path_yolo, "train/labels"),
                      os.path.join(path_yolo, "val/images"),
                      os.path.join(path_yolo, "val/labels"),
                      os.path.join(path_yolo, "test/images"),
                      os.path.join(path_yolo, "test/labels")]:
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # Remove the file
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # Remove the directory and its contents
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Copy images from train augmented to Yolo train folder
shutil.copytree(path_train_aug, os.path.join(path_yolo, "train/images"), dirs_exist_ok=True)
shutil.copytree(path_train_val, os.path.join(path_yolo, "val/images"), dirs_exist_ok=True)
shutil.copytree(path_test, os.path.join(path_yolo, "test/images"), dirs_exist_ok=True)

print("Folders have been cleaned and files have been copied successfully.")

Folders have been cleaned and files have been copied successfully.


Check the annotations and id of the coco jsons

In [5]:
print("Check that annotation for training is correct")
print(validate_coco_dataset(os.path.join(path_annotations,aug_train_annotations_name), os.path.join(path_yolo,"train/images")))

print("Check that annotation for validation is correct")
print(validate_coco_dataset(os.path.join(path_annotations,val_annotations_name), os.path.join(path_yolo,"val/images")))

print("Check that annotation for test is correct")
print(validate_coco_dataset(os.path.join(path_annotations,test_annotations_name), os.path.join(path_yolo,"test/images")))

Check that annotation for training is correct
True
Check that annotation for validation is correct
True
Check that annotation for test is correct
True


Some of the points of the polygons are out of the range of the width and the height

In [6]:
# Some of the points of the polygons are out of the range of the images

# train
process_coco_annotations(os.path.join(path_annotations,aug_train_annotations_name),
                          os.path.join(path_annotations,aug_train_annotations_name))

# val
process_coco_annotations(os.path.join(os.path.join(path_annotations,val_annotations_name)), 
                         os.path.join(os.path.join(path_annotations,val_annotations_name)) )

### <font color="#62b6cb"> 1.2 Obtain the labels for YOLO  <a name="id12"></a> 

Obtain the yolo txt for each images from the coco data annotations using the conver_coco from ultralytics. The annotations json to convert should be in a folder with that json in it.

In [7]:
# Obtain the yolo labels
# train
convert_coco_to_yolo_segmentation(path_annotations, aug_train_annotations_name, path_yolo, 'train')
# val
convert_coco_to_yolo_segmentation(path_annotations, val_annotations_name, path_yolo, 'val')
# test
convert_coco_to_yolo_segmentation(path_annotations, test_annotations_name, path_yolo, 'test')

Annotations /home/sagemaker-user/project-danielteresa/Data/Yoloimages/train/annotations_train_updated_aug.json: 100%|██████████| 46485/46485 [00:22<00:00, 2031.49it/s]

COCO data converted successfully.
Results saved to /home/sagemaker-user/project-danielteresa/Data/Yoloimages/aux





Removed auxiliary directory ./Data/Yoloimages/aux
Yolo labels saved in ./Data/Yoloimages/train/labels



Annotations /home/sagemaker-user/project-danielteresa/Data/Yoloimages/val/annotations_val_updated.json: 100%|██████████| 2324/2324 [00:00<00:00, 11728.94it/s]

COCO data converted successfully.
Results saved to /home/sagemaker-user/project-danielteresa/Data/Yoloimages/aux





Removed auxiliary directory ./Data/Yoloimages/aux
Yolo labels saved in ./Data/Yoloimages/val/labels



Annotations /home/sagemaker-user/project-danielteresa/Data/Yoloimages/test/annotations_test_updated.json: 100%|██████████| 2324/2324 [00:00<00:00, 12081.64it/s]

COCO data converted successfully.
Results saved to /home/sagemaker-user/project-danielteresa/Data/Yoloimages/aux





Removed auxiliary directory ./Data/Yoloimages/aux
Yolo labels saved in ./Data/Yoloimages/test/labels



We check the nº of elememts in the folder:

In [9]:
# Define the directory path
directory_path = path_yolo+'/train/labels'

# List all files in the directory
files = os.listdir(directory_path)

# Count the number of files
file_count = len([file for file in files if os.path.isfile(os.path.join(directory_path, file))])

print(f"Number of files in '{directory_path}': {file_count}")

Number of files in './Data/Yoloimages//train/labels': 46485


We define a subset of images

In [13]:
# Folder Set up
train_folder = './Data/Yoloimages/train'
val_folder = './Data/Yoloimages/val'

destination_train_folder = './Data/Yoloimages/train_prueba'
destination_val_folder = './Data/Yoloimages/val_prueba'

# Seleccionar 20 imágenes de cada carpeta
select_images(train_folder, destination_train_folder, 2000, 'images', 'labels')
select_images(val_folder, destination_val_folder, 2000, 'images', 'labels')

The yaml file is created for the yolo code.

In [16]:
with open(os.path.join(path_annotations,val_annotations_name), 'r') as f:
    coco_data = json.load(f)

names = [class_name['name'] for class_name in coco_data["categories"]]

# Specify the paths and information
actual_path = os.getcwd()
train_path = os.path.join(actual_path, 'Data/Yoloimages/train_prueba/images')
val_path = os.path.join(actual_path, 'Data/Yoloimages/val_prueba/images')

names_categories = [class_name['name'] for class_name in coco_data["categories"]]
nc = len(names)
file_path = './Notebooks/data.yaml'

# Create the YAML file
create_yaml_file(file_path, train_path, val_path, nc, names)

We train the model or uploaded if it was already trained:

In [None]:
# Path to save/load the model
model_path = "./Models/yolo_model.pkl"

# Fit the GLM with Gamma family and log link if it is not saved
if os.path.exists(model_path):
    print("Loading existing model...")
    yolo_model = joblib.load(model_path)
else:
    print("Training new model...")
    try:
        if not os.path.exists(weights_yolo_path):
            os.makedirs(weights_yolo_path)
        # Initialize the YOLO model with the specified weights
        yolo_model = YOLO(os.path.join(weights_yolo_path, "yolov8m-seg.pt"))
        
        # Train the YOLO model
        yolo_model.train(data="./Notebooks/data.yaml", 
                         batch=100,
                         epochs=10, 
                         optimizer='Adam', 
                         task='segment',
                         project=runs_path)
        # Save the model
        joblib.dump(yolo_model, model_path)
        print(f"Model saved to {model_path}")
    except Exception as e:
        print("Error fitting the model:", e)
        raise e

Training new model...
Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m-seg.pt to 'Models/yolo_weights/yolov8m-seg.pt'...


100%|██████████| 52.4M/52.4M [00:00<00:00, 427MB/s]

New https://pypi.org/project/ultralytics/8.2.70 available 😃 Update with 'pip install -U ultralytics'





Ultralytics YOLOv8.2.69 🚀 Python-3.10.14 torch-2.0.0.post104 CPU (Intel Xeon Platinum 8488C)
[34m[1mengine/trainer: [0mtask=segment, mode=train, model=Models/yolo_weights/yolov8m-seg.pt, data=./Notebooks/data.yaml, epochs=10, time=None, patience=100, batch=100, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=Models/runs, name=train, exist_ok=False, pretrained=True, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop

[34m[1mtrain: [0mScanning /home/sagemaker-user/project-danielteresa/Data/Yoloimages/train_prueba/labels.cache... 1999 images, 0 backgrounds, 1 corrupt: 100%|██████████| 2000/2000 [00:00<?, ?it/s]




[34m[1mval: [0mScanning /home/sagemaker-user/project-danielteresa/Data/Yoloimages/val_prueba/labels.cache... 1996 images, 0 backgrounds, 4 corrupt: 100%|██████████| 2000/2000 [00:00<?, ?it/s]






Plotting labels to Models/runs/train/labels.jpg... 
[34m[1moptimizer:[0m Adam(lr=0.01, momentum=0.937) with parameter groups 86 weight(decay=0.0), 97 weight(decay=0.00078125), 96 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mModels/runs/train[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      2.048      4.513      4.405       2.14        241        640:  50%|█████     | 10/20 [10:42<10:36, 63.66s/it]

In [None]:
# epochs	100	
# patience	100
# batch	16	Batch size, with three modes: set as an integer (e.g., batch=16), auto mode for 60% GPU memory utilization (batch=-1), or auto mode with specified utilization fraction (batch=0.70).
# imgsz	640	
# lr0	0.01	Initial learning rate (i.e. SGD=1E-2, Adam=1E-3) . Adjusting this value is crucial for the optimization process, influencing how rapidly model weights are updated.
# weight_decay	
# dropout

# USAR ADAM AUIDA A MAS RAPIDA CONVERGENCIA

# verbose =False

In [None]:
space = {
    'lr0': [0.001, 0.01],  # Initial learning rate
}

# Tune the model
model.tune(space=space, data='./Notebooks/data.yaml')

In [17]:
model.train(data="data.yaml", epochs=100, optimizer='Adam', task='segment')

# Path to save/load the model
model_path = "../Models/yolo_model.pkl"

# Fit the GLM with Gamma family and log link if it is not saved
if os.path.exists(model_path):
    print("Loading existing model...")
    yolo_model = joblib.load(model_path)
else:
    print("Training new model...")
    try:
        yolo_model = model.train(data="data.yaml", epochs=100, optimizer='Adam', task='segment')
        gb_model.fit(X_train, y_train)
        # Save the model
        joblib.dump(gb_model, model_path)
        print(f"Model saved to {model_path}")
    except Exception as e:
        print("Error fitting the model:", e)
        raise e

[34m[1mTuner: [0mInitialized Tuner instance with 'tune_dir=C:\Users\teres\OneDrive\Documentos\UCD\Summer\project-danielteresa\runs\segment\tune9'
[34m[1mTuner: [0m Learn about tuning at https://docs.ultralytics.com/guides/hyperparameter-tuning
[34m[1mTuner: [0mStarting iteration 1/10 with hyperparameters: {'lr0': 0.01}


In [9]:

! pip install -U ultralytics



In [8]:
! python.exe -m pip install --upgrade pip

Collecting pip
  Downloading pip-24.2-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.2-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
    --------------------------------------- 0.0/1.8 MB 660.6 kB/s eta 0:00:03
   ---- ----------------------------------- 0.2/1.8 MB 2.0 MB/s eta 0:00:01
   -------- ------------------------------- 0.4/1.8 MB 3.1 MB/s eta 0:00:01
   ------------ --------------------------- 0.6/1.8 MB 3.0 MB/s eta 0:00:01
   ---------------- ----------------------- 0.8/1.8 MB 3.2 MB/s eta 0:00:01
   --------------------- ------------------ 1.0/1.8 MB 3.4 MB/s eta 0:00:01
   ------------------------ --------------- 1.1/1.8 MB 3.3 MB/s eta 0:00:01
   ------------------------------ --------- 1.4/1.8 MB 3.6 MB/s eta 0:00:01
   ----------------------------------- ---- 1.6/1.8 MB 3.8 MB/s eta 0:00:01
   ---------------------------------------  1.8/1.8 MB 3.9 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1

In [19]:
results = model.predict("03bda226ad62553c0c73.jpg")


image 1/1 c:\Users\teres\OneDrive\Documentos\UCD\Summer\project-danielteresa\03bda226ad62553c0c73.jpg: 480x640 300 met_scratchs, 501.6ms
Speed: 5.4ms preprocess, 501.6ms inference, 145.0ms postprocess per image at shape (1, 3, 480, 640)


In [34]:
results[0].masks

ultralytics.engine.results.Masks object with attributes:

data: tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,

In [22]:
# Realizar predicciones
results = model.predict("03bda226ad62553c0c73.jpg", conf=0.9, iou=0.7)

# Visualizar resultados
for result in results:
    img = result.plot(show=True, labels=True, masks=True, boxes=True)
    plt.imshow(img)
    plt.show()


image 1/1 c:\Users\teres\OneDrive\Documentos\UCD\Summer\project-danielteresa\03bda226ad62553c0c73.jpg: 480x640 132 met_scratchs, 412.3ms
Speed: 0.0ms preprocess, 412.3ms inference, 58.0ms postprocess per image at shape (1, 3, 480, 640)


<Figure size 640x480 with 1 Axes>