# General notebook preparation

In [1]:
# Standard library imports
import os
import sys
import yaml

In [2]:
# Set constants for environment
IN_COLAB = 'google.colab' in sys.modules
IN_LOCAL_CONTAINER = 'HOMELAB' in os.environ
env = {
    'PROJECT_NAME': 'LVGL UI Detector',
    'PROJECT_TAG': 'lvgl-ui-detector',
    'IN_COLAB': IN_COLAB,
    'IN_LOCAL_CONTAINER': IN_LOCAL_CONTAINER,
    'DIRS': {},
    'FILES': {},
    'ENV': os.environ,
}
required_modules = ['ultralytics', 'clearml']

# Environment setups

In [3]:
if IN_COLAB:
    # from google.colab import drive
    # drive.mount('/content/drive') # Mount Google Drive
    env['DIRS']['root'] = os.path.join("/tmp")
elif IN_LOCAL_CONTAINER:
    # Local container already comes with the required modules pre-installed
    env['DIRS']['root'] = "/usr/src"
else:
    # Local development environment needs to have the required modules installed
    env['DIRS']['root'] = os.path.curdir

In [4]:
# Import all dependencies
missing_deps = False
try:
    from ultralytics import YOLO
    from clearml import Task, TaskTypes, Dataset, StorageManager, Logger
    import yaml
except ImportError as e:
    print(f"Failed to import required dependencies: {e}")
    missing_deps = True
    print("Run the next cell to install the required dependencies and then restart the runtime.")

In [5]:
if missing_deps and not IN_COLAB:
    %pip install -q {*required_modules}
    print("Installed required dependencies. Please restart the runtime.")

In [6]:
# Show the used versions of the required modules
if not IN_COLAB:
    for module in required_modules:
        print(f"Module: {module}")
        %pip show {module}

Module: ultralytics


Name: ultralytics
Version: 8.2.2
Summary: Ultralytics YOLOv8 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
Home-page: 
Author: Glenn Jocher, Ayush Chaurasia, Jing Qiu
Author-email: 
License: AGPL-3.0
Location: /home/rini-debian/git-stash/lvgl-ui-detector/.venv/lib/python3.11/site-packages
Requires: matplotlib, opencv-python, pandas, pillow, psutil, py-cpuinfo, pyyaml, requests, scipy, seaborn, thop, torch, torchvision, tqdm
Required-by: 
Note: you may need to restart the kernel to use updated packages.
Module: clearml
Name: clearml
Version: 1.15.1
Summary: ClearML - Auto-Magical Experiment Manager, Version Control, and MLOps for AI
Home-page: https://github.com/allegroai/clearml
Author: ClearML
Author-email: support@clear.ml
License: Apache License 2.0
Location: /home/rini-debian/git-stash/lvgl-ui-detector/.venv/lib/python3.11/site-packages
Requires: attrs, furl, jsonschema, numpy, pathlib2, Pillow, psutil, pyjwt, pyp

In [7]:
# Configure folders
env['DIRS']['data'] = os.path.join(env['DIRS']['root'], "datasets")

In [8]:
# Query ClearML for available datasets
datasets = Dataset.list_datasets(env['PROJECT_NAME'], partial_name="UI Randomizer", tags=[env['PROJECT_TAG']], only_completed=True)
# Store dataset filenames per dataset
env['DATASETS'] = {}
for dataset in datasets:
    env['FILES'][dataset['id']] = Dataset.get(dataset['id']).list_files("*.yaml")
    env['DATASETS'][dataset['id']] = dataset




In [9]:
# Debugging output

print(env['FILES'])
print(env['DATASETS'])

{'d1c4ec5c8f7e4492ab7b2a8d0b584c88': ['ui_randoms.yaml'], 'e7f05a856ca34cd88940c4b8774f4c45': ['ui_randoms.yaml'], '50e10f640d7548458d9c38ab9aac571b': ['ui_randoms.yaml'], 'e113a14fec574025a15a5c9868746c4b': ['dataset_upload_test.yaml'], 'c520737c392c47608ffd52f33fc593f6': ['dataset_upload_test.yaml'], '12f2a7ee7a7f45b8866d2c5f75e92413': ['dataset_upload_test.yaml'], '640fc2e67b3b4b498fe6d9b1da764911': ['dataset_upload_test.yaml'], 'cef7f5e021484b089a5eccf0c014d150': ['dataset_upload_test.yaml'], '28b1c17f1fef47a793f9ac086b2d6b10': ['dataset_upload_test.yaml'], 'af33141c123843d4bf0c50db76635bba': ['dataset_upload_test.yaml'], '2b561087cf974658b576fbd17a87bc87': ['dataset_upload_test.yaml'], '366058db107e485ca273a11601e68e7a': ['dataset_upload_test.yaml'], 'f4d7bdcb58534f1ebbc5c5578b6801a5': ['dataset_upload_test.yaml'], '90648781cf4f4848a2a66ca572e1edf5': ['dataset_upload_test.yaml'], 'a4b6cc7161334df1ad94143a819842d6': ['dataset_upload_test.yaml'], '853a677148c5417e8cc003d6761236e3': 

# Helper functions

In [10]:
def download_dataset(id: str, overwrite: bool = True):
    dataset = Dataset.get(id)
    return dataset.get_mutable_local_copy(env['DIRS']['data'], overwrite=overwrite)

In [11]:
def fix_dataset_path(file: str, replacement_path: str):
    # Replace path in dataset file to match current environment
    with open(file, 'r+') as f:
        dataset_content = yaml.safe_load(f)
        dataset_content['path'] = replacement_path
        print(f"Original dataset:\n{dataset_content}")
        f.seek(0)
        yaml.dump(dataset_content, f)
        f.truncate()
        f.seek(0)
        print(f"Adjusted dataset:\n{f.read()}")
        return dataset_content

In [12]:
def training_task(model_variant: str, dataset_id: str, args: dict, project: str = "LVGL UI Detector"):
    # Create a ClearML Task
    task = Task.init(
        project_name="LVGL UI Detector",
        task_name=f"Train {model_variant} ({env['DATASETS'][dataset_id]['name']})",
        task_type=TaskTypes.training
    )
    task.connect(args)
    # Download & modify dataset
    env['DIRS']['target'] = download_dataset(dataset_id)
    dataset_file = os.path.join(env['DIRS']['target'], env['FILES'][dataset_id][0])
    dataset_content = fix_dataset_path(dataset_file, env['DIRS']['target'])
    args['data'] = os.path.join(env['DIRS']['target'], env['FILES'][dataset_id][0])
    # Log "model_variant" parameter to task
    task.set_parameter("model_variant", model_variant)
    task.connect_configuration(name="Dataset YAML", configuration=args['data'])
    task.connect_configuration(name="Dataset Content", configuration=dataset_content)

    # Load the YOLOv8 model
    model = YOLO(f'{model_variant}.pt')

    # Train the model 
    # If running remotely, the arguments may be overridden by ClearML if they were changed in the UI
    try:
        results = model.train(**args)
    except Exception as e:
        raise e
    finally:
        task.close()
    return results, task.id

# Choose dataset

In [13]:
valid_choice = False
while not valid_choice:
    dataset_choice = '50e10f640d7548458d9c38ab9aac571b'#@param {type:"string"}
    if dataset_choice in env['DATASETS'].keys():
        valid_choice = True
    else:
        print("Dataset ID not found, please try again.")

# Model training

In [14]:
# Training inputs
model_variant = "yolov8n"
args = dict(
    epochs=10, 
    imgsz=480
)
results, id = training_task(model_variant, dataset_choice, args)


ClearML Task: created new task id=423648e8ed394b7d897ffa395c503ada
2024-04-27 18:20:04,677 - clearml.Task - INFO - Storing jupyter notebook directly as code
ClearML results page: https://app.clear.ml/projects/5606a5f963624e2991df71add8ec09ce/experiments/423648e8ed394b7d897ffa395c503ada/output/log
2024-04-27 18:20:09,868 - clearml - INFO - Dataset.get() did not specify alias. Dataset information will not be automatically logged in ClearML Server.
Original dataset:
{'names': {0: 'arc', 1: 'bar', 2: 'button', 3: 'buttonmatrix', 4: 'calendar', 5: 'chart', 6: 'checkbox', 7: 'dropdown', 8: 'image', 9: 'imagebutton', 10: 'keyboard', 11: 'label', 12: 'led', 13: 'line', 14: 'list', 15: 'menu', 16: 'messagebox', 17: 'roller', 18: 'scale', 19: 'slider', 20: 'spangroup', 21: 'spinbox', 22: 'spinner', 23: 'switch', 24: 'table', 25: 'tabview', 26: 'textarea', 27: 'tileview', 28: 'window'}, 'path': '/home/rini-debian/git-stash/lvgl-ui-detector/datasets', 'test': 'images/test', 'train': 'images/train'

[34m[1mtrain: [0mScanning /home/rini-debian/git-stash/lvgl-ui-detector/datasets/labels/train... 370 images, 0 backgrounds, 6 corrupt: 100%|██████████| 370/370 [00:00<00:00, 519.28it/s]

[34m[1mtrain: [0mNew cache created: /home/rini-debian/git-stash/lvgl-ui-detector/datasets/labels/train.cache



[34m[1mval: [0mScanning /home/rini-debian/git-stash/lvgl-ui-detector/datasets/labels/val... 65 images, 0 backgrounds, 0 corrupt: 100%|██████████| 65/65 [00:00<00:00, 485.55it/s]

[34m[1mval: [0mNew cache created: /home/rini-debian/git-stash/lvgl-ui-detector/datasets/labels/val.cache





Plotting labels to /home/rini-debian/git-stash/lvgl-ui-detector/runs/detect/train10/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000303, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 480 train, 480 val
Using 4 dataloader workers
Logging results to [1m/home/rini-debian/git-stash/lvgl-ui-detector/runs/detect/train10[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      1.48G      1.181      4.588      1.114         34        480: 100%|██████████| 23/23 [00:08<00:00,  2.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  3.35it/s]

                   all         65        195          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      1.47G      1.006      3.781      1.061         33        480: 100%|██████████| 23/23 [00:05<00:00,  4.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  3.66it/s]

                   all         65        195      0.134       0.41      0.291      0.267






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      1.47G     0.9941      2.719      1.057         34        480: 100%|██████████| 23/23 [00:05<00:00,  4.44it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.36it/s]

                   all         65        195      0.827      0.281      0.552      0.456






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      1.47G     0.9885      2.141      1.055         36        480: 100%|██████████| 23/23 [00:04<00:00,  4.81it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.30it/s]

                   all         65        195      0.682      0.546       0.72       0.59






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      1.47G     0.9109      1.849      1.032         31        480: 100%|██████████| 23/23 [00:04<00:00,  4.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  3.90it/s]

                   all         65        195      0.687      0.702       0.78       0.65






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      1.47G     0.8677      1.701      1.026         34        480: 100%|██████████| 23/23 [00:04<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.21it/s]

                   all         65        195      0.683      0.756        0.8      0.667






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      1.47G     0.8638      1.568      1.013         34        480: 100%|██████████| 23/23 [00:05<00:00,  4.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  3.60it/s]

                   all         65        195       0.77      0.795       0.82      0.689






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      1.47G      0.797      1.498     0.9886         35        480: 100%|██████████| 23/23 [00:05<00:00,  3.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  2.22it/s]

                   all         65        195      0.747      0.805      0.841      0.713






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      1.47G     0.7528       1.39     0.9632         34        480: 100%|██████████| 23/23 [00:05<00:00,  4.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.62it/s]

                   all         65        195      0.771      0.835      0.855      0.731






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      1.47G     0.7562      1.378     0.9777         35        480: 100%|██████████| 23/23 [00:04<00:00,  4.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  4.10it/s]

                   all         65        195       0.78      0.866      0.861      0.737






10 epochs completed in 0.020 hours.
Optimizer stripped from /home/rini-debian/git-stash/lvgl-ui-detector/runs/detect/train10/weights/last.pt, 6.2MB
Optimizer stripped from /home/rini-debian/git-stash/lvgl-ui-detector/runs/detect/train10/weights/best.pt, 6.2MB

Validating /home/rini-debian/git-stash/lvgl-ui-detector/runs/detect/train10/weights/best.pt...
Ultralytics YOLOv8.2.2 🚀 Python-3.11.2 torch-2.2.2+cu121 CUDA:0 (NVIDIA GeForce RTX 2060 with Max-Q Design, 6144MiB)
Model summary (fused): 168 layers, 3011303 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  2.11it/s]


                   all         65        195      0.787      0.865      0.861      0.736
                   arc         65         11      0.873          1      0.988      0.957
                   bar         65         19      0.752        0.8      0.827      0.593
                button         65          8      0.566      0.875      0.751      0.645
              calendar         65          7      0.915          1      0.995      0.995
              checkbox         65         15      0.856       0.79       0.85      0.564
              dropdown         65         15      0.522          1      0.894       0.84
                 label         65         20      0.913      0.525      0.723      0.426
                   led         65          8      0.907          1      0.995      0.788
                roller         65         22      0.993          1      0.995      0.872
                 scale         65         16          1      0.973      0.995      0.856
                slide

In [15]:
print(f"Task to optimize: {id}")

Task to optimize: 423648e8ed394b7d897ffa395c503ada


# Hyperparameter Optimization

In [16]:
from clearml.automation import UniformParameterRange, UniformIntegerParameterRange, DiscreteParameterRange
from clearml.automation import HyperParameterOptimizer
from clearml.automation.optuna import OptimizerOptuna


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [17]:
def job_complete_callback(
    job_id,                 # type: str
    objective_value,        # type: float
    objective_iteration,    # type: int
    job_parameters,         # type: dict
    top_performance_job_id  # type: str
):
    print('Job completed!', job_id, objective_value, objective_iteration, job_parameters)
    if job_id == top_performance_job_id:
        print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value))

In [18]:
# Hyperparameter optimization
from clearml import Task

task = Task.init(
    project_name='Hyper-Parameter Optimization (UI Detector)',
    task_name='Automatic Hyper-Parameter Optimization (UI Detector)',
    task_type=Task.TaskTypes.optimizer,
    reuse_last_task_id=True
)

ClearML Task: created new task id=4fb47fa004534e88bd9d94bc948adb1d
ClearML results page: https://app.clear.ml/projects/a22811de525b4f098c08fb5ccef9ade0/experiments/4fb47fa004534e88bd9d94bc948adb1d/output/log


In [None]:
from clearml.automation import ClearmlJob
job = ClearmlJob(
    base_task_id=id,
    parameter_override={'data': ''},
)

In [19]:
# experiment template to optimize in the hyperparameter optimization
args = {
    'template_task_id': id if id else '',
    'run_as_service': False,
}
args = task.connect(args)

In [20]:
an_optimizer = HyperParameterOptimizer(
    # This is the experiment we want to optimize
    base_task_id=args['template_task_id'],
    hyper_parameters=[
        # Other hyperparameters we want to optimize
        DiscreteParameterRange('General/batch', values=[16,32,64,128,256]),
        DiscreteParameterRange('General/epochs', values=[25,50,75,100,125,150]),
        UniformParameterRange('General/lr0', min_value=0.001, max_value=0.1),
        UniformParameterRange('General/momentum', min_value=0.85, max_value=0.95),
        UniformParameterRange('General/weight_decay', min_value=0.0001, max_value=0.001),
        DiscreteParameterRange('General/imgsz', values=[320, 480, 640]),
        DiscreteParameterRange('General/warmup_epochs', values=[1, 3, 5])
    ],
    objective_metric_title='val',
    objective_metric_series='metrics/mAP50-95(B)',
    objective_metric_sign='max',
    optimizer_class=OptimizerOptuna,
    execution_queue='training',
    pool_period_min=5,
    max_iteration_per_job=30,
    total_max_jobs=1,
)

In [21]:
#task.execute_remotely(queue_name='training', exit_process=True)

In [22]:
# report every 12 seconds, this is way too often, but we are testing here J
an_optimizer.set_report_period(2)
# start the optimization process, callback function to be called every time an experiment is completed
# this function returns immediately
an_optimizer.start(job_complete_callback=job_complete_callback)
# set the time limit for the optimization process (2 hours)

[I 2024-04-27 18:22:44,406] A new study created in memory with name: 4fb47fa004534e88bd9d94bc948adb1d


True

Progress report #0 completed, sleeping for 0.25 minutes
Progress report #1 completed, sleeping for 2.0 minutes
Progress report #2 completed, sleeping for 2.0 minutes
Progress report #3 completed, sleeping for 2.0 minutes
Progress report #4 completed, sleeping for 2.0 minutes
Progress report #5 completed, sleeping for 2.0 minutes


: 