In [None]:
from datetime import datetime
import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
sys.path.append(project_root)

from ultralytics import YOLO, settings
import mlflow

from custom_dataset.data_preparation import YOLODataProcessor

In [2]:
# Set mlflow experiment

experiment_name = 'yolo_train'

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='mlflow-artifacts:/173000798021307826', creation_time=1745753018682, experiment_id='173000798021307826', last_update_time=1745753018682, lifecycle_stage='active', name='yolo_train', tags={}>

In [4]:
# Get all runs

experiment = mlflow.get_experiment_by_name(experiment_name)
df_runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
df_runs.empty

True

# 1. Dataset prepatation

In [5]:
dataset_name = 'yolo-ocr-dataset_yolo'
dataset_path = os.path.join(project_root, 'custom_dataset', 'data', dataset_name)
dataset_config = f'{dataset_name}.yaml'

data_processor = YOLODataProcessor(dataset_path)
data_processor(
    config_path=dataset_config,
    train_frac=0.95,
    val_frac=0.025,
    restructure=False,
)

# 2. Train

In [6]:
# Give name to current run (model) and create it
v = 1
date = datetime.now().strftime("%d.%m.%y")
model_name = f'{date}_{dataset_name}_v{v}'
if not df_runs.empty:
    while (df_runs['params.run_name'] == model_name).any():
        v += 1
        model_name = f'{date}_{dataset_name}_v{v}'
print(model_name)

with mlflow.start_run(run_name=model_name) as run:
    run_id = run.info.run_id
os.environ['MLFLOW_RUN_ID'] = run_id
print(run_id)

27.04.25_yolo-ocr-dataset_yolo_v1
🏃 View run 27.04.25_yolo-ocr-dataset_yolo_v1 at: http://localhost:5000/#/experiments/173000798021307826/runs/f7996ca49b43452cb8f87e5318557645
🧪 View experiment at: http://localhost:5000/#/experiments/173000798021307826
f7996ca49b43452cb8f87e5318557645


In [7]:
# Store run_name and run_id for downstream use

%store run_id
%store model_name

Stored 'run_id' (str)
Stored 'model_name' (str)


In [None]:
model = YOLO("yolo11n.pt")
# print(settings)

# All params here https://docs.ultralytics.com/modes/train/#train-settings
model.train(
    data=f'{dataset_name}.yaml',
    epochs=3,
    imgsz=224, # Resize all images to the value before training
    patience=10, # early stopping
    batch=2, # Fraction means fraction of device memory
    optimizer='Adam',
    single_cls=True, # True when need to find an object, not classsify it
    cos_lr=True, # Cosine learning rate scheduler
    lr0=1e-3, # Initial learning rate
    lrf=1e-5, # Final learning rate to regulate slope of lr scheduler
    weight_decay=1e-4, # L2 regularization
    warmup_epochs=1, # Num of warmup epochs
    box=7.5, # Weight of box loss in total loss function. Emphasises accurate box prediction
    cls=0, # Weight of classification loss in total loss function. Ephasises correct class prediction
    dfl=0,
    pose=0,
    kobj=0,
    val=True, # Enable validation during training
    plots=True, # Generate and save plots of train and val metrics
    amp=True, # Enables Mixed Precision training
    freeze=None, # Freezes specified num of first layers for transfer learning
    device=0, # 0 is cuda
    save=True, # Save checkpoints
    project='checkpoints', # Name of a folder to save training outputs
    name=model_name, # Training run name. Creates subdir in project dir
)

New https://pypi.org/project/ultralytics/8.3.118 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.116  Python-3.12.0 torch-2.7.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=yolo-ocr-dataset_yolo.yaml, epochs=3, time=None, patience=10, batch=2, imgsz=224, save=True, save_period=-1, cache=False, device=0, workers=8, project=checkpoints, name=27.04.25_yolo-ocr-dataset_yolo_v1, exist_ok=False, pretrained=True, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=True, rect=False, cos_lr=True, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_mask

[34m[1mtrain: [0mScanning C:\Users\Stas\Desktop\Practice\OCR\custom_dataset\data\yolo-ocr-dataset_yolo\labels\train.cache... 941 images, 0 backgrounds, 0 corrupt: 100%|██████████| 941/941 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access  (ping: 0.60.3 ms, read: 327.975.9 MB/s, size: 1594.2 KB)


[34m[1mval: [0mScanning C:\Users\Stas\Desktop\Practice\OCR\custom_dataset\data\yolo-ocr-dataset_yolo\labels\val.cache... 25 images, 0 backgrounds, 0 corrupt: 100%|██████████| 25/25 [00:00<?, ?it/s]


In [None]:
metrics = model.val()

In [None]:
model.export(format="onnx")