In [None]:
def sliced_wbf_predict(im, model, conf, overlap, wbf_iou):
    """
    Runs sliced prediction on an image, applies Weighted Boxes Fusion (WBF),
    and returns normalized bounding boxes.

    Args:
        im (np.ndarray): Input image.
        model: YOLO model instance.
        conf (float): Confidence threshold.
        overlap (float): Overlap ratio for slicing.
        wbf_iou (float): IOU threshold for WBF. 

    Returns:
        list[dict]: List of predicted bounding boxes with normalized coordinates.
    """
    h, w, _ = im.shape
    # Run sliced prediction using SAHI
    r = get_sliced_prediction(
        im, 
        model,
        slice_height=IMGZ, 
        slice_width=IMGZ,
        overlap_height_ratio=overlap, 
        overlap_width_ratio=overlap,
        conf=conf, 
        device=DEVICE, 
        verbose=False
    )

    bxs, scs = [], []
    # Collect bounding boxes and scores
    for o in r.object_prediction_list:
        x1, y1, x2, y2 = o.bbox.to_xyxy()
        bxs.append([x1 / w, y1 / h, x2 / w, y2 / h])
        scs.append(o.score.value)

    if not bxs:
        return []
    
    # Apply Weighted Boxes Fusion
    bxs, scs, _ = weighted_boxes_fusion(
        [bxs], [scs], [[0]],
        iou_thr=wbf_iou, skip_box_thr=SKIP_BOX_T
    )

    # Format predictions as dicts
    return [
        dict(
            xc=(x1 + x2) / 2,
            yc=(y1 + y2) / 2,
            w=x2 - x1,
            h=y2 - y1,
            label=0,
            score=s
        ) 
        for (x1, y1, x2, y2), s 
        in zip(bxs, scs)
    ]

def run_validation(model, conf, overlap, wbf_iou):
    """
    Runs inference on all validation images, collects predictions,
    and returns a DataFrame.

    Args:
        model: YOLO model instance.
        conf (float): Confidence threshold.
        overlap (float): Overlap ratio for slicing.
        wbf_iou (float): IOU threshold for WBF.

    Returns:
        pd.DataFrame: DataFrame of predictions for all images.
    """
    rows = []
    for p in VAL_IMGDIR.glob("*.*"):
        img_id = p.stem
        # Read and convert image to RGB
        im     = cv2.cvtColor(cv2.imread(str(p), -1), cv2.COLOR_BGR2RGB)
        start  = time.time()
        preds  = sliced_wbf_predict(im, model, conf, overlap, wbf_iou)
        dt     = round(time.time() - start, 5)

        if preds:
            # Add predictions to rows
            for d in preds:
                d |= dict(
                    image_id=img_id, 
                    time_spent=dt,
                    w_img=im.shape[1], 
                    h_img=im.shape[0]
                )
                rows.append(d)
        else:
            # If no predictions, add empty row
            rows.append(
                dict(
                    image_id=img_id, 
                    xc=None, 
                    yc=None, 
                    w=None, 
                    h=None,
                    label=0, 
                    score=None, 
                    time_spent=dt,
                    w_img=im.shape[1], 
                    h_img=im.shape[0]
                )
            )
            
    # Return predictions as DataFrame
    return pd.DataFrame(
        rows, 
        columns=[
            "image_id",
            "label",
            "xc",
            "yc",
            "w",
            "h",
            "w_img",
            "h_img",
            "score",
            "time_spent"
        ]
    )

def fbeta(df_pred):
    """
    Computes the F-beta score for predictions using the custom metric.

    Args:
        df_pred (pd.DataFrame): DataFrame of predictions.

    Returns:
        float: F-beta score.
    """
    pred_bytes = metric.df_to_bytes(df_pred)
    gt_bytes   = metric.open_df_as_bytes(str(VAL_GTCSV))
    score, *_  = metric.evaluate(pred_bytes, gt_bytes, beta=BETA, parallelize=False)
    return score

def objective(trial):
    """
    Optuna objective function for hyperparameter search.

    Args:
        trial (optuna.trial.Trial): Optuna trial object.

    Returns:
        float: F-beta score for current hyperparameters.
    """
    conf     = trial.suggest_float("conf",     0.05, 0.40)
    overlap  = trial.suggest_float("overlap",  0.10, 0.35)
    wbf_iou  = trial.suggest_float("wbf_iou",  0.45, 0.70)
    df_pred  = run_validation(model, conf, overlap, wbf_iou)
    return fbeta(df_pred)

In [1]:
pip install ultralytics clearml pyyaml sahi

Collecting ultralytics
  Downloading ultralytics-8.3.176-py3-none-any.whl.metadata (37 kB)
Collecting clearml
  Downloading clearml-2.0.2-py2.py3-none-any.whl.metadata (17 kB)
Collecting sahi
  Downloading sahi-0.11.32-py3-none-any.whl.metadata (18 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.15-py3-none-any.whl.metadata (14 kB)
Collecting furl>=2.0.0 (from clearml)
  Downloading furl-2.1.4-py2.py3-none-any.whl.metadata (25 kB)
Collecting pathlib2>=2.3.0 (from clearml)
  Downloading pathlib2-2.3.7.post1-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting fire (from sahi)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pybboxes==0.1.6 (from sahi)
  Downloading pybboxes-0.1.6-py3-none-any.whl.metadata (9.9 kB)
Collecting terminaltables (from sahi)
  Downloading te

In [2]:
import os
import json
import random
import numpy as np
import pandas as pd
from typing import Tuple
from numba import jit
from concurrent.futures import ThreadPoolExecutor
import torch
import cv2
import albumentations as A
from pathlib import Path
from ultralytics import YOLO
import shutil
from tqdm import tqdm
import yaml
import time

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
print("✅ Custom metric code is embedded in the main script.")

COLUMNS = ['image_id', 'label', 'xc', 'yc', 'w', 'h', 'w_img', 'h_img', 'score', 'time_spent']

def df_to_bytes(df: pd.DataFrame) -> bytes: return df.to_json().encode(encoding="utf-8")
def bytes_to_df(df_byte: bytes) -> pd.DataFrame: return pd.DataFrame(json.loads(df_byte.decode("utf-8").replace("'", '"')))
def open_df_as_bytes(csv_path: str) -> bytes: return df_to_bytes(pd.read_csv(csv_path, sep=",", decimal=".", converters={'image_id': str, 'time_spent': float}))
def set_types(df: pd.DataFrame) -> pd.DataFrame: return df.astype({'image_id': str, 'label': int, 'xc': float, 'yc': float, 'w': float, 'h': float, 'w_img': int, 'h_img': int}, errors='ignore')

def get_time_spent(df: pd.DataFrame, all_image_ids: list) -> np.ndarray:
    time_spent_map = df.groupby('image_id')['time_spent'].first()
    return np.array([time_spent_map.get(img_id, 0) for img_id in all_image_ids])

def preprocess_predicted_df(predicted_file: bytes, gt_file: bytes, all_image_ids: list):
    predicted_df = bytes_to_df(predicted_file)
    gt_df = bytes_to_df(gt_file)
    time_spent = get_time_spent(predicted_df, all_image_ids)
    
    gt_df = set_types(gt_df).set_index('image_id').sort_index()

    if not predicted_df.empty and 'score' in predicted_df.columns and predicted_df['score'].iloc[0] != -1:
        predicted_df = predicted_df.drop(columns=['time_spent'])
        predicted_df = set_types(predicted_df).set_index('image_id').sort_index()
    else:
        predicted_df = pd.DataFrame(columns=gt_df.columns).set_index(pd.Index([], name='image_id'))
        
    return gt_df, predicted_df, time_spent

def get_box_coordinates(row):
    w_img, h_img = int(row['w_img']), int(row['h_img'])
    x1, y1 = int((row['xc'] - row['w']/2) * w_img), int((row['yc'] - row['h']/2) * h_img)
    x2, y2 = int((row['xc'] + row['w']/2) * w_img), int((row['yc'] + row['h']/2) * h_img)
    return (x1, y1, x2, y2)

@jit(nopython=True)
def compute_iou_from_coords(pred_box, gt_box):
    x1_p, y1_p, x2_p, y2_p = pred_box
    x1_g, y1_g, x2_g, y2_g = gt_box
    x_left, y_top = max(x1_p, x1_g), max(y1_p, y1_g)
    x_right, y_bottom = min(x2_p, x2_g), min(y2_p, y2_g)
    if x_right < x_left or y_bottom < y_top: return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    box1_area = (x2_p - x1_p) * (y2_p - y1_p)
    box2_area = (x2_g - x1_g) * (y2_g - y2_g)
    union_area = box1_area + box2_area - intersection_area
    return intersection_area / union_area if union_area > 0 else 0.0

def process_image(pred_df, gt_df, thresholds):
    pred_boxes = [get_box_coordinates(row) for _, row in pred_df.iterrows()]
    gt_boxes = [get_box_coordinates(row) for _, row in gt_df.iterrows()]
    iou_matrix = np.zeros((len(pred_boxes), len(gt_boxes)))
    for i, p_box in enumerate(pred_boxes):
        for j, g_box in enumerate(gt_boxes): iou_matrix[i, j] = compute_iou_from_coords(p_box, g_box)
    
    results = {}
    for t in thresholds:
        matches, iou_mat = [], iou_matrix.copy()
        iou_mat[iou_mat < t] = 0
        pred_indices, gt_indices = set(), set()
        while iou_mat.max() > 0:
            i, j = np.unravel_index(np.argmax(iou_mat), iou_mat.shape)
            if i not in pred_indices and j not in gt_indices:
                pred_indices.add(i); gt_indices.add(j); matches.append((i, j))
            iou_mat[i, :], iou_mat[:, j] = 0, 0
        tp = len(matches)
        results[t] = {'tp': tp, 'fp': len(pred_boxes) - tp, 'fn': len(gt_boxes) - tp}
    return results

def metric_counter(time_spent, total_tp, total_fp, total_fn, thresholds, beta, m):
    f_beta_scores, beta_squared = [], beta ** 2
    for t in thresholds:
        tp, fp, fn = total_tp[t], total_fp[t], total_fn[t]
        numerator = (1 + beta_squared) * tp
        denominator = (1 + beta_squared) * tp + beta_squared * fn + fp
        f_beta_scores.append(numerator / denominator if denominator > 0 else 0.0)
    return np.mean(f_beta_scores), round(float(np.mean(time_spent)), 3)

def compute_overall_metric(predicted_df, gt_df, time_spent, thresholds, beta, m, parallelize=True):
    unique_image_ids = gt_df.index.unique()
    total_tp, total_fp, total_fn = {t: 0 for t in thresholds}, {t: 0 for t in thresholds}, {t: 0 for t in thresholds}

    def process_image_id(image_id):
        pred_df_img = predicted_df.loc[[image_id]] if image_id in predicted_df.index else pd.DataFrame()
        gt_df_img = gt_df.loc[[image_id]]
        if pred_df_img.empty: return {t: {'tp': 0, 'fp': 0, 'fn': len(gt_df_img)} for t in thresholds}
        return process_image(pred_df_img, gt_df_img, thresholds)

    results = list(ThreadPoolExecutor().map(process_image_id, unique_image_ids))
    for result in filter(None, results):
        for t in thresholds:
            total_tp[t] += result[t]['tp']; total_fp[t] += result[t]['fp']; total_fn[t] += result[t]['fn']
    return metric_counter(time_spent, total_tp, total_fp, total_fn, thresholds, beta, m)

def evaluate(predicted_file, gt_file, all_image_ids, thresholds=np.round(np.arange(0.3, 1.0, 0.07), 2), beta=1.0):
    try:
        gt_df, predicted_df, time_spent = preprocess_predicted_df(predicted_file, gt_file, all_image_ids)
        m = len(all_image_ids)
        return compute_overall_metric(predicted_df, gt_df, time_spent, thresholds, beta, m)
    except Exception as e:
        print(f"Ошибка в evaluate: {e}"); return 0.0, 0.0

✅ Custom metric code is embedded in the main script.


In [4]:
def run_inference_for_metric(model, image_paths):
    print("INFO: Starting inference run for custom metric...")
    results_list = []
    
    for img_path in tqdm(image_paths, desc="Custom Validation"):
        start_time = time.time()
        preds = model.predict(source=str(img_path), verbose=False)
        time_spent = time.time() - start_time
        
        h_img, w_img = preds[0].orig_shape
        
        if len(preds[0].boxes) == 0:
            results_list.append({'image_id': img_path.name, 'label': -1, 'xc': -1, 'yc': -1, 'w': -1, 'h': -1, 'w_img': w_img, 'h_img': h_img, 'score': -1, 'time_spent': time_spent})
        else:
            for box in preds[0].boxes:
                x_c, y_c, w, h = box.xywhn[0]
                results_list.append({'image_id': img_path.name, 'label': int(box.cls), 'xc': float(x_c), 'yc': float(y_c), 'w': float(w), 'h': float(h), 'w_img': w_img, 'h_img': h_img, 'score': float(box.conf), 'time_spent': time_spent})
    
    return pd.DataFrame(results_list)

In [5]:
best_custom_score = -1.0
ground_truth_bytes = None

def custom_metric_callback(trainer):
    global best_custom_score, ground_truth_bytes, VAL_IMAGE_DIR, API_configured, MODEL_DATASET_SLUG
    if ground_truth_bytes is None: return
    
    print("\n" + "="*20 + " Custom Metric Validation " + "="*20)
    
    val_image_paths = sorted(list(Path(VAL_IMAGE_DIR).glob("*.jpg")))
    all_val_image_names = [p.name for p in val_image_paths]
    
    predictions_df = run_inference_for_metric(trainer.model, val_image_paths)
    predictions_bytes = df_to_bytes(predictions_df)
    
    metric, avg_time = evaluate(predicted_file=predictions_bytes, gt_file=ground_truth_bytes, all_image_ids=all_val_image_names)
    print(f"📈 Custom Metric Score (F-beta): {metric:.5f}")
    
    trainer.logger.log_metrics({"custom_F_beta": metric}, step=trainer.epoch)
    if metric > best_custom_score:
        best_custom_score = metric
        print(f"🚀 New best model found! F-beta={best_custom_score:.5f}. Saving and uploading...")

        local_save_dir = Path(trainer.save_dir)
        local_save_path = local_save_dir / "best_by_metric.pt"
        trainer.model.save(local_save_path)

        if API_configured:
            try:
                version_notes = f"Block {current_block_idx}, Epoch {trainer.epoch}, F-beta: {metric:.5f}"
                upload_dir = Path("/kaggle/working/upload_temp")
                if upload_dir.exists(): shutil.rmtree(upload_dir)
                upload_dir.mkdir()
                shutil.copy(local_save_path, upload_dir / f"best_model_b{current_block_idx}_e{trainer.epoch}.pt")

                subprocess.run(f"kaggle datasets version -p {upload_dir} -m '{version_notes}' -r zip", shell=True, check=True, capture_output=True)
                print("✅ Successfully uploaded new best model to Kaggle Datasets.")
            except Exception as e:
                print(f"❌ ERROR uploading model to Kaggle: {e}")
        else:
            print("SKIPPED UPLOAD: Kaggle API not configured.")
    
    print("="*66 + "\n")

In [6]:
def is_truly_positive(label_path):
    """
    Сhecks if the labels file contains real data and not just spaces.
    """
    if not label_path.exists():
        return False
    with open(label_path, 'r') as f:
        for line in f:
            if line.strip():
                return True
    return False

def prepare_aggregated_dataset(source_paths, dest_path, neg_pos_ratio):

    """
    Aggregates, and balances a dataset from multiple sources for training YOLO models.

    This function solves three key problems when working with large, partitioned datasets:
    1.  **Aggregation:** Combines files from multiple input directories (`source_paths`) into a
        single, unified structure.
    2.  **Training Set Balancing:** Creates a training dataset with a controlled ratio of "negative" (no objects)
        to "positive" (with objects) examples to combat false positives and accelerate training.
    3.  **Preserving Real Validation Distribution:** The validation set remains untouched to ensure a fair
        and realistic evaluation of the model's performance.

    The process is executed in two passes for maximum reliability:
    - **Pass 1 (Aggregation):** Scans all specified `source_paths`, analyzes label files,
      and compiles complete lists of "positive" and "negative" images for the `train` and `val` splits.
    - **Pass 2 (Sampling and Linking):** Based on the aggregated lists, it creates the final dataset
      structure in `dest_path` using symbolic links (symlinks) to save disk space.

    Args:
        source_paths (list[Path]):
            A list of paths (`pathlib.Path` objects) to the root directories of the input datasets.
            Each dataset is expected to contain `train` and/or `val` subdirectories, which in turn
            contain a mix of image (.jpg, .jpeg, .png) and label (.txt) files.

        dest_path (Path):
            The path to the target directory where the final, ready-to-train dataset structure
            will be created. If the directory exists, it will be completely removed and recreated.

        neg_pos_ratio (float):
            The desired ratio of "negative" (background) to "positive" (with objects) examples
            in the **training** set.
            - `1.0` means a 1:1 ratio (one negative for each positive).
            - `0.25` means a 1:4 ratio (one negative for every four positives).
            - `0.0` will completely exclude background images from training.

    Returns:
        Path:
            The absolute path to the generated `dataset.yaml` file, which can be passed
            directly to the `train()` method of a YOLO model.

    Raises:
        ValueError: If `random.sample` cannot select the requested number of negative
                    examples (though the code has a safeguard against this).
    """
  
    print(f"\nINFO: Setting up final dataset directory at {dest_path}")
    if dest_path.exists(): shutil.rmtree(dest_path)
    images_train_dir, labels_train_dir = dest_path / "images/train", dest_path / "labels/train"
    images_val_dir, labels_val_dir = dest_path / "images/val", dest_path / "labels/val"
    for d in [images_train_dir, labels_train_dir, images_val_dir, labels_val_dir]:
        d.mkdir(parents=True)
        
    VALID_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png"}

    print("--- PASS 1: Aggregating all data files ---")
    all_train_pos, all_train_neg = [], []
    all_val_pos, all_val_neg = [], []

    for source_path in source_paths:
        print(f"--> Analyzing {source_path.name}...")
        for split in ["train", "val"]:
            source_split_dir = source_path / split
            if not source_split_dir.exists(): continue
            
            image_files = [f for f in source_split_dir.glob("*") if f.suffix.lower() in VALID_IMAGE_EXTENSIONS]
            for img_file in tqdm(image_files, desc=f"  - Reading {split} manifest"):
                label_file = img_file.with_suffix('.txt')
                
                is_positive = is_truly_positive(label_file)
                if split == 'train':
                    (all_train_pos if is_positive else all_train_neg).append(img_file)
                else:
                    (all_val_pos if is_positive else all_val_neg).append(img_file)

    print("\n--- Aggregation Complete ---")
    print(f"Total Train Positives: {len(all_train_pos)}")
    print(f"Total Train Negatives: {len(all_train_neg)}")

    print("\n--- PASS 2: Sampling and Linking ---")
    num_neg_to_add = int(len(all_train_pos) * neg_pos_ratio)
    num_neg_to_add = min(num_neg_to_add, len(all_train_neg))
    print(f"Linking all {len(all_train_pos)} positives and {num_neg_to_add} sampled negatives for TRAIN...")
    final_train_paths = all_train_pos + random.sample(all_train_neg, num_neg_to_add)
    for img_path in tqdm(final_train_paths, desc="  - Linking train set"):
        lbl_path = img_path.with_suffix('.txt')
        os.symlink(img_path, images_train_dir / img_path.name)
        if lbl_path.exists(): os.symlink(lbl_path, labels_train_dir / lbl_path.name)
    final_val_paths = all_val_pos + all_val_neg
    print(f"Linking all {len(final_val_paths)} images for VAL...")
    for img_path in tqdm(final_val_paths, desc="  - Linking val set"):
        lbl_path = img_path.with_suffix('.txt')
        os.symlink(img_path, images_val_dir / img_path.name)
        if lbl_path.exists(): os.symlink(lbl_path, labels_val_dir / lbl_path.name)
        
    yaml_path = dest_path / "dataset.yaml"
    with open(yaml_path, 'w') as f:
        yaml.dump({'path': str(dest_path.resolve()), 'train': 'images/train', 'val': 'images/val', 'nc': 1, 'names': ['person']}, f)
    return yaml_path

In [7]:
if torch.cuda.is_available():
    device_count = torch.cuda.device_count()
    print(f"INFO: Найдено {device_count} видеокарт NVIDIA.")
    
    if device_count > 1:
        device_ids = ','.join(map(str, range(device_count)))
        print(f"INFO: Будут использоваться все видеокарты: {device_ids}")
    elif device_count == 1:
        device_ids = '0'
        print("INFO: Будет использоваться одна видеокарта: 0")
    else:
        device_ids = None
        print("WARNING: Видеокарты найдены, но их количество равно 0. Используется CPU.")
else:
    device_ids = None
    print("WARNING: Видеокарты NVIDIA не найдены. Используется CPU.")

INFO: Найдено 2 видеокарт NVIDIA.
INFO: Будут использоваться все видеокарты: 0,1


In [None]:
EPOCHS = 100
RATIO = 1.25

INPUT_DATASET_PATHS = [
    Path("/kaggle/input/uav-people-sliced-1536px-slices-20-of-data"),
    Path("/kaggle/input/uav-people-sliced-1536px-slices-80-of-data"),
]
FINAL_DATASET_DIR = Path("/kaggle/working/final_prepared_dataset")

final_yaml_path = prepare_aggregated_dataset(INPUT_DATASET_PATHS, FINAL_DATASET_DIR, RATIO)

model = YOLO("yolo12s.pt") 
model.add_callback("on_epoch_end", custom_metric_callback)

model.train(
    data=str(final_yaml_path),
    epochs=EPOCHS,
    imgsz=640,
    patience=1,
    batch=20,
    device=device_ids,
    project="uav_sliced_on_the_fly",
    name="run_continuous",
    val=True,
    save=True,
    exist_ok=True,
    pretrained=True, # Use pretrained weights if available
    optimizer="AdamW",
    single_cls=True, # We are only detecting people
    rect=True, # Use rectangular training to speed up training
    cos_lr=True, # Use cosine learning rate scheduler
    close_mosaic=10, # Disable mosaic augmentation on last 10 epochs
    resume=False, # Start training from scratch
    amp=False, # Use automatic mixed precision for faster training
    fraction=1.0, # Use 100% of the dataset
    lr0=0.01,
    weight_decay=0.0005,

    hsv_h=0.015, # Hue augmentation percentage range
    hsv_s=0.70, # Saturation augmentation percentage range
    hsv_v=0.40, # Brightness augmentation percentage range
    degrees=0.0, # No rotation augmentation
    translate=0.05, # Translation augmentation percentage range (keeping it low to avoid losing small objects)
    scale=0.60, # Scale augmentation percentage range
    shear=0.0, # No shear augmentation
    perspective=0.0, # No perspective augmentation
    flipud=0.0, # No vertical flip augmentation
    fliplr=0.50, # Horizontal flip augmentation probability
    bgr=0.0, # No RGB->BGR channel swapping
    mosaic=0.8, # Mosaic augmentation probability (combines 4 images into one)
    mixup=0.0, # Mixup augmentation probability (combines 2 images into one)
    cutmix=0.0, # CutMix augmentation probability (cuts and pastes patches from one image to another)
    copy_paste=0.2, 
    erasing=0.2
)


INFO: Setting up final, BALANCED dataset directory at /kaggle/working/final_prepared_dataset
--- PASS 1: Aggregating all data files ---
--> Analyzing uav-people-sliced-1536px-slices-20-of-data...


  - Reading train manifest: 100%|██████████| 109846/109846 [17:00<00:00, 107.64it/s]
  - Reading val manifest: 100%|██████████| 5396/5396 [00:48<00:00, 112.31it/s]


--> Analyzing uav-people-sliced-1536px-slices-80-of-data...


  - Reading train manifest: 100%|██████████| 439388/439388 [1:17:25<00:00, 94.57it/s]  
  - Reading val manifest: 100%|██████████| 21584/21584 [03:43<00:00, 96.69it/s] 



--- Aggregation Complete ---
Total Train Positives: 76547
Total Train Negatives: 472687

--- PASS 2: Sampling and Linking ---
Linking all 76547 positives and 95683 sampled negatives for TRAIN...


  - Linking train set: 100%|██████████| 172230/172230 [06:57<00:00, 412.43it/s]


Linking all 26980 images for VAL...


  - Linking val set: 100%|██████████| 26980/26980 [00:34<00:00, 771.39it/s] 
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo12s.pt to 'yolo12s.pt': 100%|██████████| 18.1M/18.1M [00:00<00:00, 86.6MB/s]


Ultralytics 8.3.176 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
                                                        CUDA:1 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=False, augment=False, auto_augment=randaugment, batch=20, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.2, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/kaggle/working/final_prepared_dataset/dataset.yaml, degrees=0.0, deterministic=True, device=0,1, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.2, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo12s.pt, momentum=0.937, mosaic=0.8, multi_scale=False, name=run_continuous, nbs=64, nms=False,

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf': 100%|██████████| 755k/755k [00:00<00:00, 18.8MB/s]

Overriding class names with single class.





Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     26080  ultralytics.nn.modules.block.C3k2            [64, 128, 1, False, 0.25]     
  3                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
  4                  -1  1    103360  ultralytics.nn.modules.block.C3k2            [128, 256, 1, False, 0.25]    
  5                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
  6                  -1  2    689408  ultralytics.nn.modules.block.A2C2f           [256, 256, 2, True, 4]        
  7                  -1  1   1180672  ultralytics

[34m[1mtrain: [0mScanning /kaggle/working/final_prepared_dataset/labels/train... 172230 images, 95683 backgrounds, 0 corrupt: 100%|██████████| 172230/172230 [20:51<00:00, 137.59it/s]


[34m[1mtrain: [0mNew cache created: /kaggle/working/final_prepared_dataset/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 2.8±3.2 ms, read: 38.3±16.2 MB/s, size: 482.9 KB)


[34m[1mval: [0mScanning /kaggle/working/final_prepared_dataset/labels/val... 26980 images, 24388 backgrounds, 0 corrupt: 100%|██████████| 26980/26980 [02:41<00:00, 166.67it/s]


[34m[1mval: [0mNew cache created: /kaggle/working/final_prepared_dataset/labels/val.cache
Plotting labels to uav_sliced_on_the_fly/run_continuous/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.01, momentum=0.937) with parameter groups 113 weight(decay=0.0), 120 weight(decay=0.00046875), 119 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1muav_sliced_on_the_fly/run_continuous[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      6.14G      1.902       1.75      1.339          9        640:  31%|███       | 2630/8612 [32:02<1:05:30,  1.52it/s]