In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.200-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  D

In [2]:
!pip install ultralytics

import os
import cv2
import json
import math
import shutil
import tempfile
import warnings
import random
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
from ultralytics import YOLO
from ultralytics.utils.downloads import attempt_download_asset

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error


class YOLODetectionPipeline:
    """
    YOLO-пайплайн из pandas DataFrame с опциональным подбором гиперпараметров для подсчёта объектов
    и калибровкой линейной моделью (Ridge).

    Данные:
      train_df/val_df — DataFrame с колонками:
        - image_path (str): путь к изображению (jpg/png)
        - boxes_col (list[list[float]]): список боксов [[x,y,w,h], ...] в YOLO-нормировке [0..1]

    Основной сценарий:
      - fit(): обучает YOLO; (опц.) подбирает conf/iou/max_det; (опц.) обучает Ridge по фичам детектора;
               (опц.) печатает RMSE/MAE для plain count и calibrated count.
      - predict(): возвращает детекции (boxes_json + count). Калибровка тут НЕ применяется.
      - predict_counts(): возвращает числовой подсчёт. Если включена и обучена калибровка — применит Ridge,
                          иначе вернёт plain len(detections), использовав лучшие найденные пороги (если тюнинг включён).

    Устройство:
      - device в fit/predict/predict_counts можно не указывать — сработает авто:
            "0,1,...,N-1" для train (если доступно N GPUs) → внутри пайплайна
            инференс/тюнинг выполняются на первой карте "0" (устойчивее),
            иначе — "cpu".
      - Можно передать device="0" или "cpu" явно — приоритет выше авто.

    Время тюнинга:
      - tune_val_subsample: подвыборка валидации (int — кол-во, float — доля 0..1).
      - tune_max_combinations: лимит числа комбинаций из сетки (conf×iou×max_det).
    """

    def __init__(self,
                 model_ckpt: str = "yolov8n.pt",
                 data_root: str | None = None,
                 image_col: str = "image_path",
                 boxes_col: str = "boxes",
                 class_names: list[str] | None = None,
                 use_symlinks: bool = True,
                 verbose: bool = True,
                 # переключатели
                 enable_tuning: bool = True,
                 enable_ridge: bool = True,
                 validate_count: bool = True,
                 # управление временем тюнинга
                 tune_val_subsample: int | float | None = None,  # int=кол-во картинок; float=доля [0..1]
                 tune_max_combinations: int | None = None,       # ограничение числа комбо (случайно из сетки)
                 random_state: int = 42,
                 # сетки для тюнинга
                 tune_conf_grid = np.linspace(0.05, 0.6, 12),
                 tune_iou_grid  = (0.4, 0.5, 0.6),
                 tune_max_det_grid = (300, 1000, 2000),
                 ridge_alpha_grid = (0.1, 0.3, 1.0, 2.0, 3.0, 5.0),
                 # пороги нормированной площади для фич:
                 small_thr: float = 0.005,
                 big_thr: float   = 0.05):
        self.model_ckpt = model_ckpt
        self.image_col = image_col
        self.boxes_col = boxes_col
        self.class_names = class_names or ["obj"]
        self.use_symlinks = use_symlinks
        self.verbose = verbose

        self.enable_tuning = enable_tuning
        self.enable_ridge = enable_ridge
        self.validate_count = validate_count
        self.tune_val_subsample = tune_val_subsample
        self.tune_max_combinations = tune_max_combinations
        self.random_state = random_state

        random.seed(random_state)
        np.random.seed(random_state)

        # рабочая папка
        self._tmpdir_owned = False
        if data_root is None:
            self.data_root = tempfile.mkdtemp(prefix="yolo_ds_")
            self._tmpdir_owned = True
        else:
            self.data_root = os.path.abspath(data_root)
            os.makedirs(self.data_root, exist_ok=True)

        self.dataset_yaml = os.path.join(self.data_root, "dataset.yaml")
        self.model_path = None
        self._model = None
        self._device = None  # устройство, использованное при fit()

        # сетки и пороги фич
        self.tune_conf_grid = np.array(tune_conf_grid, dtype=float)
        self.tune_iou_grid  = tuple(tune_iou_grid)
        self.tune_max_det_grid = tuple(int(x) for x in tune_max_det_grid)
        self.ridge_alpha_grid = tuple(float(x) for x in ridge_alpha_grid)
        self.small_thr = float(small_thr)
        self.big_thr   = float(big_thr)

        # сохранённые результаты тюнинга/калибровки
        self.calib_ = dict(
            best_conf=None, best_iou=None, best_max_det=None,
            ridge_alpha=None, ridge_model=None,
            imgsz=None
        )

    # -------------------- device helpers --------------------
    @staticmethod
    def _resolve_device(device: str | int | None) -> str:
        """
        Преобразует device для Ultralytics:
          - None или "auto": если есть CUDA → "0,1,...,N-1" для train; иначе "cpu"
          - иначе возвращает str(device)
        Используется в fit() для обучения.
        """
        if device is None or str(device).lower() == "auto":
            if torch.cuda.is_available() and torch.cuda.device_count() > 0:
                n = torch.cuda.device_count()
                return ",".join(str(i) for i in range(n))  # напр. "0" или "0,1"
            return "cpu"
        return str(device)

    def _infer_device(self) -> str:
        """
        Устройство для инференса/тюнинга:
          - если тренировались на '0,1,...' → берём первую карту '0'
          - если тренировались на одной карте 'k' → её же
          - иначе авто: '0' при наличии CUDA, 'cpu' без GPU
        """
        if getattr(self, "_device", None):
            if isinstance(self._device, str) and "," in self._device:
                return self._device.split(",")[0]
            return self._device
        return "0" if (torch.cuda.is_available() and torch.cuda.device_count() > 0) else "cpu"

    # -------------------- helpers: разметка → YOLO-тxt --------------------
    @staticmethod
    def _is_nan_like(x):
        if x is None: return True
        if isinstance(x, float) and np.isnan(x): return True
        if isinstance(x, str) and x.strip()=="": return True
        return False

    def _parse_boxes(self, row):
        boxes_raw = row[self.boxes_col] if self.boxes_col in row else None
        if self._is_nan_like(boxes_raw): return []
        out = []
        if isinstance(boxes_raw, (list, tuple, np.ndarray)):
            for it in boxes_raw:
                vals = list(map(float, it))
                if len(vals) >= 4:
                    x,y,w,h = vals[:4]
                    if 0 <= x <= 1 and 0 <= y <= 1 and 0 < w <= 1 and 0 < h <= 1:
                        out.append((0, x,y,w,h))
        elif isinstance(boxes_raw, str):
            lines = [ln.strip() for ln in boxes_raw.strip().splitlines() if ln.strip()]
            for ln in lines:
                parts = ln.split()
                vals = list(map(float, parts))
                if len(vals) == 4:
                    x,y,w,h = vals
                    out.append((0, x,y,w,h))
                elif len(vals) >= 5:
                    cls,x,y,w,h = int(vals[0]), *vals[1:5]
                    out.append((cls, float(x),float(y),float(w),float(h)))
        return out

    def _link_or_copy(self, src, dst):
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        if self.use_symlinks:
            try:
                if os.path.lexists(dst): os.remove(dst)
                os.symlink(os.path.abspath(src), dst)
                return
            except Exception:
                pass
        shutil.copy2(src, dst)

    def _write_label_file(self, label_path, boxes):
        os.makedirs(os.path.dirname(label_path), exist_ok=True)
        with open(label_path, "w", encoding="utf-8") as f:
            for cls, x,y,w,h in boxes:
                f.write(f"{int(cls)} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

    def _materialize(self, train_df, val_df, train_split="train", val_split="val"):
        for split_name, df in [(train_split, train_df), (val_split, val_df)]:
            img_dir = os.path.join(self.data_root, "images", split_name)
            lbl_dir = os.path.join(self.data_root, "labels", split_name)
            os.makedirs(img_dir, exist_ok=True); os.makedirs(lbl_dir, exist_ok=True)
            it = df.iterrows()
            if self.verbose: it = tqdm(it, total=len(df), desc=f"[build] {split_name}")
            for _, row in it:
                src = row[self.image_col]
                if not os.path.exists(src):
                    raise FileNotFoundError(f"Image not found: {src}")
                fname = os.path.basename(src)
                stem, _ = os.path.splitext(fname)
                self._link_or_copy(src, os.path.join(img_dir, fname))
                self._write_label_file(os.path.join(lbl_dir, stem + ".txt"), self._parse_boxes(row))

        with open(self.dataset_yaml, "w", encoding="utf-8") as f:
            f.write(f"path: {self.data_root}\ntrain: images/{train_split}\nval: images/{val_split}\nnames:\n")
            for i, name in enumerate(self.class_names):
                f.write(f"  {i}: {name}\n")

    # -------------------- fit: train + (tune/ridge/validate) --------------------
    def fit(self,
            train_df: pd.DataFrame,
            val_df: pd.DataFrame | None = None,
            test_size: float = 0.2,
            epochs: int = 50,
            imgsz: int = 640,
            batch: int = 16,
            device: str | int | None = "auto",
            workers: int = 4,
            patience: int = 50,
            optimizer: str = "auto",
            augment: bool = True,
            seed: int = 42,
            close_mosaic: int | None = 10,
            cos_lr: bool = True,
            rect: bool = False,
            iou: float = 0.7,
            **extra_train_kwargs):

        # выбрать устройство для тренировки и сохранить
        self._device = self._resolve_device(device)
        if self.verbose:
            print(f"[device] training device='{self._device}'")

        np.random.seed(seed); random.seed(seed)

        # если val_df не задан — сделаем стратифицированный сплит по бинам count
        if val_df is None:
            tmp = train_df.copy()
            counts = [len(self._parse_boxes(r)) for _, r in tmp.iterrows()]
            tmp["_bins"] = np.clip((np.array(counts)//5).astype(int), 0, 50)
            val_mask = tmp.groupby("_bins", group_keys=False).apply(
                lambda g: g.sample(frac=test_size, random_state=seed)).index
            val_df = train_df.loc[val_mask]
            train_df = train_df.drop(index=val_mask)
            train_df = train_df.reset_index(drop=True); val_df = val_df.reset_index(drop=True)

        # собрать датасет на диск
        self._materialize(train_df, val_df)

        # загрузить/скачать чекпоинт при необходимости
        if not os.path.exists(self.model_ckpt) and self.model_ckpt.endswith(".pt"):
            try:
                if self.verbose: print(f"Checkpoint '{self.model_ckpt}' not found. Attempting to download...")
                attempt_download_asset(self.model_ckpt)
            except Exception as e:
                raise FileNotFoundError(f"Failed to download '{self.model_ckpt}'. Error: {e}")

        # train args
        train_args = {
            'data': self.dataset_yaml, 'epochs': epochs, 'imgsz': imgsz, 'batch': batch,
            'device': self._device, 'workers': workers, 'patience': patience, 'optimizer': optimizer,
            'augment': augment, 'seed': seed, 'close_mosaic': close_mosaic, 'cos_lr': cos_lr,
            'rect': rect, 'iou': iou, 'verbose': self.verbose
        }
        train_args.update(extra_train_kwargs)

        # обучение
        model = YOLO(self.model_ckpt)
        model.train(**train_args)
        
        # NEW: корректно выбираем путь к лучшим весам
        best_path = None
        # Ultralytics v8: после train лучший чекпоинт лежит в model.trainer.best
        if hasattr(model, "trainer") and getattr(model.trainer, "best", None):
            best_path = str(model.trainer.best)  # .../runs/detect/exp/weights/best.pt
        # запасной вариант (иногда ckpt_path есть, но в новых релизах — нет)
        elif getattr(model, "ckpt_path", None):
            best_path = str(model.ckpt_path)
        # если по какой-то причине не нашли best/last — используем исходный чекпоинт
        else:
            best_path = self.model_ckpt
        
        self.model_path = best_path
        if self.verbose:
            print(f"[fit] best model: {self.model_path}")

        # тюнинг/калибровка/валидация
        try:
            self._tune_and_or_calibrate(val_df, imgsz=imgsz)
            if self.validate_count:
                self._validate_counting(val_df)
        except Exception as e:
            if self.verbose:
                print(f"[post-fit] skipped tuning/calibration/validation due to: {e}")

        return self.model_path

    # -------------------- инференс-хелперы --------------------
    def _ensure_model(self):
        if self._model is None:
            path = self.model_path or self.model_ckpt
            self._model = YOLO(path)

    @torch.no_grad()
    def _raw_counts(self, paths, imgsz, conf, iou, max_det):
        self._ensure_model()
        out = []
        dev = self._infer_device()  # ВСЕГДА одна карта при инференсе/тюнинге
        for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[counts]"):
            batch = paths[i:i+64]
            res = self._model.predict(batch, imgsz=imgsz, conf=conf, iou=iou,
                                      max_det=max_det, device=dev, verbose=False)
            for r in res:
                out.append(int(len(r.boxes) if (r.boxes is not None) else 0))
        return np.array(out, dtype=float)

    @torch.no_grad()
    def _yolo_feats(self, paths, imgsz, conf, iou, max_det):
        self._ensure_model()
        rows = []
        dev = self._infer_device()
        for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[feats]"):
            batch = paths[i:i+64]
            res = self._model.predict(batch, imgsz=imgsz, conf=conf, iou=iou,
                                      max_det=max_det, device=dev, verbose=False)
            for r in res:
                if r.boxes is None or len(r.boxes) == 0:
                    rows.append(dict(n=0, conf_sum=0, conf_mean=0, conf_max=0,
                                     area_mean=0, frac_small=0, frac_mid=0, frac_big=0))
                    continue
                confs = r.boxes.conf.cpu().numpy()
                xywhn = r.boxes.xywhn.cpu().numpy()
                areas = (xywhn[:,2] * xywhn[:,3]).clip(0, 1)
                rows.append(dict(
                    n=len(confs),
                    conf_sum=float(confs.sum()),
                    conf_mean=float(confs.mean()),
                    conf_max=float(confs.max()),
                    area_mean=float(areas.mean()),
                    frac_small=float((areas < self.small_thr).mean()),
                    frac_mid=float(((areas >= self.small_thr) & (areas <= self.big_thr)).mean()),
                    frac_big=float((areas > self.big_thr).mean())
                ))
        return pd.DataFrame(rows).to_numpy()

    # -------------------- подвыборка валидации --------------------
    def _subset_val(self, val_df: pd.DataFrame) -> pd.DataFrame:
        if self.tune_val_subsample is None:
            return val_df
        n = len(val_df)
        if isinstance(self.tune_val_subsample, float):
            k = max(1, int(round(n * self.tune_val_subsample)))
        else:
            k = int(self.tune_val_subsample)
        k = min(k, n)
        return val_df.sample(n=k, random_state=self.random_state).reset_index(drop=True)

    # -------------------- тюнинг и/или калибровка --------------------
    def _tune_and_or_calibrate(self, val_df: pd.DataFrame, imgsz: int):
        val_sub = self._subset_val(val_df)
        paths = val_sub[self.image_col].tolist()
        y_true = np.array([len(self._parse_boxes(r)) for _, r in val_sub.iterrows()], dtype=float)

        best_conf, best_iou, best_max_det = None, None, None

        # 1) тюнинг plain count (если включён)
        if self.enable_tuning:
            combos = [(float(c), float(i), int(m))
                      for i in self.tune_iou_grid
                      for m in self.tune_max_det_grid
                      for c in self.tune_conf_grid]
            random.shuffle(combos)
            if self.tune_max_combinations is not None:
                combos = combos[:int(self.tune_max_combinations)]

            best_rmse = 1e9
            for conf, iou, max_det in combos:
                y_pred = self._raw_counts(paths, imgsz=imgsz, conf=conf, iou=iou, max_det=max_det)
                rmse = mean_squared_error(y_true, y_pred, squared=False)
                if rmse < best_rmse:
                    best_rmse, best_conf, best_iou, best_max_det = rmse, conf, iou, max_det
            if self.verbose:
                print(f"[tune] best plain count: conf={best_conf}, iou={best_iou}, max_det={best_max_det}  RMSE={best_rmse:.3f}")
        else:
            best_conf, best_iou, best_max_det = 0.25, 0.5, 1000
            if self.verbose:
                print("[tune] disabled → using defaults conf=0.25, iou=0.5, max_det=1000")

        # 2) калибровка Ridge (если включена)
        ridge_model, ridge_alpha = None, None
        if self.enable_ridge:
            X = self._yolo_feats(paths, imgsz=imgsz, conf=best_conf, iou=best_iou, max_det=best_max_det)
            best_rmse = 1e9
            for a in self.ridge_alpha_grid:
                m = Ridge(alpha=float(a)).fit(X, y_true)
                y_hat = m.predict(X)
                rmse = mean_squared_error(y_true, y_hat, squared=False)
                if rmse < best_rmse:
                    best_rmse, ridge_alpha, ridge_model = rmse, float(a), m
            if self.verbose:
                print(f"[calib] Ridge alpha={ridge_alpha}  RMSE(calib)={best_rmse:.3f}")
        else:
            if self.verbose:
                print("[calib] Ridge disabled")

        # сохранить найденное
        self.calib_.update(dict(
            best_conf=best_conf if self.enable_tuning else None,
            best_iou=best_iou if self.enable_tuning else None,
            best_max_det=best_max_det if self.enable_tuning else None,
            ridge_alpha=ridge_alpha if self.enable_ridge else None,
            ridge_model=ridge_model if self.enable_ridge else None,
            imgsz=imgsz
        ))

    # -------------------- финальная валидация подсчёта --------------------
    def _validate_counting(self, val_df: pd.DataFrame):
        paths = val_df[self.image_col].tolist()
        y_true = np.array([len(self._parse_boxes(r)) for _, r in val_df.iterrows()], dtype=float)

        imgsz = self.calib_['imgsz'] or 640
        conf  = self.calib_['best_conf'] if self.enable_tuning else 0.25
        iou   = self.calib_['best_iou']  if self.enable_tuning else 0.5
        max_det = self.calib_['best_max_det'] if self.enable_tuning else 1000

        y_plain = self._raw_counts(paths, imgsz, conf, iou, max_det)
        rmse_plain = mean_squared_error(y_true, y_plain, squared=False)
        mae_plain  = mean_absolute_error(y_true, y_plain)

        if self.enable_ridge and self.calib_['ridge_model'] is not None:
            X = self._yolo_feats(paths, imgsz, conf, iou, max_det)
            y_cal = np.clip(self.calib_['ridge_model'].predict(X), 0, None)
            rmse_cal = mean_squared_error(y_true, y_cal, squared=False)
            mae_cal  = mean_absolute_error(y_true, y_cal)
            print(f"[val-count] plain: RMSE={rmse_plain:.3f}, MAE={mae_plain:.3f}  |  calibrated: RMSE={rmse_cal:.3f}, MAE={mae_cal:.3f}")
        else:
            print(f"[val-count] plain: RMSE={rmse_plain:.3f}, MAE={mae_plain:.3f}  |  calibrated: (disabled)")

    # -------------------- публичный инференс: детекции --------------------
    @torch.no_grad()
    def predict(self, df: pd.DataFrame,
                conf: float = 0.25, iou: float = 0.6,
                imgsz: int = 640, device: str | int | None = "auto",
                max_det: int = 300, agnostic_nms: bool = False) -> pd.DataFrame:
        """Детекции (калибровка НЕ используется)."""
        assert self.image_col in df.columns
        if self._model is None:
            self._model = YOLO(self.model_path or self.model_ckpt)

        # для инференса в ноутбуках мульти-GPU может быть нестабилен — оставляем одну карту
        dev = self._resolve_device(device if device is not None else self._infer_device())

        paths = df[self.image_col].tolist()
        preds = []
        for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[predict]"):
            batch = paths[i:i+64]
            res = self._model(batch, conf=conf, iou=iou, imgsz=imgsz,
                              device=dev, verbose=False, max_det=max_det,
                              agnostic_nms=agnostic_nms)
            for r in res:
                boxes = []
                if r.boxes is not None and len(r.boxes):
                    xywhn = r.boxes.xywhn.cpu().numpy()
                    confv = r.boxes.conf.cpu().numpy()
                    clsv  = r.boxes.cls.cpu().numpy().astype(int)
                    for (x,y,w,h), c, k in zip(xywhn, confv, clsv):
                        boxes.append({"cls": int(k), "conf": float(c),
                                      "x": float(x), "y": float(y), "w": float(w), "h": float(h)})
                preds.append({
                    self.image_col: r.path,
                    "count": len(boxes),
                    "boxes_json": json.dumps(boxes, ensure_ascii=False)
                })
        return pd.DataFrame(preds)

    # -------------------- публичный инференс: подсчёт --------------------
    @torch.no_grad()
    def predict_counts(self, df: pd.DataFrame,
                       imgsz: int | None = None,
                       conf: float | None = None,
                       iou: float | None = None,
                       max_det: int | None = None,
                       device: str | int | None = "auto",
                       clamp_nonneg: bool = True,
                       do_round: bool = False) -> pd.DataFrame:
        """
        Подсчёт объектов.
          - Если enable_ridge=True и калибратор обучен → применяет Ridge (на лучших conf/iou/max_det при enable_tuning).
          - Иначе → plain len(detections) с conf/iou/max_det:
              * если enable_tuning=True → используем лучшие найденные;
              * если enable_tuning=False → дефолты conf=0.25, iou=0.5, max_det=1000.
        """
        assert self.image_col in df.columns
        if self._model is None:
            self._model = YOLO(self.model_path or self.model_ckpt)

        dev = self._resolve_device(device if device is not None else self._infer_device())

        imgsz = imgsz or self.calib_['imgsz'] or 640
        if self.enable_tuning and self.calib_['best_conf'] is not None:
            conf_def, iou_def, max_det_def = self.calib_['best_conf'], self.calib_['best_iou'], self.calib_['best_max_det']
        else:
            conf_def, iou_def, max_det_def = 0.25, 0.5, 1000

        use_conf  = conf    if conf    is not None else conf_def
        use_iou   = iou     if iou     is not None else iou_def
        use_maxdet= max_det if max_det is not None else max_det_def

        paths = df[self.image_col].tolist()

        # калиброванный вариант
        if self.enable_ridge and self.calib_['ridge_model'] is not None:
            # считаем фичи на одной карте
            X = []
            for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[feats]"):
                batch = paths[i:i+64]
                res = self._model.predict(batch, imgsz=imgsz, conf=use_conf, iou=use_iou,
                                          max_det=use_maxdet, device=dev, verbose=False)
                for r in res:
                    if r.boxes is None or len(r.boxes) == 0:
                        X.append([0,0,0,0,0,0,0,0]); continue
                    confs = r.boxes.conf.cpu().numpy()
                    xywhn = r.boxes.xywhn.cpu().numpy()
                    areas = (xywhn[:,2]*xywhn[:,3]).clip(0,1)
                    X.append([
                        len(confs), float(confs.sum()), float(confs.mean()), float(confs.max()),
                        float(areas.mean()),
                        float((areas < self.small_thr).mean()),
                        float(((areas >= self.small_thr) & (areas <= self.big_thr)).mean()),
                        float((areas > self.big_thr).mean())
                    ])
            y = self.calib_['ridge_model'].predict(np.asarray(X, dtype=float))
        else:
            # plain len(det) на одной карте
            y = []
            for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[counts]"):
                batch = paths[i:i+64]
                res = self._model.predict(batch, imgsz=imgsz, conf=use_conf, iou=use_iou,
                                          max_det=use_maxdet, device=dev, verbose=False)
                for r in res:
                    y.append(int(len(r.boxes) if (r.boxes is not None) else 0))
            y = np.asarray(y, dtype=float)

        if clamp_nonneg: y = np.clip(y, 0, None)
        if do_round:     y = np.rint(y)

        out = df[[self.image_col]].copy()
        out["label"] = y
        return out

    # -------------------- housekeeping --------------------
    def cleanup(self):
        if self._tmpdir_owned and os.path.isdir(self.data_root):
            shutil.rmtree(self.data_root, ignore_errors=True)

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
# yolo_detection_pipeline.py
# Обновлённый пайплайн:
# - авто-выбор устройства (GPU '0' / мульти-GPU '0,1,...' / CPU),
# - тюнинг подсчёта: plain (conf/iou/max_det) ИЛИ area-gated (cs/cb/area_thr + iou/max_det),
# - Ridge-калибровка по резидуалу (K-fold CV, стандартизация),
# - устойчивый инференс при мульти-GPU (тюнинг/инференс на первой карте),
# - финальная валидация RMSE/MAE для задачи подсчёта.
#
# Требования:
# pip install -U ultralytics opencv-python pandas numpy scikit-learn tqdm

import os
import cv2
import json
import shutil
import tempfile
import warnings
import random
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
from ultralytics import YOLO
from ultralytics.utils.downloads import attempt_download_asset

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import KFold


class YOLODetectionPipeline:
    """
    YOLO-пайплайн из pandas DataFrame с опциональным подбором гиперпараметров подсчёта
    (plain или area-gated) и калибровкой линейной моделью (Ridge по резидуалу).

    Входные данные (DataFrame):
      - image_path (str): путь к изображению (jpg/png)
      - boxes_col: GT-боксы в YOLO-нормировке [0..1] (списки/массивы/строка)

    Основной сценарий:
      - fit(): обучает YOLO; (опц.) тюнинг порогов (plain/area-gated + iou/max_det);
               (опц.) калибрует Ridge; (опц.) валидирует RMSE/MAE.
      - predict(): возвращает детекции (boxes_json + count). Калибровка НЕ применяется.
      - predict_counts(): возвращает числовой подсчёт; применяет лучшие пороги/калибровку, если включены.

    Управление временем тюнинга:
      - tune_val_subsample: подвыборка валидации (int — кол-во, float — доля 0..1).
      - tune_max_combinations: ограничивает число проверяемых комбо (случайно из сетки).
    """

    def __init__(self,
                 model_ckpt: str = "yolov8n.pt",
                 data_root: str | None = None,
                 image_col: str = "image_path",
                 boxes_col: str = "boxes",
                 class_names: list[str] | None = None,
                 use_symlinks: bool = True,
                 verbose: bool = True,
                 # переключатели
                 enable_tuning: bool = True,
                 enable_ridge: bool = True,
                 validate_count: bool = True,
                 # режим тюнинга plain vs area-gated
                 enable_area_gate: bool = True,        # True → тюним (conf_small, conf_big, area_thr) + (iou, max_det)
                 enable_tta_flip: bool = False,        # True → TTA flip при plain-подсчёте
                 # управление временем тюнинга
                 tune_val_subsample: int | float | None = None,  # int=кол-во; float=доля [0..1]
                 tune_max_combinations: int | None = 100,
                 random_state: int = 42,
                 # сетки для plain-тюнинга
                 tune_conf_grid = (0.20, 0.25, 0.30, 0.35),
                 tune_iou_grid  = (0.55, 0.60),
                 tune_max_det_grid = (300, 600),
                 # сетки для area-gated (под мелкие объекты из ваших стат)
                 tune_conf_small_grid = (0.10, 0.12, 0.14, 0.18),
                 tune_conf_big_grid   = (0.30, 0.40, 0.50),
                 tune_area_thr_grid   = (0.0008, 0.0010, 0.0012, 0.0015),
                 gate_conf_base: float = 0.07,  # базовый conf для извлечения кандидатов при area-gated
                 # сетка для Ridge
                 ridge_alpha_grid = (0.3, 1.0, 3.0),
                 # пороги нормированной площади для фич Ridge/plain
                 small_thr: float = 0.0010,
                 big_thr: float   = 0.003):
        self.model_ckpt = model_ckpt
        self.image_col = image_col
        self.boxes_col = boxes_col
        self.class_names = class_names or ["obj"]
        self.use_symlinks = use_symlinks
        self.verbose = verbose

        self.enable_tuning = enable_tuning
        self.enable_ridge = enable_ridge
        self.validate_count = validate_count
        self.enable_area_gate = enable_area_gate
        self.enable_tta_flip = enable_tta_flip

        self.tune_val_subsample = tune_val_subsample
        self.tune_max_combinations = tune_max_combinations
        self.random_state = random_state
        random.seed(random_state)
        np.random.seed(random_state)

        # рабочая папка
        self._tmpdir_owned = False
        if data_root is None:
            self.data_root = tempfile.mkdtemp(prefix="yolo_ds_")
            self._tmpdir_owned = True
        else:
            self.data_root = os.path.abspath(data_root)
            os.makedirs(self.data_root, exist_ok=True)

        self.dataset_yaml = os.path.join(self.data_root, "dataset.yaml")
        self.model_path = None
        self._model = None
        self._device = None  # строка устройства, использованная при fit()

        # сетки и пороги
        self.tune_conf_grid = tuple(float(x) for x in tune_conf_grid)
        self.tune_iou_grid  = tuple(float(x) for x in tune_iou_grid)
        self.tune_max_det_grid = tuple(int(x) for x in tune_max_det_grid)

        self.tune_conf_small_grid = tuple(float(x) for x in tune_conf_small_grid)
        self.tune_conf_big_grid   = tuple(float(x) for x in tune_conf_big_grid)
        self.tune_area_thr_grid   = tuple(float(x) for x in tune_area_thr_grid)
        self.gate_conf_base = float(gate_conf_base)

        self.ridge_alpha_grid = tuple(float(x) for x in ridge_alpha_grid)
        self.small_thr = float(small_thr)
        self.big_thr   = float(big_thr)

        # сохранённые результаты тюнинга/калибровки
        self.calib_ = dict(
            # plain режим:
            best_conf=None, best_iou=None, best_max_det=None,
            # area-gated:
            gate_conf_small=None, gate_conf_big=None, gate_area_thr=None, gate_conf_base=None,
            # Ridge:
            ridge_alpha=None, ridge_model=None, ridge_mu=None, ridge_sd=None,
            # общий:
            imgsz=None
        )

    # -------------------- device helpers --------------------
    @staticmethod
    def _resolve_device(device: str | int | None) -> str:
        """
        Выбор устройства для обучения:
          - None / "auto": "0,1,...,N-1" при наличии CUDA, иначе "cpu"
          - иначе вернуть строку как есть (например, "0" или "cpu")
        """
        if device is None or str(device).lower() == "auto":
            if torch.cuda.is_available() and torch.cuda.device_count() > 0:
                n = torch.cuda.device_count()
                return ",".join(str(i) for i in range(n))
            return "cpu"
        return str(device)

    def _infer_device(self) -> str:
        """
        Устройство для инференса/тюнинга:
          - если тренировались на '0,1,...' → берём первую карту '0'
          - если тренировались на 'k' → её же
          - иначе авто: '0' при наличии CUDA, 'cpu' без GPU
        """
        if getattr(self, "_device", None):
            if isinstance(self._device, str) and "," in self._device:
                return self._device.split(",")[0]
            return self._device
        return "0" if (torch.cuda.is_available() and torch.cuda.device_count() > 0) else "cpu"

    # -------------------- helpers: разметка → YOLO-тxt --------------------
    @staticmethod
    def _is_nan_like(x):
        if x is None: return True
        if isinstance(x, float) and np.isnan(x): return True
        if isinstance(x, str) and x.strip()=="": return True
        return False

    def _parse_boxes(self, row):
        boxes_raw = row[self.boxes_col] if self.boxes_col in row else None
        if self._is_nan_like(boxes_raw): return []
        out = []
        if isinstance(boxes_raw, (list, tuple, np.ndarray)):
            for it in boxes_raw:
                vals = list(map(float, it))
                if len(vals) >= 4:
                    x,y,w,h = vals[:4]
                    if 0 <= x <= 1 and 0 <= y <= 1 and 0 < w <= 1 and 0 < h <= 1:
                        out.append((0, x,y,w,h))
        elif isinstance(boxes_raw, str):
            lines = [ln.strip() for ln in boxes_raw.strip().splitlines() if ln.strip()]
            for ln in lines:
                parts = ln.split()
                vals = list(map(float, parts))
                if len(vals) == 4:
                    x,y,w,h = vals
                    out.append((0, x,y,w,h))
                elif len(vals) >= 5:
                    cls,x,y,w,h = int(vals[0]), *vals[1:5]
                    out.append((cls, float(x),float(y),float(w),float(h)))
        return out

    def _link_or_copy(self, src, dst):
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        if self.use_symlinks:
            try:
                if os.path.lexists(dst): os.remove(dst)
                os.symlink(os.path.abspath(src), dst)
                return
            except Exception:
                pass
        shutil.copy2(src, dst)

    def _write_label_file(self, label_path, boxes):
        os.makedirs(os.path.dirname(label_path), exist_ok=True)
        with open(label_path, "w", encoding="utf-8") as f:
            for cls, x,y,w,h in boxes:
                f.write(f"{int(cls)} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

    def _materialize(self, train_df, val_df, train_split="train", val_split="val"):
        for split_name, df in [(train_split, train_df), (val_split, val_df)]:
            img_dir = os.path.join(self.data_root, "images", split_name)
            lbl_dir = os.path.join(self.data_root, "labels", split_name)
            os.makedirs(img_dir, exist_ok=True); os.makedirs(lbl_dir, exist_ok=True)
            it = df.iterrows()
            if self.verbose: it = tqdm(it, total=len(df), desc=f"[build] {split_name}")
            for _, row in it:
                src = row[self.image_col]
                if not os.path.exists(src):
                    raise FileNotFoundError(f"Image not found: {src}")
                fname = os.path.basename(src)
                stem, _ = os.path.splitext(fname)
                self._link_or_copy(src, os.path.join(img_dir, fname))
                self._write_label_file(os.path.join(lbl_dir, stem + ".txt"), self._parse_boxes(row))

        with open(self.dataset_yaml, "w", encoding="utf-8") as f:
            f.write(f"path: {self.data_root}\ntrain: images/{train_split}\nval: images/{val_split}\nnames:\n")
            for i, name in enumerate(self.class_names):
                f.write(f"  {i}: {name}\n")

    # -------------------- fit: train + (tune/ridge/validate) --------------------
    def fit(self,
            train_df: pd.DataFrame,
            val_df: pd.DataFrame | None = None,
            test_size: float = 0.2,
            epochs: int = 50,
            imgsz: int = 640,
            batch: int = 16,
            device: str | int | None = "auto",
            workers: int = 4,
            patience: int = 50,
            optimizer: str = "auto",
            augment: bool = True,
            seed: int = 42,
            close_mosaic: int | None = 10,
            cos_lr: bool = True,
            rect: bool = False,
            iou: float = 0.7,
            **extra_train_kwargs):

        # выбрать устройство для тренировки и сохранить
        self._device = self._resolve_device(device)
        if self.verbose:
            print(f"[device] training device='{self._device}'")

        np.random.seed(seed); random.seed(seed)

        # если val_df не задан — делаем простую стратификацию по бинам count
        if val_df is None:
            tmp = train_df.copy()
            counts = [len(self._parse_boxes(r)) for _, r in tmp.iterrows()]
            tmp["_bins"] = np.clip((np.array(counts)//5).astype(int), 0, 50)
            val_mask = tmp.groupby("_bins", group_keys=False).apply(
                lambda g: g.sample(frac=test_size, random_state=seed)).index
            val_df = train_df.loc[val_mask]
            train_df = train_df.drop(index=val_mask)
            train_df = train_df.reset_index(drop=True); val_df = val_df.reset_index(drop=True)

        self._materialize(train_df, val_df)

        # загрузить/скачать чекпоинт
        if not os.path.exists(self.model_ckpt) and self.model_ckpt.endswith(".pt"):
            try:
                if self.verbose: print(f"Checkpoint '{self.model_ckpt}' not found. Attempting to download...")
                attempt_download_asset(self.model_ckpt)
            except Exception as e:
                raise FileNotFoundError(f"Failed to download '{self.model_ckpt}'. Error: {e}")

        # train args
        train_args = {
            'data': self.dataset_yaml, 'epochs': epochs, 'imgsz': imgsz, 'batch': batch,
            'device': self._device, 'workers': workers, 'patience': patience, 'optimizer': optimizer,
            'augment': augment, 'seed': seed, 'close_mosaic': close_mosaic, 'cos_lr': cos_lr,
            'rect': rect, 'iou': iou, 'verbose': self.verbose
        }
        train_args.update(extra_train_kwargs)

        # обучение
        model = YOLO(self.model_ckpt)
        model.train(**train_args)

        # ГАРАНТИРОВАННО берём лучший чекпоинт
        best_path = None
        if hasattr(model, "trainer") and getattr(model.trainer, "best", None):
            best_path = str(model.trainer.best)     # .../runs/detect/exp/weights/best.pt
        elif getattr(model, "ckpt_path", None):
            best_path = str(model.ckpt_path)
        else:
            best_path = self.model_ckpt
        self.model_path = best_path
        if self.verbose:
            print(f"[fit] best model: {self.model_path}")

        # тюнинг/калибровка/валидация
        try:
            self._tune_and_or_calibrate(val_df, imgsz=imgsz)
            if self.validate_count:
                self._validate_counting(val_df)
        except Exception as e:
            if self.verbose:
                print(f"[post-fit] skipped tuning/calibration/validation due to: {e}")

        return self.model_path

    # -------------------- инференс-хелперы --------------------
    def _ensure_model(self):
        if self._model is None:
            path = self.model_path or self.model_ckpt
            self._model = YOLO(path)

    @torch.no_grad()
    def _raw_counts(self, paths, imgsz, conf, iou, max_det):
        """Plain len(detections). Если enable_tta_flip=True — усреднение с augment=True."""
        self._ensure_model()
        out = []
        dev = self._infer_device()
        if not self.enable_tta_flip:
            for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[counts]"):
                batch = paths[i:i+64]
                res = self._model.predict(batch, imgsz=imgsz, conf=conf, iou=iou,
                                          max_det=max_det, device=dev, verbose=False)
                for r in res:
                    out.append(int(len(r.boxes) if (r.boxes is not None) else 0))
        else:
            for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[counts-tta]"):
                batch = paths[i:i+64]
                r1 = self._model.predict(batch, imgsz=imgsz, conf=conf, iou=iou,
                                         max_det=max_det, device=dev, verbose=False)
                r2 = self._model.predict(batch, imgsz=imgsz, conf=conf, iou=iou,
                                         max_det=max_det, device=dev, verbose=False, augment=True)
                for a, b in zip(r1, r2):
                    n1 = int(len(a.boxes) if (a.boxes is not None) else 0)
                    n2 = int(len(b.boxes) if (b.boxes is not None) else 0)
                    out.append(0.5 * (n1 + n2))
        return np.array(out, dtype=float)

    @torch.no_grad()
    def _yolo_feats(self, paths, imgsz, conf, iou, max_det):
        """Фичи для Ridge: [n, conf_sum, conf_mean, conf_max, area_mean, frac_small, frac_mid, frac_big]."""
        self._ensure_model()
        rows = []
        dev = self._infer_device()
        for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[feats]"):
            batch = paths[i:i+64]
            res = self._model.predict(batch, imgsz=imgsz, conf=conf, iou=iou,
                                      max_det=max_det, device=dev, verbose=False)
            for r in res:
                if r.boxes is None or len(r.boxes) == 0:
                    rows.append(dict(n=0, conf_sum=0, conf_mean=0, conf_max=0,
                                     area_mean=0, frac_small=0, frac_mid=0, frac_big=0))
                    continue
                confs = r.boxes.conf.cpu().numpy()
                xywhn = r.boxes.xywhn.cpu().numpy()
                areas = (xywhn[:, 2] * xywhn[:, 3]).clip(0, 1)
                rows.append(dict(
                    n=len(confs),
                    conf_sum=float(confs.sum()),
                    conf_mean=float(confs.mean()),
                    conf_max=float(confs.max()),
                    area_mean=float(areas.mean()),
                    frac_small=float((areas < self.small_thr).mean()),
                    frac_mid=float(((areas >= self.small_thr) & (areas <= self.big_thr)).mean()),
                    frac_big=float((areas > self.big_thr).mean())
                ))
        return pd.DataFrame(rows).to_numpy()

    @torch.no_grad()
    def _detect_conf_area(self, paths, imgsz, conf_base, iou, max_det):
        """Возвращает список массивов Nx2 [conf, area] при базовом пороге conf_base (для area-gated)."""
        self._ensure_model()
        dev = self._infer_device()
        out = []
        for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[boxes]"):
            batch = paths[i:i+64]
            res = self._model.predict(batch, imgsz=imgsz, conf=conf_base, iou=iou,
                                      max_det=max_det, device=dev, verbose=False)
            for r in res:
                if r.boxes is None or len(r.boxes) == 0:
                    out.append(np.empty((0, 2), dtype=np.float32))
                    continue
                confs = r.boxes.conf.cpu().numpy()
                xywhn = r.boxes.xywhn.cpu().numpy()
                areas = (xywhn[:, 2] * xywhn[:, 3]).clip(0, 1)
                out.append(np.stack([confs, areas], axis=1))
        return out

    @staticmethod
    def _count_with_area_gate(conf_area_list, conf_small, conf_big, area_thr):
        """Подсчёт с двупороговой фильтрацией по площади."""
        counts = []
        for ca in conf_area_list:
            if ca.size == 0:
                counts.append(0); continue
            conf = ca[:, 0]; area = ca[:, 1]
            small_mask = (area < area_thr)  & (conf >= conf_small)
            big_mask   = (area >= area_thr) & (conf >= conf_big)
            counts.append(int(small_mask.sum() + big_mask.sum()))
        return np.array(counts, dtype=float)

    # -------------------- подвыборка валидации --------------------
    def _subset_val(self, val_df: pd.DataFrame) -> pd.DataFrame:
        if self.tune_val_subsample is None:
            return val_df
        n = len(val_df)
        if isinstance(self.tune_val_subsample, float):
            k = max(1, int(round(n * self.tune_val_subsample)))
        else:
            k = int(self.tune_val_subsample)
        k = min(k, n)
        return val_df.sample(n=k, random_state=self.random_state).reset_index(drop=True)

    # -------------------- Ridge по резидуалу с K-fold CV --------------------
    def _fit_ridge_cv_on_residual(self, X: np.ndarray, y_true: np.ndarray, y_plain: np.ndarray):
        alphas = self.ridge_alpha_grid
        k = min(5, len(y_true)) if len(y_true) >= 3 else 2
        kf = KFold(n_splits=k, shuffle=True, random_state=self.random_state)

        # стандартизация фич
        mu = X.mean(axis=0)
        sd = X.std(axis=0); sd[sd == 0] = 1.0
        Xs = (X - mu) / sd
        r = y_true - y_plain

        best_alpha, best_cv = None, 1e9
        for a in alphas:
            cv_scores = []
            for tr, va in kf.split(Xs):
                m = Ridge(alpha=float(a)).fit(Xs[tr], r[tr])
                pr = m.predict(Xs[va])
                cv_scores.append(mean_squared_error(r[va], pr, squared=False))
            cv_rmse = float(np.mean(cv_scores))
            if cv_rmse < best_cv:
                best_alpha, best_cv = float(a), cv_rmse

        # финальная подгонка на всех
        model = Ridge(alpha=best_alpha).fit(Xs, r)
        return dict(model=model, alpha=best_alpha, mu=mu, sd=sd, cv_rmse=best_cv)

    # -------------------- тюнинг и/или калибровка --------------------
    def _tune_and_or_calibrate(self, val_df: pd.DataFrame, imgsz: int):
        val_sub = self._subset_val(val_df)
        paths = val_sub[self.image_col].tolist()
        y_true = np.array([len(self._parse_boxes(r)) for _, r in val_sub.iterrows()], dtype=float)

        # 1) Тюнинг plain или area-gated
        if self.enable_tuning:
            if self.enable_area_gate:
                # Подготовим базовый порог для сбора кандидатов, согласованный с минимальным cs
                min_cs = min(self.tune_conf_small_grid) if len(self.tune_conf_small_grid) else 0.10
                base_collect = max(0.03, min(self.gate_conf_base, min_cs - 0.02))
                if self.verbose:
                    print(f"[tune-gate] base_collect={base_collect:.3f} (min_cs={min_cs:.3f})")

                # 1) Предрасчёт списков [conf, area] для всех (iou, max_det)
                iou_grid = tuple(self.tune_iou_grid)
                md_grid  = tuple(self.tune_max_det_grid)
                conf_area_by_key = {}
                total_prepasses = len(iou_grid) * len(md_grid)
                if self.verbose:
                    print(f"[tune-gate] precomputing boxes for {total_prepasses} (iou,max_det) pairs...")
                for iou_ in iou_grid:
                    for md_ in md_grid:
                        conf_area_by_key[(iou_, md_)] = self._detect_conf_area(
                            paths, imgsz=imgsz,
                            conf_base=float(base_collect),
                            iou=float(iou_), max_det=int(md_)
                        )

                # 2) Подбор cs/cb/area_thr + iou/max_det
                all_combos = []
                for iou_ in iou_grid:
                    for md_ in md_grid:
                        for cs in self.tune_conf_small_grid:
                            # эффективная база (ниже cs)
                            base_eff = max(0.03, min(float(self.gate_conf_base), float(cs) - 0.02))
                            for cb in self.tune_conf_big_grid:
                                for at in self.tune_area_thr_grid:
                                    all_combos.append((float(iou_), int(md_), float(cs), float(cb), float(at), float(base_eff)))

                random.shuffle(all_combos)
                full_space = len(all_combos)
                if self.tune_max_combinations is not None:
                    all_combos = all_combos[:int(self.tune_max_combinations)]
                if self.verbose:
                    print(f"[tune-gate] search combos: {len(all_combos)} (cap), full={full_space}")

                best = dict(rmse=1e9, iou=None, max_det=None, cs=None, cb=None, at=None, base=None)
                for (iou_, md_, cs, cb, at, base_eff) in all_combos:
                    conf_area = conf_area_by_key[(iou_, md_)]
                    y_pred = self._count_with_area_gate(conf_area, cs, cb, at)
                    rmse = mean_squared_error(y_true, y_pred, squared=False)
                    if rmse < best["rmse"]:
                        best.update(dict(rmse=rmse, iou=iou_, max_det=md_,
                                         cs=cs, cb=cb, at=at, base=base_eff))

                if self.verbose:
                    print(f"[tune-gate] best: cs={best['cs']:.3f}, cb={best['cb']:.3f}, "
                          f"area_thr={best['at']:.4f}, iou={best['iou']}, max_det={best['max_det']}  RMSE={best['rmse']:.3f}")

                self.calib_.update(dict(
                    best_conf=None, best_iou=float(best['iou']), best_max_det=int(best['max_det']),
                    gate_conf_small=float(best['cs']), gate_conf_big=float(best['cb']),
                    gate_area_thr=float(best['at']), gate_conf_base=float(best['base'])
                ))
            else:
                combos = [(float(c), float(i), int(m))
                          for i in self.tune_iou_grid
                          for m in self.tune_max_det_grid
                          for c in self.tune_conf_grid]
                random.shuffle(combos)
                full_space = len(combos)
                if self.tune_max_combinations is not None:
                    combos = combos[:int(self.tune_max_combinations)]
                if self.verbose:
                    print(f"[tune-plain] search combos: {len(combos)} (cap), full={full_space}")

                best = dict(rmse=1e9, conf=None, iou=None, max_det=None)
                for conf, iou_v, max_det in combos:
                    y_pred = self._raw_counts(paths, imgsz=imgsz, conf=conf, iou=iou_v, max_det=max_det)
                    rmse = mean_squared_error(y_true, y_pred, squared=False)
                    if rmse < best["rmse"]:
                        best.update(dict(rmse=rmse, conf=conf, iou=iou_v, max_det=max_det))
                if self.verbose:
                    print(f"[tune] best plain count: conf={best['conf']}, iou={best['iou']}, max_det={best['max_det']}  RMSE={best['rmse']:.3f}")

                self.calib_.update(dict(
                    best_conf=best['conf'], best_iou=best['iou'], best_max_det=best['max_det'],
                    gate_conf_small=None, gate_conf_big=None, gate_area_thr=None, gate_conf_base=None
                ))
        else:
            # без тюнинга — дефолты для plain
            self.calib_.update(dict(
                best_conf=0.25, best_iou=0.5, best_max_det=1000,
                gate_conf_small=None, gate_conf_big=None, gate_area_thr=None, gate_conf_base=None
            ))

        # 2) Калибровка Ridge (по резидуалу, с CV)
        ridge_model, ridge_alpha = None, None
        if self.enable_ridge:
            # строим y_plain на том же сабсете val_sub
            if self.enable_tuning and self.enable_area_gate and self.calib_.get("gate_conf_small") is not None:
                iou_use = float(self.calib_["best_iou"])
                md_use  = int(self.calib_["best_max_det"])
                conf_area = self._detect_conf_area(paths, imgsz=imgsz,
                                                   conf_base=float(self.calib_["gate_conf_base"]),
                                                   iou=iou_use, max_det=md_use)
                y_plain = self._count_with_area_gate(conf_area,
                                                     float(self.calib_["gate_conf_small"]),
                                                     float(self.calib_["gate_conf_big"]),
                                                     float(self.calib_["gate_area_thr"]))
                X = self._yolo_feats(paths, imgsz=imgsz,
                                     conf=float(self.calib_["gate_conf_base"]),
                                     iou=iou_use, max_det=md_use)
            else:
                conf_use = float(self.calib_["best_conf"] if self.calib_.get("best_conf") is not None else 0.25)
                iou_use  = float(self.calib_["best_iou"]  if self.calib_.get("best_iou")  is not None else 0.5)
                md_use   = int(self.calib_["best_max_det"] if self.calib_.get("best_max_det") is not None else 1000)
                y_plain  = self._raw_counts(paths, imgsz=imgsz, conf=conf_use, iou=iou_use, max_det=md_use)
                X = self._yolo_feats(paths, imgsz=imgsz, conf=conf_use, iou=iou_use, max_det=md_use)

            pack = self._fit_ridge_cv_on_residual(X, y_true, y_plain)
            ridge_model, ridge_alpha = pack["model"], pack["alpha"]
            if self.verbose:
                print(f"[calib] Ridge(residual) alpha={ridge_alpha}  CV-RMSE(resid)={pack['cv_rmse']:.3f}")

            # сохраним стандартизацию
            self.calib_.update(dict(
                ridge_alpha=ridge_alpha, ridge_model=ridge_model,
                ridge_mu=pack["mu"], ridge_sd=pack["sd"], imgsz=imgsz
            ))
        else:
            self.calib_.update(dict(ridge_alpha=None, ridge_model=None, imgsz=imgsz))

    # -------------------- финальная валидация подсчёта --------------------
    def _validate_counting(self, val_df: pd.DataFrame):
        paths = val_df[self.image_col].tolist()
        y_true = np.array([len(self._parse_boxes(r)) for _, r in val_df.iterrows()], dtype=float)

        imgsz = self.calib_['imgsz'] or 640

        # plain/gate
        if self.enable_tuning and self.enable_area_gate and self.calib_.get("gate_conf_small") is not None:
            iou_use = float(self.calib_["best_iou"])
            md_use  = int(self.calib_["best_max_det"])
            conf_area = self._detect_conf_area(paths, imgsz=imgsz, conf_base=self.calib_["gate_conf_base"],
                                               iou=iou_use, max_det=md_use)
            y_plain = self._count_with_area_gate(conf_area,
                                                 self.calib_["gate_conf_small"],
                                                 self.calib_["gate_conf_big"],
                                                 self.calib_["gate_area_thr"])
        else:
            conf  = self.calib_['best_conf'] if self.enable_tuning else 0.25
            iou_v   = self.calib_['best_iou']  if self.enable_tuning else 0.5
            max_det = self.calib_['best_max_det'] if self.enable_tuning else 1000
            y_plain = self._raw_counts(paths, imgsz, conf, iou_v, max_det)

        rmse_plain = mean_squared_error(y_true, y_plain, squared=False)
        mae_plain  = mean_absolute_error(y_true, y_plain)

        # calibrated
        if self.enable_ridge and self.calib_['ridge_model'] is not None:
            if self.enable_tuning and self.enable_area_gate and self.calib_.get("gate_conf_small") is not None:
                X = self._yolo_feats(paths, imgsz=imgsz, conf=float(self.calib_["gate_conf_base"]),
                                     iou=float(self.calib_["best_iou"]), max_det=int(self.calib_["best_max_det"]))
            else:
                X = self._yolo_feats(paths, imgsz=imgsz,
                                     conf=(self.calib_["best_conf"] if self.calib_["best_conf"] is not None else 0.25),
                                     iou=(self.calib_["best_iou"] if self.calib_["best_iou"] is not None else 0.5),
                                     max_det=(self.calib_["best_max_det"] if self.calib_["best_max_det"] is not None else 1000))
            mu = self.calib_.get("ridge_mu"); sd = self.calib_.get("ridge_sd")
            Xs = (X - mu) / sd
            resid = self.calib_['ridge_model'].predict(Xs)
            y_cal = np.clip(y_plain + resid, 0, None)
            rmse_cal = mean_squared_error(y_true, y_cal, squared=False)
            mae_cal  = mean_absolute_error(y_true, y_cal)
            print(f"[val-count] plain: RMSE={rmse_plain:.3f}, MAE={mae_plain:.3f}  |  calibrated: RMSE={rmse_cal:.3f}, MAE={mae_cal:.3f}")
        else:
            print(f"[val-count] plain: RMSE={rmse_plain:.3f}, MAE={mae_plain:.3f}  |  calibrated: (disabled)")

    # -------------------- публичный инференс: детекции --------------------
    @torch.no_grad()
    def predict(self, df: pd.DataFrame,
                conf: float = 0.25, iou: float = 0.6,
                imgsz: int = 640, device: str | int | None = "auto",
                max_det: int = 300, agnostic_nms: bool = False) -> pd.DataFrame:
        """Детекции (калибровка НЕ используется)."""
        assert self.image_col in df.columns
        if self._model is None:
            self._model = YOLO(self.model_path or self.model_ckpt)

        dev = self._resolve_device(device if device is not None else self._infer_device())

        paths = df[self.image_col].tolist()
        preds = []
        for i in tqdm(range(0, len(paths), 64), disable=not self.verbose, desc="[predict]"):
            batch = paths[i:i+64]
            res = self._model(batch, conf=conf, iou=iou, imgsz=imgsz,
                              device=dev, verbose=False, max_det=max_det,
                              agnostic_nms=agnostic_nms)
            for r in res:
                boxes = []
                if r.boxes is not None and len(r.boxes):
                    xywhn = r.boxes.xywhn.cpu().numpy()
                    confv = r.boxes.conf.cpu().numpy()
                    clsv  = r.boxes.cls.cpu().numpy().astype(int)
                    for (x,y,w,h), c, k in zip(xywhn, confv, clsv):
                        boxes.append({"cls": int(k), "conf": float(c),
                                      "x": float(x), "y": float(y), "w": float(w), "h": float(h)})
                preds.append({
                    self.image_col: r.path,
                    "count": len(boxes),
                    "boxes_json": json.dumps(boxes, ensure_ascii=False)
                })
        return pd.DataFrame(preds)

    # -------------------- публичный инференс: подсчёт --------------------
    @torch.no_grad()
    def predict_counts(self, df: pd.DataFrame,
                       imgsz: int | None = None,
                       conf: float | None = None,
                       iou: float | None = None,
                       max_det: int | None = None,
                       device: str | int | None = "auto",
                       clamp_nonneg: bool = True,
                       do_round: bool = False) -> pd.DataFrame:
        """
        Подсчёт объектов.
          - Если enable_ridge=True и калибратор обучен → y = y_plain + Ridge(residual).
          - Иначе → plain len(dets).
          - Если enable_area_gate=True и тюнинг выполнен → двупороговая фильтрация (conf_small/conf_big) по area.
        """
        assert self.image_col in df.columns
        if self._model is None:
            self._model = YOLO(self.model_path or self.model_ckpt)

        dev = self._resolve_device(device if device is not None else self._infer_device())
        imgsz = imgsz or self.calib_['imgsz'] or 640
        paths = df[self.image_col].tolist()

        # area-gated путь
        if self.enable_tuning and self.enable_area_gate and self.calib_.get("gate_conf_small") is not None:
            conf_base = float(self.calib_["gate_conf_base"])
            iou_use   = float(self.calib_["best_iou"])
            max_det_use = int(self.calib_["best_max_det"])
            conf_area = self._detect_conf_area(paths, imgsz=imgsz, conf_base=conf_base, iou=iou_use, max_det=max_det_use)
            cs, cb, at = float(self.calib_["gate_conf_small"]), float(self.calib_["gate_conf_big"]), float(self.calib_["gate_area_thr"])
            y_plain = self._count_with_area_gate(conf_area, cs, cb, at)

            if self.enable_ridge and self.calib_.get("ridge_model") is not None:
                X = self._yolo_feats(paths, imgsz=imgsz, conf=conf_base, iou=iou_use, max_det=max_det_use)
                # стандартизация и предсказание резидуала
                mu = self.calib_.get("ridge_mu"); sd = self.calib_.get("ridge_sd")
                Xs = (X - mu) / sd
                resid = self.calib_['ridge_model'].predict(Xs)
                y = y_plain + resid
            else:
                y = y_plain

        else:
            # обычный plain путь
            if self.enable_tuning and self.calib_.get("best_conf") is not None:
                conf_def, iou_def, max_det_def = self.calib_['best_conf'], self.calib_['best_iou'], self.calib_['best_max_det']
            else:
                conf_def, iou_def, max_det_def = 0.25, 0.5, 1000

            use_conf  = conf    if conf    is not None else conf_def
            use_iou   = iou     if iou     is not None else iou_def
            use_maxdet= max_det if max_det is not None else max_det_def

            if self.enable_ridge and self.calib_.get("ridge_model") is not None:
                X = self._yolo_feats(paths, imgsz=imgsz, conf=use_conf, iou=use_iou, max_det=use_maxdet)
                mu = self.calib_.get("ridge_mu"); sd = self.calib_.get("ridge_sd")
                Xs = (X - mu) / sd
                resid = self.calib_['ridge_model'].predict(Xs)
                # базовый plain-счёт для сложения с резидуалом:
                y_plain = self._raw_counts(paths, imgsz=imgsz, conf=use_conf, iou=use_iou, max_det=use_maxdet)
                y = y_plain + resid
            else:
                y = self._raw_counts(paths, imgsz=imgsz, conf=use_conf, iou=use_iou, max_det=use_maxdet)

        if clamp_nonneg: y = np.clip(y, 0, None)
        if do_round:     y = np.rint(y)

        out = df[[self.image_col]].copy()
        out["label"] = y
        return out

    # -------------------- housekeeping --------------------
    def cleanup(self):
        if self._tmpdir_owned and os.path.isdir(self.data_root):
            shutil.rmtree(self.data_root, ignore_errors=True)

In [4]:
from typing import *

In [5]:
from glob import glob
from pathlib import Path

def read_yolo_txt(txt_path: str) -> list[list[float]]:
    boxes = []
    if os.path.exists(txt_path):
        with open(txt_path, "r", encoding="utf-8") as f:
            for ln in f:
                p = ln.strip().split()
                if len(p) >= 5:
                    x, y, w, h = map(float, p[1:5])  # cls x y w h → берём координаты
                    if 0 <= x <= 1 and 0 <= y <= 1 and 0 < w <= 1 and 0 < h <= 1:
                        boxes.append([x, y, w, h])
    return boxes

def df_from_dirs(images_dir: str, labels_dir: str) -> pd.DataFrame:
    rows = []
    for img_path in sorted(glob(os.path.join(images_dir, "*"))):
        if not os.path.isfile(img_path):
            continue
        stem = Path(img_path).stem
        txt_path = os.path.join(labels_dir, stem + ".txt")
        boxes = read_yolo_txt(txt_path)
        rows.append({"image_path": img_path, "label": boxes})
    return pd.DataFrame(rows)

# подставьте ваши реальные пути
train_img_dir = "/kaggle/input/all-cups-workout-seagulls/train/train/images/"
train_lbl_dir = "/kaggle/input/all-cups-workout-seagulls/train/train/labels/"
valid_img_dir = "/kaggle/input/all-cups-workout-seagulls/train/valid/images/"
valid_lbl_dir = "/kaggle/input/all-cups-workout-seagulls/train/valid/labels/"
test_img = "/kaggle/input/all-cups-workout-seagulls/test/images/"

train_df = df_from_dirs(train_img_dir, train_lbl_dir)
val_df = df_from_dirs(valid_img_dir, valid_lbl_dir)

submission_data = pd.DataFrame({
    "image_path": sorted(glob(os.path.join(test_img, "*")))
})

In [6]:
# pipeline = YOLODetectionPipeline(
#     model_ckpt="yolov10n.pt",
#     image_col="image_path",
#     boxes_col="label",
#     enable_tuning=True,
#     enable_ridge=True,
#     validate_count=True,
#     tune_val_subsample=None,  # вся валидация для подбора параметров для подсчёта объектов
#     tune_max_combinations=100,  ######################### 50
#     random_state=42,
#     verbose=True
# )
# pipeline.fit(
#     train_df=train_df,
#     val_df=val_df,
#     lr0=0.01,
#     epochs=70,  ######################### 40
#     imgsz=640,
#     batch=32,
#     iou=0.5,
#     rect=True,
#     device='0,1'
# )

# import random

# # 1) Сетки, основанные на ваших перцентилях
# AREA_THR_GRID = [0.0008, 0.0010, 0.0012, 0.0015]
# CS_GRID = [0.10, 0.12, 0.14, 0.18]
# CB_GRID = [0.30, 0.40, 0.50]
# IOU_GRID = [0.55, 0.60]           # жёстче на разреженной вал
# MAXDET_GRID = [300, 600]          # хватает при p95=6
# RIDGE_ALPHA_GRID = [0.3, 1.0, 3.0]

# def make_gate_candidates():
#     cands = []
#     for area_thr in AREA_THR_GRID:
#         for cs in CS_GRID:
#             base = max(0.03, cs - 0.03)
#             for cb in CB_GRID:
#                 for iou in IOU_GRID:
#                     for md in MAXDET_GRID:
#                         cands.append(dict(
#                             area_thr=area_thr, cs=cs, cb=cb,
#                             gate_conf_base=base,
#                             iou=iou, max_det=md
#                         ))
#     random.Random(42).shuffle(cands)
#     return cands

# gate_cands = make_gate_candidates()[:96]  # ограничили до ~100

# pipe = YOLODetectionPipeline(
#     model_ckpt="yolov8n.pt",
#     image_col="image_path",
#     boxes_col="label",
#     class_names=["obj"],
#     verbose=True,
#     use_symlinks=True,

#     # режимы
#     enable_tuning=True,
#     enable_ridge=True,
#     validate_count=True,
#     enable_area_gate=True,
#     enable_tta_flip=False,      # начнём без TTA

#     # лимит на поиск
#     tune_val_subsample=None,
#     tune_max_combinations=len(gate_cands),

#     # сетки для gate (plain отключаем на этом проходе)
#     tune_conf_small_grid = sorted(set([c["cs"] for c in gate_cands])),
#     tune_conf_big_grid   = sorted(set([c["cb"] for c in gate_cands])),
#     tune_area_thr_grid   = sorted(set([c["area_thr"] for c in gate_cands])),

#     # фикс для plain (чтобы не участвовали в поиске)
#     tune_conf_grid = (0.30,),   # заглушки
#     tune_iou_grid  = tuple(sorted(set([c["iou"] for c in gate_cands]))),
#     tune_max_det_grid = tuple(sorted(set([c["max_det"] for c in gate_cands]))),

#     # базовый порог — внутри пайпа сделайте зависимость base от cs, если поддерживается;
#     # если нет — установите один: близко к cs-0.03, например 0.07
#     gate_conf_base = 0.07,

#     # Ridge
#     ridge_alpha_grid = RIDGE_ALPHA_GRID,

#     # фич-пороги
#     small_thr=0.0010,
#     big_thr=0.003,

#     random_state=42
# )

pipe = YOLODetectionPipeline(
    model_ckpt="yolov8n.pt",
    image_col="image_path",
    boxes_col="label",
    class_names=["obj"],
    verbose=True,
    use_symlinks=True,

    enable_tuning=True,
    enable_ridge=True,
    validate_count=True,

    enable_area_gate=True,
    enable_tta_flip=False,

    tune_val_subsample=None,
    tune_max_combinations=500,

    tune_conf_grid=(0.18, 0.21, 0.24, 0.27, 0.30, 0.33, 0.36, 0.39, 0.42),
    tune_iou_grid=(0.4, 0.45, 0.50, 0.55, 0.60, 0.65),
    tune_max_det_grid=(300, 600, 1000),
    tune_conf_small_grid=(0.08, 0.10, 0.12, 0.14, 0.16, 0.18, 0.20),
    tune_conf_big_grid=(0.28, 0.32, 0.36, 0.40, 0.45, 0.50, 0.55),
    tune_area_thr_grid=(0.0006, 0.0008, 0.0010, 0.0012, 0.0015, 0.0018),
    ridge_alpha_grid=(0.1, 0.3, 0.6, 1.0, 2.0, 3.0, 5.0, 7.5, 10.0),

    gate_conf_base=0.07,
    small_thr=0.0010,
    big_thr=0.003,

    random_state=42
)

pipe.fit(
    train_df=train_df,
    val_df=val_df,
    epochs=60,
    imgsz=640,
    batch=32,
    device='0,1',
    workers=4,
    lr0=0.01,
    cos_lr=True,
    rect=True,
    iou=0.5
)

[device] training device='0,1'


[build] train: 100%|██████████| 500/500 [00:00<00:00, 1303.19it/s]
[build] val: 100%|██████████| 99/99 [00:00<00:00, 3325.13it/s]


Checkpoint 'yolov8n.pt' not found. Attempting to download...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 66.0MB/s 0.1s
Ultralytics 8.3.200 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
                                                       CUDA:1 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=True, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/tmp/yolo_ds_mafoppgg/dataset.yaml, degrees=0.0, deterministic=True, device=0,1, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=60, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.5, keras=False, k

  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all         99        110      0.885      0.839      0.878      0.452
Speed: 0.3ms preprocess, 14.3ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1m/kaggle/working/runs/detect/train[0m
[fit] best model: /kaggle/working/runs/detect/train/weights/best.pt
[tune-gate] base_collect=0.060 (min_cs=0.080)
[tune-gate] precomputing boxes for 18 (iou,max_det) pairs...


[boxes]: 100%|██████████| 2/2 [00:02<00:00,  1.08s/it]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.60it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.65it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.63it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.63it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.62it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.60it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.60it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.62it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.58it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.60it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.58it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.62it/s]
[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.61it/s]


[tune-gate] search combos: 500 (cap), full=5292
[tune-gate] best: cs=0.160, cb=0.360, area_thr=0.0018, iou=0.45, max_det=300  RMSE=0.348


[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.57it/s]
[feats]: 100%|██████████| 2/2 [00:01<00:00,  1.59it/s]


[calib] Ridge(residual) alpha=10.0  CV-RMSE(resid)=0.354


[boxes]: 100%|██████████| 2/2 [00:01<00:00,  1.63it/s]
[feats]: 100%|██████████| 2/2 [00:01<00:00,  1.64it/s]

[val-count] plain: RMSE=0.348, MAE=0.121  |  calibrated: RMSE=0.334, MAE=0.151





'/kaggle/working/runs/detect/train/weights/best.pt'

In [7]:
preds = pipe.predict_counts(submission_data)['label']
submission = submission_data.copy()
submission = pd.DataFrame({
    'filename': [path.replace(test_img, '') for path in submission_data['image_path']],
    'num': [round(x) for x in preds]
})
submission.to_csv("submission.csv", index=False)
submission.head(20)

[boxes]: 100%|██████████| 7/7 [00:09<00:00,  1.32s/it]
[feats]: 100%|██████████| 7/7 [00:06<00:00,  1.02it/s]


Unnamed: 0,filename,num
0,20200520_194614_01_JPG.rf.608a8b0fb1ef6a00bc4d...,0
1,20200520_194657_01_JPG.rf.9d2f98d60b7d94055523...,1
2,20200520_214839_01_JPG.rf.9b38fd8cf74453b18370...,30
3,20200521_065125_01_JPG.rf.94e220a6ec114bcc6dca...,32
4,20200521_085200_01_JPG.rf.bc31e4d44d1925899c4b...,32
5,20200521_165417_01_JPG.rf.8f9738ea14df1bf74a3f...,25
6,20200521_185451_01_JPG.rf.ce8bb474653d4a11503c...,22
7,20200521_195509_01_JPG.rf.6ee8bf041b3178e2174d...,28
8,20200522_055812_01_JPG.rf.792e8156e38c8c35ad3b...,23
9,20200522_075847_01_JPG.rf.ff82846fbe8a7d7fcfa5...,24
