In [3]:
import sys
sys.path.append('../utils')
sys.path.append('..')

from interpolate.markup_utils import load_markup, yolo_dataset_info
from src.metrics import compute_border_metrics, compute_precision_recall, compute_non_border_metrics

### Загрузка датасета и модели

In [4]:
CONFIG_PATH = '../config.json'
SPLIT = 'test'
IOU_THRESHOLD = 0.7

In [5]:
# Load config
import json
import numpy as np
from pathlib import Path

with open(CONFIG_PATH, 'r') as f:
    config = json.load(f)

# Load labels
dataset_info = yolo_dataset_info(Path(config['data']))
gt_labels_dir = Path(dataset_info[SPLIT]) / 'labels'

In [6]:
from ultralytics import YOLO
model = YOLO(config['model'])

Error decoding JSON from /home/kondrashov/.config/Ultralytics/persistent_cache.json. Starting with an empty dictionary.


### Предсказание с лучшим по F1 confidence

In [7]:

import subprocess
import shutil
shutil.rmtree('runs/segment', ignore_errors=True)
# Run YOLO validation to get the best confidence score

# Run validation to get best confidence threshold
val_results = model.val(data=config['data'], split=SPLIT)

best_f1_idx = np.argmax(val_results.seg.curves_results[1][1].mean(axis=0))
best_f1 = val_results.seg.curves_results[1][1][..., best_f1_idx].mean()
best_conf = val_results.seg.curves_results[1][0][best_f1_idx]
print(f"Best F1: {best_f1:.4f} at confidence {best_conf:.4f}")

# Create temporary directory for predictions
pred_labels_dir = Path('runs/segment/predict/labels')

# Run prediction with best confidence
model.predict(
    source=str(Path(dataset_info[SPLIT]) / 'images'),
    conf=best_conf,
    save_txt=True,
)


Ultralytics 8.3.48 🚀 Python-3.10.12 torch-2.6.0+cu124 CUDA:0 (NVIDIA A100 80GB PCIe, 81154MiB)
YOLOv8m-seg summary (fused): 263 layers, 24,586,614 parameters, 0 gradients, 98.7 GFLOPs


[34m[1mval: [0mScanning /alpha/projects/wastie/datasets/26_12_2_classes/test/labels.cache... 731 images, 145 backgrounds, 0 corrupt: 100%|██████████| 731/731 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 46/46 [00:09<00:00,  5.10it/s]


                   all        731       4528      0.904      0.884      0.947      0.827      0.901       0.88       0.94      0.765
                   bot        546       3431      0.917      0.846      0.938      0.818      0.919      0.846      0.931      0.758
                  alum        434       1097      0.891      0.923      0.957      0.835      0.884      0.914      0.948      0.772
Speed: 0.7ms preprocess, 4.1ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1mruns/segment/val[0m
Best F1: 0.8904 at confidence 0.3183

image 1/731 /alpha/projects/wastie/datasets/26_12_2_classes/test/images/tula_sep_0002_2024_07_16_14_17_15_000.jpg: 800x800 9 bots, 10.0ms
image 2/731 /alpha/projects/wastie/datasets/26_12_2_classes/test/images/tula_sep_0002_2024_07_16_14_17_18_000.jpg: 800x800 39 bots, 3 alums, 10.0ms
image 3/731 /alpha/projects/wastie/datasets/26_12_2_classes/test/images/tula_sep_0002_2024_07_16_14_17_21_000.jpg: 800x800 38 bots, 4 alums, 7.6ms
image 

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: ultralytics.engine.results.Masks object
 names: {0: 'bot', 1: 'alum'}
 obb: None
 orig_img: array([[[ 2,  2,  2],
         [ 2,  2,  2],
         [ 2,  2,  2],
         ...,
         [18, 16, 16],
         [18, 16, 16],
         [18, 16, 16]],
 
        [[ 2,  2,  2],
         [ 2,  2,  2],
         [ 2,  2,  2],
         ...,
         [19, 17, 17],
         [19, 17, 17],
         [19, 17, 17]],
 
        [[ 2,  2,  2],
         [ 2,  2,  2],
         [ 2,  2,  2],
         ...,
         [20, 18, 18],
         [20, 18, 18],
         [20, 18, 18]],
 
        ...,
 
        [[ 6,  8,  5],
         [ 6,  8,  5],
         [ 6,  8,  5],
         ...,
         [ 5,  5,  5],
         [ 5,  5,  5],
         [ 6,  6,  6]],
 
        [[ 6,  8,  5],
         [ 6,  8,  5],
         [ 6,  8,  5],
         ...,
         [ 5,  5,  5],
         [ 5,  5,  5],
         [

### Подготавливаем данные

In [8]:
gt_paths = []
pred_paths = []
for gt_path in gt_labels_dir.glob("*.txt"):
    pred_path = pred_labels_dir / gt_path.name
    if not pred_path.exists():
        pred_path.touch()
    gt_paths.append(gt_path)
    pred_paths.append(pred_path)

### Считаем метрики

In [9]:
image_shape = (config['imgsz'], config['imgsz'])

In [10]:
both_metrics = compute_precision_recall(gt_paths, pred_paths, image_shape, IOU_THRESHOLD)
print(f"Metrics:\nPrecision: {both_metrics['precision']:.4f}\nRecall: {both_metrics['recall']:.4f}")

Masks processed: 100%|██████████| 1/1 [00:00<00:00, 318.86it/s]
Masks processed: 0it [00:00, ?it/s]
  precision = pred_matched.mean()
  ret = ret.dtype.type(ret / rcount)
  recall = gt_matched.mean()
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 20/20 [00:00<00:00, 235.19it/s]
Masks processed: 100%|██████████| 10/10 [00:00<00:00, 430.21it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 1/1 [00:00<00:00, 1058.37it/s]
Masks processed: 100%|██████████| 6/6 [00:00<00:00, 468.06it/s]
Masks processed: 100%|██████████| 3/3 [00:00<00:00, 1407.80it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 17/17 [00:00<00:00, 264.53it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 3/3 [00:00<00:00, 1078.60it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 13/13 [00:00<00:00, 675.95it/s]
Masks processed: 100%|███

Metrics:
Precision: 0.8819
Recall: 0.8673





In [11]:
border_metrics = compute_border_metrics(gt_paths, pred_paths, image_shape, IOU_THRESHOLD)
print(f"Border metrics:\nPrecision: {border_metrics['precision']:.4f}\nRecall: {border_metrics['recall']:.4f}")

Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 6/6 [00:00<00:00, 354.24it/s]
Masks processed: 100%|██████████| 4/4 [00:00<00:00, 1016.00it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 1/1 [00:00<00:00, 763.85it/s]
Masks processed: 100%|██████████| 2/2 [00:00<00:00, 589.29it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 5/5 [00:00<00:00, 297.60it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 3/3 [00:00<00:00, 464.30it/s]
Masks processed: 100%|██████████| 1/1 [00:00<00:00, 359.81it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 1/1 [00:00<00:00, 9619.96it/s]
Masks processed: 0

Border metrics:
Precision: 0.7216
Recall: 0.8594


In [12]:
non_border_metrics = compute_non_border_metrics(gt_paths, pred_paths, image_shape, IOU_THRESHOLD)
print(f"Non border metrics:\nPrecision: {non_border_metrics['precision']:.4f}\nRecall: {non_border_metrics['recall']:.4f}")

Masks processed: 100%|██████████| 1/1 [00:00<00:00, 363.77it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 14/14 [00:00<00:00, 240.04it/s]
Masks processed: 100%|██████████| 6/6 [00:00<00:00, 285.26it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 4/4 [00:00<00:00, 350.52it/s]
Masks processed: 100%|██████████| 3/3 [00:00<00:00, 1164.65it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 12/12 [00:00<00:00, 220.48it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 3/3 [00:00<00:00, 1027.76it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 10/10 [00:00<00:00, 583.44it/s]
Masks processed: 100%|██████████| 3/3 [00:00<00:00, 734.68it/s]
Masks processed: 0it [00:00, ?it/s]
Masks processed: 100%|██████████| 4/4 [00:00<00:00, 654.

Non border metrics:
Precision: 0.9170
Recall: 0.8638





In [15]:
# Create a dictionary with all metrics for easy comparison
metrics_comparison = {
    'All objects': both_metrics,
    'Border objects': border_metrics,
    'Non-border objects': non_border_metrics
}

# Print comparison table
print("Metrics comparison:")
print("-" * 60)
print(f"{'Type':<20} {'Precision':>12} {'Recall':>12} {'F1-score':>12}")
print("-" * 60)

for metric_type, metrics in metrics_comparison.items():
    precision = metrics['precision']
    recall = metrics['recall']
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    print(f"{metric_type:<20} {precision:>12.4f} {recall:>12.4f} {f1:>12.4f}")

print("\nAnalysis:")
# Find best performing filter based on F1 score
best_f1 = 0
best_type = None

for metric_type, metrics in metrics_comparison.items():
    precision = metrics['precision']
    recall = metrics['recall']
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    if f1 > best_f1:
        best_f1 = f1
        best_type = metric_type

print(f"The best performing filter is '{best_type}' with F1-score of {best_f1:.4f}")

# Calculate F1 enhancement percentage
baseline_f1 = 2 * (metrics_comparison['All objects']['precision'] * metrics_comparison['All objects']['recall']) / (metrics_comparison['All objects']['precision'] + metrics_comparison['All objects']['recall'])
best_f1_score = 2 * (metrics_comparison[best_type]['precision'] * metrics_comparison[best_type]['recall']) / (metrics_comparison[best_type]['precision'] + metrics_comparison[best_type]['recall'])

f1_enhancement = ((best_f1_score - baseline_f1) / baseline_f1) * 100
print(f"\nF1 score enhancement: {f1_enhancement:.2f}%")
# Calculate error reduction coefficient
error_reduction = (1 - baseline_f1) / (1 - best_f1_score)
print(f"\nError reduction coefficient: {error_reduction:.3f}x ({(error_reduction - 1)*100:.1f}%)")

Metrics comparison:
------------------------------------------------------------
Type                    Precision       Recall     F1-score
------------------------------------------------------------
All objects                0.8819       0.8673       0.8745
Border objects             0.7216       0.8594       0.7845
Non-border objects         0.9170       0.8638       0.8896

Analysis:
The best performing filter is 'Non-border objects' with F1-score of 0.8896

F1 score enhancement: 1.7%

Error reduction coefficient: 1.14x (13.7%)
