In [3]:
from ultralytics.models.yolo.detect import DetectionValidator
from ultralytics.utils.ops import xywh2xyxy
from ultralytics import YOLO
import torch
import numpy as np
from ensemble_boxes_wbf import weighted_boxes_fusion
import time

class WBFValidator(DetectionValidator):
    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
        super().__init__(dataloader, save_dir, pbar, args, _callbacks)
        
        self.classes = None
        self.iou_thres = 0.5
        self.skip_box_thr = 0.0001
        self.max_time_img = 0.05
        self.in_place = True
        self.rotated = False
        self.end2end = False
        self.labels = ()
        self.multi_label = False

    
    def postprocess(self, preds):
        super().postprocess(preds)
        return self.post_process_with_wbf(
            preds,
            iou_thres=self.iou_thres,
            labels=self.lb,
            nc=self.nc,
            multi_label=True,
            agnostic=self.args.single_cls or self.args.agnostic_nms,
            max_det=self.args.max_det,
            end2end=self.end2end,
            rotated=self.args.task == "obb",
            skip_box_thr=self.skip_box_thr,
        )
    
    def post_process_with_wbf(self, prediction,
        iou_thres=0.5,
        skip_box_thr=0.0001,
        classes=None,
        agnostic=False,
        multi_label=False,
        labels=(),
        max_det=300,
        nc=0,  # number of classes (optional)
        max_time_img=0.05,
        max_nms=30000,
        max_wh=7680,
        in_place=True,
        rotated=False,
        end2end=False,
    ):
        # Handle list/tuple case (YOLOv8 model in validation mode)
        if isinstance(prediction, (list, tuple)):
            prediction = prediction[0]  # select only inference output

        """# Early return for end-to-end model case
        if preds.shape[-1] == 6 or self.end2end:
            output = [pred[pred[:, 4] > self.args.conf][:self.args.max_det] for pred in preds]
            if self.classes is not None:
                output = [pred[(pred[:, 5:6] == self.classes).any(1)] for pred in output]
            return output"""

        # Get batch size and number of classes
        bs = prediction.shape[0]  # batch size
        nc = nc or (prediction.shape[1] - 4) # number of classes
        nm = prediction.shape[1] - nc - 4  # number of masks
        mi = 4 + nc  # mask start index

        # I think it's done in WBF prefiltering already
        #xc = prediction[:, 4:mi].amax(1) > 0 # candidates

        # Settings
        time_limit = 2.0 + max_time_img * bs  # seconds to quit after
        
        prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
        if not rotated:
            if in_place:
                prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy
            else:
                prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1)  # xywh to xyxy
        
        t = time.time()
        output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
        
        for xi, x in enumerate(prediction):
            if labels and len(labels[xi]) and not rotated:
                lb = labels[xi]
                v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
                v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
                v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
                x = torch.cat((x, v), 0)

            # Split into components
            box, cls, mask = x.split((4, nc, nm), 1)

            img_size = 512
            
            if multi_label:
                i, j = torch.where(cls > 0)
                conf = cls[i, j]
                pred_boxes = box[i]
                pred_labels = j
            else:
                conf, j = cls.max(1)
                pred_boxes = box
                pred_labels = j

            # Convert to numpy and normalize
            boxes_np = pred_boxes.cpu().numpy()
            
            # Clip to ensure no negative values
            boxes_np = np.clip(boxes_np, 0, img_size)
            
            # Normalize to [0, 1]
            boxes_np = boxes_np / img_size
            
            # Ensure boxes are properly formatted
            boxes_np = np.clip(boxes_np, 0, 1)
            
            scores_np = conf.cpu().numpy()
            labels_np = pred_labels.cpu().numpy()

            # Apply WBF (note the list wrapping for single model case)
            boxes_wbf, scores_wbf, labels_wbf = weighted_boxes_fusion(
                [boxes_np],
                [scores_np],
                [labels_np],
                weights=None,
                iou_thr=iou_thres,
                skip_box_thr=skip_box_thr
            )
            
            # Convert back to absolute coordinates
            boxes_wbf = boxes_wbf * img_size
            
            # Create output tensor
            if len(boxes_wbf):
                out = torch.zeros((len(boxes_wbf), 6 + nm), device=x.device)
                out[:, :4] = torch.from_numpy(boxes_wbf).to(x.device)
                out[:, 4] = torch.from_numpy(scores_wbf).to(x.device)
                out[:, 5] = torch.from_numpy(labels_wbf).to(x.device)
                
                # Limit detections
                out = out[:max_det]
                output[xi] = out

        return output
    
class YOLOWithWBF(YOLO):
    @property
    def task_map(self):
        """Override task_map to use WBFValidator"""
        base_task_map = super().task_map
        base_task_map["detect"]["validator"] = WBFValidator
        return base_task_map

def validate_with_wbf(model_path, data_yaml):
    # Create custom YOLO instance with WBF validator
    model = YOLOWithWBF(model_path)
    
    # Run validation
    metrics = model.val(data=data_yaml)
    return metrics

if __name__ == "__main__":
    model_path = './runs/detect/yolov11s-imgz512-webiswebseg20/weights/best.pt'
    data_yaml = './data/webis-webseg-20-yolo-full/dataset.yaml'
    
    metrics = validate_with_wbf(model_path, data_yaml)
    print("\nValidation Metrics:", metrics)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,413,187 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-full/labels/val... 1698 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1698/1698 [00:37<00:00, 45.37it/s]

[34m[1mval: [0mNew cache created: /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-full/labels/val.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 107/107 [05:34<00:00,  3.13s/it]


                   all       1698      26334       0.04      0.063     0.0219    0.00636
Speed: 0.6ms preprocess, 13.0ms inference, 0.0ms loss, 177.7ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val[0m

Validation Metrics: ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f7eadf1b3d0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,  

In [4]:
from ultralytics.models.yolo.detect import DetectionValidator
from ultralytics.utils.ops import xywh2xyxy
from ultralytics import YOLO
import torch
import numpy as np
from ensemble_boxes_wbf import weighted_boxes_fusion
import time

class WBFValidator(DetectionValidator):
    def __init__(self, dataloader=None, save_dir=None, pbar=None, args=None, _callbacks=None):
        super().__init__(dataloader, save_dir, pbar, args, _callbacks)
        
        self.classes = None
        self.iou_thres = 0.5
        self.skip_box_thr = 0.0001
        self.max_time_img = 0.05
        self.in_place = True
        self.rotated = False
        self.end2end = False
        self.labels = ()
        self.multi_label = False

    
    def postprocess(self, preds):
        super().postprocess(preds)
        return self.post_process_with_wbf(
            preds,
            iou_thres=self.iou_thres,
            labels=self.lb,
            nc=self.nc,
            multi_label=True,
            agnostic=self.args.single_cls or self.args.agnostic_nms,
            max_det=self.args.max_det,
            end2end=self.end2end,
            rotated=self.args.task == "obb",
            skip_box_thr=self.skip_box_thr,
        )
    
    def post_process_with_wbf(self, prediction,
        iou_thres=0.5,
        skip_box_thr=0.0001,
        classes=None,
        agnostic=False,
        multi_label=False,
        labels=(),
        max_det=300,
        nc=0,  # number of classes (optional)
        max_time_img=0.05,
        max_nms=30000,
        max_wh=7680,
        in_place=True,
        rotated=False,
        end2end=False,
    ):
        # Handle list/tuple case (YOLOv8 model in validation mode)
        if isinstance(prediction, (list, tuple)):
            prediction = prediction[0]  # select only inference output

        """# Early return for end-to-end model case
        if preds.shape[-1] == 6 or self.end2end:
            output = [pred[pred[:, 4] > self.args.conf][:self.args.max_det] for pred in preds]
            if self.classes is not None:
                output = [pred[(pred[:, 5:6] == self.classes).any(1)] for pred in output]
            return output"""

        # Get batch size and number of classes
        bs = prediction.shape[0]  # batch size
        nc = nc or (prediction.shape[1] - 4) # number of classes
        nm = prediction.shape[1] - nc - 4  # number of masks
        mi = 4 + nc  # mask start index

        # I think it's done in WBF prefiltering already
        #xc = prediction[:, 4:mi].amax(1) > 0 # candidates

        # Settings
        time_limit = 2.0 + max_time_img * bs  # seconds to quit after
        
        prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
        if not rotated:
            if in_place:
                prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy
            else:
                prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1)  # xywh to xyxy
        
        t = time.time()
        output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
        
        for xi, x in enumerate(prediction):
            if labels and len(labels[xi]) and not rotated:
                lb = labels[xi]
                v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
                v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
                v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
                x = torch.cat((x, v), 0)

            # Split into components
            box, cls, mask = x.split((4, nc, nm), 1)

            img_size = 512
            
            if multi_label:
                i, j = torch.where(cls > 0)
                conf = cls[i, j]
                pred_boxes = box[i]
                pred_labels = j
            else:
                conf, j = cls.max(1)
                pred_boxes = box
                pred_labels = j

            # Convert to numpy and normalize
            boxes_np = pred_boxes.cpu().numpy()
            
            # Clip to ensure no negative values
            boxes_np = np.clip(boxes_np, 0, img_size)
            
            # Normalize to [0, 1]
            boxes_np = boxes_np / img_size
            
            # Ensure boxes are properly formatted
            boxes_np = np.clip(boxes_np, 0, 1)
            
            scores_np = conf.cpu().numpy()
            labels_np = pred_labels.cpu().numpy()

            # Apply WBF (note the list wrapping for single model case)
            boxes_wbf, scores_wbf, labels_wbf = weighted_boxes_fusion(
                [boxes_np],
                [scores_np],
                [labels_np],
                weights=None,
                iou_thr=iou_thres,
                skip_box_thr=skip_box_thr
            )
            
            # Convert back to absolute coordinates
            boxes_wbf = boxes_wbf * img_size
            
            # Create output tensor
            if len(boxes_wbf):
                out = torch.zeros((len(boxes_wbf), 6 + nm), device=x.device)
                out[:, :4] = torch.from_numpy(boxes_wbf).to(x.device)
                out[:, 4] = torch.from_numpy(scores_wbf).to(x.device)
                out[:, 5] = torch.from_numpy(labels_wbf).to(x.device)
                
                # Limit detections
                out = out[:max_det]
                output[xi] = out

        return output
    
class YOLOWithWBF(YOLO):
    @property
    def task_map(self):
        """Override task_map to use WBFValidator"""
        base_task_map = super().task_map
        base_task_map["detect"]["validator"] = WBFValidator
        return base_task_map

def validate_with_wbf(model_path, data_yaml):
    # Create custom YOLO instance with WBF validator
    model = YOLOWithWBF(model_path)
    
    # Run validation
    metrics = model.val(data=data_yaml)
    return metrics

if __name__ == "__main__":
    model_path = './runs/detect/yolov11s-imgsz512-NANO/weights/best.pt'
    data_yaml = './data/IIS_data_yolo_annotator_2_nano/dataset.yaml'
    
    metrics = validate_with_wbf(model_path, data_yaml)
    print("\nValidation Metrics:", metrics)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,414,735 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<00:00, 55.88it/s]

[34m[1mval: [0mNew cache created: /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:03<00:00,  1.76s/it]


                   all         20         78     0.0547     0.0526     0.0295    0.00604
                header         19         19      0.274      0.263      0.126     0.0235
                footer         19         19          0          0          0          0
                   nav         11         12          0          0          0          0
           maincontent         20         20          0          0      0.022    0.00671
                 title          8          8          0          0          0          0
Speed: 4.5ms preprocess, 37.8ms inference, 0.0ms loss, 77.0ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val2[0m

Validation Metrics: ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f7e809f5d90>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Pre

## NMS

In [7]:
from ultralytics import YOLO

# Load model
model = YOLO('./runs/detect/yolov11s-imgsz512-NANO/weights/best.pt')

# Validation with NMS (default)
metrics_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5
)

# Validation without NMS
metrics_no_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5,
    augment=True,    # Enable TTA
    nms=False        # Disable NMS
)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,414,735 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:07<00:00,  3.75s/it]


                   all         20         78      0.896       0.87      0.905      0.857
                header         19         19      0.947      0.942      0.962      0.898
                footer         19         19       0.95          1      0.993      0.973
                   nav         11         12      0.909      0.833      0.901      0.851
           maincontent         20         20      0.958       0.95      0.974      0.883
                 title          8          8      0.714      0.625      0.698      0.679
Speed: 4.8ms preprocess, 108.8ms inference, 0.0ms loss, 3.4ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val4[0m
Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:11<00:00,  5.69s/it]


                   all         20         78       0.88       0.86      0.907      0.851
                header         19         19      0.892      0.947      0.963      0.912
                footer         19         19        0.9      0.947      0.971      0.955
                   nav         11         12      0.909      0.833      0.909      0.813
           maincontent         20         20      0.986       0.95      0.993      0.893
                 title          8          8      0.712       0.62      0.698      0.679
Speed: 2.4ms preprocess, 294.5ms inference, 0.0ms loss, 4.3ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val5[0m


In [2]:
from ultralytics import YOLO

# Load model
model = YOLO('/home/bruno/vt2-visual-webseg/runs/detect/yolov11s-imgz512-webiswebseg20-old-tiny-segments/weights/best.pt')

# Validation with NMS (default)
metrics_nms = model.val(
    data='./data/webis-webseg-20-yolo-full/dataset.yaml',
    conf=0.2,
    iou=0.5
)

metrics_no_nms = model.val(
    data='./data/webis-webseg-20-yolo-full/dataset.yaml',
    conf=0.2,
    iou=0.5,
    augment=True,    # Enable TTA
    nms=False        # Disable NMS
)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,413,187 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-full/labels/val... 1698 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1698/1698 [00:34<00:00, 49.19it/s]

[34m[1mval: [0mNew cache created: /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-full/labels/val.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 107/107 [00:24<00:00,  4.28it/s]


                   all       1698      26334      0.548      0.431      0.484      0.354
Speed: 0.1ms preprocess, 4.0ms inference, 0.0ms loss, 1.2ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val24[0m
Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-full/labels/val.cache... 1698 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1698/1698 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 107/107 [00:31<00:00,  3.45it/s]


                   all       1698      26334      0.554      0.433      0.479      0.343
Speed: 0.1ms preprocess, 8.5ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val25[0m


In [3]:
from ultralytics import YOLO

# Load model
model = YOLO('./runs/detect/yolov11s-imgsz1024-rect-webiswebseg20/weights/best.pt')

# Validation with NMS (default)
metrics_nms = model.val(
    data='./data/webis-webseg-20-yolo-full/dataset.yaml',
    conf=0.2,
    iou=0.5
)

metrics_no_nms = model.val(
    data='./data/webis-webseg-20-yolo-full/dataset.yaml',
    conf=0.2,
    iou=0.5,
    augment=True,    # Enable TTA
    nms=False        # Disable NMS
)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,413,187 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-full/labels/val.cache... 1698 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1698/1698 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 107/107 [01:36<00:00,  1.11it/s]


                   all       1698      26334      0.573      0.386      0.467      0.345
Speed: 0.4ms preprocess, 38.9ms inference, 0.0ms loss, 2.0ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val26[0m
Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-full/labels/val.cache... 1698 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1698/1698 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 107/107 [01:59<00:00,  1.12s/it]


                   all       1698      26334      0.544       0.41      0.465      0.346
Speed: 0.7ms preprocess, 49.0ms inference, 0.0ms loss, 2.1ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val27[0m


In [1]:
from ultralytics import YOLO

# Load model
model = YOLO('./runs/detect/yolov11s-imgsz512-NANO/weights/best.pt')

# Validation with NMS (default)
metrics_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5
)

metrics_no_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5,
    augment=True,    # Enable TTA
    nms=False        # Disable NMS
)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,414,735 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:01<00:00,  2.00it/s]


                   all         20         78      0.896       0.87      0.905      0.857
                header         19         19      0.947      0.942      0.962      0.898
                footer         19         19       0.95          1      0.993      0.973
                   nav         11         12      0.909      0.833      0.901      0.851
           maincontent         20         20      0.958       0.95      0.974      0.883
                 title          8          8      0.714      0.625      0.698      0.679
Speed: 5.0ms preprocess, 19.1ms inference, 0.0ms loss, 10.1ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val33[0m
Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:02<00:00,  1.08s/it]


                   all         20         78       0.88       0.86      0.907      0.851
                header         19         19      0.892      0.947      0.963      0.912
                footer         19         19        0.9      0.947      0.971      0.955
                   nav         11         12      0.909      0.833      0.909      0.813
           maincontent         20         20      0.986       0.95      0.993      0.893
                 title          8          8      0.712       0.62      0.698      0.679
Speed: 3.5ms preprocess, 41.8ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val34[0m


In [4]:
from ultralytics import YOLO

# Load model
model = YOLO('./runs/detect/yolov11s-imgsz640-NANO/weights/best.pt')

# Validation with NMS (default)
metrics_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5
)

metrics_no_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5,
    augment=True,    # Enable TTA
    nms=False        # Disable NMS
)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,414,735 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:07<00:00,  3.66s/it]


                   all         20         78      0.866      0.817      0.874      0.837
                header         19         19      0.922      0.947      0.962       0.91
                footer         19         19      0.947      0.947      0.972      0.968
                   nav         11         12          1      0.833      0.917      0.861
           maincontent         20         20      0.786      0.735       0.84       0.77
                 title          8          8      0.674      0.625      0.677      0.677
Speed: 4.1ms preprocess, 262.8ms inference, 0.0ms loss, 2.3ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val28[0m
Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:03<00:00,  1.81s/it]


                   all         20         78      0.881       0.84        0.9      0.863
                header         19         19      0.894      0.895      0.948        0.9
                footer         19         19      0.947      0.947      0.972      0.964
                   nav         11         12          1      0.833      0.917      0.886
           maincontent         20         20      0.889        0.9      0.965      0.873
                 title          8          8      0.675      0.625        0.7       0.69
Speed: 3.1ms preprocess, 64.9ms inference, 0.0ms loss, 3.1ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val29[0m


In [2]:
from ultralytics import YOLO

# Load model
model = YOLO('./runs/detect/yolov11s-imgsz512-webis-webseg-full/weights/best.pt')

# Validation with NMS (default)
metrics_nms = model.val(
    data='./data/webis-webseg-20-yolo-no-tiny-segments-full/dataset.yaml',
    conf=0.2,
    iou=0.5
)

metrics_no_nms = model.val(
    data='./data/webis-webseg-20-yolo-no-tiny-segments-full/dataset.yaml',
    conf=0.2,
    iou=0.5,
    augment=True,    # Enable TTA
    nms=False        # Disable NMS
)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,413,187 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-no-tiny-segments-full/labels/val... 1698 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1698/1698 [00:36<00:00, 46.57it/s]

[34m[1mval: [0mNew cache created: /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-no-tiny-segments-full/labels/val.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 107/107 [00:33<00:00,  3.18it/s]


                   all       1698      15155      0.638      0.624      0.638      0.456
Speed: 0.2ms preprocess, 4.4ms inference, 0.0ms loss, 1.4ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val35[0m
Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/webis-webseg-20-yolo-no-tiny-segments-full/labels/val.cache... 1698 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1698/1698 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 107/107 [00:38<00:00,  2.76it/s]


                   all       1698      15155      0.649      0.618      0.642      0.452
Speed: 0.1ms preprocess, 9.6ms inference, 0.0ms loss, 1.2ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val36[0m


In [5]:
from ultralytics import YOLO

# Load model
model = YOLO('./runs/detect/yolov11s-imgsz1024-rect-NANO/weights/best.pt')

# Validation with NMS (default)
metrics_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5
)

metrics_no_nms = model.val(
    data='./data/IIS_data_yolo_annotator_2_nano/dataset.yaml',
    conf=0.2,
    iou=0.5,
    augment=True,    # Enable TTA
    nms=False        # Disable NMS
)

Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)
YOLO11s summary (fused): 238 layers, 9,414,735 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:20<00:00, 10.38s/it]


                   all         20         78      0.903      0.869      0.905      0.864
                header         19         19      0.947      0.947      0.967      0.927
                footer         19         19          1          1      0.995      0.969
                   nav         11         12          1       0.75      0.875      0.841
           maincontent         20         20        0.9        0.9      0.928      0.832
                 title          8          8      0.667       0.75       0.76      0.751
Speed: 130.7ms preprocess, 796.4ms inference, 0.0ms loss, 1.4ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val30[0m
Ultralytics 8.3.74 ðŸš€ Python-3.11.5 torch-2.6.0+cu124 CUDA:0 (NVIDIA RTX A2000 8GB Laptop GPU, 8192MiB)


[34m[1mval: [0mScanning /home/bruno/vt2-visual-webseg/data/IIS_data_yolo_annotator_2_nano/labels/val.cache... 20 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 20/20 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:08<00:00,  4.42s/it]


                   all         20         78      0.938      0.833       0.88      0.852
                header         19         19      0.941      0.947      0.964      0.931
                footer         19         19          1          1      0.995      0.983
                   nav         11         12          1      0.691      0.875      0.858
           maincontent         20         20      0.941        0.9      0.926       0.85
                 title          8          8      0.806      0.625      0.638      0.638
Speed: 72.9ms preprocess, 161.6ms inference, 0.0ms loss, 3.3ms postprocess per image
Results saved to [1m/home/bruno/vt2-visual-webseg/runs/detect/val31[0m


In [None]:
def calculate_f1(precision, recall):
    if precision + recall == 0:  # Avoid division by zero
        return 0
    return 2 * (precision * recall)/(precision + recall)

# Example usage:
precision = 0.574
recall = 0.385
f1 = calculate_f1(precision, recall)
print(f"F1 Score: {f1:.3f}")

F1 Score: 0.461


: 