In [1]:
from ultralytics import YOLO
from pycocotools.coco import COCO
import os
from typing import Dict, List, Tuple, Set
import json 
import numpy as np

In [2]:
model_v8 = YOLO("models/yolov8n.pt")
model_v11 = YOLO("models/yolo11n.pt" )

In [3]:
coco_val_path = "./data/val2017" 
coco_ann_path = "./data/annotations/instances_val2017.json" 

coco = COCO(coco_ann_path)
image_ids = coco.getImgIds()

loading annotations into memory...
Done (t=0.15s)
creating index...
index created!


In [4]:
iou_threshold = 0.5

In [5]:
def compute_iou(box1: List[float], box2: List[float]) -> float:
    if len(box1) != 4 or len(box2) != 4:
        raise ValueError("Box format not supported")

    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    w = max(0, x2 - x1)
    h = max(0, y2 - y1)
    intersection = w * h

    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    union = box1_area + box2_area - intersection

    iou = intersection / union if union > 0 else 0
    return iou

In [6]:
def run_inference(model: YOLO, image_ids: list[int]) -> Dict:
    results = {}
    for idx, img_id in enumerate(image_ids):

        if idx % 100 == 0:
            print(f"Processing image {idx}/{len(image_ids)}")
        
        img_info = coco.loadImgs(img_id)[0]
        img_path = os.path.join(f"{coco_val_path}/{img_info['file_name']}")
    
        try:
            prediction = model(img_path, verbose=False)[0]
            detections = []
            if prediction.boxes is not None: 
                boxes = prediction.boxes.xyxy.cpu().numpy()
                confidences = prediction.boxes.conf.cpu().numpy()
                classes = prediction.boxes.cls.cpu().numpy()
    
                for i in range(len(boxes)):
                    detections.append({
                        'bbox': boxes[i].tolist(), 
                        'confidence': float(confidences[i]),
                        'class': int(classes[i])
                    })
    
            results[img_id] = detections
                
        except Exception as e: 
            print(f"Error processing image {img_id}: {e}")

    return results

In [7]:
def get_ground_truth_objects(img_id: int) -> List[Dict]:
    ann_ids = coco.getAnnIds(imgIds=img_id)
    anns = coco.loadAnns(ann_ids)

    gt_objects = []

    for ann in anns: 
        if ann['iscrowd'] == 0: # Only consider non-crowd annotations
            # Convert from [x,y,w,h] to [x1,y1,x2,y2]
            x, y, w, h = ann['bbox']
            bbox = [x, y, x + w , y + h]

            gt_objects.append({
                'bbox': bbox, 
                'class': ann['category_id'] - 1,  # COCO classes are 1-indexed, convert to 0-indexed
                'area': ann['area']
            })
    return gt_objects
        

In [8]:
def find_matching_detections(detections: List[Dict], gt_objects: List[Dict], iou_threshold: float = 0.5) -> Dict: 
    matches = {}

    for gt_idx, gt_obj in enumerate(gt_objects):
        best_match = None 
        best_confidence = -1 

        for det in detections:
            iou = compute_iou(det['bbox'], gt_obj['bbox'])

            if iou >= iou_threshold and det['confidence'] > best_confidence:
                best_match = det
                best_confidence = det['confidence']

        matches[gt_idx] = best_match
                
    return matches

In [9]:
results_v8 = run_inference(model_v8, image_ids)
results_v11 = run_inference(model_v11, image_ids)

Processing image 0/5000
Processing image 100/5000
Processing image 200/5000
Processing image 300/5000
Processing image 400/5000
Processing image 500/5000
Processing image 600/5000
Processing image 700/5000
Processing image 800/5000
Processing image 900/5000
Processing image 1000/5000
Processing image 1100/5000
Processing image 1200/5000
Processing image 1300/5000
Processing image 1400/5000
Processing image 1500/5000
Processing image 1600/5000
Processing image 1700/5000
Processing image 1800/5000
Processing image 1900/5000
Processing image 2000/5000
Processing image 2100/5000
Processing image 2200/5000
Processing image 2300/5000
Processing image 2400/5000
Processing image 2500/5000
Processing image 2600/5000
Processing image 2700/5000
Processing image 2800/5000
Processing image 2900/5000
Processing image 3000/5000
Processing image 3100/5000
Processing image 3200/5000
Processing image 3300/5000
Processing image 3400/5000
Processing image 3500/5000
Processing image 3600/5000
Processing im

In [10]:
N_total = 0   # Total number of ground truth objects
N_loc = 0     # Objects localized by both models

LNF_total = 0  # Location negative flips
CNF_total = 0  # Classification negative flips 
BNF_total = 0  # Both negative flips 
TNF_total = 0  # Total Negative Flips (either location or classification )

flip_details = []

for img_id in (list(results_v8.keys())):
    gt_objects = get_ground_truth_objects(img_id)
    N_total += len(gt_objects)

    detections_v8 = results_v8[img_id]
    detections_v11 = results_v11[img_id]

    matches_v8 = find_matching_detections(detections_v8, gt_objects, iou_threshold)
    matches_v11 = find_matching_detections(detections_v11, gt_objects, iou_threshold)

    for gt_idx, gt_obj in enumerate(gt_objects):
        d1 = matches_v8[gt_idx]
        d2 = matches_v11[gt_idx]
        
        # Location Negative Flip: LNF_{i,g} = 1 if d1 ≠ ∅ and d2 = ∅
        LNF_i_g = 1 if (d1 is not None and d2 is None) else 0 

        # Classification Negative Flip: CNF_{i,g} = 1 if both detected but v8 correct, v11 wrong
        CNF_i_g = 0 
        if d1 is not None and d2 is not None: 
            N_loc += 1 # Count objects localized by both models 
            if d1['class'] == gt_obj['class'] and d2['class'] != gt_obj['class']:
                CNF_i_g = 1

        BNF_i_g = LNF_i_g * CNF_i_g  
        TNF_i_g = 1 if (LNF_i_g == 1 or CNF_i_g == 1) else 0

        LNF_total += LNF_i_g
        CNF_total += CNF_i_g
        BNF_total += BNF_i_g
        TNF_total += TNF_i_g

        if LNF_i_g == 1 or CNF_i_g == 1:
            flip_details.append({
                'image_id': img_id,
                'gt_class': gt_obj['class'],
                'gt_bbox': gt_obj['bbox'],
                'LNF': LNF_i_g,
                'CNF': CNF_i_g,
                'TNF': TNF_i_g,
                'v8_detected': d1 is not None,
                'v11_detected': d2 is not None,
                'v8_class': d1['class'] if d1 else None,
                'v11_class': d2['class'] if d2 else None,
                'v8_confidence': d1['confidence'] if d1 else None,
                'v11_confidence': d2['confidence'] if d2 else None
            })


In [12]:
LNF_rate = LNF_total / N_total if N_total > 0 else 0
CNF_rate_standard = CNF_total / N_loc if N_loc > 0 else 0  # Standard: denominator = N_loc
CNF_rate_common_denom = CNF_total / N_total if N_total > 0 else 0  # Common denominator for subtraction
TNF_rate = TNF_total / N_total if N_total > 0 else 0

# Compute the difference (using common denominator)
flip_difference = CNF_rate_common_denom - LNF_rate

results = {
    'summary': {
        'N_total': N_total,
        'N_loc': N_loc,
        'LNF_total': LNF_total,
        'CNF_total': CNF_total,
        'BNF_total': BNF_total,
        'TNF_total': TNF_total,
        'LNF_rate': LNF_rate,
        'CNF_rate_standard': CNF_rate_standard,
        'CNF_rate_common_denom': CNF_rate_common_denom,
        'TNF_rate': TNF_rate,
        'flip_difference': flip_difference,
        'iou_threshold': iou_threshold
    },
    'flip_details': flip_details
}



with open('negative_flip_analysis_results.json', 'w') as f:
    # Convert numpy types to native Python types for JSON serialization
    json_results = {
        'summary': {k: (int(v) if isinstance(v, np.integer) else float(v) if isinstance(v, np.floating) else v) 
                   for k, v in results['summary'].items()},
        'flip_details': results['flip_details']
    }
    json.dump(json_results, f, indent=2)
    

In [14]:
json_results['summary']

{'N_total': 36335,
 'N_loc': 17474,
 'LNF_total': 1853,
 'CNF_total': 92,
 'BNF_total': 0,
 'TNF_total': 1945,
 'LNF_rate': 0.05099766065776799,
 'CNF_rate_standard': 0.005264965090992332,
 'CNF_rate_common_denom': 0.00253199394523187,
 'TNF_rate': 0.05352965460299986,
 'flip_difference': -0.048465666712536125,
 'iou_threshold': 0.5}