In [2]:
from ultralytics import YOLO
from loguru import logger
from tidecv import TIDE, datasets
from dataclasses import dataclass, asdict
from typing import List, Tuple, Dict, Set
from tqdm import tqdm
from collections import defaultdict
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json

# Problem 1: COCO Dataset Evaluation

In [None]:
model = YOLO('yolov8x.pt')
logger.info('Loaded model: YOLOv8x')
metrics = model.val(data='coco_val2017.yaml', save_json=True, device='0')
map = metrics.box.map
logger.info(f'mean Avg Precision (mAP): {map}')

[32m2025-02-17 19:14:18.028[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mLoaded model: YOLOv8x[0m


Ultralytics 8.3.75 🚀 Python-3.10.16 torch-2.6.0+cu124 CUDA:0 (NVIDIA A100-PCIE-40GB, 40444MiB)
YOLOv8x summary (fused): 268 layers, 68,200,608 parameters, 0 gradients, 257.8 GFLOPs


[34m[1mval: [0mScanning /home/ip_arul/daksh21036/CV/HW1/2021036_HW1/data/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [03:37<00:00,  1.44it/s]


                   all       5000      36335      0.737      0.647      0.707      0.541
                person       2693      10777      0.827      0.769      0.849      0.646
               bicycle        149        314      0.732      0.583      0.667      0.431
                   car        535       1918      0.759      0.683      0.754       0.54
            motorcycle        159        367      0.764      0.714      0.789      0.559
              airplane         97        143      0.833      0.874      0.928      0.791
                   bus        189        283      0.881      0.813      0.886      0.777
                 train        157        190      0.915      0.911      0.956      0.794
                 truck        250        414      0.686      0.572      0.652      0.491
                  boat        121        424      0.704      0.521      0.597       0.36
         traffic light        191        634      0.711      0.509      0.591      0.338
          fire hydran

[32m2025-02-17 19:18:05.937[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mmean Avg Precision (mAP): 0.5413895196527954[0m


# Problem 2: TIDE Statistics

In [4]:
tide = TIDE()
tide.evaluate(datasets.COCO(path='instances_val2017.json'), datasets.COCOResult(path='coco_predictions.json'), 
              mode=tide.BOX)
tide.summarize()

-- coco_predictions --

bbox AP @ 50: 9.79

                         Main Errors
  Type      Cls      Loc     Both     Dupe      Bkg     Miss  
-------------------------------------------------------------
   dAP    56.72     0.65     0.12     0.08     0.43     0.36  

        Special Error
  Type   FalsePos   FalseNeg  
-----------------------------
   dAP       4.85       2.51  



# Problem 3: Expected Calibration Error (ECE) 

ECE is a metric to evaluate the calibration of a model. It is defined as the expected value of the absolute difference between the accuracy and the confidence of the model. The confidence of the model is the probability assigned to the predicted class. The accuracy is 1 if the prediction is correct and 0 otherwise. The ECE is computed by dividing the confidence interval [0, 1] into M equally spaced bins and computing the weighted average of the absolute difference between the accuracy and the confidence in each bin. The weights are the proportion of samples in each bin. The ECE is defined as:

\begin{equation}
ECE = \sum_{m=1}^{M} \frac{B_m}{N} \left| \text{acc}(B_m) - \text{conf}(B_m) \right|
\end{equation}

where $B_m$ is the set of samples in bin $m$, $N$ is the total number of samples, $\text{acc}(B_m)$ is the accuracy of the model in bin $m$, and $\text{conf}(B_m)$ is the confidence of the model in bin $m$. The accuracy and confidence in bin $m$ are defined as:

\begin{equation}
\text{acc}(B_m) = \frac{1}{|B_m|} \sum_{i \in B_m} \mathbb{1} \left( y_i = \hat{y}_i \right)
\end{equation}

\begin{equation}
\text{conf}(B_m) = \frac{1}{|B_m|} \sum_{i \in B_m} p_i
\end{equation}

where $y_i$ is the true label of sample $i$, $\hat{y}_i$ is the predicted label of sample $i$, $p_i$ is the confidence of the model for sample $i$, and $\mathbb{1}(\cdot)$ is the indicator function.


## Starter Code

The following section contains some starter code to help you prepare the data using the COCO API. You can use this code to preprocess the data, or you can write your own code.

In [3]:
@dataclass
class Detection:
    bbox: np.ndarray  # [x1, y1, x2, y2]
    class_id: int
    confidence: float

@dataclass
class GroundTruth:
    bbox: np.ndarray  # [x1, y1, x2, y2]
    class_id: int

In [4]:
def get_image_annotations(coco_gt: COCO, coco_dt: COCO, img_id, max_dets=None):
    """Get list of ground truth and detection annotations across all images"""
    gt_ids = coco_gt.getAnnIds(imgIds=img_id)
    gt_anns = coco_gt.loadAnns(gt_ids)
    gts = [
        GroundTruth(
            bbox=ann['bbox'],
            class_id=ann['category_id']
        )
        for ann in gt_anns
    ]

    # Get detections
    dt_ids = coco_dt.getAnnIds(imgIds=img_id)
    dt_anns = coco_dt.loadAnns(dt_ids)
    if max_dets is not None:
        dt_anns = sorted(dt_anns, key=lambda x: x['score'], reverse=True)[:max_dets]
    dets = [
        Detection(
            bbox=ann['bbox'],
            class_id=ann['category_id'],
            confidence=ann['score']
        )
        for ann in dt_anns
    ]

    return gts, dets

def prepare_inputs(coco_gt: COCO, coco_dt: COCO, max_dets=100) -> dict:
    """Convert COCO format to TIDE format for all images"""
    gts_dets = dict()
    
    for img_id in tqdm(coco_gt.getImgIds(), desc="Processing images"):
        gts, dets = get_image_annotations(coco_gt, coco_dt, img_id, max_dets)
        if gts and dets:
            gts_dets[img_id] = (gts, dets)
    
    return gts_dets

In [11]:
predictions_json = "coco_predictions.json"   # Path to the predictions JSON file
ground_truth_json = "instances_val2017.json"  # Path to the val annotations JSON file

coco_gt = COCO(ground_truth_json)       # Load ground truth annotations
coco_dt = coco_gt.loadRes(predictions_json) # Load predictions

loading annotations into memory...
Done (t=0.38s)
creating index...
index created!
Loading and preparing results...
DONE (t=2.31s)
creating index...
index created!


In [12]:
gts_dets = prepare_inputs(coco_gt, coco_dt, max_dets=100)
len(gts_dets.keys())

Processing images: 100%|██████████| 5000/5000 [00:01<00:00, 4642.09it/s]


4952

In [7]:
def calculate_iou(bbox1: list, bbox2: list):
    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2
    x = (x1 + w1 - x2) if (x1 <= x2) else (x2 + w2 - x1)
    y = (y1 + h1 - y2) if (y1 <= y2) else (y2 + h2 - y1)
    intersection = x*y
    union = w1*h1 + w2*h2 - intersection
    return max(intersection / union, 0)

def search_closest_min_idx(confidence: float, m: int):
    intervals = np.linspace(0, 1, num=m, endpoint=False)
    n = len(intervals)
    beg, end = 0, n-1
    mid = (beg + end) // 2
    while beg <= end:
        if (mid == n-1 or intervals[mid] <= confidence < intervals[mid+1]):
            break
        if confidence <= intervals[mid]:
            end = mid
        else:
            beg = mid + 1
        mid = (beg + end) // 2
    return mid

def calculate_ece(gts_dets: Dict[str, Tuple[List[GroundTruth], List[Detection]]], m: int):
    N = len(gts_dets)
    proportions = np.zeros(m)
    confidences = np.zeros(m)
    accuracies = np.zeros(m)
    
    for gts, dets in gts_dets.values():
        for i in range(len(gts)):
            max_iou_idx = None
            iou = 0
            for j in range(len(dets)):
                gt_bbox, det_bbox = gts[i].bbox, dets[j].bbox
                this_iou = calculate_iou(gt_bbox, det_bbox)
                if this_iou > iou:
                    max_iou_idx = j
            if max_iou_idx is None:
                continue
            gt, det = gts[i], dets[max_iou_idx]
            idx = search_closest_min_idx(det.confidence, m)
            confidences[idx] = det.confidence
            if gt.class_id == det.class_id:
                accuracies[idx] += 1
            proportions[idx] += 1
    
    confidences = np.divide(confidences, proportions, out=np.zeros_like(confidences), where=proportions!=0)
    accuracies = np.divide(accuracies, proportions, out=np.zeros_like(accuracies), where=proportions!=0)
    ece = 0.0
    for i in range(m):
        ece += abs(accuracies[i] - confidences[i]) * proportions[i] / N
        
    return ece

In [13]:
logger.info(f'ECE: {calculate_ece(gts_dets, 10)}')

[32m2025-02-20 23:17:39.771[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mECE: 1.0606147960420031[0m


# Problem 3: size-based analysis.

In [27]:
SMALL_THRESH = 32 * 32
MEDIUM_THRESH = 96 * 96
SMALL_AREA_FILE = 'coco_predictions_small.json'
MEDIUM_AREA_FILE = 'coco_predictions_medium.json'
LARGE_AREA_FILE = 'coco_predictions_large.json'

In [42]:
def classify_bboxes_on_scale(gts_dets: Dict[str, Tuple[List[GroundTruth], List[Detection]]]):
    small = []
    medium = []
    large = []
    
    for img_id, (gts, dets) in gts_dets.items():
        for i in range(len(dets)):
            det = dets[i]
            max_iou_idx = None
            iou = 0
            for j in range(len(gts)):
                det_bbox, gt_bbox = dets[i].bbox, gts[j].bbox
                this_iou = calculate_iou(gt_bbox, det_bbox)
                if this_iou > iou:
                    max_iou_idx = j
            if max_iou_idx is None:
                continue
            gt = gts[max_iou_idx]
            area = gt.bbox[2] * gt.bbox[3]
            det = asdict(det)
            det['image_id'] = img_id
            det['category_id'] = det.pop('class_id')
            det['bbox'] = list(det['bbox'])
            det['score'] = det.pop('confidence')    # ensure proper COCO format
            if area <= SMALL_THRESH:
                small.append(det)
            elif area <= MEDIUM_THRESH:
                medium.append(det)
            else:
                large.append(det)
    
    json.dump(small, open(SMALL_AREA_FILE, 'w'))
    json.dump(medium, open(MEDIUM_AREA_FILE, 'w'))
    json.dump(large, open(LARGE_AREA_FILE, 'w'))

    logger.info('Saved predictions to JSON files')

In [43]:
classify_bboxes_on_scale(gts_dets)

[32m2025-02-18 23:50:32.561[0m | [1mINFO    [0m | [36m__main__[0m:[36mclassify_bboxes_on_scale[0m:[36m36[0m - [1mSaved predictions to JSON files[0m


In [44]:
tide = TIDE()
logger.info('Evaluating small area predictions')
tide.evaluate(datasets.COCO(path='instances_val2017.json'), datasets.COCOResult(path=SMALL_AREA_FILE), 
              mode=tide.BOX)
tide.summarize()

[32m2025-02-18 23:53:16.533[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mEvaluating small area predictions[0m


-- coco_predictions_small --

bbox AP @ 50: 0.02

                         Main Errors
  Type      Cls      Loc     Both     Dupe      Bkg     Miss  
-------------------------------------------------------------
   dAP     0.02     0.02     0.00     0.00     0.00     1.69  

        Special Error
  Type   FalsePos   FalseNeg  
-----------------------------
   dAP       0.01       3.06  



In [45]:
logger.info('Evaluating medium area predictions')
tide.evaluate(datasets.COCO(path='instances_val2017.json'), datasets.COCOResult(path=MEDIUM_AREA_FILE), 
              mode=tide.BOX)
tide.summarize()

[32m2025-02-18 23:53:24.132[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mEvaluating medium area predictions[0m


-- coco_predictions_small --

bbox AP @ 50: 0.02

                         Main Errors
  Type      Cls      Loc     Both     Dupe      Bkg     Miss  
-------------------------------------------------------------
   dAP     0.02     0.02     0.00     0.00     0.00     1.69  

        Special Error
  Type   FalsePos   FalseNeg  
-----------------------------
   dAP       0.01       3.06  

-- coco_predictions_medium --

bbox AP @ 50: 0.10

                         Main Errors
  Type      Cls      Loc     Both     Dupe      Bkg     Miss  
-------------------------------------------------------------
   dAP     0.26     0.13     0.00     0.00     0.00     2.76  

        Special Error
  Type   FalsePos   FalseNeg  
-----------------------------
   dAP       0.06       5.92  



In [46]:
logger.info('Evaluating large area predictions')
tide.evaluate(datasets.COCO(path='instances_val2017.json'), datasets.COCOResult(path=LARGE_AREA_FILE), 
              mode=tide.BOX)
tide.summarize()

[32m2025-02-18 23:53:32.864[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mEvaluating large area predictions[0m


-- coco_predictions_small --

bbox AP @ 50: 0.02

                         Main Errors
  Type      Cls      Loc     Both     Dupe      Bkg     Miss  
-------------------------------------------------------------
   dAP     0.02     0.02     0.00     0.00     0.00     1.69  

        Special Error
  Type   FalsePos   FalseNeg  
-----------------------------
   dAP       0.01       3.06  

-- coco_predictions_medium --

bbox AP @ 50: 0.10

                         Main Errors
  Type      Cls      Loc     Both     Dupe      Bkg     Miss  
-------------------------------------------------------------
   dAP     0.26     0.13     0.00     0.00     0.00     2.76  

        Special Error
  Type   FalsePos   FalseNeg  
-----------------------------
   dAP       0.06       5.92  

-- coco_predictions_large --

bbox AP @ 50: 1.08

                         Main Errors
  Type      Cls      Loc     Both     Dupe      Bkg     Miss  
-------------------------------------------------------------
   dA

In [8]:
predictions_small_json = "coco_predictions_small.json"   # Path to the predictions JSON file
ground_truth_json = "instances_val2017.json"  # Path to the val annotations JSON file

coco_gt = COCO(ground_truth_json)       # Load ground truth annotations
coco_dt = coco_gt.loadRes(predictions_small_json) # Load predictions

gts_dets = prepare_inputs(coco_gt, coco_dt)
logger.info(f'ECE: {calculate_ece(gts_dets, 10)}')

loading annotations into memory...
Done (t=0.39s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!


Processing images: 100%|██████████| 5000/5000 [00:00<00:00, 82789.94it/s]
[32m2025-02-20 23:14:57.867[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mECE: 0.49465499999999996[0m


In [9]:
predictions_mid_json = "coco_predictions_medium.json"   # Path to the predictions JSON file
ground_truth_json = "instances_val2017.json"  # Path to the val annotations JSON file

coco_gt = COCO(ground_truth_json)       # Load ground truth annotations
coco_dt = coco_gt.loadRes(predictions_small_json) # Load predictions

gts_dets = prepare_inputs(coco_gt, coco_dt)
logger.info(f'ECE: {calculate_ece(gts_dets, 10)}')

loading annotations into memory...
Done (t=0.55s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!


Processing images: 100%|██████████| 5000/5000 [00:00<00:00, 83851.53it/s]
[32m2025-02-20 23:15:30.585[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mECE: 0.49465499999999996[0m


In [10]:
predictions_large_json = "coco_predictions_large.json"   # Path to the predictions JSON file
ground_truth_json = "instances_val2017.json"  # Path to the val annotations JSON file

coco_gt = COCO(ground_truth_json)       # Load ground truth annotations
coco_dt = coco_gt.loadRes(predictions_large_json) # Load predictions

gts_dets = prepare_inputs(coco_gt, coco_dt)
logger.info(f'ECE: {calculate_ece(gts_dets, 10)}')

loading annotations into memory...
Done (t=0.39s)
creating index...
index created!
Loading and preparing results...
DONE (t=1.90s)
creating index...
index created!


Processing images: 100%|██████████| 5000/5000 [00:01<00:00, 4379.18it/s]
[32m2025-02-20 23:16:00.290[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mECE: 1.002436501616815[0m


## Starter Code: Spatial Grid data structure.


## Spatial Grid Implementation

The `SpatialGrid` class implements a spatial indexing data structure that helps efficiently find nearby objects. Using this data structure is useful for our size-based analysis as it significantly reduces the time complexity of finding overlapping boxes.

### How it works:
1. The image space is divided into a grid of cells. The size of the cells is defined by the `cell_size` parameter.
2. Each bounding box is mapped to the cells it overlaps.
3. When searching for nearby objects, we only need to check objects in the relevant grid cells.

### Usage Example:
```python
grid = SpatialGrid(cell_size=100)
grid.add_box(0, [100, 100, 200, 200])  # Add a box with index 0, and bbox coordinates [100, 100, 200, 200]
nearby = grid.get_nearby_indices([150, 150, 250, 250])  # Find boxes near the bbox [150, 150, 250, 250]
```

In [26]:
class SpatialGrid:
    def __init__(self, cell_size: int = 100):
        """A spatial indexing structure for efficient nearby object queries.
        
        This data structure divides 2D space into a grid and maintains a mapping of
        which objects overlap with each grid cell, enabling efficient spatial queries.
        """
        self.cell_size: int = cell_size
        # self.grid = defaultdict(list)
        self.grid: Dict[Tuple[int, int], List[int]] = defaultdict(list)
    
    def get_cell_coords(self, bbox: np.ndarray) -> Set[Tuple[int, int]]:
        """Get all grid cells that a bounding box overlaps with.
        
        Args:
            bbox: Bounding box coordinates [x1, y1, x2, y2]
                where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner

        Returns:
            Set of (x, y) grid cell coordinates
        """
        x1, y1, x2, y2 = bbox
        start_x = int(x1 // self.cell_size)
        end_x = int(x2 // self.cell_size)
        start_y = int(y1 // self.cell_size)
        end_y = int(y2 // self.cell_size)
        return {(i, j) for i in range(start_x, end_x + 1) 
                       for j in range(start_y, end_y + 1)}
    
    def add_box(self, idx: int, bbox: np.ndarray):
        """Add a bounding box to the spatial grid.
        
        Args:
            idx: Index of the bounding box
            bbox: Bounding box coordinates [x1, y1, x2, y2]
        """
        cells = self.get_cell_coords(bbox)
        for cell in cells:
            self.grid[cell].append(idx)
    
    def get_nearby_indices(self, bbox: np.ndarray) -> Set[int]:
        """Get indices of bounding boxes that are near the given bounding box.
        
        Args:
            bbox: Bounding box coordinates [x1, y1, x2, y2]
            
        Returns:
            Set of indices of nearby bounding boxes
        """
        cells = self.get_cell_coords(bbox)
        nearby = set()
        for cell in cells:
            nearby.update(self.grid[cell])
        return nearby

In [27]:
# Size thresholds as per COCO
SMALL_THRESH = 32 * 32
MEDIUM_THRESH = 96 * 96

## Your task: Implement the size-based ECE computation.

## Solution:

# Size-based TIDE computation:

## Solution: