In [1]:
import torch

from utils.dataloader import VOCDataLoaderPerson
loader = VOCDataLoaderPerson(train=False, batch_size=1)

  Referenced from: <0B7EB158-53DC-3403-8A49-22178CAB4612> /Users/yezeling/miniforge3/envs/myenv/lib/python3.10/site-packages/torchvision/image.so
  warn(


In [2]:
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
import tqdm

all_wh = []
for img_id, (img, target) in tqdm.tqdm(enumerate(loader), total=len(loader)):
    # target shape: [batch_size, num_box, 6], e.g. (x,y,w,h, obj_confidence, class_id)
    batch_size = target.shape[0]
    for b in range(batch_size):
        # Get valided box: class_id >=0
        valid_mask = (target[b, :, -1] >= 0)
        gt_b = target[b, valid_mask]

        # If there is no valid box => skip
        if gt_b.shape[0] == 0:
            continue
        
        # Get w,h
        w = gt_b[:, 2]
        h = gt_b[:, 3]

        for i in range(w.shape[0]):
            all_wh.append((w[i].item(), h[i].item()))
print(f"Collected {len(all_wh)} boxes in total")




100%|████████████████████████████████████████| 2232/2232 [00:07<00:00, 302.59it/s]

Collected 5045 boxes in total





In [3]:
import numpy as np

def iou(box, cluster):
    """
    Calculate the IoU of the box and the cluster (w,h) (assuming that the box and cluster are in the form of upper left corner (0,0) and lower right corner (w,h))
    box: shape (2,) => (w, h)
    cluster: shape (k,2) => [[w1,h1],
                             [w2,h2],
                             ...
                             [wk,hk]]
    return: shape (k,)，The IoU between the box and each cluster.
    """
    # box area
    box_area = box[0] * box[1]
    # cluster areas
    cluster_area = cluster[:, 0] * cluster[:, 1]
    
    # min_w, min_h
    min_w = np.minimum(box[0], cluster[:, 0])
    min_h = np.minimum(box[1], cluster[:, 1])
    
    inter_area = min_w * min_h
    ious = inter_area / (box_area + cluster_area - inter_area + 1e-9)  
    return ious

In [4]:
def iou_distance(boxes, clusters):
    """
    Calculate the distance matrix between boxes and clusters: dist = 1 - IoU
    boxes: shape (N,2)
    clusters: shape (k,2)
    return: shape (N,k)，Represents the 1-IoU between the i-th row box and the j-th cluster
    """
    N = boxes.shape[0]
    k = clusters.shape[0]
    dist = np.zeros((N, k), dtype=np.float32)
    for i in range(N):
        ious = iou(boxes[i], clusters)
        dist[i] = 1 - ious
    return dist

In [5]:
def kmeans_iou(boxes, k=5, max_iter=100, seed=666):
    """
    Use IoU distance to do k-means clustering and get k anchors (w,h)
    boxes: np.array, shape (N,2)，Each line is [w,h]
    k: Number of clusters
    
    return: The cluster centers with shape (k,2)
    """
    np.random.seed(seed)
    N = boxes.shape[0]
    
    # Randomly initialize k centers (randomly select k from the boxes)
    init_indices = np.random.choice(N, k, replace=False)
    clusters = boxes[init_indices].copy()
    
    last_nearest = None
    
    for it in range(max_iter):
        # Calculate distance => shape (N,k)
        dist = iou_distance(boxes, clusters)
        
        # For each box, find the closest (smallest distance) cluster index
        current_nearest = np.argmin(dist, axis=1)
        
        # If it is exactly the same as the previous allocation, it means convergence => break
        if last_nearest is not None and np.all(current_nearest == last_nearest):
            break
        last_nearest = current_nearest
        
        # Update the center of each cluster according to the allocation
        for c in range(k):
            # Find all boxes belonging to class c
            mask = (current_nearest == c)
            if np.sum(mask) == 0:
                continue  # If this cluster does not have a box, keep it as is or reselect it.
            
            cluster_boxes = boxes[mask]
            clusters[c] = np.mean(cluster_boxes, axis=0)
    
    return clusters

In [16]:
def avg_iou(boxes, clusters):
    """
    Calculate the average IoU between the boxes and the cluster they are assigned to
    """
    dist = iou_distance(boxes, clusters)  # shape (N,k)
    nearest = np.argmin(dist, axis=1)
    ious_sum = 0.0
    for i in range(boxes.shape[0]):
        c = nearest[i]
        # 1 - dist[i,c] = IoU
        iou_val = 1 - dist[i, c]
        ious_sum += iou_val
    return ious_sum / boxes.shape[0]

In [32]:
print(len(all_wh))
print(len(all_wh[0]))
boxes = np.array(all_wh, dtype=np.float32) 
print(boxes.shape)

5045
2
(5045, 2)


In [44]:
k = 5
anchors = kmeans_iou(boxes, k=k, max_iter=100, seed=666)
print("K-means anchors:")
print(anchors)
score = avg_iou(boxes, anchors)
print("Average IoU with these anchors:", score)

K-means anchors:
[[0.39765027 0.5792138 ]
 [0.04936575 0.09369813]
 [0.12444213 0.25251988]
 [0.6931115  0.72179604]
 [0.22943892 0.44076216]]
Average IoU with these anchors: 0.632892970960842


In [43]:
k = 9
anchors = kmeans_iou(boxes, k=k, max_iter=100, seed=666)
print("K-means anchors:")
print(anchors)
score = avg_iou(boxes, anchors)
print("Average IoU with these anchors:", score)

K-means anchors:
[[0.48963717 0.7219764 ]
 [0.12734953 0.2878153 ]
 [0.08295543 0.15192717]
 [0.7726421  0.73322475]
 [0.18190207 0.48115298]
 [0.46931577 0.45483983]
 [0.03759469 0.07182477]
 [0.30879277 0.5884212 ]
 [0.26364553 0.29661068]]
Average IoU with these anchors: 0.6937220038105877


In [35]:
original_anchors=(
                (1.08, 1.19),
                (3.42, 4.41),
                (6.63, 11.38),
                (9.42, 5.11),
                (16.62, 10.52))
print(type(original_anchors))

original_anchors = np.array(anchors, dtype=np.float32) 
print(original_anchors.shape)

score = avg_iou(boxes, original_anchors)
print("Average IoU with original anchors in loss.py:", score)

<class 'tuple'>
(5, 2)
Average IoU with original anchors in loss.py: 0.09935919823802498


In [29]:
anchors=(
    (0.39765027, 0.5792138),
        (0.04936575, 0.09369813),
        (0.12444213, 0.25251988),
        (0.6931115,  0.72179604),
        (0.22943892, 0.44076216))
print(type(anchors))

<class 'tuple'>
