Using K-means to find the anchors box in COCO
Those clusters are use to find the common bounding boxes in the dataset in an unsupervised manner

source: https://gist.github.com/WillieMaddox/3b1159baecb809b5fcb3a6154bc3cb0b

In [1]:
import numpy as np
from pycocotools.coco import COCO

In [2]:
def convert_coco_bbox(size, box):
    '''
    Convert bounding box from COCO format to YOLO format
    '''
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = box[0] + box[2] / 2.0
    y = box[1] + box[3] / 2.0
    w = box[2]
    h = box[3]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h

def area(x):
    '''
    compute area of bounding box
    '''
    if len(x.shape) == 1:
        return x[0] * x[1]
    else:
        return x[:, 0] * x[:, 1]

In [5]:
!pwd

/notebooks


In [3]:
def load_coco_dataset():
    '''
    Load COCO dataset
    '''

    datasets = ['train2017', 'val2017']
    data = []

    for dataset in datasets:
        annfile = '/notebooks/data/annotations/instances_%s.json' % dataset
        coco = COCO(annfile)
        cats = coco.loadCats(coco.getCatIds())
        base_classes = {cat['id']: cat['name'] for cat in cats}
        img_id_set = set()

        for cat_ids in base_classes.keys():
            img_ids = coco.getImgIds(catIds=cat_ids)
            img_id_set = img_id_set.union(set(img_ids))

        image_ids = list(img_id_set)

        for image_id in image_ids:
            annIds = coco.getAnnIds(imgIds=image_id)
            anns = coco.loadAnns(annIds)
            img = coco.loadImgs(image_id)[0]
            w = img['width']
            h = img['height']

            for ann in anns:
                b = ann['bbox']
                bb = convert_coco_bbox((w, h), b)
                data.append(bb[2:])

    return np.array(data)

In [4]:
def kmeans_iou(k, centroids, points, iter_count=0, iteration_cutoff=25, feature_size=13):
    '''
    k-means clustering with IOU as distance metric
    '''
    
    best_clusters = []
    best_avg_iou = 0
    best_avg_iou_iteration = 0

    npoi = points.shape[0]
    area_p = area(points)  # (npoi, 2) -> (npoi,)

    while True:
        cen2 = centroids.repeat(npoi, axis=0).reshape(k, npoi, 2)
        cdiff = points - cen2
        cidx = np.where(cdiff < 0)
        cen2[cidx] = points[cidx[1], cidx[2]]

        wh = cen2.prod(axis=2).T  # (k, npoi, 2) -> (npoi, k)
        dist = 1. - (wh / (area_p[:, np.newaxis] + area(centroids) - wh))  # -> (npoi, k)
        belongs_to_cluster = np.argmin(dist, axis=1)  # (npoi, k) -> (npoi,)
        clusters_niou = np.min(dist, axis=1)  # (npoi, k) -> (npoi,)
        clusters = [points[belongs_to_cluster == i] for i in range(k)]
        avg_iou = np.mean(1. - clusters_niou)
        if avg_iou > best_avg_iou:
            best_avg_iou = avg_iou
            best_clusters = clusters
            best_avg_iou_iteration = iter_count

        print("\nIteration {}".format(iter_count))
        print("Average iou to closest centroid = {}".format(avg_iou))
        print("Sum of all distances (cost) = {}".format(np.sum(clusters_niou)))

        new_centroids = np.array([np.mean(c, axis=0) for c in clusters])
        isect = np.prod(np.min(np.asarray([centroids, new_centroids]), axis=0), axis=1)
        aa1 = np.prod(centroids, axis=1)
        aa2 = np.prod(new_centroids, axis=1)
        shifts = 1 - isect / (aa1 + aa2 - isect)

        # for i, s in enumerate(shifts):
        #     print("{}: Cluster size: {}, Centroid distance shift: {}".format(i, len(clusters[i]), s))

        if sum(shifts) == 0 or iter_count >= best_avg_iou_iteration + iteration_cutoff:
            break

        centroids = new_centroids
        iter_count += 1

    # Get anchor boxes from best clusters
    anchors = np.asarray([np.mean(cluster, axis=0) for cluster in best_clusters])
    anchors = anchors[anchors[:, 0].argsort()]
    print("k-means clustering anchor points (original coordinates) \
    \nFound at iteration {} with best average IoU: {} \
    \n{}".format(best_avg_iou_iteration, best_avg_iou, anchors*feature_size))

    return anchors

In [5]:
# running k-means
img_size = 608
k = 9 # starting number of clusters


In [6]:

# random_data = np.random.random((1000, 2))
# centroids = np.random.random((k, 2))
# random_anchors = kmeans_iou(k, centroids, random_data)
coco_data = load_coco_dataset()


loading annotations into memory...
Done (t=12.66s)
creating index...
index created!
loading annotations into memory...
Done (t=0.41s)
creating index...
index created!


In [13]:
# finding the anchors in the dataset
centroids = coco_data[np.random.choice(np.arange(len(coco_data)), k, replace=False)]
# centroids = coco_data[:k]
coco_anchors = kmeans_iou(k, centroids, coco_data, feature_size=img_size)


Iteration 0
Average iou to closest centroid = 0.5197335232280779
Sum of all distances (cost) = 430694.3315724778

Iteration 1
Average iou to closest centroid = 0.5795460898541414
Sum of all distances (cost) = 377055.4984484234

Iteration 2
Average iou to closest centroid = 0.5885787900517612
Sum of all distances (cost) = 368955.1354998017

Iteration 3
Average iou to closest centroid = 0.5932260724081821
Sum of all distances (cost) = 364787.5363336456

Iteration 4
Average iou to closest centroid = 0.5958037313001489
Sum of all distances (cost) = 362475.9382371899

Iteration 5
Average iou to closest centroid = 0.596751236006035
Sum of all distances (cost) = 361626.23307203606

Iteration 6
Average iou to closest centroid = 0.5968110369526753
Sum of all distances (cost) = 361572.6046595058

Iteration 7
Average iou to closest centroid = 0.5964072418694105
Sum of all distances (cost) = 361934.720821866

Iteration 8
Average iou to closest centroid = 0.5957371407779404
Sum of all distances (c

In [14]:
print("COCO anchors: {}".format(coco_anchors))
# scaling the anchors to the image size
scaled_coco_anchors = coco_anchors * img_size
print("Scaled COCO anchors: {}".format(scaled_coco_anchors))

COCO anchors: [[0.02196365 0.02763756]
 [0.03266837 0.07623702]
 [0.06801535 0.17382282]
 [0.09472818 0.06342981]
 [0.14755442 0.31924604]
 [0.21997163 0.13340219]
 [0.27987354 0.59023727]
 [0.49882851 0.30312032]
 [0.74084004 0.75263764]]
Scaled COCO anchors: [[ 13.35389981  16.80363535]
 [ 19.86237173  46.3521068 ]
 [ 41.35333578 105.68427486]
 [ 57.5947337   38.56532303]
 [ 89.71308806 194.10159485]
 [133.74275322  81.10853279]
 [170.16311387 358.8642621 ]
 [303.28773233 184.29715545]
 [450.43074487 457.60368498]]


Those anchors are used as priors for the detections. Couldn't get the same values as in the YoloV3 paper so I will just use anchor values from the paper. Output of Kmeans always changes