<a href="https://colab.research.google.com/github/DhruvMakwana/Global-Wheat-Detection/blob/master/Generate_Anchors_for_YOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# importing libraries
from os import listdir
from os.path import isfile, join
import argparse
import numpy as np
import sys
import os
import shutil
import random
import math

In [0]:
# Define width and height from cfg file
width_in_cfg_file = 1024.0
height_in_cfg_file = 1024.0

In [0]:
def IOU(x, centroids):
  similarities = []
  k = len(centroids)
  for centroid in centroids:
    c_w, c_h = centroid
    w, h = x
    if c_w >= w and c_h >= h:
      similarity = w * h / (c_w * c_h)
    elif c_w >= w and c_h <= h:
      similarity = w * c_h / (w * h + (c_w - w) * c_h)
    elif c_w <= w and c_h >= h:
      similarity = c_w * h / (w * h + c_w * (c_h - h))
    else: #means both w,h are bigger than c_w and c_h respectively
      similarity = (c_w * c_h) / (w * h)
    similarities.append(similarity) # will become (k,) shape
  return np.array(similarities)

In [0]:
def avg_IOU(X, centroids):
  n, d = X.shape
  sum = 0.0
  for i in range(X.shape[0]):
    #note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy
    sum += max(IOU(X[i], centroids))
  return sum / n

In [0]:
def write_anchors_to_file(centroids, X, anchor_file):
  f = open(anchor_file, 'w')

  anchors = centroids.copy()
  print(anchors.shape)

  for i in range(anchors.shape[0]):
      anchors[i][0] *= width_in_cfg_file / 32.0
      anchors[i][1] *= height_in_cfg_file / 32.0


  widths = anchors[:,0]
  sorted_indices = np.argsort(widths)

  print('Anchors = ', anchors[sorted_indices])

  for i in sorted_indices[:-1]:
      f.write('%0.2f,%0.2f, '%(anchors[i, 0], anchors[i, 1]))

  # there should not be comma after last anchor, that's why
  f.write('%0.2f,%0.2f\n'%(anchors[sorted_indices[-1:], 0],anchors[sorted_indices[-1:], 1]))

  f.write('%f\n'%(avg_IOU(X, centroids)))
  print()

In [0]:
def kmeans(X,centroids,eps,anchor_file):
  N = X.shape[0]
  iterations = 0
  k,dim = centroids.shape
  prev_assignments = np.ones(N)*(-1)
  iter = 0
  old_D = np.zeros((N,k))

  while True:
      D = []
      iter += 1
      for i in range(N):
          d = 1 - IOU(X[i], centroids)
          D.append(d)
      D = np.array(D) # D.shape = (N,k)

      print("iter {}: dists = {}".format(iter, np.sum(np.abs(old_D - D))))

      #assign samples to centroids
      assignments = np.argmin(D, axis=1)

      if (assignments == prev_assignments).all() :
          print("Centroids = ",centroids)
          write_anchors_to_file(centroids, X, anchor_file)
          return

      #calculate new centroids
      centroid_sums=np.zeros((k, dim), np.float)
      for i in range(N):
          centroid_sums[assignments[i]] += X[i]
      for j in range(k):
          centroids[j] = centroid_sums[j] / (np.sum(assignments==j))

      prev_assignments = assignments.copy()
      old_D = D.copy()

In [0]:
filelist = "/content/drive/My Drive/Dataset/train.txt"
output_dir = "/content/drive/My Drive/Dataset/Anchors"
clusters = 9

In [8]:
if not os.path.exists(output_dir):
  print("Created")
  os.mkdir(output_dir)

Created


In [9]:
%%time
f = open(filelist)
lines = [line.rstrip('\n') for line in f.readlines()]
annotation_dims = []
size = np.zeros((1,1,3))
for line in lines:
  line = line.replace('JPEGImages','labels')
  line = line.replace('.jpg','.txt')
  line = line.replace('.jpeg','.txt')
  line = line.replace('.jpg','.txt')
  print(line)
  
  f2 = open(line)
  for line in f2.readlines():
    line = line.rstrip('\n')
    w,h = line.split(' ')[3:]
    annotation_dims.append(tuple(map(float,(w,h))))
annotation_dims = np.array(annotation_dims)
eps = 0.005

if clusters == 0:
  for num_clusters in range(1,11): #we make 1 through 10 clusters
    anchor_file = join(output_dir,'anchors%d.txt'%(num_clusters))
    indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)]
    centroids = annotation_dims[indices]
    kmeans(annotation_dims, centroids,eps, anchor_file)
    print('centroids.shape', centroids.shape)
else:
  anchor_file = join(output_dir,'anchors%d.txt'%(clusters))
  indices = [random.randrange(annotation_dims.shape[0]) for i in range(clusters)]
  centroids = annotation_dims[indices]
  kmeans(annotation_dims,centroids, eps, anchor_file)
  print('centroids.shape', centroids.shape)

/content/drive/My Drive/Dataset/Data/eddff557c.txt
/content/drive/My Drive/Dataset/Data/d63350a3d.txt
/content/drive/My Drive/Dataset/Data/f9b92c9e9.txt
/content/drive/My Drive/Dataset/Data/49c3e4f6e.txt
/content/drive/My Drive/Dataset/Data/2cae223a8.txt
/content/drive/My Drive/Dataset/Data/f016b9731.txt
/content/drive/My Drive/Dataset/Data/c49ed83ce.txt
/content/drive/My Drive/Dataset/Data/c83d339fe.txt
/content/drive/My Drive/Dataset/Data/383817085.txt
/content/drive/My Drive/Dataset/Data/d781eb0c1.txt
/content/drive/My Drive/Dataset/Data/229d8a499.txt
/content/drive/My Drive/Dataset/Data/45aabed56.txt
/content/drive/My Drive/Dataset/Data/91f19054e.txt
/content/drive/My Drive/Dataset/Data/00b70a919.txt
/content/drive/My Drive/Dataset/Data/dc7f9cee6.txt
/content/drive/My Drive/Dataset/Data/e290bc809.txt
/content/drive/My Drive/Dataset/Data/233cb8750.txt
/content/drive/My Drive/Dataset/Data/c0fa19bac.txt
/content/drive/My Drive/Dataset/Data/509a7a9b5.txt
/content/drive/My Drive/Dataset