# Analysis of object size

The purpose of this notebook is to analyze the object size and divide it into three different sections. The clusters can be used later to predict the quality of the predictions for different coin/object sizes.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


We have forked the package [rafaelpadilla/review_object_detection_metrics](https://github.com/rafaelpadilla/review_object_detection_metrics) and customized it for the purpose of this use case. If you would like to run this notebook, contact us and we will provide you with the customized files. (Note: this script does not need to be executed to train the final model).

In [None]:
# Read above
!mkdir /usr/lib/python3.7/metrics
!cp -R /content/drive/MyDrive/review_object_detection_metrics-main/src /usr/lib/python3.7/metrics/src

In [None]:
!pip install PyQt5
!pip install -qU torch_snippets

# Imports

In [None]:
import copy
import glob
import torch
import time
import statistics
import pandas as pd
import numpy as np

from IPython import display
from torch_snippets import *
from os.path import join
from PIL import Image
from metrics.src.bounding_box import BoundingBox
from metrics.src.evaluators import coco_evaluator, pascal_voc_evaluator
from metrics.src.bounding_box import BoundingBox
from metrics.src.utils.enumerators import BBFormat, BBType, CoordinatesType, MethodAveragePrecision

In [None]:
width, height = 1000, 1000
IMAGE_ROOT = '/content/drive/MyDrive/data/euro-coin-dataset-master'
batch_size = 1

In [None]:
rootdir = '/content/drive/MyDrive/data'
df_train = pd.read_csv(os.path.join(rootdir, 'train.csv'))

In [None]:
label2target = {1: 1, 10: 2, 100: 3, 2: 4, 20: 5, 200: 6, 5: 7, 50: 8, 'background': 0}
target2label = {0: 'background', 1: 1, 2: 10, 3: 100, 4: 2, 5: 20, 6: 200, 7: 5, 8: 50}
num_classes = len(label2target)

In [None]:
def preprocess_image(img):
  img = torch.tensor(img).permute(2,0,1)
  return img.to(device).float()

def preprocess_traindata(img, data, target_width, target_height):
  current_width, current_height = img.size
  data[:,[0,2]] = (data[:,[0,2]] / current_width * target_width).astype(int)
  data[:,[1,3]] = (data[:,[1,3]] / current_height * target_height).astype(int)
  img = np.array(img.resize((width, height), resample=Image.BILINEAR))/255.
  return img, data

In [None]:
class CoinDataset(torch.utils.data.Dataset):
  w, h = width, height
  def __init__(self, df, image_dir=IMAGE_ROOT, transformer = None, threshold = 0.15):
    self.image_dir = image_dir
    self.files = glob.glob(IMAGE_ROOT + '/*/*')
    self.df = df
    self.image_infos = df['filename'].unique()
    self.transformer = transformer
    self.threshold = threshold

  def __getitem__(self, ix):

    #filename	pose	xmin	xmax	ymin	ymax

    # load image
    image_id = self.image_infos[ix]
    img_path = find(image_id, self.files)



    img = Image.open(img_path).convert('RGB')

    data = self.df[self.df['filename'] == image_id]
    labels = data['pose'].values.tolist()
    data = data[['xmin','ymin','xmax','ymax']].values

    if self.transformer:
      pass # not relevant since for testing data augmentation is not needed

    img, data = preprocess_traindata(img, data, self.w, self.h)

    boxes = data.astype(np.uint32).tolist() # convert to absolute coordinates
    # torch FRCNN expects ground truths as a dictionary of tensors
    target = {}
    target['boxes'] = torch.Tensor(boxes).float()
    target['labels'] = torch.Tensor([label2target[i] for i in labels]).long()
    img = preprocess_image(img)

    return img, target

  def collate_fn(self, batch):
    return tuple(zip(*batch)) 

  def __len__(self):
    return len(self.image_infos)

In [None]:
train_ds = CoinDataset(df_train)

In [None]:
train_loader = DataLoader(train_ds, batch_size=batch_size, collate_fn=train_ds.collate_fn, drop_last=True)

# Analysis

## Create Bounding Boxes

In [None]:
bounding_boxes_train = []

for ix, (images, targets) in enumerate(train_loader):
    #Ground Truth
  for i, box in enumerate(targets[0]['boxes']):
    cls = target2label[int(targets[0]['labels'][i])]
    bb = BoundingBox(
      image_name        = str(ix),
      class_id          = cls,
      coordinates       = list(box),
      type_coordinates  = CoordinatesType.ABSOLUTE,
      bb_type           = BBType.GROUND_TRUTH,
      confidence        = None,
      format            = BBFormat.XYX2Y2
    )
    bounding_boxes_train.append(bb)
  if ix % 20 == 0:
    p = ix / len(train_loader)
    print(p, end = '\r')



In [None]:
areas = []
for bbs in bounding_boxes_train:
  areas.append(bbs.get_area())

## Cluster object sizes

In [None]:
from sklearn.cluster import KMeans
import numpy as np
import math

kmeans = KMeans(n_clusters=3, random_state=0).fit(np.array(areas).reshape(-1, 1))

Determine the limit above which an object size changes its cluster.

In [None]:
x = np.linspace(min(areas), max(areas), 100000)
preds = kmeans.predict(x.reshape(-1, 1))

In [5]:
changes = np.where(preds[:-1] != preds[1:])[0]
print('Object boundaries for S and M sized objects', np.sqrt(x[changes]))

Object boundaries for S and M sized objects [98.50850764 201.88434696]
