In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"]="-1"   

# from keras.optimizers import Adam
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from collections import namedtuple, OrderedDict
import glob
import xml.etree.ElementTree as ET
import cv2
import io
from PIL import Image
import dataset_util
import numpy as np

In [2]:
aspect_ratios = [0.4,0.6,0.8]
scales = [0.1,0.2,0.4]
img_height = 288
img_width = 512
predictor_sizes = [[36,64], [18,32], [9,16]]


In [3]:
class AnchorBox:
    """Generates anchor boxes.

    This class has operations to generate anchor boxes for feature maps at
    strides `[8, 16, 32, 64, 128]`. Where each anchor each box is of the
    format `[x, y, width, height]`.

    Attributes:
      aspect_ratios: A list of float values representing the aspect ratios of
        the anchor boxes at each location on the feature map
      scales: A list of float values representing the scale of the anchor boxes
        at each location on the feature map.
      num_anchors: The number of anchor boxes at each location on feature map
      areas: A list of float values representing the areas of the anchor
        boxes for each feature map in the feature pyramid.
      strides: A list of float value representing the strides for each feature
        map in the feature pyramid.
    """

    def __init__(self, aspect_ratios, scales, predictor_sizes):
        
        self.aspect_ratios = aspect_ratios
        self.scales = scales
        self.predictor_sizes = predictor_sizes

    def _get_anchors(self, feature_height, feature_width, scale):
        """Generates anchor boxes for a given feature map size and level

        Arguments:
          feature_height: An integer representing the height of the feature map.
          feature_width: An integer representing the width of the feature map.
          level: An integer representing the level of the feature map in the
            feature pyramid.

        Returns:
          anchor boxes with the shape
          `(feature_height * feature_width * num_anchors, 4)`
        """
        rx = tf.range(0, img_width, img_width/feature_width, dtype=tf.float32)+ 0.5 * img_width/feature_width
        ry = tf.range(0, img_height, img_height/feature_height, dtype=tf.float32) + 0.5 * img_height/feature_height
        centers = tf.stack(tf.meshgrid(rx, ry), axis=-1)
        centers = tf.expand_dims(centers, axis=-2)
        centers = tf.tile(centers, [1, 1, len(self.aspect_ratios), 1])
        image_ratio = feature_width / feature_height
        h = tf.constant([scale * img_height]*len(self.aspect_ratios))
        w = tf.constant(self.aspect_ratios) * h
        wh = tf.stack((w,h), axis=-1)[None, None,:]
        wh = tf.tile(wh, [feature_height, feature_width, 1, 1])
        anchors = tf.concat([centers, wh], axis=-1)
        return tf.reshape(anchors, [-1, 4])

    def get_anchors(self):
        """Generates anchor boxes for all the feature maps of the feature pyramid.

        Arguments:
          image_height: Height of the input image.
          image_width: Width of the input image.

        Returns:
          anchor boxes for all the feature maps, stacked as a single tensor
            with shape `(total_anchors, 4)`
        """
        anchors = []
        for i in range(len(self.scales)):
            anchors.append(self._get_anchors(self.predictor_sizes[i][0],
                                             self.predictor_sizes[i][1],
                                             self.scales[i],))
        return tf.concat(anchors, axis=0)

In [4]:
def compute_iou(boxes1, boxes2):
    """Computes pairwise IOU matrix for given two sets of boxes

    Arguments:
      boxes1: A tensor with shape `(N, 4)` representing bounding boxes
        where each box is of the format `[x, y, width, height]`.
        boxes2: A tensor with shape `(M, 4)` representing bounding boxes
        where each box is of the format `[x, y, width, height]`.

    Returns:
      pairwise IOU matrix with shape `(N, M)`, where the value at ith row
        jth column holds the IOU between ith box and jth box from
        boxes1 and boxes2 respectively.
    """
    boxes1_corners = convert_wh_to_xy(boxes1)
    boxes2_corners = convert_wh_to_xy(boxes2)
    lu = tf.maximum(boxes1_corners[:, None, :2], boxes2_corners[:, :2])
    rd = tf.minimum(boxes1_corners[:, None, 2:], boxes2_corners[:, 2:])
    intersection = tf.maximum(0.0, rd - lu)
    intersection_area = intersection[:, :, 0] * intersection[:, :, 1]
    boxes1_area = boxes1[:, 2] * boxes1[:, 3]
    boxes2_area = boxes2[:, 2] * boxes2[:, 3]
    union_area = tf.maximum(
        boxes1_area[:, None] + boxes2_area - intersection_area, 1e-8
    )
    return tf.clip_by_value(intersection_area / union_area, 0.0, 1.0)

def match_iou(gt_boxes, pred_boxes):
    iou_matrix = compute_iou(gt_boxes, pred_boxes)
#     print(iou_matrix)
    max_iou = tf.reduce_max(iou_matrix, axis=1)
    positive_mask = tf.greater_equal(max_iou, 0.4)

    return max_iou, positive_mask

def convert_wh_to_xy(boxes):
    convert_boxes = np.empty_like(boxes)
    convert_boxes[:,[0,1]] = boxes[:,[0,1]] - boxes[:,[2,3]]/2
    convert_boxes[:,[2,3]] = boxes[:,[0,1]] + boxes[:,[2,3]]/2
    convert_boxes[convert_boxes<0] = 0.0
    return convert_boxes

def convert_xy_to_wh(boxes):
    convert_boxes = np.empty_like(boxes)
    convert_boxes[:,[2,3]] = boxes[:,[2,3]] - boxes[:,[0,1]]
    convert_boxes[:,[0,1]] = (boxes[:,[0,1]] + boxes[:,[2,3]])/2
    return convert_boxes

In [17]:
def get_gt_boxes(xml_path):
    gt_boxes = []
    tree = ET.parse(xml_path)
    root = tree.getroot()
    for member in root.findall('object'):
        bndbox = member.find('bndbox')
        box = [int(bndbox.find('xmin').text),
               int(bndbox.find('ymin').text),
               int(bndbox.find('xmax').text),
               int(bndbox.find('ymax').text),]
        gt_boxes.append(box)
    return gt_boxes

def compute_anchor_recall_from_xml(scales, aspect_ratios, images_folder, iou_threshold=0.1):
    all_ious = np.array([])
    anchors = AnchorBox(aspect_ratios, scales, predictor_sizes)
    
    anchor_boxes_wh = anchors.get_anchors()

#     anchor_boxes_wh[:,[0,2]] *= img_width
#     anchor_boxes_wh[:,[1,3]] *= img_height
    for xml_path in glob.glob(f'{images_folder}/*.xml')[::20]:
        img_path = xml_path.split('.')[0] + '.jpg'
        img = cv2.imread(img_path)


        gt_boxes = np.array(get_gt_boxes(xml_path))
        gt_boxes[:,[0,2]] = gt_boxes[:,[0,2]]/img.shape[1]*img_width
        gt_boxes[:,[1,3]] = gt_boxes[:,[1,3]]/img.shape[0]*img_height

        gt_boxes_wh = convert_xy_to_wh(gt_boxes)


        # anchors_xy = convert_wh_to_xy(anchors_wh)
        # preds = convert_wh_to_xy(preds)
        max_iou, positive_mask = match_iou(gt_boxes_wh.astype(float), anchor_boxes_wh.numpy())
        all_ious = np.append(all_ious, max_iou.numpy())
    return np.mean(all_ious>iou_threshold)





In [22]:
compute_anchor_recall_from_xml(scales=[0.1,0.2,0.4],
                               aspect_ratios=[0.2,0.4,0.8],
                               images_folder='W:/Work/data/DATASETS/PERSON/PIVO_pivo/09_08/Images_xml',
                              iou_threshold=0.3)

1.0