In [24]:
import os
import cv2
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt
import glob
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from models.ssd_model import build_model
from keras_loss_function.keras_ssd_loss import SSDLoss
import xml.etree.ElementTree as ET
import pandas as pd

In [25]:
img_height = 32 # Height of the input images
img_width = 32 # Width of the input images
img_channels = 3 # Number of color channels of the input images
n_classes = 76 # Number of positive classes

intensity_mean = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
intensity_range = 127.5 # Set this to your preference (maybe `None`). The current settings transform the input pixel values to the interval `[-1,1]`.
# scales = [1.5/52, 1.5/26, 1.5/18, 1.5/15]
scales = [0.05, 0.08, 0.12, 0.2] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`.
# aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes
aspect_ratios = [0.5, 1.0, 2]
two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1
steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended
offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled
normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size
predicted_layers = [0,1,2]

In [26]:
# 1: Build the Keras model

K.clear_session() # Clear previous models from memory.

model = build_model(image_size=(img_height, img_width, img_channels),
                    n_classes=n_classes,
                    mode='training',
                    l2_regularization=0.0005,
                    scales=scales,
                    aspect_ratios_global=aspect_ratios,
                    aspect_ratios_per_layer=None,
                    two_boxes_for_ar1=two_boxes_for_ar1,
                    steps=steps,
                    offsets=offsets,
                    clip_boxes=clip_boxes,
                    variances=variances,
                    normalize_coords=normalize_coords,
                    subtract_mean=intensity_mean,
                    divide_by_stddev=intensity_range)

# 2: Optional: Load some weights

# model.load_weights('ssd_mobnet\ssd3_epoch-02_loss-1.4484.h5', by_name=True)

# 3: Instantiate an Adam optimizer and the SSD loss function and compile the model

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 32, 32, 3)]  0           []                               
                                                                                                  
 identity_layer (Lambda)        (None, 32, 32, 3)    0           ['input_2[0][0]']                
                                                                                                  
 input_mean_normalization (Lamb  (None, 32, 32, 3)   0           ['identity_layer[0][0]']         
 da)                                                                                              
                                                                                                  
 input_stddev_normalization (La  (None, 32, 32, 3)   0           ['input_mean_normalization[

                                                                                                  
 anchors_concat (Concatenate)   (None, 84, 8)        0           ['anchors1_reshape[0][0]',       
                                                                  'anchors2_reshape[0][0]',       
                                                                  'anchors3_reshape[0][0]']       
                                                                                                  
 predictions (Concatenate)      (None, 84, 89)       0           ['classes_softmax[0][0]',        
                                                                  'boxes_concat[0][0]',           
                                                                  'anchors_concat[0][0]']         
                                                                                                  
Total params: 2,454,140
Trainable params: 2,442,028
Non-trainable params: 12,112
____________________________

In [27]:
# # TODO: Set the path to the `.h5` file of the model to be loaded.
# model_path = 'ssd_for_object/3stola288_[0.05, 0.08, 0.12, 0.2]_[0.7, 1.0, 1.3].h5'

# # We need to create an SSDLoss object in order to pass that to the model loader.
# ssd_loss = SSDLoss(neg_pos_ratio=1, alpha=0.1)

# K.clear_session() # Clear previous models from memory.

# model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
#                                                'compute_loss': ssd_loss.compute_loss})
# model.summary()

In [28]:
def compute_iou(boxes1, boxes2):
    """Computes pairwise IOU matrix for given two sets of boxes

    Arguments:
      boxes1: A tensor with shape `(N, 4)` representing bounding boxes
        where each box is of the format `[x, y, width, height]`.
        boxes2: A tensor with shape `(M, 4)` representing bounding boxes
        where each box is of the format `[x, y, width, height]`.

    Returns:
      pairwise IOU matrix with shape `(N, M)`, where the value at ith row
        jth column holds the IOU between ith box and jth box from
        boxes1 and boxes2 respectively.
    """
    boxes1_corners = convert_wh_to_xy(boxes1)
    boxes2_corners = convert_wh_to_xy(boxes2)
    lu = tf.maximum(boxes1_corners[:, None, :2], boxes2_corners[:, :2])
    rd = tf.minimum(boxes1_corners[:, None, 2:], boxes2_corners[:, 2:])
    intersection = tf.maximum(0.0, rd - lu)
    intersection_area = intersection[:, :, 0] * intersection[:, :, 1]
    boxes1_area = boxes1[:, 2] * boxes1[:, 3]
    boxes2_area = boxes2[:, 2] * boxes2[:, 3]
    union_area = tf.maximum(
        boxes1_area[:, None] + boxes2_area - intersection_area, 1e-8
    )
    return tf.clip_by_value(intersection_area / union_area, 0.0, 1.0)

def match_iou(gt_boxes, pred_boxes):
    iou_matrix = compute_iou(gt_boxes, pred_boxes)
#     print(iou_matrix)
    max_iou = tf.reduce_max(iou_matrix, axis=1)
    positive_mask = tf.greater_equal(max_iou, 0.4)

    return max_iou, positive_mask

def convert_wh_to_xy(boxes):
    convert_boxes = np.empty_like(boxes)
    convert_boxes[:,[0,1]] = boxes[:,[0,1]] - boxes[:,[2,3]]/2
    convert_boxes[:,[2,3]] = boxes[:,[0,1]] + boxes[:,[2,3]]/2
    convert_boxes[convert_boxes<0] = 0.0
    return convert_boxes

def convert_xy_to_wh(boxes):
    convert_boxes = np.empty_like(boxes)
    convert_boxes[:,[2,3]] = boxes[:,[2,3]] - boxes[:,[0,1]]
    convert_boxes[:,[0,1]] = (boxes[:,[0,1]] + boxes[:,[2,3]])/2
    return convert_boxes

In [29]:
predictor_sizes = [model.get_layer('classes1').output_shape[1:3],
                   model.get_layer('classes2').output_shape[1:3],
                  model.get_layer('classes3').output_shape[1:3]]

def get_anchors(scales, aspect_ratios, n_predictor_layers = 3, coords='centroids'):
    anchor_boxes_wh = []
    
    steps = [None] * n_predictor_layers
    offsets = [None] * n_predictor_layers

#     print(scales)
    for i, size in enumerate(predictor_sizes):
        
        x = np.zeros((1,) + size + (3,))
        anchors = AnchorBoxes(img_height,
                          img_width,
                          this_scale=scales[i],
                          next_scale=scales[i+1],
                          aspect_ratios=aspect_ratios[i],
                          two_boxes_for_ar1=two_boxes_for_ar1,
                          this_steps=steps[i],
                          this_offsets=offsets[i],
                          clip_boxes=clip_boxes,
                          variances=variances,
                          coords=coords,
                          normalize_coords=normalize_coords,
                          name='anchors6')
        layer_anchors_wh = anchors.call(x)
        layer_anchors_wh = np.reshape(layer_anchors_wh[0], (-1,8))[:,:4]
        anchor_boxes_wh.append(layer_anchors_wh)
    anchor_boxes_wh = np.vstack([boxes for boxes in anchor_boxes_wh])
    return anchor_boxes_wh

def get_gt_boxes(xml_path):
    gt_boxes = []
    tree = ET.parse(xml_path)
    root = tree.getroot()

    for member in root.findall('object'):
        bndbox = member.find('bndbox')
        box = [int(bndbox.find('xmin').text),
               int(bndbox.find('ymin').text),
               int(bndbox.find('xmax').text),
               int(bndbox.find('ymax').text),]
        gt_boxes.append(box)
    return gt_boxes

# TEST ANCHORS

In [30]:
def compute_anchor_recall_from_xml(images_folder, scales, aspect_ratios=[0.5,1.0,2.0], aspect_ratios_per_layer=None, iou_threshold=0.3):
    all_ious = np.array([])
    if aspect_ratios_per_layer is None:
        aspect_ratios = [aspect_ratios]*3
    else:
        aspect_ratios = aspect_ratios_per_layer
    anchor_boxes_wh = get_anchors(scales= scales,
                                 aspect_ratios=aspect_ratios)

    anchor_boxes_wh[:,[0,2]] *= img_width
    anchor_boxes_wh[:,[1,3]] *= img_height
    for xml_path in glob.glob(f'{images_folder}/*.xml')[::20]:
        img_path = xml_path.split('.')[0] + '.jpg'
        img = cv2.imread(img_path)


        gt_boxes = np.array(get_gt_boxes(xml_path))
        gt_boxes[:,[0,2]] = gt_boxes[:,[0,2]]/img.shape[1]*img_width
        gt_boxes[:,[1,3]] = gt_boxes[:,[1,3]]/img.shape[0]*img_height

        gt_boxes_wh = convert_xy_to_wh(gt_boxes)


        # anchors_xy = convert_wh_to_xy(anchors_wh)
        # preds = convert_wh_to_xy(preds)
        max_iou, positive_mask = match_iou(gt_boxes_wh.astype(float), anchor_boxes_wh.astype(float))
        all_ious = np.append(all_ious, max_iou.numpy())
    return np.mean(all_ious>iou_threshold)

def compute_anchor_recall_from_csv(df, scales, aspect_ratios=[0.5,1.0,2.0], aspect_ratios_per_layer=None, iou_threshold=0.3):
    all_ious = np.array([])
    if aspect_ratios_per_layer is None:
        aspect_ratios = [aspect_ratios]*3
    else:
        aspect_ratios = aspect_ratios_per_layer
    anchor_boxes_wh = get_anchors(scales= scales,
                                 aspect_ratios=aspect_ratios)

    anchor_boxes_wh[:,[0,2]] *= img_width
    anchor_boxes_wh[:,[1,3]] *= img_height

    for _ in range(100):
        gt_boxes = []
        for _ in range(10):
            i = np.random.randint(0, df.shape[0])
            box = df.iloc[i][['xmin','ymin','xmax','ymax']]
            gt_boxes.append(box)


        gt_boxes = np.array(gt_boxes)

        gt_boxes_wh = convert_xy_to_wh(gt_boxes)


        max_iou, positive_mask = match_iou(gt_boxes_wh.astype(float), anchor_boxes_wh.astype(float))
        all_ious = np.append(all_ious, max_iou.numpy())
    return np.mean(all_ious>iou_threshold)
    

In [32]:
scales = [1.0, 1.0, 1.0, 1.0]
aspect_ratios = [ 1.0, 1.0, 1.0]
df = pd.read_csv("!img_classes.csv")

compute_anchor_recall_from_csv(df,
                               scales,
                               aspect_ratios,
                               iou_threshold=0.3)

ValueError: could not broadcast input array from shape (6,) into shape (4,4,4)

In [9]:
df

Unnamed: 0,image_name,xmin,xmax,ymin,ymax,class_id
0,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,212,236,116,141,1
1,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,426,512,11,61,2
2,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,213,237,116,141,1
3,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,426,512,11,61,2
4,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,213,236,116,141,1
...,...,...,...,...,...,...
12417,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,424,512,12,68,2
12418,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,293,314,115,164,1
12419,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,281,290,67,91,1
12420,D:/Program Files/ML-NN/traind_ssd/DATASET\ZAVO...,461,491,230,272,1


In [21]:
compute_anchor_recall_from_xml(scales=[0.025, 0.05, 0.1, 0.2],
                      aspect_ratios=[ 0.5, 1.0, 2.0,],
                      images_folder='W:Work/data/DATASETS/CAR_PERSON/Stroika_ulitca/1/Images_xml',
                      iou_threshold=0.3)

0.8656821378340366

In [106]:
predictor_sizes

[(36, 64), (18, 32), (9, 16)]

In [26]:
cur_scales = [0.00001, 0.00001, 0.00001,0.0001]
best_scales = cur_scales
for ind in range(4):
    best_recall = 0
    cur_scales = [0.00001, 0.00001, 0.00001,0.0001]
    for n in range(1,10):
        cur_scales[ind] = 0.005*n*(1+ind)
        recall = compute_anchor_recall_from_xml(scales=cur_scales,
                      aspect_ratios=[0.3,0.8,1.5],
                      images_folder='E:/Desktop/Work/data/PERSON_CAR/Stroika_ulitca/1/Images_xml')
        if recall>best_recall:
            best_scales[ind] = cur_scales[ind]
            best_recall = recall
        print(cur_scales, recall)
best_scales

[0.005, 1e-05, 1e-05, 0.0001] 0.0008051529790660225
[0.01, 1e-05, 1e-05, 0.0001] 0.008856682769726247
[0.015, 1e-05, 1e-05, 0.0001] 0.037037037037037035
[0.02, 1e-05, 1e-05, 0.0001] 0.12077294685990338
[0.025, 1e-05, 1e-05, 0.0001] 0.29307568438003223
[0.03, 1e-05, 1e-05, 0.0001] 0.46980676328502413
[0.035, 1e-05, 1e-05, 0.0001] 0.532608695652174
[0.04, 1e-05, 1e-05, 0.0001] 0.5615942028985508
[0.045, 1e-05, 1e-05, 0.0001] 0.5462962962962963
[1e-05, 0.01, 1e-05, 0.0001] 0.009259259259259259
[1e-05, 0.02, 1e-05, 0.0001] 0.0499194847020934
[1e-05, 0.03, 1e-05, 0.0001] 0.06602254428341385
[1e-05, 0.04, 1e-05, 0.0001] 0.10185185185185185
[1e-05, 0.05, 1e-05, 0.0001] 0.11553945249597423
[1e-05, 0.06, 1e-05, 0.0001] 0.13526570048309178
[1e-05, 0.07, 1e-05, 0.0001] 0.1356682769726248
[1e-05, 0.08, 1e-05, 0.0001] 0.13365539452495975
[1e-05, 0.09, 1e-05, 0.0001] 0.12962962962962962
[1e-05, 1e-05, 0.015, 0.0001] 0.004025764895330112
[1e-05, 1e-05, 0.03, 0.0001] 0.016908212560386472
[1e-05, 1e-05

[0.04, 0.07, 0.10500000000000001, 0.0001]

In [124]:
compute_anchor_recall(scales=[0.02, 0.06, 0.1, 0.2],
                      aspect_ratios=[0.5,1.0,2.0],
                      images_folder='E:/Downloads/stroika_ulitca/1/Images_xml')

0.9807897545357525

In [107]:
from tensorflow import keras


In [108]:
img = np.zeros((1,128,128,3))


In [109]:
np.concatenate([img]*3).shape

(3, 128, 128, 3)