In [1]:
import sys
sys.path.append("/home/jacopo.gasparetto/Workspaces/keras-yolo2")
import numpy as np
import os, cv2

from keras.models import Model
from keras.layers import Input, Activation, Concatenate
from keras.layers import Flatten, Dropout
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import GlobalAveragePooling2D, Lambda
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.preprocessing.image import ImageDataGenerator, load_img

from keras.optimizers import Adam, SGD

import tensorflow as tf
from tqdm import tqdm_notebook as tqdm
import pickle
import matplotlib.pyplot as plt

from utils import *

Using TensorFlow backend.


In [2]:
labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "times", "div", "(", ")", "[", "]", 
              "{", "}", "!", ","]
config = {
    "labels": labels,
    "image_h": 416, 
    "image_w": 416,
    "grid_h": 13,
    "grid_w": 13,
    "nb_max_box": 5,
    "class_weights": np.ones(len(labels), dtype='float32'),
    "obj_threshold": 0.3,
    "nms_threshold": 0.3,
    "anchors": [0.48,0.97, 0.65,1.21, 0.89,1.01, 1.14,0.39, 1.20,1.31],
    "no_object_scale": 1.0,
    "object_scale": 5.0,
    "coord_scale": 1.0,
    "class_scale": 1.0,
    "epochs": 100,
    "batch_size": 32,
    "warm_up_batches": 3,
    "true_box_buffer": 100
}

# Create Model

In [3]:
# define some auxiliary variables and the fire module
sq1x1  = "squeeze1x1"
exp1x1 = "expand1x1"
exp3x3 = "expand3x3"
relu   = "relu_"


def fire_module(x, fire_id, squeeze=16, expand=64):

    s_id = 'fire' + str(fire_id) + '/'
    x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1, kernel_initializer='glorot_uniform')(x)
    x = Activation('relu', name=s_id + relu + sq1x1)(x)
    
    left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1, kernel_initializer='glorot_uniform')(x)
    left = Activation('relu', name=s_id + relu + exp1x1)(left)
    
    right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3, kernel_initializer='glorot_uniform')(x)
    right = Activation('relu', name=s_id + relu + exp3x3)(right)
    
    x = concatenate([left, right], axis=3, name=s_id + 'concat')
    return x


def SqueezeNet(nb_classes, inputs=(416, 416, 3)):
    
    input_img = Input(shape=inputs)
    true_boxes = Input(shape=(1, 1, 1, config["true_box_buffer"], 4))
    
    x = Convolution2D(96, (7, 7), activation='relu', kernel_initializer='glorot_uniform', strides=(2, 2), padding='same', name='conv1')(input_img)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='maxpool1')(x)
    
    x = fire_module(x, fire_id=2, squeeze=16, expand=64)
    x = fire_module(x, fire_id=3, squeeze=16, expand=64)
    x = MaxPooling2D(pool_size= (3, 3), strides=(2, 2), name='pool3')(x)
    
    x = fire_module(x, fire_id=4, squeeze=32, expand=128)
    x = fire_module(x, fire_id=5, squeeze=32, expand=128)
    x = MaxPooling2D(pool_size= (3, 3), strides=(2, 2), name='pool5')(x)
    
    x = fire_module(x, fire_id=6, squeeze=48, expand=192)
    x = fire_module(x, fire_id=7, squeeze=48, expand=192)
    x = fire_module(x, fire_id=8, squeeze=64, expand=256)
    x = fire_module(x, fire_id=9, squeeze=64, expand=256)
    
    x = Dropout(0.5, name='fire9/dropout')(x)
    x = Convolution2D(nb_classes, (1, 1), activation='relu', kernel_initializer='glorot_uniform', padding='valid', name='conv10')(x)

    global_avgpool10 = GlobalAveragePooling2D()(x)
    softmax = Activation("softmax", name='softmax')(global_avgpool10)
    
    softmax = Lambda(lambda args: args[0])([softmax, true_boxes])

    return Model(inputs=[input_img, true_boxes], outputs=softmax)

In [4]:
model = SqueezeNet(nb_classes=len(labels), inputs=(416, 416, 1))
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 416, 416, 1)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 208, 208, 96) 4800        input_1[0][0]                    
__________________________________________________________________________________________________
maxpool1 (MaxPooling2D)         (None, 103, 103, 96) 0           conv1[0][0]                      
__________________________________________________________________________________________________
fire2/squeeze1x1 (Conv2D)       (None, 103, 103, 16) 1552        maxpool1[0][0]                   
__________________________________________________________________________________________________
fire2/relu

# Training

In [5]:
loss = Loss(config)

In [6]:
def normalize(self, image):
        image = image[..., ::-1]
        image = image.astype('float')

        image[..., 0] -= 103.939
        image[..., 1] -= 116.779
        image[..., 2] -= 123.68

        return image 

In [7]:
import pre_processing
data_path = os.path.expanduser("~/Workspaces/DeepCalculatorBot/ObjectDetection/data/")
train_imgs_path = os.path.join(data_path, "xml_dataset/train/features/")
train_labels_path = os.path.join(data_path, "xml_dataset/train/labels/")
valid_imgs_path = os.path.join(data_path, "xml_dataset/valid/features/")
valid_labels_path = os.path.join(data_path, "xml_dataset/valid/labels/")

train_imgs, seen_train_labels = pre_processing.parse_annotation(train_labels_path, train_imgs_path, labels=config["labels"])
train_batch = pre_processing.BatchGenerator(train_imgs, config, norm=normalize)

valid_imgs, seen_valid_labels = pre_processing.parse_annotation(valid_labels_path, valid_imgs_path, labels=config["labels"])
valid_batch = pre_processing.BatchGenerator(valid_imgs, config, norm=normalize)

**Setup a few callbacks and start the training**

In [8]:
early_stop = EarlyStopping(monitor='val_loss', 
                           min_delta=0.001, 
                           patience=3, 
                           mode='min', 
                           verbose=1)

checkpoint = ModelCheckpoint('weights/squeeze_weights.h5', 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='min', 
                             period=1)

In [9]:
true_boxes = Input(shape=(1, 1, 1, config["true_box_buffer"], 4))


def custom_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:4]
    
    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(config["grid_w"]), [config["grid_h"]]), (1, config["grid_h"], config["grid_w"], 1, 1)))
    cell_y = tf.transpose(cell_x, (0,2,1,3,4))

    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [config["batch_size"], 1, 1, config["nb_max_box"], 1])
    
    coord_mask = tf.zeros(mask_shape)
    conf_mask  = tf.zeros(mask_shape)
    class_mask = tf.zeros(mask_shape)
    
    seen = tf.Variable(0.)
    total_recall = tf.Variable(0.)
    
    """
    Adjust prediction
    """
    ### adjust x and y     
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
    
    ### adjust w and h
    pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(config["anchors"], [1,1,1,config["nb_max_box"],2])
    
    ### adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 4])
    
    ### adjust class probabilities
    pred_box_class = y_pred[..., 5:]
    
    """
    Adjust ground truth
    """
    ### adjust x and y
    true_box_xy = y_true[..., 0:2] # relative position to the containing cell
    
    ### adjust w and h
    true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
    
    ### adjust confidence
    true_wh_half = true_box_wh / 2.
    true_mins    = true_box_xy - true_wh_half
    true_maxes   = true_box_xy + true_wh_half
    
    pred_wh_half = pred_box_wh / 2.
    pred_mins    = pred_box_xy - pred_wh_half
    pred_maxes   = pred_box_xy + pred_wh_half       
    
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
    pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)
    
    true_box_conf = iou_scores * y_true[..., 4]
    
    ### adjust class probabilities
    true_box_class = tf.argmax(y_true[..., 5:], -1)
    
    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * config["coord_scale"]
    
    ### confidence mask: penelize predictors + penalize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]
    
    true_wh_half = true_wh / 2.
    true_mins    = true_xy - true_wh_half
    true_maxes   = true_xy + true_wh_half
    
    pred_xy = tf.expand_dims(pred_box_xy, 4)
    pred_wh = tf.expand_dims(pred_box_wh, 4)
    
    pred_wh_half = pred_wh / 2.
    pred_mins    = pred_xy - pred_wh_half
    pred_maxes   = pred_xy + pred_wh_half    
    
    intersect_mins  = tf.maximum(pred_mins,  true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    
    true_areas = true_wh[..., 0] * true_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores  = tf.truediv(intersect_areas, union_areas)

    best_ious = tf.reduce_max(iou_scores, axis=4)
    conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * config["no_object_scale"]
    
    # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
    conf_mask = conf_mask + y_true[..., 4] * config["object_scale"]
    
    ### class mask: simply the position of the ground truth boxes (the predictors)
    class_mask = y_true[..., 4] * tf.gather(config["class_weights"], true_box_class) * config["class_scale"]       
    
    """
    Warm-up training
    """
    no_boxes_mask = tf.to_float(coord_mask < config["coord_scale"]/2.)
    seen = tf.assign_add(seen, 1.)
    
    true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, config["warm_up_batches"]), 
                          lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 
                                   true_box_wh + tf.ones_like(true_box_wh) * np.reshape(config["anchors"], [1,1,1,config["nb_max_box"],2]) * no_boxes_mask, 
                                   tf.ones_like(coord_mask)],
                          lambda: [true_box_xy, 
                                   true_box_wh,
                                   coord_mask])
    
    """
    Finalize the loss
    """
    nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
    nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))
    nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
    
    loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask)  / (nb_conf_box  + 1e-6) / 2.
    loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
    loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
    
    loss = loss_xy + loss_wh + loss_conf + loss_class
    
    nb_true_box = tf.reduce_sum(y_true[..., 4])
    nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))

    """
    Debugging code
    """    
    current_recall = nb_pred_box/(nb_true_box + 1e-6)
    total_recall = tf.assign_add(total_recall, current_recall) 

    loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000)
    loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
    loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
    loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
    loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
    loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
    loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
    
    return loss

In [10]:
log_dir = os.path.expanduser('~/Workspaces/DeepCalculatorBot/ObjectDetection/logs/')
tb_counter  = len([log for log in os.listdir(log_dir) if 'squeeze_' in log]) + 1
tensorboard = TensorBoard(log_dir=os.path.expanduser(log_dir) + 'squeeze_' + '_' + str(tb_counter), 
                          histogram_freq=0, 
                          write_graph=True, 
                          write_images=True)

optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)

model.compile(loss=custom_loss, optimizer=optimizer)

model.fit_generator(generator        = train_batch, 
                    steps_per_epoch  = len(train_batch), 
                    epochs           = 100, 
                    verbose          = 1,
                    validation_data  = valid_batch,
                    validation_steps = len(valid_batch),
                    callbacks        = [early_stop, checkpoint, tensorboard], 
                    max_queue_size   = 3)

Epoch 1/100
0
0
0
0
0


ValueError: Error when checking input: expected input_2 to have 6 dimensions, but got array with shape (0, 1, 1, 1, 100)

# Perform detection on image

In [None]:
model.load_weights("weights_coco.h5")

In [None]:
# image = cv2.imread('/home/jacopo.gasparetto/Workspaces/DeepCalculatorBot/ObjectDetection/fake_dataset/features/52.png')
image = cv2.imread('/home/jacopo.gasparetto/Workspaces/DeepCalculatorBot/ObjectDetection/test_img.jpg', cv2.IMREAD_GRAYSCALE)
dummy_array = np.zeros((1,1,1,1,TRUE_BOX_BUFFER,4))

plt.figure(figsize=(10,10))
print(image.shape)
input_image = cv2.resize(image, (416, 416))
input_image = input_image / 255.
input_image = input_image[:,:,::-1]
input_image = np.expand_dims(input_image, 0)

netout = model.predict([input_image, dummy_array])
boxes = decode_netout(netout[0], 
                      obj_threshold=OBJ_THRESHOLD,
                      nms_threshold=NMS_THRESHOLD,
                      anchors=ANCHORS, 
                      nb_class=CLASS)
image = draw_boxes(image, boxes, labels=LABELS)

plt.imshow(image[:,:,::-1]); plt.show()