In [1]:
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
import imgaug as ia
from tqdm import tqdm
from imgaug import augmenters as iaa
from keras.utils import Sequence
import numpy as np
import pickle
import os, cv2,copy
from phone_pre import BatchGenerator
from utils import WeightReader, decode_netout, draw_boxes

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
LABELS = ['phone']
IMAGE_H, IMAGE_W = 490, 326
#IMAGE_H, IMAGE_W = 416, 416
GRID_H,  GRID_W  = 5 ,5
BOX              = 5
CLASS            = len(LABELS)
CLASS_WEIGHTS    = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD    = 0.3#0.5
NMS_THRESHOLD    = 0.3#0.45

NO_OBJECT_SCALE  = 1.0
OBJECT_SCALE     = 5.0
COORD_SCALE      = 1.0
CLASS_SCALE      = 1.0

BATCH_SIZE       = 2
WARM_UP_BATCHES  = 0
TRUE_BOX_BUFFER  = 3


In [3]:
wt_path = 'yolov2.weights'                      
train_image_folder = '/Users/preet/findphone/train_img/'
train_annot_folder = '/Users/preet/findphone/train_anno/'
valid_image_folder = '/Users/preet/findphone/test_img/'
valid_annot_folder = '/Users/preet/findphone/test_anno/'

In [4]:
# the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
def space_to_depth_x2(x):
    return tf.space_to_depth(x, block_size=2)

In [5]:
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
true_boxes  = Input(shape=(1, TRUE_BOX_BUFFER,4 ))
print(np.shape(true_boxes))

# Layer 1
x = Conv2D(32, (3,3), strides=(3,2), padding='same', name='conv_1', use_bias=False)(input_image)
x = BatchNormalization(name='norm_1')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 2
x = Conv2D(64, (3,3), strides=(1,1), padding='same', name='conv_2', use_bias=False)(x)
x = BatchNormalization(name='norm_2')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 3
x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_3', use_bias=False)(x)
x = BatchNormalization(name='norm_3')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 4
x = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_4', use_bias=False)(x)
x = BatchNormalization(name='norm_4')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 5
x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_5', use_bias=False)(x)
x = BatchNormalization(name='norm_5')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 6
x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
x = BatchNormalization(name='norm_6')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 7
x = Conv2D(128, (1,1), strides=(1,1), padding='same', name='conv_7', use_bias=False)(x)
x = BatchNormalization(name='norm_7')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 8
x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_8', use_bias=False)(x)
x = BatchNormalization(name='norm_8')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 9
x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_9', use_bias=False)(x)
x = BatchNormalization(name='norm_9')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 10
x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_10', use_bias=False)(x)
x = BatchNormalization(name='norm_10')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 11
x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_11', use_bias=False)(x)
x = BatchNormalization(name='norm_11')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 12
x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_12', use_bias=False)(x)
x = BatchNormalization(name='norm_12')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 13
x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_13', use_bias=False)(x)
x = BatchNormalization(name='norm_13')(x)
x = LeakyReLU(alpha=0.1)(x)

skip_connection = x

x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 14
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_14', use_bias=False)(x)
x = BatchNormalization(name='norm_14')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 15
x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_15', use_bias=False)(x)
x = BatchNormalization(name='norm_15')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 16
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_16', use_bias=False)(x)
x = BatchNormalization(name='norm_16')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 17
x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_17', use_bias=False)(x)
x = BatchNormalization(name='norm_17')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 18
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_18', use_bias=False)(x)
x = BatchNormalization(name='norm_18')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 19
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_19', use_bias=False)(x)
x = BatchNormalization(name='norm_19')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 20
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_20', use_bias=False)(x)
x = BatchNormalization(name='norm_20')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 21
skip_connection = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_connection)
skip_connection = BatchNormalization(name='norm_21')(skip_connection)
skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
skip_connection = Lambda(space_to_depth_x2)(skip_connection)

x = concatenate([skip_connection, x])

# Layer 22
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)
x = BatchNormalization(name='norm_22')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 23
x = Conv2D((2+ CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
output = Reshape((GRID_H, GRID_W, 2 + CLASS))(x)

# small hack to allow true_boxes to be registered when Keras build the model 
# for more information: https://github.com/fchollet/keras/issues/2790
output = Lambda(lambda args:args[0])([output])

model = Model([input_image], output)

(?, 1, 3, 4)(?, 1, 3, 4)



In [6]:
model.summary()

____________________________________________________________________________________________________________________________________________________________________________________________________

Layer (type)                    Output Shape         Param #     Connected to                     Layer (type)                    Output Shape         Param #     Connected to                     


input_1 (InputLayer)            (None, 490, 326, 3)  0                                            input_1 (InputLayer)            (None, 490, 326, 3)  0                                            

____________________________________________________________________________________________________________________________________________________________________________________________________

conv_1 (Conv2D)                 (None, 164, 163, 32) 864         input_1[0][0]                    conv_1 (Conv2D)                 (None, 164, 163, 32) 864         input_1[0][0]                    

_________

conv_6 (Conv2D)                 (None, 20, 20, 256)  294912      max_pooling2d_3[0][0]            __________________________________________________________________________________________________

__________________________________________________________________________________________________conv_6 (Conv2D)                 (None, 20, 20, 256)  294912      max_pooling2d_3[0][0]            

norm_6 (BatchNormalization)     (None, 20, 20, 256)  1024        conv_6[0][0]                     __________________________________________________________________________________________________

__________________________________________________________________________________________________norm_6 (BatchNormalization)     (None, 20, 20, 256)  1024        conv_6[0][0]                     

leaky_re_lu_6 (LeakyReLU)       (None, 20, 20, 256)  0           norm_6[0][0]                     __________________________________________________________________________________________________

__________

leaky_re_lu_12 (LeakyReLU)      (None, 10, 10, 256)  0           norm_12[0][0]                    __________________________________________________________________________________________________

__________________________________________________________________________________________________norm_12 (BatchNormalization)    (None, 10, 10, 256)  1024        conv_12[0][0]                    

conv_13 (Conv2D)                (None, 10, 10, 512)  1179648     leaky_re_lu_12[0][0]             __________________________________________________________________________________________________

__________________________________________________________________________________________________leaky_re_lu_12 (LeakyReLU)      (None, 10, 10, 256)  0           norm_12[0][0]                    

norm_13 (BatchNormalization)    (None, 10, 10, 512)  2048        conv_13[0][0]                    __________________________________________________________________________________________________

__________

____________________________________________________________________________________________________________________________________________________________________________________________________

leaky_re_lu_18 (LeakyReLU)      (None, 5, 5, 1024)   0           norm_18[0][0]                    norm_19 (BatchNormalization)    (None, 5, 5, 1024)   4096        conv_19[0][0]                    

____________________________________________________________________________________________________________________________________________________________________________________________________

conv_19 (Conv2D)                (None, 5, 5, 1024)   9437184     leaky_re_lu_18[0][0]             conv_21 (Conv2D)                (None, 10, 10, 64)   32768       leaky_re_lu_13[0][0]             

____________________________________________________________________________________________________________________________________________________________________________________________________

norm_19 (B

In [7]:
weight_reader  = WeightReader(wt_path)

In [8]:
weight_reader.reset()
nb_conv = 23

for i in range(1, nb_conv+1):
    conv_layer = model.get_layer('conv_' + str(i))
    
    if i < nb_conv:
        norm_layer = model.get_layer('norm_' + str(i))
        
        size = np.prod(norm_layer.get_weights()[0].shape)

        beta  = weight_reader.read_bytes(size)
        gamma = weight_reader.read_bytes(size)
        mean  = weight_reader.read_bytes(size)
        var   = weight_reader.read_bytes(size)

        weights = norm_layer.set_weights([gamma, beta, mean, var])       
        
    if len(conv_layer.get_weights()) > 1:
        bias   = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
        kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
        kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
        kernel = kernel.transpose([2,3,1,0])
        conv_layer.set_weights([kernel, bias])
    else:
        kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
        kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
        kernel = kernel.transpose([2,3,1,0])
        conv_layer.set_weights([kernel])

In [9]:
layer   = model.layers[-3] # the last convolutional layer
weights = layer.get_weights()

new_kernel = np.random.normal(size=weights[0].shape)/(GRID_H*GRID_W)
new_bias   = np.random.normal(size=weights[1].shape)/(GRID_H*GRID_W)

layer.set_weights([new_kernel, new_bias])

In [10]:
def custom_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:2]
    
    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1)))
    cell_y = tf.transpose(cell_x, (0,2,1,3))

    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 1])
    
    coord_mask = tf.zeros(mask_shape)
    class_mask = tf.zeros(mask_shape)
    
    seen = tf.Variable(0.)
    total_recall = tf.Variable(0.)
    
    """
    Adjust prediction
    """

    ### adjust x and y      
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
    
    ### adjust class probabilities
    pred_box_class = y_pred[..., 2:]
    print(np.shape(y_pred))
    
    """
    Adjust ground truth
    """
    # adjust x and y
    true_box_xy = y_true[..., 0:2] # relative position to the containing cell

    # adjust class probabilities
    true_box_class = tf.argmax(y_true[..., 2:], -1)
    print(np.shape(y_true))
    
    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE
    
    
    ### confidence mask: penelize predictors + penalize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    true_xy = true_boxes[..., 0:2]
    
    pred_xy = tf.expand_dims(pred_box_xy, 4)
    
    ### class mask: simply the position of the ground truth boxes (the predictors)
    class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class) * CLASS_SCALE       
    
    """
    Warm-up training
    """
    no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.)
    seen = tf.assign_add(seen, 1.)
    
    true_box_xy, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES), 
                          lambda: [true_box_xy + (0.5 + cell_grid),
                                  tf.ones_like(coord_mask)],
                          lambda: [true_box_xy,
                                  coord_mask])
    
    """
    Finalize the loss
    """
    nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))

    nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
    
    loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
    loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
    
    loss = loss_xy + loss_class

    loss = tf.Print(loss, [tf.zeros((1))], message='Dummy Line \t', summarize=1000)
    loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
    loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)

    
    return loss

In [11]:
generator_config = {
    'IMAGE_H'         : IMAGE_H, 
    'IMAGE_W'         : IMAGE_W,
    'GRID_H'          : GRID_H,  
    'GRID_W'          : GRID_W,
    'LABELS'          : LABELS,
    'CLASS'           : len(LABELS),
    'BATCH_SIZE'      : BATCH_SIZE,
    'TRUE_BOX_BUFFER' : 50,
}

In [12]:
def normalize(image):
    return image / 255.

In [13]:
file = 'labels.txt'
f=open(file,"r")
lines = [line.rstrip('\n') for line in open(file)]
all_imgs = []
seen_labels = {}

j=1
for x in lines:
    img={}
    img['name'] = (x.split(' ')[0])
    img['x'] = (x.split(' ')[1])
    img['y'] = (x.split(' ')[2])
    img['filename']= train_image_folder+(x.split(' ')[0])
    all_imgs.append(img)
    
f.close()

In [14]:
from phone_pre import BatchGenerator

In [16]:
train_imgs = (all_imgs[0:99])
train_batch = BatchGenerator(train_imgs, generator_config, norm=normalize,jitter=False)


valid_imgs = (all_imgs[100:129])
valid_batch = BatchGenerator(valid_imgs, generator_config, norm=normalize, jitter=False)


In [17]:
early_stop = EarlyStopping(monitor='val_loss', 
                           min_delta=0.001, 
                           patience=3, 
                           mode='min', 
                           verbose=1)

checkpoint = ModelCheckpoint('phone_weights.h5', 
                             monitor='val_loss', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='min', 
                             period=1)

In [18]:
tb_counter  = len([log for log in os.listdir(os.path.expanduser('~/logs/')) if 'phone_' in log]) + 1
tensorboard = TensorBoard(log_dir=os.path.expanduser('~/logs/') + 'phone_' + '_' + str(tb_counter), 
                          histogram_freq=0, 
                          write_graph=True, 
                          write_images=False)

optimizer = Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)

model.compile(loss=custom_loss, optimizer=optimizer)

model.fit_generator(generator        = train_batch, 
                    steps_per_epoch  = len(train_batch), 
                    epochs           = 10, 
                    verbose          = 1,
                    validation_data  = valid_batch,
                    validation_steps = len(valid_batch),
                    callbacks        = [early_stop, checkpoint, tensorboard], 
                    max_queue_size   = 3)

(?, 5, 5, 3)(?, 5, 5, 3)

(?, ?, ?)(?, ?, ?)



ValueError: Rank mismatch: Rank of labels (received 2) should equal rank of logits minus 1 (received 4).

ValueError: Rank mismatch: Rank of labels (received 2) should equal rank of logits minus 1 (received 4).