In [1]:
%matplotlib inline
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import os
from setting import *
import cv2
import pickle
#from sklearn.model_selection import train_test_split
import math
import matplotlib.pyplot as plt
from functions import *


In [2]:
IMG_H, IMG_W = 300, 300

In [3]:
def SSDHook(feature_map, hook_id):
        """
        Takes input feature map, output the predictions tensor
        hook_id is for variable_scope unqie string ID
        """
        with tf.variable_scope('ssd_hook_' + hook_id):
            # Note we have linear activation (i.e. no activation function)
            net_conf = slim.conv2d(feature_map, NUM_PRED_CONF, [3, 3], activation_fn=None, scope='conv_conf')
            net_conf = tf.contrib.layers.flatten(net_conf)

            net_loc = slim.conv2d(feature_map, NUM_PRED_LOC, [3, 3], activation_fn=None, scope='conv_loc')
            net_loc = tf.contrib.layers.flatten(net_loc)

        return net_conf, net_loc
def AlexNet():
        
        # Image batch tensor and dropout keep prob placeholders
        x = tf.placeholder(tf.float32, [None, IMG_H, IMG_W, NUM_CHANNELS], name='x')
        is_training = tf.placeholder(tf.bool, name='is_training')

        # Classification and localization predictions
        preds_conf = []  # conf -> classification b/c confidence loss -> classification loss
        preds_loc = []

        # Use batch normalization for all convolution layers
        # FIXME: Not sure why setting is_training is not working well
        #with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}):
        with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': True},\
                weights_regularizer=slim.l2_regularizer(scale=REG_SCALE)):
            net = slim.conv2d(x, 64, [11, 11], 3, padding='VALID', scope='conv1')
            #net = slim.conv2d(x, 64, [7, 7], 2, padding='SAME', scope='conv1')
            net = slim.max_pool2d(net, [3, 3], 2,padding='SAME',scope='pool1')
            net = slim.conv2d(net, 192, [5, 5],2, scope='conv2')

            net_conf, net_loc = SSDHook(net, 'conv2')
            preds_conf.append(net_conf)
            preds_loc.append(net_loc)

            #net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = slim.max_pool2d(net, [3, 3], 2,padding='SAME', scope='pool2')
            net = slim.conv2d(net, 384, [3, 3], scope='conv3')
            net = slim.conv2d(net, 384, [3, 3], scope='conv4')
            net = slim.conv2d(net, 256, [3, 3], scope='conv5')

            # The following layers added for SSD
            net = slim.conv2d(net, 1024, [3, 3], scope='conv6')
            net = slim.conv2d(net, 1024, [1, 1], scope='conv7')

            net_conf, net_loc = SSDHook(net, 'conv7')
            preds_conf.append(net_conf)
            preds_loc.append(net_loc)

            net = slim.conv2d(net, 256, [1, 1], scope='conv8')
            net = slim.conv2d(net, 512, [3, 3], 2, scope='conv8_2')

            net_conf, net_loc = SSDHook(net, 'conv8_2')
            preds_conf.append(net_conf)
            preds_loc.append(net_loc)

            net = slim.conv2d(net, 128, [1, 1], scope='conv9')
            net = slim.conv2d(net, 256, [3, 3], 2, scope='conv9_2')

            net_conf, net_loc = SSDHook(net, 'conv9_2')
            preds_conf.append(net_conf)
            preds_loc.append(net_loc)
            
            net = slim.conv2d(net, 128, [1, 1], scope='conv10')
            net = slim.conv2d(net, 256, [3, 3], 2, scope='conv10_2')
            
            net_conf, net_loc = SSDHook(net, 'conv10_2')
            preds_conf.append(net_conf)
            preds_loc.append(net_loc)

        # Concatenate all preds together into 1 vector, for both classification and localization predictions
        final_pred_conf = tf.concat(preds_conf,1)
        final_pred_loc = tf.concat(preds_loc,1)

        # Return a dictionary of {tensor_name: tensor_reference}
        ret_dict = {
            'x': x,
            'y_pred_conf': final_pred_conf,
            'y_pred_loc': final_pred_loc,
            'is_training': is_training,
        }
        return ret_dict
def SSDModel():
        """
        Wrapper around the model and model helper
        Returns dict of relevant tensor references
        """
       
        model = AlexNet()
        model_helper = ModelHelper(model['y_pred_conf'], model['y_pred_loc'])

        ssd_model = {}
        for k in model.keys():
            ssd_model[k] = model[k]
        for k in model_helper.keys():
            ssd_model[k] = model_helper[k]

        return ssd_model
def ModelHelper(y_pred_conf, y_pred_loc):
        """
        Define loss function, optimizer, predictions, and accuracy metric
        Loss includes confidence loss and localization loss
        conf_loss_mask is created at batch generation time, to mask the confidence losses
        It has 1 at locations w/ positives, and 1 at select negative locations
        such that negative-to-positive ratio of NEG_POS_RATIO is satisfied
        Arguments:
            * y_pred_conf: Class predictions from model,
                a tensor of shape [batch_size, num_feature_map_cells * num_defaul_boxes * num_classes]
            * y_pred_loc: Localization predictions from model,
                a tensor of shape [batch_size, num_feature_map_cells * num_defaul_boxes * 4]
        Returns relevant tensor references
        """
        num_total_preds = 0
        for fm_size in FM_SIZES:
            num_total_preds += fm_size[0] * fm_size[1] * NUM_DEFAULT_BOXES
        num_total_preds_conf = num_total_preds * NUM_CLASSES
        num_total_preds_loc  = num_total_preds * 4

        # Input tensors
        y_true_conf = tf.placeholder(tf.int32, [None, num_total_preds], name='y_true_conf')  # classification ground-truth labels
        y_true_loc  = tf.placeholder(tf.float32, [None, num_total_preds_loc], name='y_true_loc')  # localization ground-truth labels
        conf_loss_mask = tf.placeholder(tf.float32, [None, num_total_preds], name='conf_loss_mask')  # 1 mask "bit" per def. box

        # Confidence loss
        logits = tf.reshape(y_pred_conf, [-1, num_total_preds, NUM_CLASSES])
        print("pred shape:")
        print(logits.shape)
        print("true shape:")
        print(y_true_conf.shape)
        conf_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y_true_conf)
        #conf_loss = tf.metrics.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits)
        
        conf_loss = conf_loss_mask * conf_loss  # "zero-out" the loss for don't-care negatives
        conf_loss = tf.reduce_sum(conf_loss)

        # Localization loss (smooth L1 loss)
        # loc_loss_mask is analagous to conf_loss_mask, except 4 times the size
        print(y_true_loc)
        print(y_pred_loc)
        diff = y_true_loc - y_pred_loc

        loc_loss_l2 = 0.5 * (diff**2.0)
        loc_loss_l1 = tf.abs(diff) - 0.5
        smooth_l1_condition = tf.less(tf.abs(diff), 1.0)
        loc_loss = tf.where(smooth_l1_condition, loc_loss_l2, loc_loss_l1)

        loc_loss_mask = tf.minimum(y_true_conf, 1)  # have non-zero localization loss only where we have matching ground-truth box
        loc_loss_mask = tf.to_float(loc_loss_mask)
        loc_loss_mask = tf.stack([loc_loss_mask] * 4, axis=2)  # [0, 1, 1] -> [[[0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]], ...]
        loc_loss_mask = tf.reshape(loc_loss_mask, [-1, num_total_preds_loc])  # removing the inner-most dimension of above
        loc_loss = loc_loss_mask * loc_loss
        loc_loss = tf.reduce_sum(loc_loss)

        # Weighted average of confidence loss and localization loss
        # Also add regularization loss
        #loss = conf_loss + LOC_LOSS_WEIGHT * loc_loss + tf.reduce_sum(slim.losses.get_regularization_losses())
        loss = conf_loss + LOC_LOSS_WEIGHT * loc_loss + 0.001*tf.reduce_sum(tf.losses.get_regularization_losses())
        optimizer = OPT.minimize(loss)

        #reported_loss = loss #tf.reduce_sum(loss, 1)  # DEBUG

        # Class probabilities and predictions
        probs_all = tf.nn.softmax(logits)
        probs, preds_conf = tf.nn.top_k(probs_all)  # take top-1 probability, and the index is the predicted class
        probs = tf.reshape(probs, [-1, num_total_preds])
        preds_conf = tf.reshape(preds_conf, [-1, num_total_preds])

        # Return a dictionary of {tensor_name: tensor_reference}
        ret_dict = {
            'y_true_conf': y_true_conf,
            'y_true_loc': y_true_loc,
            'conf_loss_mask': conf_loss_mask,
            'optimizer': optimizer,
            'conf_loss': conf_loss,
            'loc_loss': loc_loss,
            'loss': loss,
            'probs': probs,
            'probs_all': probs_all,
            'preds_conf': preds_conf,
            'preds_loc': y_pred_loc,
        }
        return ret_dict


In [None]:
x = tf.placeholder(tf.float32, [None, IMG_H, IMG_W, NUM_CHANNELS], name='x')
net = slim.conv2d(x, 64, [11, 11], 3, padding='VALID', scope='conv1232')
net = slim.max_pool2d(net, [3, 3], 2,padding='SAME',scope='pool1233')
net = slim.conv2d(net, 192, [5, 5],2, scope='conv233')
net_conf, net_loc = SSDHook(net, 'conv2323')
print(net)
print(net_loc)
net = slim.max_pool2d(net, [3, 3], 2,padding='SAME', scope='pool2')
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
net = slim.conv2d(net, 1024, [3, 3], scope='conv6')
net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
net_conf, net_loc = SSDHook(net, 'conv7')
print(net)
print(net_loc)
net = slim.conv2d(net, 256, [1, 1], scope='conv8')
net = slim.conv2d(net, 512, [3, 3], 2, scope='conv8_2')
net_conf, net_loc = SSDHook(net, 'conv8_2')
print(net)
print(net_loc)
net = slim.conv2d(net, 128, [1, 1], scope='conv9')
net = slim.conv2d(net, 256, [3, 3], 2, scope='conv9_2')
net_conf, net_loc = SSDHook(net, 'conv9_2')
print(net)
print(net_loc)
net = slim.conv2d(net, 128, [1, 1], scope='conv10')
net = slim.conv2d(net, 256, [3, 3], 2, scope='conv10_2')
net_conf, net_loc = SSDHook(net, 'conv10_2')
print(net)
print(net_loc)

Tensor("conv233/Relu:0", shape=(?, 25, 25, 192), dtype=float32)
Tensor("ssd_hook_conv2323/Flatten_1/flatten/Reshape:0", shape=(?, 30000), dtype=float32)
Tensor("conv7/Relu:0", shape=(?, 13, 13, 1024), dtype=float32)
Tensor("ssd_hook_conv7/Flatten_1/flatten/Reshape:0", shape=(?, 8112), dtype=float32)
Tensor("conv8_2/Relu:0", shape=(?, 7, 7, 512), dtype=float32)
Tensor("ssd_hook_conv8_2/Flatten_1/flatten/Reshape:0", shape=(?, 2352), dtype=float32)
Tensor("conv9_2/Relu:0", shape=(?, 4, 4, 256), dtype=float32)
Tensor("ssd_hook_conv9_2/Flatten_1/flatten/Reshape:0", shape=(?, 768), dtype=float32)


In [4]:
os.chdir('D:\Data\VOC2007')

In [5]:
train=np.load('./3000_data_prep.npy')
train=train[()]
for key in train.keys():
    train[key]['y_true_conf']=train[key]['y_true_conf'].astype(int)
X_train = []
y_train_conf = []
y_train_loc = []
k=0
for img_name in train.keys():
        img_file=cv2.imread(os.path.join('./ResizedImage//',img_name))
        X_train.append(img_file)
        y_train_conf.append(train[img_name]['y_true_conf'])
        y_train_loc.append(train[img_name]['y_true_loc'])
X_train = np.array(X_train)
y_train_conf = np.array(y_train_conf)
y_train_loc = np.array(y_train_loc)

In [6]:
sess=tf.Session()
model=SSDModel()
x=model['x']
y_true_conf=model['y_true_conf']
y_true_loc=model['y_true_loc']
conf_loss_mask=model['conf_loss_mask']
is_training=model['is_training']
conf_loss=model['conf_loss']
loc_loss=model['loc_loss']
reported_loss=model['loss']
optimizer = model['optimizer']


probs_all=model['probs_all']
preds_conf = model['preds_conf']
preds_loc = model['preds_loc']
probs = model['probs']


saver=tf.train.Saver()

pred shape:
(?, 10356, 24)
true shape:
(?, 10356)
Tensor("y_true_loc:0", shape=(?, 41424), dtype=float32)
Tensor("concat_1:0", shape=(?, 41424), dtype=float32)


In [7]:
model_path='./12-31_model.ckpt'

In [8]:
saver.restore(sess,model_path)

#sess.run(tf.global_variables_initializer())

INFO:tensorflow:Restoring parameters from ./12-31_model.ckpt


In [9]:
optimizer=tf.train.GradientDescentOptimizer(learning_rate=1e-3).minimize(model['loss'])

In [15]:
for epoch in range(100 ):
    train_gen=next_batch(X_train,y_train_conf,y_train_loc,BATCH_SIZE)
    num_batches_train = int(math.ceil(X_train.shape[0] / BATCH_SIZE))
    losses=[]
    
    for i in range(num_batches_train):
        images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(train_gen)
        _, loss = sess.run([optimizer, reported_loss], feed_dict={x: images,y_true_conf: y_true_conf_gen,
                        y_true_loc: y_true_loc_gen,conf_loss_mask: conf_loss_mask_gen,
                        is_training: True
                    })
        losses.append(loss)
        #print(loss)
    print("new epoch")
    losses=np.array(losses)
    print(np.mean(losses))

new epoch
231291.0
new epoch
93544.8
new epoch
60028.4
new epoch
44351.0
new epoch
35415.6
new epoch
29648.7
new epoch
25556.0
new epoch
22458.3
new epoch
20019.7
new epoch
18040.2
new epoch
16421.6
new epoch
15065.7
new epoch
13922.9
new epoch
12946.7
new epoch
12094.4
new epoch
11358.5
new epoch
10707.5
new epoch
10125.6
new epoch
9607.87
new epoch
9135.7
new epoch
8713.51
new epoch
8325.94
new epoch
7968.91
new epoch
7641.91
new epoch
7340.0
new epoch
7060.29
new epoch
6802.22
new epoch
6557.9
new epoch
6335.03
new epoch
6122.0
new epoch
5924.52
new epoch
5743.78
new epoch
5569.12
new epoch
5407.46
new epoch
5253.32
new epoch
5110.8
new epoch
4975.37
new epoch
4844.54
new epoch
4724.5
new epoch
4608.5
new epoch
4500.32
new epoch
4398.03
new epoch
4301.45
new epoch
4203.72
new epoch
4119.23
new epoch
4032.86
new epoch
3952.14
new epoch
3874.97
new epoch
3799.84
new epoch
3731.94
new epoch
3661.08
new epoch
3594.22
new epoch
3534.23
new epoch
3475.27
new epoch
3414.55
new epoch
3360.4

In [10]:
optimizer=tf.train.GradientDescentOptimizer(learning_rate=1e-3).minimize(model['loss'])
for epoch in range(180 ):
    train_gen=next_batch(X_train,y_train_conf,y_train_loc,BATCH_SIZE)
    num_batches_train = int(math.ceil(X_train.shape[0] / BATCH_SIZE))
    losses=[]
    
    for i in range(num_batches_train):
        images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(train_gen)
        _, loss = sess.run([optimizer, reported_loss], feed_dict={x: images,y_true_conf: y_true_conf_gen,
                        y_true_loc: y_true_loc_gen,conf_loss_mask: conf_loss_mask_gen,
                        is_training: True
                    })
        losses.append(loss)
        #print(loss)
    if epoch % 10 ==0 :
    
        losses=np.array(losses)
        print(np.mean(losses))

1006.85
33.935
19.02
16.07
14.6203
13.8769
13.0323
12.7585
12.1973
11.8259
11.5699
11.3492
11.1341
10.9567
10.816
10.626
10.471
10.3767


In [28]:
optimizer=tf.train.GradientDescentOptimizer(learning_rate=1e-3).minimize(model['loss'])
for period in range(20):
    for npy_dir in npy_list:
        train,X_train,y_train_conf,y_train_loc=change_trainset(npy_dir)
        for epoch in range(15):
            train_gen=next_batch(X_train,y_train_conf,y_train_loc,BATCH_SIZE)
            num_batches_train = int(math.ceil(X_train.shape[0] / BATCH_SIZE))
            losses=[]

            for i in range(num_batches_train):
                images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(train_gen)
                _, loss = sess.run([optimizer, reported_loss], feed_dict={x: images,y_true_conf: y_true_conf_gen,
                                y_true_loc: y_true_loc_gen,conf_loss_mask: conf_loss_mask_gen,
                                is_training: True
                            })
                losses.append(loss)
                #print(loss)
            if epoch % 10 ==0 :

                losses=np.array(losses)
                print(np.mean(losses))
        del train
        del X_train
        del y_train_conf
        del y_train_loc

45.0864
13.9568
28.1451
12.3073
25.7974
10.351
110.542
12.2519
39.2978
13.8176
27.6842
12.3249
24.6696
10.2453
33.3856
11.3851
25.1858
13.29
20.1644
11.9381
18.5034
9.84533
21.6579
10.7594
20.3436
12.8856
17.1574
11.6079
15.4908
9.54682
17.1413
10.3696
17.9348
12.6066
15.5831
11.3614
13.9004
9.29078
15.033
10.0765
16.6161
12.3613
14.5796
11.185
12.8064
9.12253
13.6669
9.86864
15.682
12.1869
13.8791
11.0223
12.0157
8.99449
12.8267
9.69599
14.9512
12.0471
13.3559
10.8847
11.5139
8.86366
12.208
9.52433
14.4769
11.9102
12.9463
10.7772
11.0322
8.76855
11.7434
9.42166
14.0496
11.8033
12.5936
10.6601
10.731
8.683
11.376
9.32
13.7403
11.6972
12.3291
10.5767
10.4068
8.59221
11.1028
9.22478
13.4619
11.6253
12.097
10.5164
10.2023
8.53227
10.8608
9.15996
13.2483
11.536
11.9004
10.4471
9.9984
8.46695
10.6631
9.07038
13.0544
11.4818
11.7321
10.388
9.8159
8.43822
10.4747
9.00622
12.8947
11.4275
11.5778
10.3272
9.68382
8.37327
10.3243
8.93338
12.7473
11.3558
11.4566
10.2834
9.55056
8.33294
10.1664
8.8

KeyboardInterrupt: 

In [54]:
del train
del X_train
del y_train_conf
del y_train_loc
train=np.load('./4305_data_prep.npy')
train=train[()]
for key in train.keys():
    train[key]['y_true_conf']=train[key]['y_true_conf'].astype(int)
X_train = []
y_train_conf = []
y_train_loc = []
k=0
for img_name in train.keys():
        img_file=cv2.imread(os.path.join('./ResizedImage//',img_name))
        X_train.append(img_file)
        y_train_conf.append(train[img_name]['y_true_conf'])
        y_train_loc.append(train[img_name]['y_true_loc'])
X_train = np.array(X_train)
y_train_conf = np.array(y_train_conf)
y_train_loc = np.array(y_train_loc)

In [26]:
npy_list=['./1000_data_prep.npy','./2000_data_prep.npy','./3000_data_prep.npy','./4000_data_prep.npy']

In [24]:
npy_list=['./1000_data_prep.npy','./2000_data_prep.npy','./3000_data_prep.npy','./4000_data_prep.npy']
def change_trainset(npy_dir):
    train=np.load(npy_dir)
    train=train[()]
    for key in train.keys():
        train[key]['y_true_conf']=train[key]['y_true_conf'].astype(int)
    X_train = []
    y_train_conf = []
    y_train_loc = []
    k=0
    for img_name in train.keys():
            img_file=cv2.imread(os.path.join('./ResizedImage//',img_name))
            X_train.append(img_file)
            y_train_conf.append(train[img_name]['y_true_conf'])
            y_train_loc.append(train[img_name]['y_true_loc'])
    X_train = np.array(X_train)
    y_train_conf = np.array(y_train_conf)
    y_train_loc = np.array(y_train_loc)
    return train,X_train,y_train_conf,y_train_loc

In [51]:

images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(train_gen)
closs,lloss = sess.run( [conf_loss,loc_loss], feed_dict={x: images,y_true_conf: y_true_conf_gen,
                y_true_loc: y_true_loc_gen,conf_loss_mask: conf_loss_mask_gen,
                is_training: False
            })
print(closs)
print("new epoch")
print(lloss)
   

8.3915
new epoch
0.120379


In [None]:
loc

In [65]:
images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(train_gen)
preds_conf_val, preds_loc_val, probs_val,probs_all_val = sess.run([preds_conf, preds_loc, probs, probs_all], feed_dict={x: images, is_training: False})

In [30]:
for k in range(7):
    y_pred_conf=preds_conf_val[k]
    y_pred_conf=y_pred_conf.astype('float32')
    prob=probs_val[k]
    y_pred_loc = preds_loc_val[k]
    boxes = nms(y_pred_conf, y_pred_loc, prob)
    image=images[k]
    rectangled= image
    for box in boxes:
        box_coords = [int(round(ku)) for ku in box[0]]
        box_cls = box[1]
        rectangled = cv2.rectangle(image, tuple(box_coords[:2]), tuple(box_coords[2:]), (0,255,0))
        rectangled = cv2.putText(rectangled,str(box_cls),(tuple(box_coords[:2])),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0, 0, 255),1)
    cv2.imwrite('./Test/1-02-2_newt'+str(k)+'.jpg',rectangled)

In [66]:
for k in range(7):
    y_pred_conf=preds_conf_val[k]
    y_pred_conf=y_pred_conf.astype('float32')
    prob=probs_val[k]
    y_pred_loc = preds_loc_val[k]
    boxes = nms(y_pred_conf, y_pred_loc, prob)
    image=images[k]
    rectangled= image
    for box in boxes:
        box_coords = [int(round(ku)) for ku in box[0]]
        box_cls = box[1]
        rectangled = cv2.rectangle(image, tuple(box_coords[:2]), tuple(box_coords[2:]), (0,255,0))
        rectangled = cv2.putText(rectangled,str(box_cls),(tuple(box_coords[:2])),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0, 0, 255),1)
    cv2.imwrite('./Test/1-02-2_test6_'+str(k)+'.jpg',rectangled)

In [34]:
nms(y_pred_conf, y_pred_loc, prob)

array([], dtype=float64)

In [40]:
y_pred_conf[40:70]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.], dtype=float32)

In [57]:
CONF_THRESH=0.9

In [65]:
class_boxes = []  # class -> [(x1, y1, x2, y2, prob), (...), ...]
for h in range(3):
    class_boxes.append([])

In [41]:
CONF_THRESH = 0.85

In [67]:
#We've Done 2000 Images For one time
model_path='./1-02_model.ckpt'
saver.save(sess,model_path)

'./1-02_model.ckpt'