In [1]:
import numpy as np
import tensorflow as tf
import config as cfg

import os
import xml.etree.ElementTree as ET
import numpy as np
import cv2
import pickle
import copy
import tensorflow as tf
import datetime
#from __future__ import print_function
slim = tf.contrib.slim


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [None]:
DATA_PATH ='data'
PASCAL_PATH = os.path.join(DATA_PATH, 'pascal_voc')
CACHE_PATH = os.path.join(PASCAL_PATH, 'cache')

In [None]:
class dataset_pascal_voc(object):
    def __init__(self, phase, rebuild=False):
        self.devkil_path = os.path.join(cfg.PASCAL_PATH, 'VOCdevkit')
        self.data_path = os.path.join(self.devkil_path, 'VOC2007')
        self.cache_path = cfg.CACHE_PATH
        self.batch_size = cfg.BATCH_SIZE
        self.image_size = cfg.IMAGE_SIZE
        self.cell_size = cfg.CELL_SIZE
        self.classes = cfg.CLASSES
        self.class_to_ind = dict(zip(self.classes, range(len(self.classes)))) #create dict where keys = labels
        self.flipped = True
        self.phase = phase
        self.rebuild = rebuild
        self.cursor = 0
        self.epoch = 1
        self.labels_got = None
        self.prepare()
        
    def get(self):
        X_img = np.zeros((self.batch_size, self.image_size, self.image_size, 3))
        Y_labels = np.zeros((self.batch_size, self.cell_size, self.cell_size, 25))
        count_batch = 0
        while count_batch < self.batch_size:
            img_name = self.labels_got[self.cursor]['imname']
            flipped = self.labels_got[self.cursor]['flipped']
            X_img[count_batch, :,:,:] = self.read_image(img_name,flipped)
            Y_labels[count_batch, :,:,:] = self.labels_got[self.cursor]['label']
            count_batch +=1
        
        return X_img, Y_labels
        
            
    def read_image(self, img_name, flipped=False):
        image = cv2.imread(img_name)
        # resize image to 448 , 448
        image = cv2.resize(image, (self.image_size, self.image_size))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image = (image /255.0) *2 -1.0
        if flipped:
            image = image[:,::-1,:]        
        return image
    
    def prepare(self):
        print("In prepare")
        # contains list of dict where each dicts where each dict contain file path, 3D labels & if flipped or not
        labels_got = self.get_labels()
        #print(labels_got[0:10])
        #print(labels_got[0]['label'])
        if self.flipped:
            print('Appending horizontally-flipped training examples ...')
            labels_copy = copy.deepcopy(labels_got)
            added_labels = self.data_augment(labels_got,labels_copy)
        np.random.shuffle(added_labels)
        self.labels_got = added_labels
        print("labels len", len(added_labels))
        return labels_got
        
    
    def data_augment(self, orig_labels, labels_copy):
        print("Create flipped data")
        for index in range(len(labels_copy)):
            labels_copy[index]['flipped'] = True
            labels_copy[index]['label'] = labels_copy[index]['label'][:,::-1,:]
            
            for i in range(self.cell_size):
                for j in range(self.cell_size):
                    if labels_copy[index]['label'][i,j,0] == 1:
                        #print(labels_copy[index]['label'][i,j,2])
                        labels_copy[index]['label'][i,j,1] = self.image_size-1-labels_copy[index]['label'][i,j,1]
                        #print(labels_copy[index]['label'][i,j,2])
        orig_labels+= labels_copy
        return orig_labels
        
        
    def get_labels(self):
        # getting file containing data
        cache_file = os.path.join(self.cache_path,'pascal_'+self.phase+'_gt_labels.pkl')
        print(self.cache_path)
        if(os.path.isfile(cache_file) and not self.rebuild):
            print("Getting labels from "+ cache_file)
            with open(cache_file, 'rb') as labels_file:
                labels_got = pickle.load(labels_file)
                
            print("original labels length :", len(labels_got))
            return labels_got

In [2]:
class YoloNet(object):
    
    def __init__(self, is_training=True):
        self.classes = cfg.CLASSES
        self.num_of_classes = len(self.classes)
        self.image_size = cfg.IMAGE_SIZE
        self.cell_size = cfg.CELL_SIZE
        self.boxes_per_cell = cfg.BOXES_PER_CELL
        # output size = S*S * B*5 + Cls
        self.output_size = (self.cell_size * self.cell_size) * (self.num_of_classes + self.boxes_per_cell * 5)
        #self.predict_boxes_print = self.placeholder(tf.float32, [20, 7, 7, 2, 4])
        self.object_scale = cfg.OBJECT_SCALE
        self.no_object_scale = cfg.NOOBJECT_SCALE
        self.class_scale = cfg.CLASS_SCALE
        self.coordi_scale = cfg.COORD_SCALE
        
        self.learning_rate = cfg.LEARNING_RATE
        self.batch_size = cfg.BATCH_SIZE
        self.alpha = cfg.ALPHA
        self.boundary1 = self.cell_size * self.cell_size * self.num_of_classes # 7*7*20 = 980
        self.boundary2 = self.boundary1 +\
            self.cell_size * self.cell_size * self.boxes_per_cell # 980 + 7*7*2(98) = 1078

        
        #(7, 7, 2)
        self.offset = np.transpose(np.reshape(np.array(
            [np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
            (self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))
        self.images = tf.placeholder(
            tf.float32, [None, self.image_size, self.image_size, 3],
            name='images')
        self.logits = self.build_network(self.images, num_outputs=self.output_size, alpha=self.alpha,is_training=True)
        
        if is_training:
            self.labels = tf.placeholder(
                tf.float32,
                [None, self.cell_size, self.cell_size, 5 + self.num_of_classes])
            self.loss_layer(self.logits, self.labels)
            self.total_loss = tf.losses.get_total_loss()
            tf.summary.scalar('total_loss', self.total_loss)

    def build_network(self, images, num_outputs, alpha, keep_prob=0.5, is_training=True, scope='yolo'):
        with tf.variable_scope(scope):
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                activation_fn=leaky_relu(alpha),
                weights_regularizer=slim.l2_regularizer(0.0005),
                # truncated help in convergance
                weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
            ):
                net = tf.pad(images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),name='pad_1')
                #print("Start padd", net)
                # 64 filters of 7 *7 with stride of 2 padding = no padding
                net = slim.conv2d(
                    net, 64, 7, 2, padding='VALID', scope='conv_2')
                #print("1st conv", net)
                # Max Pooling layer padding = same as input dimension. Stride = 2
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
                
                net = slim.conv2d(net, 192, 3, scope='conv_4')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
                
                net = slim.conv2d(net, 128, 1, scope='conv_6')
                net = slim.conv2d(net, 256, 3, scope='conv_7')
                net = slim.conv2d(net, 256, 1, scope='conv_8')
                net = slim.conv2d(net, 512, 3, scope='conv_9')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
                
                net = slim.conv2d(net, 256, 1, scope='conv_11')
                net = slim.conv2d(net, 512, 3, scope='conv_12')
                net = slim.conv2d(net, 256, 1, scope='conv_13')
                net = slim.conv2d(net, 512, 3, scope='conv_14')
                net = slim.conv2d(net, 256, 1, scope='conv_15')
                net = slim.conv2d(net, 512, 3, scope='conv_16')
                net = slim.conv2d(net, 256, 1, scope='conv_17')
                net = slim.conv2d(net, 512, 3, scope='conv_18')
                net = slim.conv2d(net, 512, 1, scope='conv_19')
                net = slim.conv2d(net, 1024, 3, scope='conv_20')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
                
                net = slim.conv2d(net, 512, 1, scope='conv_22')
                net = slim.conv2d(net, 1024, 3, scope='conv_23')
                net = slim.conv2d(net, 512, 1, scope='conv_24')
                net = slim.conv2d(net, 1024, 3, scope='conv_25')
                net = slim.conv2d(net, 1024, 3, scope='conv_26')
                net = tf.pad(net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),name='pad_27')
                net = slim.conv2d(
                    net, 1024, 3, 2, padding='VALID', scope='conv_28')
                net = slim.conv2d(net, 1024, 3, scope='conv_29')
                net = slim.conv2d(net, 1024, 3, scope='conv_30')
                # transpose to change NHWC([batch_size, height, width, channels]) to NCHW
                net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
                net = slim.flatten(net, scope='flat_32')
                net = slim.fully_connected(net, 512, scope='fc_33')
                #=4096
                net = slim.fully_connected(net, 4096, scope='fc_34')
                #print(net)
                net = slim.dropout(
                    net, keep_prob=1-keep_prob, is_training=True,
                    scope='dropout_35')
                #7 x 7 x (2 x 5 + 20) = 7 x 7 x 30 tensor  = 1470
                net = slim.fully_connected(
                    net, num_outputs, activation_fn=None, scope='fc_36')        
                print(net)
        return net

    def calculate_iou(self, boxes1, boxes2, scope='iou'):
        with tf.variable_scope(scope):
            # covert (x_center,y_center,w,h) to (x1,y1,x2,y2)

            boxes1_temp = tf.stack([boxes1[..., 0] - boxes1[..., 2] / 2.0,  # x_c - w/2 => x_1
                                 boxes1[..., 1] - boxes1[..., 3] / 2.0,  # y_c - h/2 => y_1
                                 boxes1[..., 0] + boxes1[..., 2] / 2.0,  # x_c + w/2 => x_2
                                 boxes1[..., 1] + boxes1[..., 3] / 2.0], # y_c + h/2 => y_2
                                axis=-1)

            boxes2_temp = tf.stack([boxes2[..., 0] - boxes2[..., 2] / 2.0,
                                 boxes2[..., 1] - boxes2[..., 3] / 2.0,
                                 boxes2[..., 0] + boxes2[..., 2] / 2.0,
                                 boxes2[..., 1] + boxes2[..., 3] / 2.0],
                                axis=-1)
            
            lu = tf.maximum(boxes1_temp[...,:2],boxes2_temp[...,:2])
            ru = tf.minimum(boxes1_temp[...,2:],boxes2_temp[...,2:])
            
            # intersection| calculating width and height of intersectioned regionregioni.e x2-x1, y2-y1
            intersection = tf.maximum(0.0, lu-ru)
            # calculate area of overlapped region by width*height
            intersection_area = intersection[...,0] * intersection[...,1]
            
            box1_area = boxes1[...,2] * boxes1[...,3]
            box2_area = boxes2[...,2] * boxes2[...,3]
            
            union = tf.maximum(box1_area + box2_area - intersection_area, 1e-10)
            
        return tf.clip_by_value(intersection_area / union, 0.0, 1.0) # value <0 : value =0 | value>1: value=1
    
    
    def loss_layer(self, predicts, labels,  scope = "loss_layer"):
        with tf.variable_scope(scope):

            predict_classes = tf.reshape(predicts[:,:self.boundary1],
                                        [self.batch_size, self.cell_size, self.cell_size, self.num_of_classes])
            
            predict_scales = tf.reshape(predicts[:,self.boundary1:self.boundary2],
                                       [self.batch_size, self.cell_size, self.cell_size, self.boxes_per_cell])
            
            predict_boxes = tf.reshape(predicts[:,self.boundary2:],
                                      [self.batch_size, self.cell_size, self.cell_size,self.boxes_per_cell, 4])
            
            
            classes = labels[...,5:]
            
            response = tf.reshape(labels[...,0],
                                 [self.batch_size, self.cell_size, self.cell_size, 1])
            
            boxes = tf.reshape(labels[...,1:5],
                              [self.batch_size, self.cell_size, self.cell_size, 1, 4])
            
            boxes = tf.tile(boxes, [1,1,1, self.boxes_per_cell, 1]) / self.image_size
            
            #shape=(1, 7, 7, 2)
            offset = tf.reshape(
                tf.constant(self.offset, dtype=tf.float32),
                [1, self.cell_size, self.cell_size, self.boxes_per_cell])
            # Tile operation creates a new tensor by replicating input multiples times
            # [[[0. 0.]
            #   [1. 1.]  X 7) X 7
            #   ...
            #   [6. 6.]
            # offest[0,:,:,1] =>1st row => [0. 1. 2. 3. 4. 5. 6.]
            offset = tf.tile(offset, [self.batch_size, 1, 1, 1]) 
            
            # [[[0. 0.]                  [[[1. 1.]                  
            #   [0. 0.]  X 7) X 7          [1. 1.]
            #   ...                        ...
            #   [0. 0.]                    [1.  1.]
            # offest[0,:,:,1] =>1st row => [0. 0. 0. 0. 0. 0. 0.]
            offset_tran = tf.transpose(offset, (0, 2, 1, 3))
            
            # here sqaure is for calculating width and height of predicted bbox for IOU calculation
            
            predict_boxes_tran = tf.stack(
                [(predict_boxes[..., 0] + offset)/self.cell_size, # position of x_c within cell containing obj   
                 (predict_boxes[..., 1] + offset_tran)/ self.cell_size,  # position of y_c within cell containing obj 
                 tf.square(predict_boxes[..., 2]),                        # take sqaure of (𝑤^1/2) => w
                 tf.square(predict_boxes[..., 3])], axis=-1)              # take sqaure of (h^1/2) => h
            
            #self.predict_boxes_print = predict_boxes_tran
            #print_classes = tf.Print(self.predict_boxes_print,[self.predict_boxes_print],"predict")
            
            #tf.print(predict_boxes_tran,[predict_boxes_tran],"predicted")
            
            iou_predict_truth = self.calculate_iou(predict_boxes_tran, boxes)

             # calculate I tensor [BATCH_SIZE, CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
            object_mask = tf.reduce_max(iou_predict_truth, 3, keep_dims=True)
            object_mask = tf.cast(
                (iou_predict_truth >= object_mask), tf.float32) * response
#             # get the box
#             object_mask = tf.reduce_max(iou_predict_truth, 3, keep_dims=True)
#             object_mask = tf.cast(
#                 (iou_predict_truth >= object_mask), tf.float32) * response

            # calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
            
            noobject_mask = tf.ones_like(
                object_mask, dtype=tf.float32) - object_mask
            # (7,7,4)
            boxes_tran = tf.stack(
                [boxes[..., 0] * self.cell_size - offset,
                 boxes[..., 1] * self.cell_size - offset_tran,
                 tf.sqrt(boxes[..., 2]),
                 tf.sqrt(boxes[..., 3])], axis=-1)

            # class_loss
            # ∑𝑆2𝑖=0 * 𝟙i𝑜𝑏𝑗  * ∑𝑐∈𝑐𝑙𝑎𝑠𝑠𝑒𝑠(𝑝𝑖(𝑐)−𝑝̂ 𝑖(𝑐))2 | 𝟙i𝑜𝑏𝑗 is 1 when there is a particular class is predicted, else 0
            class_delta = response * (predict_classes - classes)
            class_loss = tf.reduce_mean(
                tf.reduce_sum(tf.square(class_delta), axis=[1, 2, 3]),
                name='class_loss') * self.class_scale 


            # object_loss
            # ∑𝑆^2𝑖=0 ∑Bj=0 𝟙𝑖𝑗𝑜𝑏𝑗 * (𝐶𝑖−𝐶̂ 𝑖)2 | 𝟙𝑖𝑗𝑜𝑏𝑗 "denotes that the 𝑗th bounding box predictor in cell 𝑖 is responsible for that prediction". 
            # In other words, it is equal to 1 if there is an object in cell 𝑖 and confidence of the 𝑗th predictors of this cell is the highest 
            # among all the predictors of this cell.
            object_delta = object_mask * (predict_scales - iou_predict_truth) # object_mask = 1_ij
            object_loss = tf.reduce_mean(
                tf.reduce_sum(tf.square(object_delta), axis=[1, 2, 3]),
                name='object_loss') * self.object_scale # (obj_scale =1)


            # noobject_loss
            # 𝜆𝑛𝑜𝑜𝑏𝑗 * ∑𝑆2𝑖=0 ∑Bj=0 𝟙𝑖𝑗𝑛𝑜𝑜𝑏𝑗 * (𝐶𝑖−𝐶̂ 𝑖)2  | 𝟙𝑖𝑗𝑛𝑜𝑜𝑏𝑗 is almost the same except it values 1 when there are NO objects in cell 𝑖
            #If there are no objects in cell, then truth confidence should be zero. so we get: noobject_delta = noobject_mask * (predict_scales - 0)
            noobject_delta = noobject_mask * predict_scales # 
            noobject_loss = tf.reduce_mean(
                tf.reduce_sum(tf.square(noobject_delta), axis=[1, 2, 3]),
                name='noobject_loss') * self.no_object_scale # (no_obj_scale =1)

            # coord_loss
            # 𝜆𝑐𝑜𝑜𝑟𝑑∑𝑆2𝑖=0[(𝑥𝑖−𝑥̂ 𝑖)2+(𝑦𝑖−𝑦𝑖^)2]+ 𝜆𝑐𝑜𝑜𝑟𝑑∑𝑆2𝑖=0[(𝑤𝑖‾‾‾√−𝑤̂ 𝑖‾‾‾√)2+(ℎ𝑖‾‾√−ℎ̂ 𝑖‾‾√)2]
            coord_mask = tf.expand_dims(object_mask, 4)
            boxes_delta = coord_mask * (predict_boxes - boxes_tran)
            coord_loss = tf.reduce_mean(
                tf.reduce_sum(tf.square(boxes_delta), axis=[1, 2, 3, 4]),
                name='coord_loss') * self.coordi_scale # (coordi_scale = 5)
            
            
            tf.losses.add_loss(class_loss)
            tf.losses.add_loss(object_loss)
            tf.losses.add_loss(noobject_loss)
            tf.losses.add_loss(coord_loss)

            tf.summary.scalar('class_loss', class_loss)
            tf.summary.scalar('object_loss', object_loss)
            tf.summary.scalar('noobject_loss', noobject_loss)
            tf.summary.scalar('coord_loss', coord_loss)

            tf.summary.histogram('boxes_delta_x', boxes_delta[..., 0])
            tf.summary.histogram('boxes_delta_y', boxes_delta[..., 1])
            tf.summary.histogram('boxes_delta_w', boxes_delta[..., 2])
            tf.summary.histogram('boxes_delta_h', boxes_delta[..., 3])
            tf.summary.histogram('iou', iou_predict_truth)

            
            
def leaky_relu(alpha):
    def op(inputs):
        return tf.nn.leaky_relu(inputs, alpha=alpha, name='leaky_relu')
    return op

In [None]:
def print_inputfn(data):
    tensor_vval = tf.constant(data)
    tensor_xray = tf.Print(tensor_vval, [tensor_vval], "vvvvvvvvvvvvvvvvalue")
    return tensor_xray

In [None]:
class TrainYolo(object):

    def __init__(self, net, data):
        self.net = net
        self.data = data
        self.weights_file = cfg.WEIGHTS_FILE
        self.max_iter = cfg.MAX_ITER
        self.initial_learning_rate = cfg.LEARNING_RATE
        self.decay_steps = cfg.DECAY_STEPS
        self.decay_rate = cfg.DECAY_RATE
        self.staircase = cfg.STAIRCASE
        self.summary_iter = cfg.SUMMARY_ITER
        self.save_iter = cfg.SAVE_ITER
        self.output_dir = os.path.join(
            cfg.OUTPUT_DIR, datetime.datetime.now().strftime('%Y_%m_%d_%H_%M'))
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)
        self.save_cfg()

        self.variable_to_restore = tf.global_variables()
        self.saver = tf.train.Saver(self.variable_to_restore, max_to_keep=None)
        self.ckpt_file = os.path.join(self.output_dir, 'yolo')
        self.summary_op = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter(self.output_dir, flush_secs=60)

        self.global_step = tf.train.create_global_step()
        self.learning_rate = tf.train.exponential_decay(
            self.initial_learning_rate, self.global_step, self.decay_steps,
            self.decay_rate, self.staircase, name='learning_rate')
        self.optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=self.learning_rate)
        self.train_op = slim.learning.create_train_op(
            self.net.total_loss, self.optimizer, global_step=self.global_step)

        gpu_options = tf.GPUOptions()
        config = tf.ConfigProto(gpu_options=gpu_options)
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())

        if self.weights_file is not None:
            print('Restoring weights from: ' + self.weights_file)
            self.saver.restore(self.sess, self.weights_file)

        self.writer.add_graph(self.sess.graph)

    def train(self):


        for step in range(1, self.max_iter + 1):
            print('step',step)

            images, labels = self.data.get()
            feed_dict = {self.net.images: images,
                         self.net.labels: labels}

            if step % self.summary_iter == 0:
                if step % (self.summary_iter * 10) == 0:
                    print("within if")

                    summary_str, loss, _ = self.sess.run(
                        [self.summary_op, self.net.total_loss, self.train_op],
                        feed_dict=feed_dict)

                    log_str = '''Date:{}, Epoch: {}, Step: {}, Learning rate: {},'''
                    ''' Loss: {:5.3f}\n'''.format(
                        datetime.datetime.now().strftime('%m-%d %H:%M:%S'),
                        self.data.epoch,
                        int(step),
                        round(self.learning_rate.eval(session=self.sess), 6),
                        loss)
                    print(log_str)

                else:
                    print("1st else")
                    summary_str, _ = self.sess.run(
                        [self.summary_op, self.train_op],
                        feed_dict=feed_dict)

                self.writer.add_summary(summary_str, step)

            else:
                print("2nd else")
                self.sess.run(self.train_op, feed_dict=feed_dict)

                # save model weights when step % save_iter ==0
            if step % self.save_iter == 0:
                print('{} Saving checkpoint file to: {}'.format(
                    datetime.datetime.now().strftime('%m-%d %H:%M:%S'),
                    self.output_dir))
                self.saver.save(
                    self.sess, self.ckpt_file, global_step=self.global_step)

    def save_cfg(self):

        with open(os.path.join(self.output_dir, 'config.txt'), 'w') as f:
            cfg_dict = cfg.__dict__
            for key in sorted(cfg_dict.keys()):
                if key[0].isupper():
                    cfg_str = '{}: {}\n'.format(key, cfg_dict[key])
                    f.write(cfg_str)




In [None]:



os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU

pascal_dataset = dataset_pascal_voc('train')
yolo = YoloNet()
train_yolo_obj = TrainYolo(yolo, pascal_dataset)

In [None]:

print('Start training ...')
train_yolo_obj.train()
print('Done training.')


In [None]:
MAX_ITER = 100

SUMMARY_ITER = 5

SAVE_ITER = 10
for step in range(1, MAX_ITER + 1):
        print('step',step)

        if step % SUMMARY_ITER == 0:
            if step % (SUMMARY_ITER * 10) == 0:
                print("within 2nd if")

            else:
                print("1st else")

        else:
            print("2nd else")

        if step % SAVE_ITER== 0:
            print("3rd if")


In [None]:
#print(os.path.realpath('yolo-2.meta'))
print(os.path.abspath("yolo-2.meta"))

In [None]:
saver = tf.train.import_meta_graph('/home/hawk_pc/yolo_from_scratch/data/pascal_voc/output/2019_06_14_02_02/yolo-2.meta')
saver.restore(sess, '/home/hawk_pc/yolo_from_scratch/data/pascal_voc/output/2019_06_14_02_02/yolo-2')
#graph = sess.graph
# for node in graph.as_graph_def().node:
#     if "fc_36" in node.name:
#         print(node.name)

In [None]:
saver.restore(sess,tf.train.latest_checkpoint("2019_06_14_02_02")) 
#saver = tf.train.import_meta_graph("/home/hawk_pc/yolo_from_scratch/data/pascal_voc/output/2019_06_14_02_02/yolo-2.meta")
sess = tf.Session()
saver.restore(sess, meta_path+"/data/pascal_voc/output/2019_06_14_02_02/checkpoint")
graph = sess.graph


In [3]:
class Detector(object):

    def __init__(self, net, weight_file):
        self.net = net
        self.weights_file = weight_file

        self.classes = cfg.CLASSES
        self.num_class = len(self.classes)
        self.image_size = cfg.IMAGE_SIZE
        self.cell_size = cfg.CELL_SIZE
        self.boxes_per_cell = cfg.BOXES_PER_CELL
        self.threshold = cfg.THRESHOLD
        self.iou_threshold = cfg.IOU_THRESHOLD
        self.boundary1 = self.cell_size * self.cell_size * self.num_class
        self.boundary2 = self.boundary1 +\
            self.cell_size * self.cell_size * self.boxes_per_cell

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        print('Restoring weights from: ' + self.weights_file)
        #self.saver = tf.train.import_meta_graph('/home/hawk_pc/yolo_from_scratch/data/pascal_voc/output/2019_06_14_02_02/yolo-2.meta')
        #self.saver.restore(self.sess, '/home/hawk_pc/yolo_from_scratch/data/pascal_voc/output/2019_06_14_02_02/yolo-2')
        self.saver = tf.train.Saver()
        self.saver.restore(self.sess, self.weights_file)


    def detect(self, img):
        img_h, img_w, _ = img.shape
        inputs = cv2.resize(img, (self.image_size, self.image_size))
        inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB).astype(np.float32)
        inputs = (inputs / 255.0) * 2.0 - 1.0
        inputs = np.reshape(inputs, (1, self.image_size, self.image_size, 3))
        print("Input shape",inputs.shape)

        self.look_only_once(inputs)


    def look_only_once(self, inputs):
        # (1,1470)
        net_output = self.sess.run(self.net.logits,
                                   feed_dict={self.net.images: inputs})
        
        print("Net Output shape :", net_output.shape)
        print("Net Output Value :", net_output)

 
    def image_detector(self, imname, wait=0):
        image = cv2.imread(imname)

        self.detect(image)
        

In [None]:
a = np.array([[1, 2], [4, 5], [8, 9]])
filter_mat_test = np.array(a >= 3, dtype='bool')
print(filter_mat_test)
filter_mat_boxes_test = np.nonzero(filter_mat_test)
print(a[filter_mat_boxes_test])

In [4]:
yolo = YoloNet(False)
weight_file = cfg.WEIGHTS_FILE


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Tensor("yolo/fc_36/BiasAdd:0", shape=(?, 1470), dtype=float32)


In [5]:
detector = Detector(yolo, weight_file)

imname = 'test/jhaaz.jpeg'
detector.image_detector(imname)

Restoring weights from: data/weights/YOLO_small.ckpt
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from data/weights/YOLO_small.ckpt
Input shape (1, 448, 448, 3)
Net Output shape : (1, 1470)
Net Output Value : [[0.447892   0.02232639 0.0464194  ... 0.46708572 0.32219887 0.28959635]]
range(0, 1)
Scales... [[ 1.9242056e-05  4.3281508e-03  9.1489498e-04  7.8444388e-03
   9.2050550e-04  2.9578642e-03  2.5506269e-03]
 [ 3.1755036e-03  5.9064664e-03  1.3049910e-02  9.0718316e-04
  -2.1513016e-03  2.1220699e-03  5.0066668e-03]
 [-1.9240798e-03  2.3736809e-03 -9.6800327e-03 -4.8317863e-03
  -2.7239621e-02  2.9419707e-03  3.4683871e-03]
 [-5.2653216e-03 -3.2920182e-02 -2.4007775e-02  3.8994023e-01
   3.0162860e-02 -6.8838941e-04 -9.0657698e-04]
 [ 8.2501676e-03  7.2493879e-03  4.3935932e-02 -2.3313984e-02
  -3.0640556e-02 -8.1528391e-04 -8.3611365e-03]
 [-3.7863616e-02 -1.0677131e-02 -4.1858880e-03  6.6747367e-03
  -2

In [None]:
offset = np.array(
    [np.arange(7)] * 7 * 2)
offset = np.transpose(
    np.reshape(
        offset,
        [2, 7 ,7]),
    (1, 2, 0))
print(offset.shape)
print(offset)

In [None]:
offset_trans=np.transpose(offset,(1,0, 2))
print(offset.shape)
print(offset)
