In [1]:
import tensorflow as tf
import numpy as np
# import cv2
import random
from PIL import Image

In [5]:
def leaky_relu(x, alpha=0.1, dtype=tf.float32):
    x = tf.cast(x, dtype=dtype)
    bool_mask = (x > 0)
    mask = tf.cast(bool_mask, dtype=dtype)
    return 1.0 * mask * x + alpha * (1 - mask) * x

In [6]:
def conv2d(scope, input, kernel_size, stride=1, pretrain=True, train=True):
    """convolutional layer

    Args:
      input: 4-D tensor [batch_size, height, width, depth]
      scope: variable_scope name
      kernel_size: [k_height, k_width, in_channel, out_channel]
      stride: int32
    Return:
      output: 4-D tensor [batch_size, height/stride, width/stride, out_channels]
    """
    with tf.variable_scope(scope) as scope:
#         kernel = _variable_with_weight_decay('weights', shape = kernel_size, stddev = 5e-2)
#         var = self._variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32))
        kernel = tf.get_variable('weights', kernel_size, initializer=tf.truncated_normal_initializer(stddev=5e-2, dtype=tf.float32), dtype=tf.float32)
        conv = tf.nn.conv2d(input, kernel, [1, stride, stride, 1], padding='SAME')
        biases = tf.get_variable('biases', kernel_size[3:], initializer = tf.constant_initializer(0.0), dtype=tf.float32)
#         biases = self._variable_on_cpu('biases', kernel_size[3:], tf.constant_initializer(0.0), pretrain, train)
        conv_plus_biases = tf.nn.bias_add(conv, biases)
        conv = leaky_relu(conv_plus_biases)
    return conv

In [7]:
def max_pool(input, kernel_size, stride):
    """max_pool layer

    Args:
      input: 4-D tensor [batch_zie, height, width, depth]
      kernel_size: [k_height, k_width]
      stride: int32
    Return:
      output: 4-D tensor [batch_size, height/stride, width/stride, depth]
    """
    return tf.nn.max_pool(input, ksize=[1, kernel_size[0], kernel_size[1], 1], strides=[1, stride, stride, 1], padding='SAME')

In [8]:
def local(scope, input, in_dimension, out_dimension, leaky=True):
    """Fully connection layer

    Args:
      scope: variable_scope name
      input: [batch_size, ???]
      out_dimension: int32
    Return:
      output: 2-D tensor [batch_size, out_dimension]
    """
    with tf.variable_scope(scope) as scope:
        reshape = tf.reshape(input, [tf.shape(input)[0], -1])

#         weights = self._variable_with_weight_decay('weights', shape=[in_dimension, out_dimension],
#                                                  stddev=0.04, wd=self.weight_decay, pretrain=pretrain, train=train)
#         var = self._variable_on_cpu(name, shape,
#       tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32), pretrain, train)
        weights = tf.get_variable('weights', [in_dimension, out_dimension], initializer=tf.truncated_normal_initializer(stddev=0.04, dtype=tf.float32), dtype=tf.float32)
        
        
#         biases = self._variable_on_cpu('biases', [out_dimension], tf.constant_initializer(0.0), pretrain, train)
        biases = tf.get_variable('biases', [out_dimension], initializer=tf.constant_initializer(0.0), dtype=tf.float32)
        
        local = tf.matmul(reshape, weights) + biases

        if leaky:
            local = leaky_relu(local)
        else:
            local = tf.identity(local, name=scope.name)

    return local

In [9]:
def cond1(num, object_num, loss, predict, label, nilboy):
    """
    if num < object_num
    """
    return num < object_num

In [10]:
def iou(boxes1, boxes2):
    """calculate ious
    Args:
      boxes1: 4-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4]  ====> (x_center, y_center, w, h)
      boxes2: 1-D tensor [4] ===> (x_center, y_center, w, h)
    Return:
      iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    """
    boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2,
                      boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2])
    boxes1 = tf.transpose(boxes1, [1, 2, 3, 0])
    boxes2 =  tf.stack([boxes2[0] - boxes2[2] / 2, boxes2[1] - boxes2[3] / 2,
                      boxes2[0] + boxes2[2] / 2, boxes2[1] + boxes2[3] / 2])

    #calculate the left up point
    lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2])
    rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:])

    #intersection
    intersection = rd - lu 

    inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1]

    mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32)
    
    inter_square = mask * inter_square
    
    #calculate the boxs1 square and boxs2 square
    square1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1])
    square2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1])
    
    return inter_square/(square1 + square2 - inter_square + 1e-6)

In [11]:
def body1(num, object_num, loss, predict, labels, nilboy):
    """
    calculate loss
    Args:
      predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
      labels : [max_objects, 5]  (x_center, y_center, w, h, class)
    """
    global image_size
    global cell_size
    
    label = labels[num:num+1, :]
    label = tf.reshape(label, [-1])

    #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
    min_x = (label[0] - label[2] / 2) / (image_size / cell_size)
    max_x = (label[0] + label[2] / 2) / (image_size / cell_size)

    min_y = (label[1] - label[3] / 2) / (image_size / cell_size)
    max_y = (label[1] + label[3] / 2) / (image_size / cell_size)

    min_x = tf.floor(min_x)
    min_y = tf.floor(min_y)

    max_x = tf.ceil(max_x)
    max_y = tf.ceil(max_y)

    temp = tf.cast(tf.stack([max_y - min_y, max_x - min_x]), dtype=tf.int32)
    objects = tf.ones(temp, tf.float32)

    temp = tf.cast(tf.stack([min_y, cell_size - max_y, min_x, cell_size - max_x]), tf.int32)
    temp = tf.reshape(temp, (2, 2))
    objects = tf.pad(objects, temp, "CONSTANT")

    #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
    #calculate responsible tensor [CELL_SIZE, CELL_SIZE]
    center_x = label[0] / (image_size / cell_size)
    center_x = tf.floor(center_x)

    center_y = label[1] / (image_size / cell_size)
    center_y = tf.floor(center_y)

    response = tf.ones([1, 1], tf.float32)

    temp = tf.cast(tf.stack([center_y, cell_size - center_y - 1, center_x, cell_size -center_x - 1]), tf.int32)
    temp = tf.reshape(temp, (2, 2))
    response = tf.pad(response, temp, "CONSTANT")
    #objects = response

    #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    predict_boxes = predict[:, :, num_classes + boxes_per_cell:]
    

    predict_boxes = tf.reshape(predict_boxes, [cell_size, cell_size, boxes_per_cell, 4])

    predict_boxes = predict_boxes * [image_size / cell_size, image_size / cell_size, image_size, image_size]

    base_boxes = np.zeros([cell_size, cell_size, 4])

    for y in range(cell_size):
        for x in range(cell_size):
            #nilboy
            base_boxes[y, x, :] = [image_size / cell_size * x, image_size / cell_size * y, 0, 0]
    base_boxes = np.tile(np.resize(base_boxes, [cell_size, cell_size, 1, 4]), [1, 1, boxes_per_cell, 1])

    predict_boxes = base_boxes + predict_boxes

    iou_predict_truth = iou(predict_boxes, label[0:4])
    #calculate C [cell_size, cell_size, boxes_per_cell]
    C = iou_predict_truth * tf.reshape(response, [cell_size, cell_size, 1])

    #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    I = iou_predict_truth * tf.reshape(response, (cell_size, cell_size, 1))
    
    max_I = tf.reduce_max(I, 2, keep_dims=True)

    I = tf.cast((I >= max_I), tf.float32) * tf.reshape(response, (cell_size, cell_size, 1))

    #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    no_I = tf.ones_like(I, dtype=tf.float32) - I 


    p_C = predict[:, :, num_classes:num_classes + boxes_per_cell]

    #calculate truth x,y,sqrt_w,sqrt_h 0-D
    x = label[0]
    y = label[1]

    sqrt_w = tf.sqrt(tf.abs(label[2]))
    sqrt_h = tf.sqrt(tf.abs(label[3]))
    #sqrt_w = tf.abs(label[2])
    #sqrt_h = tf.abs(label[3])

    #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
    p_x = predict_boxes[:, :, :, 0]
    p_y = predict_boxes[:, :, :, 1]

    #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1)
    #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1)
    #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2]))
    #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3]))
    #p_sqrt_w = predict_boxes[:, :, :, 2]
    #p_sqrt_h = predict_boxes[:, :, :, 3]
    p_sqrt_w = tf.sqrt(tf.minimum(image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2])))
    p_sqrt_h = tf.sqrt(tf.minimum(image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3])))
    #calculate truth p 1-D tensor [NUM_CLASSES]
    P = tf.one_hot(tf.cast(label[4], tf.int32), num_classes, dtype=tf.float32)

    #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES]
    p_P = predict[:, :, 0:num_classes]

    #class_loss
    class_loss = tf.nn.l2_loss(tf.reshape(objects, (cell_size, cell_size, 1)) * (p_P - P)) * class_scale
    #class_loss = tf.nn.l2_loss(tf.reshape(response, (cell_size, cell_size, 1)) * (p_P - P)) * class_scale

    #object_loss
    object_loss = tf.nn.l2_loss(I * (p_C - C)) * object_scale
    #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * object_scale

    #noobject_loss
    #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * noobject_scale
    noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * noobject_scale

    #coord_loss
    coord_loss = (tf.nn.l2_loss(I * (p_x - x)/(image_size/cell_size)) +
                 tf.nn.l2_loss(I * (p_y - y)/(image_size/cell_size)) +
                 tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w))/ image_size +
                 tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h))/image_size) * coord_scale

    nilboy = I

    return num + 1, object_num, [loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss], predict, labels, nilboy

In [12]:
def loss(predicts, labels, objects_num):
    """Add Loss to all the trainable variables

    Args:
      predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell]
      ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell)
      labels  : 3-D tensor of [batch_size, max_objects, 5]
      objects_num: 1-D tensor [batch_size]
    """
    class_loss = tf.constant(0, tf.float32)
    object_loss = tf.constant(0, tf.float32)
    noobject_loss = tf.constant(0, tf.float32)
    coord_loss = tf.constant(0, tf.float32)
    loss = [0, 0, 0, 0]
    for i in range(batch_size):
        predict = predicts[i, :, :, :]
        label = labels[i, :, :]
        object_num = objects_num[i]
        nilboy = tf.ones([7,7,2])
        tuple_results = tf.while_loop(cond1, body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy])
        for j in range(4):
            loss[j] = loss[j] + tuple_results[2][j]
        nilboy = tuple_results[5]

    tf.add_to_collection('losses', (loss[0] + loss[1] + loss[2] + loss[3]) / batch_size)

#     tf.summary.scalar('class_loss', loss[0]/self.batch_size)
#     tf.summary.scalar('object_loss', loss[1]/self.batch_size)
#     tf.summary.scalar('noobject_loss', loss[2]/self.batch_size)
#     tf.summary.scalar('coord_loss', loss[3]/self.batch_size)
#     tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size )

    return tf.add_n(tf.get_collection('losses'), name='total_loss'), nilboy

In [13]:
image_size = 448
batch_size = 16
num_classes = 20
max_objects_per_image = 20
width = image_size
height = image_size
max_objects = max_objects_per_image
cell_size = 7
boxes_per_cell = 2
object_scale = 1
noobject_scale = 0.5
class_scale = 1
coord_scale = 5
learning_rate = 0.000001
max_iterators = 10000

images = tf.placeholder(tf.float32, (None, height, width, 3))
labels = tf.placeholder(tf.float32, (None, max_objects, 5))
objects_num = tf.placeholder(tf.int32, (None))

conv_num = 1

temp_conv = conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1)
conv_num += 1

temp_pool = max_pool(temp_conv, [2, 2], 2)

temp_conv = conv2d('conv' + str(conv_num), temp_pool, [3, 3, 16, 32], stride=1)
conv_num += 1

temp_pool = max_pool(temp_conv, [2, 2], 2)

temp_conv = conv2d('conv' + str(conv_num), temp_pool, [3, 3, 32, 64], stride=1)
conv_num += 1

temp_conv = max_pool(temp_conv, [2, 2], 2)

temp_conv = conv2d('conv' + str(conv_num), temp_conv, [3, 3, 64, 128], stride=1)
conv_num += 1

temp_conv = max_pool(temp_conv, [2, 2], 2)

temp_conv = conv2d('conv' + str(conv_num), temp_conv, [3, 3, 128, 256], stride=1)
conv_num += 1

temp_conv = max_pool(temp_conv, [2, 2], 2)

temp_conv = conv2d('conv' + str(conv_num), temp_conv, [3, 3, 256, 512], stride=1)
conv_num += 1

temp_conv = max_pool(temp_conv, [2, 2], 2)

temp_conv = conv2d('conv' + str(conv_num), temp_conv, [3, 3, 512, 1024], stride=1)
conv_num += 1     

temp_conv = conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
conv_num += 1 

temp_conv = conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
conv_num += 1 

temp_conv = tf.transpose(temp_conv, (0, 3, 1, 2))

#Fully connected layer
local1 = local('local1', temp_conv, cell_size * cell_size * 1024, 256)

local2 = local('local2', local1, 256, 4096)

local3 = local('local3', local2, 4096, cell_size * cell_size * (num_classes + boxes_per_cell * 5), leaky=False)

n1 = cell_size * cell_size * num_classes

n2 = n1 + cell_size * cell_size * boxes_per_cell

class_probs = tf.reshape(local3[:, 0:n1], (-1, cell_size, cell_size, num_classes))
scales = tf.reshape(local3[:, n1:n2], (-1, cell_size, cell_size, boxes_per_cell))
boxes = tf.reshape(local3[:, n2:], (-1, cell_size, cell_size, boxes_per_cell * 4))

# local3 = tf.concat([class_probs, scales, boxes], 3)

# n1 = cell_size * cell_size * num_classes

# n2 = n1 + cell_size * cell_size * boxes_per_cell

# class_probs = tf.reshape(local3[:, 0:n1], (-1, cell_size, cell_size, num_classes))
# scales = tf.reshape(local3[:, n1:n2], (-1, cell_size, cell_size, boxes_per_cell))
# boxes = tf.reshape(local3[:, n2:], (-1, cell_size, cell_size, boxes_per_cell * 4))

predicts = tf.concat([class_probs, scales, boxes], 3)

total_loss, nilboy = loss(predicts, labels, objects_num)
opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss)



In [14]:
def cvtColor(image):
    arr = np.array(image)
    tmp = arr[:, :, 0].copy()
    arr[:, :, 0] = arr[:, :, 2]
    arr[:, :, 2] = tmp
    return arr

In [15]:
def record_process(record):
    """record process 
    Args: record 
    Returns:
      image: 3-D ndarray
      labels: 2-D list [self.max_objects, 5] (xcenter, ycenter, w, h, class_num)
      object_num:  total object number  int 
    """
    global width
    global height
    global max_objects
    
    image = Image.open(record[0])
    image = cvtColor(image)
#     image = cv2.imread(record[0])
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    h = image.shape[0]
    w = image.shape[1]

    width_rate = width * 1.0 / w 
    height_rate = height * 1.0 / h 
    
    image = Image.fromarray(image)
    image = image.resize((height, width))
#     image = cv2.resize(image, (height, width))

    labels = [[0, 0, 0, 0, 0]] * max_objects
    i = 1
    object_num = 0
    while i < len(record):
        xmin = record[i]
        ymin = record[i + 1]
        xmax = record[i + 2]
        ymax = record[i + 3]
        class_num = record[i + 4]

        xcenter = (xmin + xmax) * 1.0 / 2 * width_rate
        ycenter = (ymin + ymax) * 1.0 / 2 * height_rate

        box_w = (xmax - xmin) * width_rate
        box_h = (ymax - ymin) * height_rate

        labels[object_num] = [xcenter, ycenter, box_w, box_h, class_num]
        object_num += 1
        i += 5
        if object_num >= max_objects:
            break
    return image, labels, object_num

In [16]:
data_path = 'yolo2d_data/pascal_voc.txt'
record_list = []  

# filling the record_list
input_file = open(data_path, 'r')

for line in input_file:
    line = line.strip()
    ss = line.split(' ')
    ss[1:] = [float(num) for num in ss[1:]]
    record_list.append(ss)

record_point = 0
record_number = len(record_list)
num_batch_per_epoch = int(np.ceil(record_number / batch_size))

In [17]:
sess = tf.Session()

In [19]:
saveDir = 'yolo2d_model'
saver = tf.train.Saver()
saver.restore(sess, saveDir + '/model.ckpt')

INFO:tensorflow:Restoring parameters from yolo2d_model/model.ckpt


In [20]:
# init =  tf.global_variables_initializer()
# sess.run(init)

In [None]:
import time
from datetime import datetime

for step in range(20):
    start_time = time.time()
    
    random.shuffle(record_list)
    for i_batch in range(num_batch_per_epoch):
        if i_batch + 1 == num_batch_per_epoch:
            break
#             l = record_list[i_batch * batch_size:]
        else:
            l = record_list[i_batch * batch_size:i_batch * batch_size + batch_size]
        np_images = []
        np_labels = []
        np_objects_num = []
        for item in l:
            image, label, object_num = record_process(item)
            image = np.array(image)
            image = image.astype(np.float32)
            np_images.append(image)
            np_labels.append(label)
            np_objects_num.append(object_num)
        np_images = np.asarray(np_images, dtype=np.float32)
        np_images = np_images/255 * 2 - 1
        np_labels = np.asarray(np_labels, dtype=np.float32)
        np_objects_num = np.asarray(np_objects_num, dtype=np.int32)
        _, loss_value = sess.run([opt, total_loss], feed_dict={images: np_images, labels: np_labels, objects_num: np_objects_num})
        duration = time.time() - start_time
        if i_batch % 10 == 0:
            num_examples_per_step = batch_size
            examples_per_sec = num_examples_per_step / duration
            sec_per_batch = float(duration)

            format_str = ('%s: step %d batch %d, loss = %.2f (%.1f examples/sec; %.3f '
                          'sec/batch)')
            print (format_str % (datetime.now(), step, i_batch, loss_value,
                                 examples_per_sec, sec_per_batch))

2017-06-19 09:26:03.798773: step 0 batch 0, loss = 13.53 (27.4 examples/sec; 0.583 sec/batch)
2017-06-19 09:26:09.166622: step 0 batch 10, loss = 10.61 (2.7 examples/sec; 5.951 sec/batch)
2017-06-19 09:26:14.470197: step 0 batch 20, loss = 14.24 (1.4 examples/sec; 11.255 sec/batch)
2017-06-19 09:26:19.789655: step 0 batch 30, loss = 19.08 (1.0 examples/sec; 16.574 sec/batch)
2017-06-19 09:26:25.034771: step 0 batch 40, loss = 12.76 (0.7 examples/sec; 21.819 sec/batch)
2017-06-19 09:26:30.447939: step 0 batch 50, loss = 17.76 (0.6 examples/sec; 27.233 sec/batch)
2017-06-19 09:26:35.845638: step 0 batch 60, loss = 19.15 (0.5 examples/sec; 32.630 sec/batch)
2017-06-19 09:26:41.085794: step 0 batch 70, loss = 11.49 (0.4 examples/sec; 37.870 sec/batch)
2017-06-19 09:26:46.373851: step 0 batch 80, loss = 13.87 (0.4 examples/sec; 43.158 sec/batch)
2017-06-19 09:26:51.656256: step 0 batch 90, loss = 14.95 (0.3 examples/sec; 48.441 sec/batch)
2017-06-19 09:26:56.766537: step 0 batch 100, loss =

In [21]:
# saver = tf.train.Saver()
saveDir = 'yolo2d_model'
saver.save(sess, saveDir + '/model.ckpt')

'yolo2d_model/model.ckpt'

In [23]:
##test
def process_predicts(predicts):
    p_classes = predicts[0, :, :, 0:20]
    C = predicts[0, :, :, 20:22]
    coordinate = predicts[0, :, :, 22:]

    p_classes = np.reshape(p_classes, (7, 7, 1, 20))
    C = np.reshape(C, (7, 7, 2, 1))

    P = C * p_classes

    #print P[5,1, 0, :]

    index = np.argmax(P)

    index = np.unravel_index(index, P.shape)

    class_num = index[3]

    coordinate = np.reshape(coordinate, (7, 7, 2, 4))

    max_coordinate = coordinate[index[0], index[1], index[2], :]

    xcenter = max_coordinate[0]
    ycenter = max_coordinate[1]
    w = max_coordinate[2]
    h = max_coordinate[3]

    xcenter = (index[1] + xcenter) * (448/7.0)
    ycenter = (index[0] + ycenter) * (448/7.0)

    w = w * 448
    h = h * 448

    xmin = xcenter - w/2.0
    ymin = ycenter - h/2.0

    xmax = xmin + w
    ymax = ymin + h

    return xmin, ymin, xmax, ymax, class_num

classes_name =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]

np_images = []
# np_img = cv2.imread('cat.jpg')
np_img = Image.open('cat.jpg')
# resized_img = cv2.resize(np_img, (height, width))
resized_img = np_img.resize((height, width))
# np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
np_img = cvtColor(resized_img)
np_img = np.array(np_img)
np_img = np_img.astype(np.float32)
np_images.append(np_img)
np_images = np.asarray(np_images, dtype=np.float32)
np_images = np_images/255 * 2 - 1
np_predict = sess.run(predicts, feed_dict={images: np_images})

xmin, ymin, xmax, ymax, class_num = process_predicts(np_predict)
class_name = classes_name[class_num]
print(xmin, ymin, xmax, ymax, class_name)
# cv2.rectangle(resized_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255))
# cv2.putText(resized_img, class_name, (int(xmin), int(ymin)), 2, 1.5, (0, 0, 255))
# cv2.imwrite('cat_out.jpg', resized_img)

(70.958498001098633, 84.109132766723633, 379.56708335876465, 363.02048301696777, 'cat')
