@@ -0,0 +1,110 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 23 02:25:45 2018
@author: ubuntu
"""

import random
import cv2
import numpy as np

class DatasetFetch():
def __init__(self, commonParams, datasetParams):
'''
Args:
commonParams: dict
datasetParams: dict
'''
self.dataPath = str(datasetParams['path'])
self.width = int(commonParams['image_size'])
self.height = int(commonParams['image_size'])
self.batchSize = int(commonParams['batch_size'])
self.threadNum = int(datasetParams['thread_num'])
self.maxObjects = int(commonParams['max_objects_per_image'])

self.recordList = []

inputFile = open(self.dataPath, 'r')

for line in inputFile:
line = line.strip()
ss = line.split(' ')
ss[1:] = [float(num) for num in ss[1:]]
self.recordList.append(ss)

def recordProcess(self, record):
'''
Record image info. -- path object coordinates.
Args:
record: imagePath xmin1 ymin1 xmax1 ymax1 calss1 xmin2 ymin2 xmax2 ymax2 class2
Returns:
image: 3-D ndarray
labels: 2-D list [self.maxObjects, 5] --> [xCenter, yCenter, w, h, classNum]
objectNum: int of total object number
'''
image = cv2.imread(record[0])
if image == None:
raise Exception("Couldn't load image!")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
h = image.shape[0]
w = image.shape[1]

widthRate = self.width*1.0 / w
heightRate = self.height*1.0 / h

image = cv2.resize(image, (self.height, self.width))

labels = [[0, 0, 0, 0, 0]] * self.maxObjects
i = 1
objectNum = 0

while i < len(record):
xmin = record[i]
ymin = record[i + 1]
xmax = record[i + 2]
ymax = record[i + 3]
classNum = record[i + 4]
#Real position of ficed size
xcenter = (xmin + xmax) * 1.0 / 2 * widthRate
ycenter = (ymin + ymax) * 1.0 / 2 * heightRate

boxW = (xmax - xmin) * widthRate
boxH = (ymax - ymin) * heightRate

labels[objectNum] = [xcenter, ycenter, boxW, boxH, classNum]
objectNum += 1

i += 5
if objectNum >= self.maxObjects:
break

return [image, labels, objectNum]

def batch(self):
'''
Get batch.
Returns:
images: 4-D ndarray [batch size, h, w, 3]
labels: 3-D ndarray [batch size, max objects, 5]
objectsNum: 1-D ndarray [batch size]
'''
images = []
labels = []
objectsNum = []

for i in range(self.batchSize):
image, label, objectNum = self.imageLabelQueue.get()
images.append(image)
labels.append(label)
objectsNum.append(objectNum)

images = np.asarray(images, dtype=np.float32)
images = images/255*2 - 1
labels = np.asarray(labels, dtype = np.float32)
objectsNum = np.asarray(objectsNum, dtype = np.float32)
return images, labels, objectsNum
@@ -0,0 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 19 16:44:03 2018
@author: root
"""
import yolo.net.net
import yolo.net.yolo_net
import yolo.net.yolo_tiny_net
@@ -0,0 +1,170 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 19 16:44:36 2018
@author: zsc
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import re

class Net(object):
'''
Basic yolo component.
'''

def __init__(self, commonParams, netParams):
'''
Init the object.
Args:
commonParams: a dict of pretrained parameters
netParams: a dict of trainable parameters
'''
self.pretrainedCollection = []
self.trainableCollection = []

def _variableInit(self, name, shape, initializer, pretrain=True, train=True):
var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)
if pretrain:
self.pretrainedCollection.append(var)
if train:
self.trainableCollection.append(var)
return var

def _variableInitDecay(self, name, shape, stddev, wd, pretrain = True, train = True):
'''
Initialize weights and biases.
Args:
name: variable nama
shape: variable shape
stddev: standard devision of your variables
wd: L2 loss lambda
Returns:
a tensor of variables
'''

initializer = tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32)
var = self._variableInit(name,
shape,
initializer=initializer,
pretrain=pretrain,
train=train)
if wd is not None:
weightDecay = tf.multiply(tf.nn.l2_loss(var), wd, name='weightLoss')
tf.add_to_collection('losses', weightDecay)
return var

def conv2d(self, scope, input, kernel_size, stride = 1, pretrain = True, train = True):
'''
Convolutional layer.
Args:
scope: tensorflow scope name
input: a 4-D tensor [batch size, height, width, channels]
kernel_size: [height, width, #input channel, #output channel]
stride: a stride value, int
pretrain: True or False
train: True or False
Return:
a 4-D tensor by [batch size, height, width, #outpuut channel]
'''
with tf.variable_scope(scope) as scope:
# initialize kernel for a specified layer
kernel = self._variableInitDecay('weights',
kernel_size,
stddev=5e-2,
wd = self.wd,
pretrain=pretrain,
train = train )

conv = tf.nn.conv2d(input,
kernel,
strides=[1, stride, stride, 1],
padding='SAME')

biases = self._variableInit('biases',
kernel_size[3:],
tf.constant_initializer(0.0),
pretrain=pretrain,
train=train)

convout = self.leakyRelu(tf.nn.bias_add(conv, biases))
return convout

def maxPool(self, input, kernel_size, stride):
'''
Max pooling layer.
Args:
input: a 4-D tensor
kernel_size: [height, width]
stride: a int32 number
Return:
output: 4-D tensor
'''
return tf.nn.max_pool(input,
ksize=[1, kernel_size[0], kernel_size[1], 1],
strides=[1, stride, stride, 1],
padding='SAME')

def fullyConnectLayer(self, scope, input, inDim, outDim, leaky = True, pretrain = True, train = True):
'''
Fully connected layer.
Args:
scope: variable scope name
input: input tensor
inDim: int32
outDim: int 32
Return:
output: a 2-D tensor [batch size, outDim]
'''
with tf.variable_scope(scope) as scope:
reshape = tf.reshape(input,
[tf.shape(input)[0],-1])
weights = self._variableInitDecay('weights',
[inDim, outDim],
stddev= 0.04,
wd = self.wd,
pretrain=pretrain,
train=train)

biases = self._variableInit('biases',
[outDim],
tf.constant_initializer(0.0),
pretrain,
train)

fcnOut = tf.matmul(reshape, weights) + biases

return fcnOut

def leakyRelu(self, x, alpha = 0.1, dtype = tf.float32):
'''
Leaky relu.
If x > 0, return x; else, 0.1x.
Args:
x: a tensor
alpha: the specified slope
Return:
y: a tensor
'''
x = tf.cast(x, dtype = dtype)
boolMask = (x > 0)
mask = tf.cast(boolMask, dtype=dtype)
return 1.0 * mask * x + alpha * (1 - mask) * x
@@ -0,0 +1,348 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 22 19:12:15 2018
@author: root
"""
import tensorflow as tf
import numpy as np
import re

from yolo.net.net import Net

class YoloNet(Net):
'''
Yolo net implementation.
'''
def __init__(self, commonParams, netParams, test = False):
'''
Init the object.
Args:
commonParams: a dict of pretrained parameters
netParams: a dict of trainable parameters
'''
super(YoloNet, self).__init__(commonParams, netParams)

#Process the parameters.
self.imageSize = int(commonParams['image_size'])
self.numClasses = int(commonParams['num_classes'])
self.cellSize = int(netParams['cell_size'])
self.boxesPerCell = int(netParams['boxes_per_cell'])
self.batchSize = int(commonParams['batch_size'])
self.wd = float(netParams['weight_decay'])

if not test:
self.objectScale = float(netParams['object_scale'])
self.noobjectScale = float(netParams['noobject_scale'])
self.classScale = float(netParams['class_scale'])
self.coordScale = float(netParams['coord_scale'])

def yoloModel(self, images):
'''
Build your yolo model.
Args:
images: 4-D tensor [batch size, height, width, channels]
Return:
predicts: [batch size, cell size, cell size, #class + 5*boxes per cell]
'''
convNum = 1
temp = self.conv2d("conv" + str(convNum), images, [7, 7, 3, 64], stride = 2)
convNum += 1

temp = self.maxPool(temp, [2, 2], 2)

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 64, 192], stride = 1)
convNum += 1

temp = self.maxPool(temp, [2,2], 2)

temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 192, 128], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 128, 256], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 256, 256], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 256, 512], stride = 1)
convNum += 1

temp = self.maxPool(temp, [2, 2], 2)

for i in range(4):
temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 512, 256], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 256, 512], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 512, 512], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 512, 1024], stride = 1)
convNum += 1

temp = self.maxPool(temp, [2, 2,], 2)

for i in range(2):
temp = self.conv2d('conv' + str(convNum), temp, [1, 1, 1024, 512], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 512, 1024], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 2)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 1)
convNum += 1

temp = self.conv2d('conv' + str(convNum), temp, [3, 3, 1024, 1024], stride = 1)
convNum += 1

f1 = self.fullyConnectLayer('f1', temp, 49*1024, 4096)
f1 = tf.nn.dropout(f1, keep_prob=0.5)

f2 = self.fullyConnectLayer('f2',
f1,
4096,
self.cellSize * self.cellSize * (self.numClasses * 5 * self.boxesPerCell),
leaky = False)

f2 = tf.reshape(f2, [tf.shape(f2)[0], self.cellSize, self.cellSize, self.numClasses + 5 * self.boxesPerCell])

return f2

def iou(self, boxes1, boxes2):
'''
Compute IOU.
Args:
boxes1: 4-D tensor [cell size, cell size, boxes per cell, 4] 4 means [x center, y center, width, height]
boxes2: 1-D tensor [4] 4 means [x center, y center, width, height]
Return:
iou: 3-D tensor [cell size, cell size, boses per cell]
'''
#Calculate the left-up and right-bottom coordinatino of boses1 & 2.
boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2]/2,
boxes1[:, :, :, 1] - boxes1[:, :, :, 3]/2,
boxes1[:, :, :, 0] + boxes1[:, :, :, 2]/2,
boxes1[:, :, :, 1] + boxes1[:, :, :, 3]/2])
boxes1 = tf.transpose(boxes1, [1, 2, 3, 0])

boxes2 = tf.stack([boxes2[0] - boxes2[2]/2,
boxes2[1] - boxes2[3]/2,
boxes2[0] + boxes2[2]/2,
boxes2[1] + boxes2[3]/2])

# left up intersection point
lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2])
rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:])

# the variable intersection contains the width and height of the intersection area.
intersection = lu - rd

interSquare = intersection[:, :, :, 0] * intersection[:, :, :, 1]

#Make sure there is intersection
mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32)

interSquare = interSquare * mask

s1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1])
s2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1])

return interSquare/(s1 + s2 - interSquare + 1e-6)


def cond1(self, num, objectNum, loss, predict, labels, nilboy):
'''
'''
return num < objectNum

def body1(self, num, objectNum, loss, predict, labels, nilboy):
'''
Calculate loss.
Args:
num: spedify which image is to be processed
objectNum: #objects in an image
loss: [class loss, object loss, no object loss, coord loss]
predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
labels: [max_objects, 5] (x_center, y_center, w, h, class)
--- > class and coord
--- > x_center is the x value of resized image. the same to y_center
nilboy: has/no objects
'''
#Get label form labels by the varibale num
label = labels[num]
label = tf.reshape(label, [-1])

minX = (label[0] - label[2] / 2) / (self.imageSize / self.cellSize)
maxX = (label[0] + label[2] / 2) / (self.imageSize / self.cellSize)
minY = (label[1] - label[3] / 2) / (self.imageSize / self.cellSize)
maxY = (label[1] + label[3] / 2) / (self.imageSize / self.cellSize)

#Determine which cell is the object belongs to.
minX = tf.floor(minX)
minY = tf.floor(minY)
maxX = tf.ceil(maxX)
maxY = tf.ceil(maxY)

#temp: if a cell contains an object, temp = 1, else 0
temp = tf.cast(tf.stack([maxY - minY, maxX - minX]), dtype=tf.int32)
objects = tf.ones(temp, tf.float32)

#temp: if a cell doesn't contains an object, temp = 0
#Which means pad it to S*S scale.
temp = tf.cast(tf.stack([minY, self.cellSize - maxY, minX, self.cellSize - maxX]),
dtype=tf.int32)
temp = tf.reshape(temp, (2, 2))
objects = tf.pad(objects, temp, 'CONSTANT')

#Calculate which cell contains the center point of the object.
centerX = label[0] / (self.imageSize / self.cellSize)
centerX = tf.floor(centerX)
centerY = label[1] / (self.imageSize / self.cellSize)
centerY = tf.floor(centerY)
response = tf.ones([1, 1], tf.float32)

# pad to S*S scale.
temp = tf.cast(tf.stack([centerY, self.cellSize - centerY - 1, centerX,self.cellSize - centerX -1]),
dtype=tf.int32)
temp = tf.reshape(temp, (2, 2))
response = tf.pad(response, temp, 'CONSTANT')

#predictBoxes: predicted boxes
predictBoxes = predict[:, :, self.numClasses + self.boxesPerCell:]

# 7 * 7 * 2 * 4
predictBoxes = tf.reshape(predictBoxes,
[self.cellSize, self.cellSize, self.boxesPerCell, 4])

# get real size form 0-1 predicted size
predictBoxes = predictBoxes * [self.imageSize / self.cellSize,
self.imageSize / self.cellSize,
self.imageSize,
self.imageSize]

#grid cell coord
baseBoxes = np.zeros([self.cellSize, self.cellSize, 4])

for y in range(self.cellSize):
for x in range(self.cellSize):
baseBoxes[y, x, :] = [self.imageSize / self.cellSize * x,
self.imageSize / self.cellSize * y,
0, 0]

#Make the shape of baseBoxes is the same with predictedBoxes.
baseBoxes = np.tile(np.resize(baseBoxes,
[self.cellSize, self.cellSize, 1, 4]),[1, 1, self.boxesPerCell, 1])

# predictBoxes is based on cell, baseBoxes is based on grid cell. Add them to get predicts based on the whole image.
predictBoxes = baseBoxes + predictBoxes

#iou for each cell 7 * 7 * 1
iouPredictTruth = self.iou(predictBoxes, label[0:4])

# filter out the cells that don't have objects
C = iouPredictTruth * tf.reshape(response,
[self.cellSize, self.cellSize, 1])

#
I = iouPredictTruth * tf.reshape(response, [self.cellSize, self.cellSize, 1])

#get the maximum iou for each cell's boxes
maxI = tf.reduce_max(I, 2, keep_dims=True)

# the max iou for the cell contains the center point
I = tf.cast((I >= maxI), tf.float32) * tf.reshape(response, (self.cellSize, self.cellSize, 1))

#noI: [cell size, cell size, boxes per cell]
noI = tf.ones_like(I, dtype=tf.float32) - I

# B confidences
pC = predict[:, :, self.numClasses:self.numClasses + self.boxesPerCell]

#real x center, y center
x = label[0]
y = label[1]

sqrtW = tf.sqrt(tf.abs(label[2]))
sqrtH = tf.sqrt(tf.abs(label[3]))

# real predicted x center and y center
pX = predictBoxes[:, :, :, 0]
pY = predictBoxes[:, :, :, 1]

#square root of predicted boxes' width and height
pSqrtW = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 2])))
pSqrtH = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 3])))

# one hot encoding
P = tf.one_hot(tf.cast(label[4], tf.int32), self.numClasses, dtype=tf.float32)

#predict classes
pP = predict[:, :, 0:self.numClasses]

#classLoss: only cells containing objects
classLoss = tf.nn.l2_loss(tf.reshape(objects, (self.cellSize, self.cellSize, 1)) * (pP -P)) * self.classScale

#objectLoss: object center location loss
objectLoss = tf.nn.l2_loss(I * (pC - C)) * self.objectScale

noObjectLoss = tf.nn.l2_loss(noI * (pC)) * self.noobjectScale

coordLoss = (tf.nn.l2_loss(I * (pX -x)/(self.imageSize/self.cellSize)) +
tf.nn.l2_loss(I * (pY -y)/(self.imageSize/self.cellSize)) +
tf.nn.l2_loss(I * (pSqrtW - sqrtW))/self.imageSize +
tf.nn.l2_loss(I * (pSqrtH - sqrtH))/self.imageSize) + self.coordScale
nilboy = I

return num + 1, objectNum, [loss[0] + classLoss, loss[1] + objectLoss, loss[2] + noObjectLoss, loss[3] + coordLoss],predict, labels, nilboy

def loss(self, predicts, labels, objectsNum):
'''
Add loss to all the trainable variables.
Args:
predicts: 4-D tensor [batch size, cell size, cell size, 5 * boxes per cell]
labels: labels: 3-D tensor [batch size, max objects, 5]
objectNum: 1-D tensor [batch size]
'''
classLoss = tf.constant(0, tf.float32)
objectLoss = tf.constant(0, tf.float32)
noObjectLoss = tf.constant(0, tf.float32)
coordLoss = tf.constant(0, tf.float32)

loss = [0, 0, 0, 0]

for i in range(self.batchSize):
predict = predicts[i, :, :, :]
label = labels[i, :, :, :]
objectNum = objectsNum[i]
nilboy = tf.ones([7, 7, 2])
tupleResults = tf.while_loop(self.cond1, self.body1,
[tf.constant(0), objectNum, [classLoss, objectLoss, noObjectLoss, coordLoss], predict, label, nilboy])
for j in range(4):
loss[j] = loss[j] + tupleResults[2][j]
nilboy = tupleResults[5]

tf.add_to_collection('losses', (loss[0] + loss[1] +loss[2] +loss[3])/self.batchSize)

tf.summary.scalar('class_loss', loss[0]/self.batchSize)
tf.summary.scalar('object_loss', loss[1]/self.batchSize)
tf.summary.scalar('noobject_loss', loss[2]/self.batchSize)
tf.summary.scalsr('coord_loss', loss[2]/self.batchSize)
tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batchSize)

return tf.add_n(tf.get_collection('losses'), name = 'total_loss'), nilboy
@@ -0,0 +1,346 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 22 19:12:15 2018
@author: root
"""
import tensorflow as tf
import numpy as np
import re

from yolo.net.net import Net

class YoloTinyNet(Net):
'''
Yolo net implementation.
'''
def __init__(self, commonParams, netParams, test = False):
'''
Init the object.
Args:
commonParams: a dict of pretrained parameters
netParams: a dict of trainable parameters
'''
super(YoloTinyNet, self).__init__(commonParams, netParams)

#Process the parameters.
self.imageSize = int(commonParams['image_size'])
self.numClasses = int(commonParams['num_classes'])
self.cellSize = int(netParams['cell_size'])
self.boxesPerCell = int(netParams['boxes_per_cell'])
self.batchSize = int(commonParams['batch_size'])
self.wd = float(netParams['weight_decay'])

if not test:
self.objectScale = float(netParams['object_scale'])
self.noobjectScale = float(netParams['noobject_scale'])
self.classScale = float(netParams['class_scale'])
self.coordScale = float(netParams['coord_scale'])

def yoloTinyModel(self, images):
'''
Build your yolo model.
Args:
images: 4-D tensor [batch size, height, width, channels]
Return:
predicts: [batch size, cell size, cell size, #class + 5*boxes per cell]
'''
conv_num = 1

temp_conv = self.conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1)
conv_num += 1

temp_pool = self.maxPool(temp_conv, [2, 2], 2)

temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 16, 32], stride=1)
conv_num += 1

temp_pool = self.maxPool(temp_conv, [2, 2], 2)

temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 32, 64], stride=1)
conv_num += 1

temp_conv = self.maxPool(temp_conv, [2, 2], 2)

temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 64, 128], stride=1)
conv_num += 1

temp_conv = self.maxPool(temp_conv, [2, 2], 2)

temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 128, 256], stride=1)
conv_num += 1

temp_conv = self.maxPool(temp_conv, [2, 2], 2)

temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 256, 512], stride=1)
conv_num += 1

temp_conv = self.maxPool(temp_conv, [2, 2], 2)

temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 512, 1024], stride=1)
conv_num += 1

temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
conv_num += 1

temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
conv_num += 1

temp_conv = tf.transpose(temp_conv, (0, 3, 1, 2))

#Fully connected layer
local1 = self.fullyConnectLayer('local1', temp_conv, self.cellSize * self.cellSize * 1024, 256)

local2 = self.fullyConnectLayer('local2', local1, 256, 4096)

local3 = self.fullyConnectLayer('local3',
local2,
4096,
self.cellSize * self.cellSize * (self.numClasses + self.boxesPerCell * 5),
leaky=False,
pretrain=False,
train=True)

n1 = self.cellSize * self.cellSize * self.numClasses

n2 = n1 + self.cellSize * self.cellSize * self.boxesPerCell

class_probs = tf.reshape(local3[:, 0:n1], (-1, self.cellSize, self.cellSize, self.numClasses))
scales = tf.reshape(local3[:, n1:n2], (-1, self.cellSize, self.cellSize, self.boxesPerCell))
boxes = tf.reshape(local3[:, n2:], (-1, self.cellSize, self.cellSize, self.boxesPerCell * 4))

local3 = tf.concat([class_probs, scales, boxes], 3)

predicts = local3

return predicts

def iou(self, boxes1, boxes2):
'''
Compute IOU.
Args:
boxes1: 4-D tensor [cell size, cell size, boxes per cell, 4] 4 means [x center, y center, width, height]
boxes2: 1-D tensor [4] 4 means [x center, y center, width, height]
Return:
iou: 3-D tensor [cell size, cell size, boses per cell]
'''
#Calculate the left-up and right-bottom coordinatino of boses1 & 2.
boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2]/2,
boxes1[:, :, :, 1] - boxes1[:, :, :, 3]/2,
boxes1[:, :, :, 0] + boxes1[:, :, :, 2]/2,
boxes1[:, :, :, 1] + boxes1[:, :, :, 3]/2])
boxes1 = tf.transpose(boxes1, [1, 2, 3, 0])

boxes2 = tf.stack([boxes2[0] - boxes2[2]/2,
boxes2[1] - boxes2[3]/2,
boxes2[0] + boxes2[2]/2,
boxes2[1] + boxes2[3]/2])

# left up intersection point
lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2])
rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:])

# the variable intersection contains the width and height of the intersection area.
intersection = lu - rd

interSquare = intersection[:, :, :, 0] * intersection[:, :, :, 1]

#Make sure there is intersection
mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32)

interSquare = interSquare * mask

s1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1])
s2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1])

return interSquare/(s1 + s2 - interSquare + 1e-6)


def cond1(self, num, objectNum, loss, predict, labels, nilboy):
'''
'''
return num < objectNum

def body1(self, num, objectNum, loss, predict, labels, nilboy):
'''
Calculate loss.
Args:
num: spedify which image is to be processed
objectNum: #objects in an image
loss: [class loss, object loss, no object loss, coord loss]
predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
labels: [max_objects, 5] (x_center, y_center, w, h, class)
--- > class and coord
--- > x_center is the x value of resized image. the same to y_center
nilboy: has/no objects
'''
#Get label form labels by the varibale num
label = labels[num]
label = tf.reshape(label, [-1])

minX = (label[0] - label[2] / 2) / (self.imageSize / self.cellSize)
maxX = (label[0] + label[2] / 2) / (self.imageSize / self.cellSize)
minY = (label[1] - label[3] / 2) / (self.imageSize / self.cellSize)
maxY = (label[1] + label[3] / 2) / (self.imageSize / self.cellSize)

#Determine which cell is the object belongs to.
minX = tf.floor(minX)
minY = tf.floor(minY)
maxX = tf.ceil(maxX)
maxY = tf.ceil(maxY)

#temp: if a cell contains an object, temp = 1, else 0
temp = tf.cast(tf.stack([maxY - minY, maxX - minX]), dtype=tf.int32)
objects = tf.ones(temp, tf.float32)

#temp: if a cell doesn't contains an object, temp = 0
#Which means pad it to S*S scale.
temp = tf.cast(tf.stack([minY, self.cellSize - maxY, minX, self.cellSize - maxX]),
dtype=tf.int32)
temp = tf.reshape(temp, (2, 2))
objects = tf.pad(objects, temp, 'CONSTANT')

#Calculate which cell contains the center point of the object.
centerX = label[0] / (self.imageSize / self.cellSize)
centerX = tf.floor(centerX)
centerY = label[1] / (self.imageSize / self.cellSize)
centerY = tf.floor(centerY)
response = tf.ones([1, 1], tf.float32)

# pad to S*S scale.
temp = tf.cast(tf.stack([centerY, self.cellSize - centerY - 1, centerX,self.cellSize - centerX -1]),
dtype=tf.int32)
temp = tf.reshape(temp, (2, 2))
response = tf.pad(response, temp, 'CONSTANT')

#predictBoxes: predicted boxes
predictBoxes = predict[:, :, self.numClasses + self.boxesPerCell:]

# 7 * 7 * 2 * 4
predictBoxes = tf.reshape(predictBoxes,
[self.cellSize, self.cellSize, self.boxesPerCell, 4])

# get real size form 0-1 predicted size
predictBoxes = predictBoxes * [self.imageSize / self.cellSize,
self.imageSize / self.cellSize,
self.imageSize,
self.imageSize]

#grid cell coord
baseBoxes = np.zeros([self.cellSize, self.cellSize, 4])

for y in range(self.cellSize):
for x in range(self.cellSize):
baseBoxes[y, x, :] = [self.imageSize / self.cellSize * x,
self.imageSize / self.cellSize * y,
0, 0]

#Make the shape of baseBoxes is the same with predictedBoxes.
baseBoxes = np.tile(np.resize(baseBoxes,
[self.cellSize, self.cellSize, 1, 4]),[1, 1, self.boxesPerCell, 1])

# predictBoxes is based on cell, baseBoxes is based on grid cell. Add them to get predicts based on the whole image.
predictBoxes = baseBoxes + predictBoxes

#iou for each cell 7 * 7 * 1
iouPredictTruth = self.iou(predictBoxes, label[0:4])

# filter out the cells that don't have objects
C = iouPredictTruth * tf.reshape(response,
[self.cellSize, self.cellSize, 1])

#
I = iouPredictTruth * tf.reshape(response, [self.cellSize, self.cellSize, 1])

#get the maximum iou for each cell's boxes
maxI = tf.reduce_max(I, 2, keepdims=True)

# the max iou for the cell contains the center point
I = tf.cast((I >= maxI), tf.float32) * tf.reshape(response, (self.cellSize, self.cellSize, 1))

#noI: [cell size, cell size, boxes per cell]
noI = tf.ones_like(I, dtype=tf.float32) - I

# B confidences
pC = predict[:, :, self.numClasses:self.numClasses + self.boxesPerCell]

#real x center, y center
x = label[0]
y = label[1]

sqrtW = tf.sqrt(tf.abs(label[2]))
sqrtH = tf.sqrt(tf.abs(label[3]))

# real predicted x center and y center
pX = predictBoxes[:, :, :, 0]
pY = predictBoxes[:, :, :, 1]

#square root of predicted boxes' width and height
pSqrtW = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 2])))
pSqrtH = tf.sqrt(tf.minimum(self.imageSize * 1.0, tf.maximum(0.0, predictBoxes[:, :, :, 3])))

# one hot encoding
P = tf.one_hot(tf.cast(label[4], tf.int32), self.numClasses, dtype=tf.float32)

#predict classes
pP = predict[:, :, 0:self.numClasses]

#classLoss: only cells containing objects
classLoss = tf.nn.l2_loss(tf.reshape(objects, (self.cellSize, self.cellSize, 1)) * (pP -P)) * self.classScale

#objectLoss: object center location loss
objectLoss = tf.nn.l2_loss(I * (pC - C)) * self.objectScale

noObjectLoss = tf.nn.l2_loss(noI * (pC)) * self.noobjectScale

coordLoss = (tf.nn.l2_loss(I * (pX -x)/(self.imageSize/self.cellSize)) +
tf.nn.l2_loss(I * (pY -y)/(self.imageSize/self.cellSize)) +
tf.nn.l2_loss(I * (pSqrtW - sqrtW))/self.imageSize +
tf.nn.l2_loss(I * (pSqrtH - sqrtH))/self.imageSize) + self.coordScale
nilboy = I

return num + 1, objectNum, [loss[0] + classLoss, loss[1] + objectLoss, loss[2] + noObjectLoss, loss[3] + coordLoss],predict, labels, nilboy

def loss(self, predicts, labels, objectsNum):
'''
Add loss to all the trainable variables.
Args:
predicts: 4-D tensor [batch size, cell size, cell size, 5 * boxes per cell]
labels: labels: 3-D tensor [batch size, max objects, 5]
objectNum: 1-D tensor [batch size]
'''
classLoss = tf.constant(0, tf.float32)
objectLoss = tf.constant(0, tf.float32)
noObjectLoss = tf.constant(0, tf.float32)
coordLoss = tf.constant(0, tf.float32)

loss = [0, 0, 0, 0]

for i in range(self.batchSize):
predict = predicts[i, :, :, :]
label = labels[i, :, :]
objectNum = objectsNum[i]
nilboy = tf.ones([7, 7, 2])
tupleResults = tf.while_loop(self.cond1, self.body1,
[tf.constant(0), objectNum, [classLoss, objectLoss, noObjectLoss, coordLoss], predict, label, nilboy])
for j in range(4):
loss[j] = loss[j] + tupleResults[2][j]
nilboy = tupleResults[5]

tf.add_to_collection('losses', (loss[0] + loss[1] +loss[2] +loss[3])/self.batchSize)

tf.summary.scalar('class_loss', loss[0]/self.batchSize)
tf.summary.scalar('object_loss', loss[1]/self.batchSize)
tf.summary.scalar('noobject_loss', loss[2]/self.batchSize)
tf.summary.scalar('coord_loss', loss[2]/self.batchSize)
tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batchSize)

return tf.add_n(tf.get_collection('losses'), name = 'total_loss'), nilboy
@@ -0,0 +1 @@
import yolo.text_dataset.text_dataset
@@ -0,0 +1,153 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 7 21:09:49 2018
@author: root
"""

import random
import cv2
import numpy as np
import queue
from threading import Thread

class TextDataSet():
'''
Process text input fiel dataset.
format:
imagePath xmin1 ymin1 xmax1 ymax1 calss1 xmin2 ymin2 xmax2 ymax2 class2
'''

def __init__(self, commonParams, datasetParams):
'''
Args:
commonParams: dict
datasetParams: dict
'''
self.dataPath = str(datasetParams['path'])
self.width = int(commonParams['image_size'])
self.height = int(commonParams['image_size'])
self.batchSize = int(commonParams['batch_size'])
self.threadNum = int(datasetParams['thread_num'])
self.maxObjects = int(commonParams['max_objects_per_image'])

self.recordQueue = queue.Queue(10000)
self.imageLabelQueue = queue.Queue(512)

self.recordList = []

inputFile = open(self.dataPath, 'r')

for line in inputFile:
line = line.strip()
ss = line.split(' ')
ss[1:] = [float(num) for num in ss[1:]]
self.recordList.append(ss)

self.recordPoint = 0
self.recordNumber = len(self.recordList)

self.numBatchPerEpoch = int(self.recordNumber / self.batchSize)

tRecordProducer = Thread(target = self.recordProducer)
tRecordProducer.daemon = True
tRecordProducer.start()

for i in range(self.threadNum):
t = Thread(target = self.recordCustomer)
t.daemon = True
t.start()

def recordProducer(self):
'''
'''
while True:
if self.recordPoint % self.recordNumber == 0:
random.shuffle(self.recordList)
self.recordPoint = 0
self.recordQueue.put(self.recordList[self.recordPoint])
self.recordPoint += 1

def recordProcess(self, record):
'''
Record Process
Args:
record: imagePath xmin1 ymin1 xmax1 ymax1 calss1 xmin2 ymin2 xmax2 ymax2 class2
Returns:
image: 3-D ndarray
labels: 2-D list [self.maxObjects, 5] --> [xCenter, yCenter, w, h, classNum]
objectNum: int of total object number
'''

image = cv2.imread(record[0])

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
h = image.shape[0]
w = image.shape[1]

widthRate = self.width*1.0 / w
heightRate = self.height*1.0 / h

image = cv2.resize(image, (self.height, self.width))

labels = [[0, 0, 0, 0, 0]] * self.maxObjects
i = 1
objectNum = 0

while i < len(record):
xmin = record[i]
ymin = record[i + 1]
xmax = record[i + 2]
ymax = record[i + 3]
classNum = record[i + 4]
#Real position of ficed size
xcenter = (xmin + xmax) * 1.0 / 2 * widthRate
ycenter = (ymin + ymax) * 1.0 / 2 * heightRate

boxW = (xmax - xmin) * widthRate
boxH = (ymax - ymin) * heightRate

labels[objectNum] = [xcenter, ycenter, boxW, boxH, classNum]
objectNum += 1

i += 5
if objectNum >= self.maxObjects:
break

return [image, labels, objectNum]

def recordCustomer(self):
while True:
item = self.recordQueue.get()
#print(item)
out = self.recordProcess(item)
self.imageLabelQueue.put(out)

def batch(self):
'''
Get batch.
Returns:
images: 4-D ndarray [batch size, h, w, 3]
labels: 3-D ndarray [batch size, max objects, 5]
objectsNum: 1-D ndarray [batch size]
'''
images = []
labels = []
objectsNum = []

for i in range(self.batchSize):
image, label, objectNum = self.imageLabelQueue.get()
images.append(image)
labels.append(label)
objectsNum.append(objectNum)

images = np.asarray(images, dtype=np.float32)
images = images/255*2 - 1
labels = np.asarray(labels, dtype = np.float32)
objectsNum = np.asarray(objectsNum, dtype = np.float32)
return images, labels, objectsNum
@@ -0,0 +1,45 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 8 20:09:41 2018
@author: root
"""

import configparser

def processConfig(configFile):
'''
Process configure file.
Args:
configFile: path to the configure file
Return:
CommonParams, datasetParams, netParams, solverParams
'''
commonParams = {}
datasetParams = {}
netParams = {}
solverParams = {}

config = configparser.ConfigParser()
config.read(configFile)

for section in config.sections():
if section == 'Common':
for option in config.options(section):
commonParams[option] = config.get(section, option)

if section == 'DataSet':
for option in config.options(section):
datasetParams[option] = config.get(section, option)

if section == 'Net':
for option in config.options(section):
netParams[option] = config.get(section, option)
if section == 'Solver':
for option in config.options(section):
solverParams[option] = config.get(section, option)

return commonParams, datasetParams, netParams, solverParams
@@ -0,0 +1 @@
import yolo.yolo_solver.yolo_solver
@@ -0,0 +1,102 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 9 09:05:37 2018
@author: root
"""
import tensorflow as tf
import sys
import time
import numpy as np
import os
from datetime import datetime


class YoloSolver:
def __init__(self, dataset, net, commonParams, solverParams):
'''
Process the paramters
'''
self.moment = float(solverParams['moment'])
self.learningRate = float(solverParams['learning_rate'])
self.batchSize = int(commonParams['batch_size'])
self.height = int(commonParams['image_size'])
self.width = int(commonParams['image_size'])
self.maxObjects = int(commonParams['max_objects_per_image'])
self.pretrainPath = str(solverParams['pretrain_model_path'])
self.trainDir = str(solverParams['train_dir'])
self.maxIterators = int(solverParams['max_iterators'])

self.dataset = dataset
self.net = net
self.constructGraph()

def _train(self):
'''
Train the model.
'''
opt = tf.train.MomentumOptimizer(self.learningRate, self.moment)
grads = opt.compute_gradients(self.totalLoss)
applyGradientOp = opt.apply_gradients(grads, self.globalStep)

return applyGradientOp

def constructGraph(self):
'''
Construct graph
'''
self.globalStep = tf.Variable(0, trainable=False)
self.images = tf.placeholder(tf.float32, (self.batchSize, self.height, self.width, 3))
self.labels = tf.placeholder(tf.float32, (self.batchSize, self.maxObjects, 5))
self.objectsNum = tf.placeholder(tf.int32, (self.batchSize))

self.predicts = self.net.yoloTinyModel(self.images)
self.totalLoss, self.nilboy = self.net.loss(self.predicts, self.labels, self.objectsNum)
tf.summary.scalar('loss', self.totalLoss)
self.trainOp = self._train()

def solve(self):

saver1 = tf.train.Saver(self.net.pretrainedCollection, write_version=1)
saver2 = tf.train.Saver(self.net.trainableCollection, write_version=1)

init = tf.global_variables_initializer()
summaryOp = tf.summary.merge_all()

os.environ["CUDA_VISIBLE_DEVICES"] = '1'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

sess = tf.Session(config = config)
sess.run(init)

saver1.restore(sess, self.pretrainPath)
summaryWriter = tf.summary.FileWriter(self.trainDir, sess.graph)

for step in range(self.maxIterators):
startTime = time.time()
npImages, npLabels, npObjectsNum = self.dataset.batch()

_, lossValue, nilboy = sess.run([self.trainOp, self.totalLoss, self.nilboy],
feed_dict={self.images: npImages, self.labels: npLabels, self.objectsNum: npObjectsNum})

duration = time.time() - startTime
assert not np.isnan(lossValue), 'Model deverged with loss = NaN'
if step%10 == 0:
numExamplesPerStep = self.dataset.batchSize
examplesPerSec = numExamplesPerStep / duration
secPerBatch = float(duration)

print('time: '+str(datetime.now())+', step: '+str(step)+', loss: '+str(lossValue)+', examplePerSec: '+str(examplesPerSec)+', secPerBatch: '+str(secPerBatch))
sys.stdout.flush()

if step % 100 == 0:
summaryStr = sess.run(summaryOp,
feed_dict={self.images: npImages, self.labels:npLabels, self.objectsNum: npObjectsNum})

summaryWriter.add_summary(summaryStr, step)

if step % 5000 == 0:
saver2.save(sess, self.trainDir + 'model.ckpt', global_step=step)
sess.close()
@@ -0,0 +1,203 @@
import sys

sys.path.append('./')

import time
from yolo.net.yolo_tiny_net import YoloTinyNet
import tensorflow as tf
import cv2
import numpy as np
import os

classes_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]


class Timer(object):
"""A simple timer."""
def __init__(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.

def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multi-threading
self.start_time = time.time()

def toc(self, average=True):
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
return self.average_time
else:
return self.diff



def process_predicts(resized_img, predicts, thresh=0.12):
"""
process the predicts of object detection with one image input.
Args:
resized_img: resized source image.
predicts: output of the model.
thresh: thresh of bounding box confidence.
Return:
predicts_dict: {"cat": [[x1, y1, x2, y2, scores1], [...]]}.
"""
p_classes = predicts[0, :, :, 0:20] # 20 classes.
C = predicts[0, :, :, 20:22] # two bounding boxes in one cell.
coordinate = predicts[0, :, :,22:] # all bounding boxes position.

p_classes = np.reshape(p_classes, (7, 7, 1, 20))
C = np.reshape(C, (7, 7, 2, 1))

P = C * p_classes # confidencefor all classes of all bounding boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2, 1).

predicts_dict = {}
for i in range(7):
for j in range(7):
temp_data = np.zeros_like(P, np.float32)
temp_data[i, j, :, :] = P[i, j, :, :]
position = np.argmax(temp_data) # refer to the class num (with maximum confidence) for every bounding box.
index = np.unravel_index(position, P.shape)

if P[index] > thresh:
class_num = index[-1]
coordinate = np.reshape(coordinate, (7, 7, 2, 4)) # (cell_size, cell_size, bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax]
max_coordinate = coordinate[index[0], index[1], index[2], :]

xcenter = max_coordinate[0]
ycenter = max_coordinate[1]
w = max_coordinate[2]
h = max_coordinate[3]

xcenter = (index[1] + xcenter) * (448/7.0)
ycenter = (index[0] + ycenter) * (448/7.0)

w = w * 448
h = h * 448
xmin = 0 if (xcenter - w/2.0 < 0) else (xcenter - w/2.0)
ymin = 0 if (xcenter - w/2.0 < 0) else (xcenter - w/2.0)
xmax = resized_img.shape[0] if (xmin + w) > resized_img.shape[0] else (xmin + w)
ymax = resized_img.shape[1] if (ymin + h) > resized_img.shape[1] else (ymin + h)

class_name = classes_name[class_num]
predicts_dict.setdefault(class_name, [])
predicts_dict[class_name].append([int(xmin), int(ymin), int(xmax), int(ymax), P[index]])

return predicts_dict


def non_max_suppress(predicts_dict, threshold=0.25):
"""
implement non-maximum supression on predict bounding boxes.
Args:
predicts_dict: {"cat": [[x1, y1, x2, y2, scores1], [...]]}.
threshhold: iou threshold
Return:
predicts_dict processed by non-maximum suppression
"""
for object_name, bbox in predicts_dict.items():
bbox_array = np.array(bbox, dtype=np.float)
x1, y1, x2, y2, scores = bbox_array[:,0], bbox_array[:,1], bbox_array[:,2], bbox_array[:,3], bbox_array[:,4]
areas = (x2-x1+1) * (y2-y1+1)
#print "areas shape = ", areas.shape
order = scores.argsort()[::-1]
#print "order = ", order
keep = []

while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])

inter = np.maximum(0.0, xx2-xx1+1) * np.maximum(0.0, yy2-yy1+1)
iou = inter/(areas[i]+areas[order[1:]]-inter)
indexs = np.where(iou<=threshold)[0]
order = order[indexs+1]
bbox = bbox_array[keep]
predicts_dict[object_name] = bbox.tolist()
predicts_dict = predicts_dict
return predicts_dict


def plot_result(src_img, predicts_dict):
"""
plot bounding boxes on source image.
Args:
src_img: source image
predicts_dict: {"cat": [[x1, y1, x2, y2, scores1], [...]]}.
"""
height_ratio = src_img.shape[0]/448.0
width_ratio = src_img.shape[1]/448.0
for object_name, bbox in predicts_dict.items():
for box in bbox:
xmin, ymin, xmax, ymax, score = box
src_xmin = xmin * width_ratio
src_ymin = ymin * height_ratio
src_xmax = xmax * width_ratio
src_ymax = ymax * height_ratio
score = float("%.3f" %score)

cv2.rectangle(src_img, (int(src_xmin), int(src_ymin)), (int(src_xmax), int(src_ymax)), (0, 0, 255))
cv2.putText(src_img, object_name + str(score), (int(src_xmin), int(src_ymin)), 1, 2, (0, 0, 255))

#cv2.imshow("result", src_img)
cv2.imwrite("result.jpg", src_img)


if __name__ == '__main__':
common_params = {'image_size': 448, 'num_classes': 20, 'batch_size': 1}
net_params = {'cell_size': 7, 'boxes_per_cell': 2, 'weight_decay': 0.0005}

net = YoloTinyNet(common_params, net_params, test=True)

image = tf.placeholder(tf.float32, (1, 448, 448, 3))
predicts = net.yoloTinyModel(image)

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

sess = tf.Session(config=config)
src_img = cv2.imread("./test2.jpg")
#src_img = cv2.imread("./data/VOCdevkit2007/VOC2007/JPEGImages/000058.jpg")
resized_img = cv2.resize(src_img, (448, 448))
#height_ratio = src_img.shape[0]/448.0
#width_ratio = src_img.shape[1]/448.0

# convert to rgb image
np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
# convert data type used in tf
np_img = np_img.astype(np.float32)
# data normalization and reshape to input tensor
np_img = np_img / 255.0 * 2 - 1
np_img = np.reshape(np_img, (1, 448, 448, 3))

saver = tf.train.Saver(net.trainableCollection)
saver.restore(sess, 'models/pretrain/yolo_tiny.ckpt')

timer = Timer()
timer.tic()

print('Procession detection...')
np_predict = sess.run(predicts, feed_dict={image: np_img})
timer.toc()
print('One detection took {:.3f}s in average'.format(timer.total_time))
predicts_dict = process_predicts(resized_img, np_predict)
print ("predict dict: ", predicts_dict)
predicts_dict = non_max_suppress(predicts_dict)
print ("predict dict after non-maximum suppression: ", predicts_dict)

plot_result(src_img, predicts_dict)

#cv2.waitKey(0)
sess.close()
@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>9a2d606a-3c97-4945-a5fc-6a8c5bfc2a85</ProjectGuid>
<ProjectHome>.</ProjectHome>
<ProjectTypeGuids>{D22814C2-A430-4A53-8052-A3A64BFB2240};{888888A0-9F3D-457C-B088-3A5042F75D52}</ProjectTypeGuids>
<StartupFile>yoloTF.py</StartupFile>
<SearchPath>
</SearchPath>
<WorkingDirectory>.</WorkingDirectory>
<OutputPath>.</OutputPath>
<CommandLineArguments>--input_dir . --output_dir .</CommandLineArguments>
<Name>yoloTF</Name>
<RootNamespace>yoloTF</RootNamespace>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
<DebugSymbols>true</DebugSymbols>
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
<DebugSymbols>true</DebugSymbols>
<EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
</PropertyGroup>
<ItemGroup>
<Compile Include="yoloTF.py" />
</ItemGroup>
<Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" />
<!-- Uncomment the CoreCompile target to enable the Build command in
Visual Studio and specify your pre- and post-build commands in
the BeforeBuild and AfterBuild targets below. -->
<!--<Target Name="CoreCompile" />-->
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
</Project>