217 yolo.py
@@ -12,14 +12,15 @@ def __init__(self, cls_name):
self.cls_name = cls_name


plate_cfg = cfg(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','plate'])
#plate_cfg = cfg(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','plate'])

batch_size = 5
n_input = 448 * 448
B = 2
S = 7
cls_name = ['plate','dog']
n_class = len(cls_name)

learning_rate = 0.01
training_iters = 1000

@@ -74,16 +75,40 @@ def conv_net(x, weights, biases, dropout):
conv19 = conv2d(conv18, weights['conv19'], biases['conv19'])
conv20 = conv2d(conv19, weights['conv20'], biases['conv20'])
conv21 = conv2d(conv20, weights['conv21'], biases['conv21'])
conv22 = conv2d(conv21, weights['conv22'], biases['conv22'])
conv22 = conv2d(conv21, weights['conv22'], biases['conv22'], strides = 2)

conv23 = conv2d(conv22, weights['conv23'], biases['conv23'])
conv24 = conv2d(conv23, weights['conv24'], biases['conv24'])

print 'conv1 : ', conv1.get_shape()
print 'conv2 : ', conv2.get_shape()
print 'conv3 : ', conv3.get_shape()
print 'conv4 : ', conv4.get_shape()
print 'conv5 : ', conv5.get_shape()
print 'conv6 : ', conv6.get_shape()
print 'conv7 : ', conv7.get_shape()
print 'conv8 : ', conv8.get_shape()
print 'conv9 : ', conv9.get_shape()
print 'conv10 : ', conv10.get_shape()
print 'conv11 : ', conv11.get_shape()
print 'conv12 : ', conv12.get_shape()
print 'conv13 : ', conv13.get_shape()
print 'conv14 : ', conv14.get_shape()
print 'conv15 : ', conv15.get_shape()
print 'conv16 : ', conv16.get_shape()
print 'conv17 : ', conv17.get_shape()
print 'conv18 : ', conv18.get_shape()
print 'conv19 : ', conv19.get_shape()
print 'conv20 : ', conv20.get_shape()
print 'conv21 : ', conv21.get_shape()
print 'conv22 : ', conv22.get_shape()
print 'conv23 : ', conv23.get_shape()
print 'conv24 : ', conv24.get_shape()
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv24, [-1, weights['fc1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['fc1']), biases['fc1'])
fc1 = tf.nn.relu(fc1)
print 'fc1 : ' , fc1.get_shape()
# fc1 = tf.nn.dropout(fc1, dropout)

fc2 = tf.add(tf.matmul(fc1, weights['fc2']), biases['fc2'])
@@ -174,7 +199,7 @@ def conv_net(x, weights, biases, dropout):
}


x = tf.placeholder(tf.float32, [None, n_input * 3]) # feed_dict (unknown batch , features)
x = tf.placeholder(tf.float32, [5, n_input * 3]) # feed_dict (unknown batch , features)
y = tf.placeholder(tf.float32, [None, n_class + 4]) # feed_dict (unknown batch, prob for each classes)


@@ -193,14 +218,14 @@ def conv_net(x, weights, biases, dropout):
# return tf.constant(2)
#return tf.concat(1,(confidence,IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4])))
def get_confidence(pred, y, B):

#confidence = tf.Variable(np.array([]), tf.float32)

#zero = tf.constant(np.array([]))

#print 'pred_shape : ', pred.get_shape()[1]
shape = (-1,int(pred.get_shape()[1]),B)
shape = (5,int(pred.get_shape()[1]),B)

"""
#print shape
# for b in xrange(B):
@@ -232,27 +257,27 @@ def get_confidence(pred, y, B):
# tf.concat(1,(confidence,IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]))))
"""
if confidence == 0:
print 'iou xxx ', IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
print 'iou xxx ', IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
confidence = IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4])
else:
# print confidence.dtype
print IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
print IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
print 'pred : ',pred[ : , : , b * 5 : b * 5 + 4]
print 'y :', y[ : , b * 5 : b * 5 + 4]
print confidence.get_shape
print confidence.get_shape
confidence = tf.concat(1,(confidence,IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4])))
print confidence.get_shape()
"""
#print 'confidence shape : ', confidence.get_shape
"""
confidence shape = [batch, cell, B]
"""
confidence = tf.reshape(confidence,shape)

assert confidence.dtype == tf.float32

#print confidence.get_shape()
print 'in confidence : ', confidence.get_shape()
return confidence

def is_responsible(confidence):
@@ -265,18 +290,20 @@ def is_responsible(confidence):
"""

_, cells, B = list(confidence.get_shape())
print 'confidence : ', confidence.get_shape()
max_iou = tf.reduce_max(confidence, 2)
print 'batch_size :',batch_size
print 'cells :', cells
print 'B : ', B
print 'max_iou : ', max_iou.get_shape()

for b in xrange(B-1):
max_iou = tf.concat(1,[max_iou,max_iou])

print 'max_iou : ', max_iou.get_shape()
max_iou = tf.reshape(max_iou,[batch_size, int(cells), int(B)])
is_res = tf.greater_equal(confidence, max_iou)

#print 'is_res : ', is_res.get_shape()
#print 'is_res : ', is_res.dtype
#print 'conf : ', confidence.dtype
@@ -285,21 +312,21 @@ def is_responsible(confidence):
#print 'confidence : ',confidence.get_shape()
assert is_res.dtype == bool

assert confidence.dtype == tf.float32
assert confidence.dtype == tf.float32
#assert is_res.get_shape() == confidence.get_shape()

return is_res



def is_appear_in_cell(confidence):

return tf.greater(tf.reduce_sum(confidence,2),tf.zeros((batch_size,49)))
#return tf.reduce_all(confidence,2)

"""
training
training
"""

@@ -314,6 +341,7 @@ def is_appear_in_cell(confidence):
pred = conv_net(x, weights, biases, 1)
display_step = 20

print 'prediction first : ' , pred.get_shape()
confidence = get_confidence(pred, y, B)
is_res = is_responsible(confidence)
is_appear = is_appear_in_cell(confidence)
@@ -323,39 +351,78 @@ def is_appear_in_cell(confidence):
not_res = tf.cast(is_res, tf.float32)
is_appear = tf.cast(is_appear, tf.float32)

images, objects = load_imdb('plate', cls_name)
images, objects = load_imdb('plate', cls_name)

images =np.array(images)
loss = None

B = 2
#b = tf.Variable(0)
for b in xrange(B):

#tf.while_loop(b < B, ):

"""
B = [(SxS) x B]
x, y => relative to cell
w, h => relative to image
pred = [batch, SxS, 5B+C]
dx = (pred[:,:,b*5+0] - y[:,0]) ** 2
dy = (pred[:,:,b*5+1] - y[:,1]) ** 2
dw = (pred[:,:,b*5+2]**0.5 - y[:,2]**0.5) ** 2
dh = (pred[:,:,b*5+3]**0.5 - y[:,3]**0.5) ** 2
dc = (pred[:,:,b*5+4] - y[:,4]) ** 2
"""
print 'dx :', tf.slice(y,[0,0],[-1,1]).get_shape()

pred_x = tf.slice(pred, [0,0,b * 5 + 0], [-1,-1,1])

print 'slice x : ', tf.slice(y, [0,0], [-1,1]).get_shape()
# print 'slice x 2 : ', , [-1, S*S, b+1]).
gt_x = tf.reshape(tf.slice(y, [0,0], [-1,1]), [batch_size, 1, 1])

pred_y = tf.slice(pred, [0,0,b * 5 + 1], [-1,-1,1])
gt_y = tf.reshape(tf.slice(y, [0,1], [-1,1]), [batch_size, 1, 1])

pred_w = tf.slice(pred, [0,0,b * 5 + 2], [-1,-1,1])
gt_w = tf.reshape(tf.slice(y, [0,2], [-1,1]), [batch_size, 1, 1])

pred_h = tf.slice(pred, [0,0,b * 5 + 3], [-1,-1,1])
gt_h = tf.reshape(tf.slice(y, [0,3], [-1,1]), [batch_size, 1, 1])


pred_c = tf.slice(pred, [0,0,b * 5 + 4], [-1,-1,1])

# gt_c = tf.ones([-1,S*S,b+1])
print 'gt_x : ', gt_x.get_shape()
print 'pred_x : ', pred_x.get_shape()

dx = tf.pow(tf.sub(pred_x, gt_x), 2)
dy = tf.pow(tf.sub(pred_y, gt_y), 2)

dw = tf.pow(tf.sub(tf.pow(pred_w,0.5), tf.pow(gt_w,0.5)), 2)
dh = tf.pow(tf.sub(tf.pow(pred_h,0.5), tf.pow(gt_h,0.5)), 2)

dc = tf.pow(tf.sub(pred_c, 1), 2)



"""
print 'dx predict : ', tf.slice(pred,[0,0,b*5+0],[-1,-1,1]).get_shape()
print 'dx y :', tf.slice(y,[0,0],[-1,1]).get_shape()
dx = tf.pow(tf.sub(tf.slice(pred,[0,0,b*5+0],[-1,-1,1]),tf.slice(y,[0,0],[-1,1])),2)
dy = tf.pow(tf.sub(tf.slice(pred,[0,0,b*5+1],[-1,-1,1]),tf.slice(y,[0,1],[-1,1])),2)
dw = tf.pow(tf.sub(tf.pow(tf.slice(pred,[0,0,b*5+2],[-1,-1,1]),0.5),tf.pow(tf.slice(y,[0,2],[-1,1]),0.5)),2)
dh = tf.pow(tf.sub(tf.pow(tf.slice(pred,[0,0,b*5+3],[-1,-1,1]),0.5),tf.pow(tf.slice(y,[0,3],[-1,1]),0.5)),2)
dc = tf.pow(tf.sub(tf.slice(pred,[0,0,b*5+4],[-1,-1,1]),1),2) #tf.slice(y,[0,4],[-1,1])),2)
"""
"""
if loss == None:
loss = lcoord * is_res[:,:,b] * (dx+dy) + \
lcoord * is_res[:,:,b] * (dw+dh) + \
is_res[:,:,b] * dc + \
@@ -366,35 +433,36 @@ def is_appear_in_cell(confidence):
lcoord * is_res[:,:,b] * (dw+dh) + \
is_res[:,:,b] * dc + \
lnoobj * not_res[:,:,b] * dc
index = b + 1
"""

if loss == None:
print tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1]),tf.int32).dtype
print tf.add(dx,dy).dtype

#print tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1]),tf.int32).dtype
#print tf.add(dx,dy).dtype
#print lcoord.dtype
test1 = tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1]),tf.float32)
#test1 = tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1]),tf.float32)
#test = tf.mul(lcoord, tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1])))

loss_coord_xy = tf.mul(tf.mul(lcoord, tf.slice(is_res,[0,0,b],[-1,-1,1])), tf.add(dx,dy))
loss_coord_wh = tf.mul(tf.mul(lcoord, tf.slice(is_res,[0,0,b],[-1,-1,1])), tf.add(dw,dh))
loss_is_obj = tf.mul(tf.slice(is_res,[0,0,b],[-1,-1,1]),dc)
loss_no_obj = tf.mul(tf.slice(not_res,[0,0,b],[-1,-1,1]),dc)

loss = tf.add(tf.add(loss_coord_xy,loss_coord_wh), tf.add(loss_is_obj,loss_no_obj))

else:

loss_coord_xy = tf.mul(tf.mul(lcoord, tf.slice(is_res,[0,0,b],[-1,-1,1])), tf.add(dx,dy))
loss_coord_wh = tf.mul(tf.mul(lcoord, tf.slice(is_res,[0,0,b],[-1,-1,1])), tf.add(dw,dh))
loss_is_obj = tf.mul(tf.slice(is_res,[0,0,b],[-1,-1,1]),dc)
loss_no_obj = tf.mul(tf.slice(not_res,[0,0,b],[-1,-1,1]),dc)

loss = tf.add(loss, tf.add(tf.add(loss_coord_xy,loss_coord_wh), tf.add(loss_is_obj,loss_no_obj)))

index = b + 1

"""
loss += is_appear * sum((y[:,:,b:] - pred[:,:,b:]) ** 2)
"""
@@ -403,34 +471,51 @@ def is_appear_in_cell(confidence):
#print tmp1.get_shape()
#tmp2 = tf.slice(y,[0,5],[-1,-1])
#print tmp2.get_shape(
print 'is_appear : ', is_appear.dtype
print 'pred : ',pred.get_shape()
print 'tmp 1 : ',tf.slice(pred,[0,0,5 * index],[-1,-1,-1]).get_shape()
tmp = tf.mul(is_appear, tf.pow(tf.reduce_sum(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1]))),2))
print 'tmp shape ', tmp.get_shape()
print loss.dtype

loss = tf.reshape(loss,[int(loss.get_shape()[0]),int(loss.get_shape()[1])])
print 'loss shape ', loss.get_shape()
loss = tf.add(loss, tf.mul(is_appear, tf.pow(tf.reduce_sum(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1]))),2)))
print int(tf.slice(y,[0,4],[-1,-1]).get_shape()[1])
print tf.slice(y,[0,4],[-1,-1]).get_shape()
#print 'is_appear : ', is_appear.dtype
#print 'pred : ',pred.get_shape()
#print 'tmp 1 : ',tf.slice(pred,[0,0,5 * index],[-1,-1,-1]).get_shape()
#tmp = tf.mul(is_appear, tf.pow(tf.reduce_sum(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1]))),2))
#print 'tmp shape ', tmp.get_shape()
#print loss.dtype
print 'loss shape : ', loss.get_shape()

"""
reshape loss [batch, cell, bbox] to [batch, bbox], so we can sum over all bbox
"""

gt_cls = tf.pow(tf.sub(tf.slice(y, [0,4], [-1,-1]), tf.slice(pred, [0,0,5 * index], [-1,-1,-1])),2)

print 'gt_cls 1 :', gt_cls.get_shape()

is_appear = tf.reshape(is_appear, [-1, S*S, 1])
gt_cls = tf.mul(is_appear, gt_cls)

print 'gt_cls 2 :', gt_cls.get_shape()
print 'y : ', tf.slice(y,[0,4],[-1,-1]).get_shape()
#loss = tf.add(loss,tf.reduce_sum(tf.mul(is_appear, tf.pow(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1])),2))))
print ' b : ', b
#loss = tf.reshape(loss,[int(loss.get_shape()[0]),int(loss.get_shape()[1] * (index))])
#print 'loss shape ', loss.get_shape()
#loss = tf.add(loss, tf.mul(is_appear, tf.pow(tf.reduce_sum(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1]))),2)))
#print int(tf.slice(y,[0,4],[-1,-1]).get_shape()[1])
#print tf.slice(y,[0,4],[-1,-1]).get_shape()
assert int(tf.slice(y,[0,4],[-1,-1]).get_shape()[1]) == n_class
loss = tf.reduce_mean(loss)

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
#print 'image : ', len(images)
print 'images : ', images.dtype
#print 'images : ', images.dtype

#print images.get_shape()

init = tf.initialize_all_variables()
with tf.Session() as sess:


sess.run(init)
step = 0

print 'realy start training ... '
while step * batch_size < training_iters:
#print 'image : ', len(images)
@@ -442,26 +527,26 @@ def is_appear_in_cell(confidence):
batch_x = images[step * batch_size : (step+1) * batch_size]
batch_y = objects[step * batch_size : (step+1) * batch_size]
print 'batch_y:',batch_y.shape
print 'batch_x:',batch_x.shape
#print 'batch_y : ', type(batch_y)
#print 'batch_x : ', type(batch_x)

print 'batch_y : ', type(batch_y)
print 'batch_x : ', type(batch_x)


#print 'batch x : ', batch_x.get_shape()
#print 'batch_y : ', batch_y.get_shape()


sess.run(optimizer, feed_dict =

sess.run(optimizer, feed_dict =
{
x:batch_x,
y:batch_y
})

print 'step {} '.format(step)
if step % display_step == 0:
loss, acc = sess([cost, accuracy],
feed_dict = {

loss, acc = sess([cost, accuracy],
feed_dict = {
x:batch_x,
y:batch_y})