@@ -12,14 +12,15 @@ def __init__(self, cls_name):
self .cls_name = cls_name
plate_cfg = cfg (['1' ,'2' ,'3' ,'4' ,'5' ,'6' ,'7' ,'8' ,'9' ,'10' ,'11' ,'12' ,'13' ,'14' ,'15' ,'16' ,'17' ,'18' ,'19' ,'plate' ])
# plate_cfg = cfg(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','plate'])
batch_size = 5
n_input = 448 * 448
B = 2
S = 7
cls_name = ['plate' ,'dog' ]
n_class = len (cls_name )
learning_rate = 0.01
training_iters = 1000
@@ -74,16 +75,40 @@ def conv_net(x, weights, biases, dropout):
conv19 = conv2d (conv18 , weights ['conv19' ], biases ['conv19' ])
conv20 = conv2d (conv19 , weights ['conv20' ], biases ['conv20' ])
conv21 = conv2d (conv20 , weights ['conv21' ], biases ['conv21' ])
conv22 = conv2d (conv21 , weights ['conv22' ], biases ['conv22' ])
conv22 = conv2d (conv21 , weights ['conv22' ], biases ['conv22' ], strides = 2 )
conv23 = conv2d (conv22 , weights ['conv23' ], biases ['conv23' ])
conv24 = conv2d (conv23 , weights ['conv24' ], biases ['conv24' ])
print 'conv1 : ' , conv1 .get_shape ()
print 'conv2 : ' , conv2 .get_shape ()
print 'conv3 : ' , conv3 .get_shape ()
print 'conv4 : ' , conv4 .get_shape ()
print 'conv5 : ' , conv5 .get_shape ()
print 'conv6 : ' , conv6 .get_shape ()
print 'conv7 : ' , conv7 .get_shape ()
print 'conv8 : ' , conv8 .get_shape ()
print 'conv9 : ' , conv9 .get_shape ()
print 'conv10 : ' , conv10 .get_shape ()
print 'conv11 : ' , conv11 .get_shape ()
print 'conv12 : ' , conv12 .get_shape ()
print 'conv13 : ' , conv13 .get_shape ()
print 'conv14 : ' , conv14 .get_shape ()
print 'conv15 : ' , conv15 .get_shape ()
print 'conv16 : ' , conv16 .get_shape ()
print 'conv17 : ' , conv17 .get_shape ()
print 'conv18 : ' , conv18 .get_shape ()
print 'conv19 : ' , conv19 .get_shape ()
print 'conv20 : ' , conv20 .get_shape ()
print 'conv21 : ' , conv21 .get_shape ()
print 'conv22 : ' , conv22 .get_shape ()
print 'conv23 : ' , conv23 .get_shape ()
print 'conv24 : ' , conv24 .get_shape ()
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf .reshape (conv24 , [- 1 , weights ['fc1' ].get_shape ().as_list ()[0 ]])
fc1 = tf .add (tf .matmul (fc1 , weights ['fc1' ]), biases ['fc1' ])
fc1 = tf .nn .relu (fc1 )
print 'fc1 : ' , fc1 .get_shape ()
# fc1 = tf.nn.dropout(fc1, dropout)
fc2 = tf .add (tf .matmul (fc1 , weights ['fc2' ]), biases ['fc2' ])
@@ -174,7 +199,7 @@ def conv_net(x, weights, biases, dropout):
}
x = tf .placeholder (tf .float32 , [None , n_input * 3 ]) # feed_dict (unknown batch , features)
x = tf .placeholder (tf .float32 , [5 , n_input * 3 ]) # feed_dict (unknown batch , features)
y = tf .placeholder (tf .float32 , [None , n_class + 4 ]) # feed_dict (unknown batch, prob for each classes)
@@ -193,14 +218,14 @@ def conv_net(x, weights, biases, dropout):
# return tf.constant(2)
#return tf.concat(1,(confidence,IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4])))
def get_confidence (pred , y , B ):
#confidence = tf.Variable(np.array([]), tf.float32)
#zero = tf.constant(np.array([]))
#print 'pred_shape : ', pred.get_shape()[1]
shape = (- 1 ,int (pred .get_shape ()[1 ]),B )
shape = (5 ,int (pred .get_shape ()[1 ]),B )
"""
#print shape
# for b in xrange(B):
@@ -232,27 +257,27 @@ def get_confidence(pred, y, B):
# tf.concat(1,(confidence,IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]))))
"""
if confidence == 0:
print 'iou xxx ', IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
print 'iou xxx ', IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
confidence = IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4])
else:
# print confidence.dtype
print IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
print IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4]).get_shape()
print 'pred : ',pred[ : , : , b * 5 : b * 5 + 4]
print 'y :', y[ : , b * 5 : b * 5 + 4]
print confidence.get_shape
print confidence.get_shape
confidence = tf.concat(1,(confidence,IoU(pred[ : , : , b * 5 : b * 5 + 4], y[ : , b * 5 : b * 5 + 4])))
print confidence.get_shape()
"""
#print 'confidence shape : ', confidence.get_shape
"""
confidence shape = [batch, cell, B]
"""
confidence = tf .reshape (confidence ,shape )
assert confidence .dtype == tf .float32
# print confidence.get_shape()
print 'in confidence : ' , confidence .get_shape ()
return confidence
def is_responsible (confidence ):
@@ -265,18 +290,20 @@ def is_responsible(confidence):
"""
_ , cells , B = list (confidence .get_shape ())
print 'confidence : ' , confidence .get_shape ()
max_iou = tf .reduce_max (confidence , 2 )
print 'batch_size :' ,batch_size
print 'cells :' , cells
print 'B : ' , B
print 'max_iou : ' , max_iou .get_shape ()
for b in xrange (B - 1 ):
max_iou = tf .concat (1 ,[max_iou ,max_iou ])
print 'max_iou : ' , max_iou .get_shape ()
max_iou = tf .reshape (max_iou ,[batch_size , int (cells ), int (B )])
is_res = tf .greater_equal (confidence , max_iou )
#print 'is_res : ', is_res.get_shape()
#print 'is_res : ', is_res.dtype
#print 'conf : ', confidence.dtype
@@ -285,21 +312,21 @@ def is_responsible(confidence):
#print 'confidence : ',confidence.get_shape()
assert is_res .dtype == bool
assert confidence .dtype == tf .float32
assert confidence .dtype == tf .float32
#assert is_res.get_shape() == confidence.get_shape()
return is_res
def is_appear_in_cell (confidence ):
return tf .greater (tf .reduce_sum (confidence ,2 ),tf .zeros ((batch_size ,49 )))
#return tf.reduce_all(confidence,2)
"""
training
training
"""
@@ -314,6 +341,7 @@ def is_appear_in_cell(confidence):
pred = conv_net (x , weights , biases , 1 )
display_step = 20
print 'prediction first : ' , pred .get_shape ()
confidence = get_confidence (pred , y , B )
is_res = is_responsible (confidence )
is_appear = is_appear_in_cell (confidence )
@@ -323,39 +351,78 @@ def is_appear_in_cell(confidence):
not_res = tf .cast (is_res , tf .float32 )
is_appear = tf .cast (is_appear , tf .float32 )
images , objects = load_imdb ('plate' , cls_name )
images , objects = load_imdb ('plate' , cls_name )
images = np .array (images )
loss = None
B = 2
#b = tf.Variable(0)
for b in xrange (B ):
#tf.while_loop(b < B, ):
"""
B = [(SxS) x B]
x, y => relative to cell
w, h => relative to image
pred = [batch, SxS, 5B+C]
dx = (pred[:,:,b*5+0] - y[:,0]) ** 2
dy = (pred[:,:,b*5+1] - y[:,1]) ** 2
dw = (pred[:,:,b*5+2]**0.5 - y[:,2]**0.5) ** 2
dh = (pred[:,:,b*5+3]**0.5 - y[:,3]**0.5) ** 2
dc = (pred[:,:,b*5+4] - y[:,4]) ** 2
"""
print 'dx :' , tf .slice (y ,[0 ,0 ],[- 1 ,1 ]).get_shape ()
pred_x = tf .slice (pred , [0 ,0 ,b * 5 + 0 ], [- 1 ,- 1 ,1 ])
print 'slice x : ' , tf .slice (y , [0 ,0 ], [- 1 ,1 ]).get_shape ()
# print 'slice x 2 : ', , [-1, S*S, b+1]).
gt_x = tf .reshape (tf .slice (y , [0 ,0 ], [- 1 ,1 ]), [batch_size , 1 , 1 ])
pred_y = tf .slice (pred , [0 ,0 ,b * 5 + 1 ], [- 1 ,- 1 ,1 ])
gt_y = tf .reshape (tf .slice (y , [0 ,1 ], [- 1 ,1 ]), [batch_size , 1 , 1 ])
pred_w = tf .slice (pred , [0 ,0 ,b * 5 + 2 ], [- 1 ,- 1 ,1 ])
gt_w = tf .reshape (tf .slice (y , [0 ,2 ], [- 1 ,1 ]), [batch_size , 1 , 1 ])
pred_h = tf .slice (pred , [0 ,0 ,b * 5 + 3 ], [- 1 ,- 1 ,1 ])
gt_h = tf .reshape (tf .slice (y , [0 ,3 ], [- 1 ,1 ]), [batch_size , 1 , 1 ])
pred_c = tf .slice (pred , [0 ,0 ,b * 5 + 4 ], [- 1 ,- 1 ,1 ])
# gt_c = tf.ones([-1,S*S,b+1])
print 'gt_x : ' , gt_x .get_shape ()
print 'pred_x : ' , pred_x .get_shape ()
dx = tf .pow (tf .sub (pred_x , gt_x ), 2 )
dy = tf .pow (tf .sub (pred_y , gt_y ), 2 )
dw = tf .pow (tf .sub (tf .pow (pred_w ,0.5 ), tf .pow (gt_w ,0.5 )), 2 )
dh = tf .pow (tf .sub (tf .pow (pred_h ,0.5 ), tf .pow (gt_h ,0.5 )), 2 )
dc = tf .pow (tf .sub (pred_c , 1 ), 2 )
"""
print 'dx predict : ', tf.slice(pred,[0,0,b*5+0],[-1,-1,1]).get_shape()
print 'dx y :', tf.slice(y,[0,0],[-1,1]).get_shape()
dx = tf.pow(tf.sub(tf.slice(pred,[0,0,b*5+0],[-1,-1,1]),tf.slice(y,[0,0],[-1,1])),2)
dy = tf.pow(tf.sub(tf.slice(pred,[0,0,b*5+1],[-1,-1,1]),tf.slice(y,[0,1],[-1,1])),2)
dw = tf.pow(tf.sub(tf.pow(tf.slice(pred,[0,0,b*5+2],[-1,-1,1]),0.5),tf.pow(tf.slice(y,[0,2],[-1,1]),0.5)),2)
dh = tf.pow(tf.sub(tf.pow(tf.slice(pred,[0,0,b*5+3],[-1,-1,1]),0.5),tf.pow(tf.slice(y,[0,3],[-1,1]),0.5)),2)
dc = tf.pow(tf.sub(tf.slice(pred,[0,0,b*5+4],[-1,-1,1]),1),2) #tf.slice(y,[0,4],[-1,1])),2)
"""
"""
if loss == None:
loss = lcoord * is_res[:,:,b] * (dx+dy) + \
lcoord * is_res[:,:,b] * (dw+dh) + \
is_res[:,:,b] * dc + \
@@ -366,35 +433,36 @@ def is_appear_in_cell(confidence):
lcoord * is_res[:,:,b] * (dw+dh) + \
is_res[:,:,b] * dc + \
lnoobj * not_res[:,:,b] * dc
index = b + 1
"""
if loss == None :
print tf .cast (tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ]),tf .int32 ).dtype
print tf .add (dx ,dy ).dtype
# print tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1]),tf.int32).dtype
# print tf.add(dx,dy).dtype
#print lcoord.dtype
test1 = tf .cast (tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ]),tf .float32 )
# test1 = tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1]),tf.float32)
#test = tf.mul(lcoord, tf.cast(tf.slice(is_res,[0,0,b],[-1,-1,1])))
loss_coord_xy = tf .mul (tf .mul (lcoord , tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ])), tf .add (dx ,dy ))
loss_coord_wh = tf .mul (tf .mul (lcoord , tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ])), tf .add (dw ,dh ))
loss_is_obj = tf .mul (tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ]),dc )
loss_no_obj = tf .mul (tf .slice (not_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ]),dc )
loss = tf .add (tf .add (loss_coord_xy ,loss_coord_wh ), tf .add (loss_is_obj ,loss_no_obj ))
else :
loss_coord_xy = tf .mul (tf .mul (lcoord , tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ])), tf .add (dx ,dy ))
loss_coord_wh = tf .mul (tf .mul (lcoord , tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ])), tf .add (dw ,dh ))
loss_is_obj = tf .mul (tf .slice (is_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ]),dc )
loss_no_obj = tf .mul (tf .slice (not_res ,[0 ,0 ,b ],[- 1 ,- 1 ,1 ]),dc )
loss = tf .add (loss , tf .add (tf .add (loss_coord_xy ,loss_coord_wh ), tf .add (loss_is_obj ,loss_no_obj )))
index = b + 1
"""
loss += is_appear * sum((y[:,:,b:] - pred[:,:,b:]) ** 2)
"""
@@ -403,34 +471,51 @@ def is_appear_in_cell(confidence):
#print tmp1.get_shape()
#tmp2 = tf.slice(y,[0,5],[-1,-1])
#print tmp2.get_shape(
print 'is_appear : ' , is_appear .dtype
print 'pred : ' ,pred .get_shape ()
print 'tmp 1 : ' ,tf .slice (pred ,[0 ,0 ,5 * index ],[- 1 ,- 1 ,- 1 ]).get_shape ()
tmp = tf .mul (is_appear , tf .pow (tf .reduce_sum (tf .sub (tf .slice (y ,[0 ,4 ],[- 1 ,- 1 ]), tf .slice (pred ,[0 ,0 ,5 * index ],[- 1 ,- 1 ,- 1 ]))),2 ))
print 'tmp shape ' , tmp .get_shape ()
print loss .dtype
loss = tf .reshape (loss ,[int (loss .get_shape ()[0 ]),int (loss .get_shape ()[1 ])])
print 'loss shape ' , loss .get_shape ()
loss = tf .add (loss , tf .mul (is_appear , tf .pow (tf .reduce_sum (tf .sub (tf .slice (y ,[0 ,4 ],[- 1 ,- 1 ]), tf .slice (pred ,[0 ,0 ,5 * index ],[- 1 ,- 1 ,- 1 ]))),2 )))
print int (tf .slice (y ,[0 ,4 ],[- 1 ,- 1 ]).get_shape ()[1 ])
print tf .slice (y ,[0 ,4 ],[- 1 ,- 1 ]).get_shape ()
#print 'is_appear : ', is_appear.dtype
#print 'pred : ',pred.get_shape()
#print 'tmp 1 : ',tf.slice(pred,[0,0,5 * index],[-1,-1,-1]).get_shape()
#tmp = tf.mul(is_appear, tf.pow(tf.reduce_sum(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1]))),2))
#print 'tmp shape ', tmp.get_shape()
#print loss.dtype
print 'loss shape : ' , loss .get_shape ()
"""
reshape loss [batch, cell, bbox] to [batch, bbox], so we can sum over all bbox
"""
gt_cls = tf .pow (tf .sub (tf .slice (y , [0 ,4 ], [- 1 ,- 1 ]), tf .slice (pred , [0 ,0 ,5 * index ], [- 1 ,- 1 ,- 1 ])),2 )
print 'gt_cls 1 :' , gt_cls .get_shape ()
is_appear = tf .reshape (is_appear , [- 1 , S * S , 1 ])
gt_cls = tf .mul (is_appear , gt_cls )
print 'gt_cls 2 :' , gt_cls .get_shape ()
print 'y : ' , tf .slice (y ,[0 ,4 ],[- 1 ,- 1 ]).get_shape ()
#loss = tf.add(loss,tf.reduce_sum(tf.mul(is_appear, tf.pow(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1])),2))))
print ' b : ' , b
#loss = tf.reshape(loss,[int(loss.get_shape()[0]),int(loss.get_shape()[1] * (index))])
#print 'loss shape ', loss.get_shape()
#loss = tf.add(loss, tf.mul(is_appear, tf.pow(tf.reduce_sum(tf.sub(tf.slice(y,[0,4],[-1,-1]), tf.slice(pred,[0,0,5 * index],[-1,-1,-1]))),2)))
#print int(tf.slice(y,[0,4],[-1,-1]).get_shape()[1])
#print tf.slice(y,[0,4],[-1,-1]).get_shape()
assert int (tf .slice (y ,[0 ,4 ],[- 1 ,- 1 ]).get_shape ()[1 ]) == n_class
loss = tf .reduce_mean (loss )
optimizer = tf .train .GradientDescentOptimizer (learning_rate ).minimize (loss )
optimizer = tf . train . AdamOptimizer ( learning_rate ). minimize ( loss )
# optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
#print 'image : ', len(images)
print 'images : ' , images .dtype
# print 'images : ', images.dtype
#print images.get_shape()
init = tf .initialize_all_variables ()
with tf .Session () as sess :
sess .run (init )
step = 0
print 'realy start training ... '
while step * batch_size < training_iters :
#print 'image : ', len(images)
@@ -442,26 +527,26 @@ def is_appear_in_cell(confidence):
batch_x = images [step * batch_size : (step + 1 ) * batch_size ]
batch_y = objects [step * batch_size : (step + 1 ) * batch_size ]
print 'batch_y:' ,batch_y .shape
print 'batch_x:' ,batch_x .shape
#print 'batch_y : ', type(batch_y)
#print 'batch_x : ', type(batch_x)
print 'batch_y : ' , type (batch_y )
print 'batch_x : ' , type (batch_x )
#print 'batch x : ', batch_x.get_shape()
#print 'batch_y : ', batch_y.get_shape()
sess .run (optimizer , feed_dict =
sess .run (optimizer , feed_dict =
{
x :batch_x ,
y :batch_y
})
print 'step {} ' .format (step )
if step % display_step == 0 :
loss , acc = sess ([cost , accuracy ],
feed_dict = {
loss , acc = sess ([cost , accuracy ],
feed_dict = {
x :batch_x ,
y :batch_y })