In [0]:
import numpy as np
from skimage.draw import polygon


def compute_classification_acc(pred, gt):
  # pred and gt are both
  assert pred.shape == gt.shape
  return (pred == gt).astype(int).sum() / gt.size
    
    
def compute_iou(b_pred,b_gt):
  # b_pred: predicted bounding boxes, shape=(n,2,4)
  # b_gt: ground truth bounding boxes, shape=(n,2,4)
    
  n = np.shape(b_gt)[0]
  L_pred = np.zeros((64,64))
  L_gt = np.zeros((64,64))
  iou = 0.0
  for i in range(n):
    for b in range(2):
      rr, cc = polygon([b_pred[i,b,0],b_pred[i,b,0],b_pred[i,b,2],b_pred[i,b,2]],
                   [b_pred[i,b,1],b_pred[i,b,3],b_pred[i,b,3],b_pred[i,b,1]],[64,64])
      L_pred[rr,cc] = 1

      rr, cc = polygon([b_gt[i,b,0],b_gt[i,b,0],b_gt[i,b,2],b_gt[i,b,2]],
                      [b_gt[i,b,1],b_gt[i,b,3],b_gt[i,b,3],b_gt[i,b,1]],[64,64])
      L_gt[rr,cc] = 1

      iou += (1.0/(2*n))*(np.sum((L_pred+L_gt)==2)/np.sum((L_pred+L_gt)>=1))

      L_pred[:,:] = 0
      L_gt[:,:] = 0
    
  return iou


def evaluation(pred_class,pred_bboxes,prefix="valid"):
  #pred_class = mnist_classification(x,y)   #shape[N,2]
  # pred_bboxes: Your predicted bboxes for 2 digits, shape [N, 2, 4]
  gt_class = np.load(prefix + "_Y.npy")
  gt_bboxes = np.load(prefix + "_bboxes.npy")
  acc = compute_classification_acc(pred_class, gt_class)
  iou = compute_iou(pred_bboxes, gt_bboxes)
  print(f"Classification Acc: {acc}")
  print(f"BBoxes IOU: {iou}")
  
 

In [0]:
import numpy as np
import tensorflow as tf
class MNIST(object):
    def __init__(self, subset='train', batch_size=100, shuffle=True):
        if subset == 'train':
            
            images = np.load('train_X.npy')
            images = np.reshape(images,(np.shape(images)[0], 64, 64, 1))
            labels = np.load('train_Y.npy')
            bboxes = np.load('train_bboxes.npy')
            bboxes = bboxes[:,:,:]
            print("training set:", images.shape, labels.shape,bboxes.shape)
        elif subset == 'valid':
            images = np.load('valid_X.npy')
            images = np.reshape(images, (np.shape(images)[0], 64, 64, 1))
            labels = np.load('valid_Y.npy')
            bboxes = np.load('valid_bboxes.npy')
            bboxes = bboxes[:,:,:] 
            print("validation set:", images.shape, labels.shape,bboxes.shape)
        else:
            raise NotImplementedError
        
        self._images = images
        self.images = self._images
        self._labels = labels
        self.labels = self._labels
        self._bboxes = bboxes
        self.bboxes = self._bboxes
        
        self.batch_size = batch_size
        self.num_samples = len(self.images)
        self.shuffle = shuffle
        if self.shuffle:
            self.shuffle_samples()
        self.next_batch_pointer = 0
    def shuffle_samples(self):
        image_indices = np.random.permutation(np.arange(self.num_samples))
        self.images = self._images[image_indices]
        self.labels = self._labels[image_indices]
        self.bboxes = self._bboxes[image_indices]
    def get_next_batch(self):
        num_samples_left = self.num_samples - self.next_batch_pointer
        if num_samples_left >= self.batch_size:
            x_batch = self.images[self.next_batch_pointer:self.next_batch_pointer + self.batch_size]
            y_batch = self.labels[self.next_batch_pointer:self.next_batch_pointer + self.batch_size]
            b_batch = self.bboxes[self.next_batch_pointer:self.next_batch_pointer + self.batch_size]
            self.next_batch_pointer += self.batch_size
        else:
            x_partial_batch_1 = self.images[self.next_batch_pointer:self.num_samples]
            y_partial_batch_1 = self.labels[self.next_batch_pointer:self.num_samples]
            b_partial_batch_1 = self.bboxes[self.next_batch_pointer:self.num_samples]
            if self.shuffle:
                self.shuffle_samples()
            x_partial_batch_2 = self.images[0:self.batch_size - num_samples_left]
            y_partial_batch_2 = self.labels[0:self.batch_size - num_samples_left]
            b_partial_batch_2 = self.bboxes[0:self.batch_size - num_samples_left]
            x_batch = np.vstack((x_partial_batch_1, x_partial_batch_2))
            y_batch = np.vstack((y_partial_batch_1, y_partial_batch_2))
            b_batch = np.vstack((b_partial_batch_1, b_partial_batch_2))
            
            self.next_batch_pointer = self.batch_size - num_samples_left

        return x_batch, y_batch,b_batch
        



In [0]:
from tensorflow.contrib.layers import flatten

def net(input, is_training,keep_prob):


    conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6), mean = 0, stddev = 0.1), name='conv1_W')
    conv1_b = tf.Variable(tf.zeros(6), name='conv1_b')
    conv1   = tf.nn.conv2d(input, conv1_W, strides=[1, 1, 1, 1], padding='SAME') + conv1_b
    conv1 = tf.nn.relu(conv1)
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')


    conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = 0, stddev = 0.1), name='conv2_W')
    conv2_b = tf.Variable(tf.zeros(16), name='conv2_b')
    conv2   = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b
    conv2 = tf.nn.relu(conv2)
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')


    conv3_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 16, 32), mean = 0, stddev = 0.1), name='conv3_W')
    conv3_b = tf.Variable(tf.zeros(32), name='conv3_b')
    conv3  = tf.nn.conv2d(conv2, conv3_W, strides=[1, 1, 1, 1], padding='SAME') + conv3_b
    conv3 = tf.nn.relu(conv3)
    conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')


    fc0   = flatten(conv3)
    fc0 = tf.layers.dropout(fc0, rate=keep_prob, training=is_training)

    fc1_W = tf.Variable(tf.truncated_normal(shape=(1568, 1300), mean = 0, stddev = 0.1), name='fc1_W')
    fc1_b = tf.Variable(tf.zeros(1300), name='fc1_b')
    fc1   = tf.matmul(fc0, fc1_W) + fc1_b
    fc1    = tf.nn.relu(fc1)


    fc2_W  = tf.Variable(tf.truncated_normal(shape=(1300, 840), mean = 0, stddev = 0.1), name='fc2_W')
    fc2_b  = tf.Variable(tf.zeros(840), name='fc2_b')
    fc2    = tf.matmul(fc1, fc2_W) + fc2_b
    fc2    = tf.nn.relu(fc2)
    
    return fc2

def find_class_net(fc2):

    W1 = tf.get_variable('W1', [840, 10*2], initializer=tf.truncated_normal_initializer(stddev=0.1))
    b1 = tf.get_variable('b1', [10*2], initializer=tf.constant_initializer(0.1))
    fc_out1 = tf.matmul(fc2, W1) + b1
    fc_logits = tf.reshape(fc_out1, [-1, 2, 10])
    
    return fc_logits


def find_b_net(fc2):
    W2 = tf.get_variable('W2', [840, 4*2], initializer=tf.truncated_normal_initializer(stddev=0.1))
    b2 = tf.get_variable('b2', [4*2], initializer=tf.constant_initializer(0.1))
    fc_out2 = tf.matmul(fc2, W2) + b2
    fc_bbox = tf.reshape(fc_out2, [-1, 2, 4])

    return fc_bbox


In [0]:

def train():
    tf.reset_default_graph()

    BATCH_SIZE = 80
    NUM_ITERS = 50000

    train_set = MNIST('train', batch_size=BATCH_SIZE)
    valid_set = MNIST('valid')

    tf.reset_default_graph()
    x = tf.placeholder(tf.float32, (None, 64, 64,1))

    y = tf.placeholder(tf.int32, (None, 2))
    y = tf.reshape(y,(-1,2))

    b = tf.placeholder(tf.float32, (None,2,4))
    b = tf.reshape(b,(-1,2,4))
    is_training = tf.placeholder(tf.bool, ())


    rate = 0.0008
    fc_y = net(x, True,0.5)
    fc_b = net(x, True,0.2)

    fc_logits =  find_class_net(fc_y)
    bboxes =  find_b_net(fc_b)

    prediction = tf.argmax(fc_logits, dimension=2)
    cross_entropy1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=fc_logits[:,0], labels=y[:,0]))
    cross_entropy2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=fc_logits[:,1], labels=y[:,1]))
    cross_entropy = cross_entropy1 + cross_entropy2

    loss_bboxes = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(b, bboxes)))))
    loss_operation = cross_entropy  +loss_bboxes
    optimizer = tf.train.AdamOptimizer(learning_rate=rate).minimize(loss_operation)

    pred_y = prediction
    pred_b = bboxes
    saver = tf.train.Saver(max_to_keep=0)

    print("Training for classification and detection...")
    print("let's start!!! ")
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(NUM_ITERS):
            batch_x, batch_y,batch_b = train_set.get_next_batch()
            _ = sess.run(optimizer, feed_dict = {x:batch_x, y:batch_y,b:batch_b})

            predd1,predd2= sess.run([pred_y,pred_b],feed_dict = {x:batch_x, y:batch_y,b:batch_b})
            
            predd1 = sess.run(pred_y,feed_dict = {x:batch_x, y:batch_y,b:batch_b})

            predd2 = sess.run(pred_b,feed_dict = {x:batch_x, y:batch_y,b:batch_b})

            if i % 1000 == 0:
                acc1 = compute_classification_acc(predd1, batch_y)
                acc2 = compute_iou(predd2,batch_b)

                print("item: ", i, "current classification accuracy:",acc1,"current iou accuracy:",acc2)

        saver.save(sess, 'ckpt/lenet', global_step = i)

        print("Model saved")

In [0]:
def test(imgg):
    tf.reset_default_graph()

    x = tf.placeholder(tf.float32, (None, 64, 64, 1))
    fc_y = net(x, False,1)
    fc_b = net(x, False,1)

    fc_logits =  find_class_net(fc_y)
    bboxes =  find_b_net(fc_b)

    prediction = tf.argmax(fc_logits, dimension=2)
    pred_y = prediction
    pred_b = bboxes
    saver = tf.train.Saver(max_to_keep=0)
    valid_set = MNIST('valid',shuffle = False)
    imgg = valid_set._images
    

    with tf.Session() as sess:
        print("Testing...")

        saver.restore(sess, tf.train.latest_checkpoint('ckpt'))
        yp_test,yb_test = sess.run([pred_y,pred_b], feed_dict = {x:imgg})
        yp_test = sess.run(pred_y, feed_dict = {x:imgg})
        yb_test = sess.run(pred_b, feed_dict = {x:imgg})
      
    return yp_test,yb_test

  
  

In [25]:
if __name__ == '__main__':
    TRAIN = False
    if TRAIN:
        train()       
    valid_set = MNIST('valid',shuffle = False)

    imgg = valid_set._images       

    predicted_y,predicted_b = test(imgg)    
    

    acc = evaluation(predicted_y,predicted_b, prefix="valid")     #classification and detection
    print("Done! Have a nice day : ) ")



validation set: (5000, 64, 64, 1) (5000, 2) (5000, 2, 4)
validation set: (5000, 64, 64, 1) (5000, 2) (5000, 2, 4)
Testing...
INFO:tensorflow:Restoring parameters from ckpt/lenet-49999
Classification Acc: 0.9576
BBoxes IOU: 0.8054558046382353
Done! Have a nice day : ) 
