In [1]:
import caffe
import numpy as np
import sys, getopt
import cv2

In [2]:
def interpret_output(output, img_width, img_height):
    classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]
    w_img = img_width
    h_img = img_height
    # print w_img, h_img
    threshold = 0.02
    iou_threshold = 0.5
    num_class = 20
    num_box = 2
    grid_size = 7
    probs = np.zeros((7,7,2,20))

    class_probs = np.reshape(output[0:980],(7,7,20))

#   print class_probs
    scales = np.reshape(output[980:1078],(7,7,2))
#   print scales
    boxes = np.reshape(output[1078:],(7,7,2,4))
    offset = np.transpose(np.reshape(np.array([np.arange(7)]*14),(2,7,7)),(1,2,0))

    boxes[:,:,:,0] += offset
    boxes[:,:,:,1] += np.transpose(offset,(1,0,2))
    boxes[:,:,:,0:2] = boxes[:,:,:,0:2] / 7.0
    boxes[:,:,:,2] = np.multiply(boxes[:,:,:,2],boxes[:,:,:,2])
    boxes[:,:,:,3] = np.multiply(boxes[:,:,:,3],boxes[:,:,:,3])
        
    boxes[:,:,:,0] *= w_img
    boxes[:,:,:,1] *= h_img
    boxes[:,:,:,2] *= w_img
    boxes[:,:,:,3] *= h_img

    for i in range(2):
        for j in range(20):
            probs[:,:,i,j] = np.multiply(class_probs[:,:,j],scales[:,:,i])

    filter_mat_probs = np.array(probs>=threshold,dtype='bool')
    filter_mat_boxes = np.nonzero(filter_mat_probs)

    boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
    probs_filtered = probs[filter_mat_probs]
    classes_num_filtered = np.argmax(probs,axis=3)[filter_mat_boxes[0],filter_mat_boxes[1],filter_mat_boxes[2]]
    argsort = np.array(np.argsort(probs_filtered))[::-1]
    boxes_filtered = boxes_filtered[argsort]
    probs_filtered = probs_filtered[argsort]
    classes_num_filtered = classes_num_filtered[argsort]
        
    for i in range(len(boxes_filtered)):
        if probs_filtered[i] == 0 : continue
        for j in range(i+1,len(boxes_filtered)):
            if iou(boxes_filtered[i],boxes_filtered[j]) > iou_threshold : 
                probs_filtered[j] = 0.0
        
    filter_iou = np.array(probs_filtered>0.0,dtype='bool')
    boxes_filtered = boxes_filtered[filter_iou]
    probs_filtered = probs_filtered[filter_iou]
    classes_num_filtered = classes_num_filtered[filter_iou]

    result = []
    for i in range(len(boxes_filtered)):
        result.append([classes[classes_num_filtered[i]],boxes_filtered[i][0],boxes_filtered[i][1],boxes_filtered[i][2],boxes_filtered[i][3],probs_filtered[i]])

    return result

In [3]:
def read_log_file(file):  
    with open(file, 'r') as f:
        raw_data = f.readlines()
    raw_data_split = raw_data[0].split()

    np_raw_data = np.asarray(raw_data_split)

    fp_raw_data = np_raw_data.astype(np.float)

    return fp_raw_data

In [4]:
def iou(box1,box2):
    tb = min(box1[0]+0.5*box1[2],box2[0]+0.5*box2[2])-max(box1[0]-0.5*box1[2],box2[0]-0.5*box2[2])
    lr = min(box1[1]+0.5*box1[3],box2[1]+0.5*box2[3])-max(box1[1]-0.5*box1[3],box2[1]-0.5*box2[3])
    if tb < 0 or lr < 0 : intersection = 0
    else : intersection =  tb*lr
    return intersection / (box1[2]*box1[3] + box2[2]*box2[3] - intersection)

In [5]:
def show_results(img,results, img_width, img_height):
    img_cp = img.copy()
    disp_console = True
    imshow = True
    for i in range(len(results)):
        x = int(results[i][1])
        y = int(results[i][2])
        w = int(results[i][3])//2
        h = int(results[i][4])//2
        print '    class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(y) + ',' + str(int(results[i][3])) + ',' + str(int(results[i][4]))+'], Confidence = ' + str(results[i][5]) 
        xmin = x-w
        xmax = x+w
        ymin = y-h
        ymax = y+h
        if xmin<0:
            xmin = 0
        if ymin<0:  
            ymin = 0
        if xmax>img_width:
            xmax = img_width
        if ymax>img_height:
            ymax = img_height
        if  imshow:
            cv2.rectangle(img_cp,(xmin,ymin),(xmax,ymax),(0,255,0),2)
            print xmin, ymin, xmax, ymax
            cv2.rectangle(img_cp,(xmin,ymin-20),(xmax,ymin),(125,125,125),-1)
            cv2.putText(img_cp,results[i][0] + ' : %.2f' % results[i][5],(xmin+5,ymin-7),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1)
    if imshow :
        cv2.imshow('YOLO detection',img_cp)
        cv2.waitKey(1000)

In [6]:
rawdump = "/home/caslab/workspace/caffe/caffe_python/yolo_caffe/caffe-yolo/output.dimg"
img_filename = "/home/caslab/workspace/caffe/caffe_python/yolo_caffe/caffe-yolo/224x224.jpg"

np_raw_data = read_log_file(rawdump)
img = caffe.io.load_image(img_filename)
img_cv = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

results = interpret_output(np_raw_data, img.shape[1] , img.shape[0])

show_results(img_cv,results, 224, 224)

    class : sofa , [x,y,w,h]=[102,92,309,295], Confidence = 0.48109533224499995
0 0 224 224
    class : pottedplant , [x,y,w,h]=[15,-145,14,77], Confidence = 0.4364443602
8 0 22 -107
    class : cow , [x,y,w,h]=[34,38,812,829], Confidence = 0.435499607544
0 0 224 224
    class : train , [x,y,w,h]=[97,77,5,62], Confidence = 0.400093594101
95 46 99 108
    class : sofa , [x,y,w,h]=[115,92,114,106], Confidence = 0.344943159134
58 39 172 145
    class : motorbike , [x,y,w,h]=[113,125,145,8], Confidence = 0.336756833313
41 121 185 129
    class : pottedplant , [x,y,w,h]=[5,-86,35,20], Confidence = 0.28726173207
0 0 22 -76
    class : tvmonitor , [x,y,w,h]=[97,-300,1874,205], Confidence = 0.27815239711
0 0 224 -198
    class : boat , [x,y,w,h]=[101,148,1,0], Confidence = 0.24687415612799998
101 148 101 148
    class : dog , [x,y,w,h]=[11,74,21,68], Confidence = 0.237049958992
1 40 21 108
    class : boat , [x,y,w,h]=[127,98,22,33], Confidence = 0.22898667366800002
116 82 138 114
    class : 