In [1]:
import os
import numpy as np
from scipy import misc
import argparse
import json
import cv2

from tensorpack import *
from train import Model
from reader import Data

from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
with open("params.json", 'r') as f:
    params = json.load(f)

sess_init = SaverRestore("model-debug")
model = Model(params)
predict_config = PredictConfig(session_init=sess_init,
                               model=model,
                               input_names=["input", "tx", "ty", "tw", "th", "tprob", "spec_mask", "truth_box"],
                               output_names=["pred_x", "pred_y", "pred_w", "pred_h", "pred_conf", "pred_prob", "x_loss", "y_loss", "w_loss", "h_loss", "p_loss", "c_loss"])

In [3]:
predict_func = OfflinePredictor(predict_config)

[32m[0411 23:51:21 @common.py:94][0m conv1_1 input: [None, 3, None, None]
[32m[0411 23:51:21 @common.py:102][0m conv1_1 output: [None, 32, None, None]
[32m[0411 23:51:22 @common.py:94][0m pool1 input: [None, 32, None, None]
[32m[0411 23:51:22 @common.py:102][0m pool1 output: [None, 32, None, None]
[32m[0411 23:51:22 @common.py:94][0m conv2_1 input: [None, 32, None, None]
[32m[0411 23:51:22 @common.py:102][0m conv2_1 output: [None, 64, None, None]
[32m[0411 23:51:22 @common.py:94][0m pool2 input: [None, 64, None, None]
[32m[0411 23:51:22 @common.py:102][0m pool2 output: [None, 64, None, None]
[32m[0411 23:51:22 @common.py:94][0m conv3_1 input: [None, 64, None, None]
[32m[0411 23:51:22 @common.py:102][0m conv3_1 output: [None, 128, None, None]
[32m[0411 23:51:22 @common.py:94][0m conv3_2 input: [None, 128, None, None]
[32m[0411 23:51:22 @common.py:102][0m conv3_2 output: [None, 64, None, None]
[32m[0411 23:51:22 @common.py:94][0m conv3_3 input: [None, 64, None, 

In [4]:
def non_maximum_suppression(boxes, overlapThresh):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []
    boxes = np.asarray(boxes).astype("float")
 
    # initialize the list of picked indexes 
    pick = []
 
    # grab the coordinates of the bounding boxes
    x1 = boxes[:,0]
    y1 = boxes[:,1]
    x2 = boxes[:,2]
    y2 = boxes[:,3]
 
    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)
 
    # keep looping while some indexes still remain in the indexes
    # list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the
        # index value to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
 
        # find the largest (x, y) coordinates for the start of
        # the bounding box and the smallest (x, y) coordinates
        # for the end of the bounding box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
 
        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
 
        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]
 
        # delete all indexes from the index list that have
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))
 
    # return only the bounding boxes that were picked using the
    # integer data type
    return boxes[pick].astype("int")

In [5]:
def predict_image(image, params, predict_func, anchors, threshold, colors, overlapThresh):
    image = np.expand_dims(image, axis=0)
    spec_mask = np.zeros((1, 5, params["image_height"] // 32, params["image_width"] // 32), dtype=float) == 0
    
    predictions = predict_func([image, spec_mask, tx, ty, tw, th, tprob, spec_mask, truth_box])
    
    [pred_x, pred_y, pred_w, pred_h, pred_conf, pred_prob, x_loss, y_loss, w_loss, h_loss, p_loss, c_loss] = predictions
    
    idxes = np.where(pred_conf > threshold)
    
    anchor_idxes = idxes[1]
    height_idxes = idxes[3]
    width_idxes = idxes[4]
    pred_box_num = len(anchor_idxes)
    
#     print(pred_box_num)
    
    image_result = np.copy(image[0])
    boxes = {}
    for i in range(pred_box_num):
        anchor = anchors[anchor_idxes[i]]
        w = pred_w[0, anchor_idxes[i], 0, height_idxes[i], width_idxes[i]]
        h = pred_h[0, anchor_idxes[i], 0, height_idxes[i], width_idxes[i]]
        x = pred_x[0, anchor_idxes[i], 0, height_idxes[i], width_idxes[i]]
        y = pred_y[0, anchor_idxes[i], 0, height_idxes[i], width_idxes[i]]
        p = np.argmax(pred_prob[0, anchor_idxes[i], :, height_idxes[i], width_idxes[i]])
        conf = pred_conf[0, anchor_idxes[i], 0, height_idxes[i], width_idxes[i]]

        center_w_cell = width_idxes[i] + x
        center_h_cell = height_idxes[i] + y
        box_w_cell = np.exp(w) * anchor[0]
        box_h_cell = np.exp(h) * anchor[1]

        center_w_pixel = center_w_cell * 32
        center_h_pixel = center_h_cell * 32
        box_w_pixel = box_w_cell * 32
        box_h_pixel = box_h_cell * 32

        top_left_x = (int)(center_w_pixel - box_w_pixel // 2)
        top_left_y = (int)(center_h_pixel - box_h_pixel // 2)
        bottom_right_x = (int)(center_w_pixel + box_w_pixel // 2)
        bottom_right_y = (int)(center_h_pixel + box_h_pixel // 2)
        
        box = [top_left_x, top_left_y, bottom_right_x, bottom_right_y]
        if p not in boxes.keys():
            boxes[p] = []
        boxes[p].append(box)
    
    for p, k_boxes in boxes.items():
        
        # non-maximum suppression
        k_boxes = non_maximum_suppression(k_boxes, overlapThresh)

        for k_box in k_boxes:
            
            [top_left_x, top_left_y, bottom_right_x, bottom_right_y] = k_box
        
            label_height = 14
            label_width = len(klasses[p]) * 10

            cv2.rectangle(image_result,
                          (top_left_x, top_left_y),
                          (bottom_right_x, bottom_right_y),
                          colors[p],
                          3)
            cv2.rectangle(image_result,
                          (top_left_x - 2, top_left_y - label_height),
                          (top_left_x + label_width, top_left_y),
                          colors[p],
                          -1)
    #         print(klasses[p] + "(" + str(conf) + "): " +
    #               "[" + str(top_left_x) + ", " + str(top_left_y) + "]; " +
    #               "[" + str(bottom_right_x) + ", " + str(bottom_right_y) + "]")
            cv2.putText(image_result,
                        klasses[p],
                        (top_left_x, top_left_y - 3),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (255, 255, 255))
#     plt.imshow(image_result)
    return image_result

In [6]:
with open("params.json", 'r') as f:
    params = json.load(f)
ds = Data("voc_2007_train.txt", params=params, shuffle=False)

In [43]:
ds.reset_state()
x_loss_list = []
y_loss_list = []
w_loss_list = []
h_loss_list = []
p_loss_list = []
c_loss_list = []
bad_data = []
bad_list = []
data_idx = 0
for data in ds.get_data():
    if data_idx % 100 == 0:
        print(data_idx)
    for idx, ele in enumerate(data):
        data[idx] = np.expand_dims(ele, axis=0)
    [pred_x, pred_y, pred_w, pred_h, pred_conf, pred_prob, x_loss, y_loss, w_loss, h_loss, p_loss, c_loss] = predict_func(data)
    x_loss_list.append(x_loss)
    y_loss_list.append(y_loss)
    w_loss_list.append(w_loss)
    h_loss_list.append(h_loss)
    p_loss_list.append(p_loss)
    c_loss_list.append(c_loss)
    if c_loss > 0.05:
        bad_data.append(data)
        bad_list.append(ds.imglist[data_idx])
        break
    data_idx += 1

0


In [44]:
bad_list

['/home/jesse/tensorflow_workspace/tensorpack/examples/YOLOv2/VOC2007/JPEGImages/000032.jpg 104 78 375 183 0 133 88 197 123 0 195 180 213 229 14 26 189 44 238 14']

In [45]:
bad_data

[[array([[[[131, 193, 208],
           [131, 193, 208],
           [132, 194, 209],
           ..., 
           [182, 232, 241],
           [182, 232, 241],
           [182, 232, 241]],
  
          [[131, 193, 209],
           [132, 193, 209],
           [133, 195, 210],
           ..., 
           [182, 232, 241],
           [182, 232, 241],
           [182, 232, 241]],
  
          [[132, 194, 209],
           [132, 194, 209],
           [134, 195, 211],
           ..., 
           [183, 233, 242],
           [183, 233, 242],
           [182, 232, 241]],
  
          ..., 
          [[  5,  14,  13],
           [  9,  17,  17],
           [ 11,  16,  19],
           ..., 
           [ 15,  33,  37],
           [ 17,  34,  38],
           [ 17,  35,  39]],
  
          [[  5,  15,  14],
           [  9,  17,  17],
           [  9,  16,  19],
           ..., 
           [ 15,  33,  38],
           [ 15,  32,  38],
           [ 17,  34,  40]],
  
          [[  5,  15,  14],
           

In [11]:
x_loss_list[0]

9.6956355e-05

In [14]:
np.max(p_loss_list)

1.0000964

In [30]:
p_idx = sorted_p_loss = np.sort(p_loss_list)
c_idx = sorted_c_loss = np.sort(c_loss_list)

In [31]:
p_idx

array([  7.39363681e-10,   1.51797386e-09,   2.62584066e-09, ...,
         5.29251527e-03,   1.24613848e-02,   1.00009644e+00], dtype=float32)

In [27]:
sorted_p_loss[2495:]

array([ 0.00445849,  0.00492434,  0.00513255,  0.00529252,  0.01246138,
        1.00009644], dtype=float32)

In [29]:
sorted_c_loss[2400:]

array([ 0.03447028,  0.03472802,  0.03525252,  0.03540563,  0.03545736,
        0.03566142,  0.03586451,  0.03614511,  0.03631044,  0.03631919,
        0.03633744,  0.03635303,  0.03654425,  0.03678494,  0.03703486,
        0.0371916 ,  0.03746598,  0.03763179,  0.03802134,  0.03838405,
        0.03852731,  0.03871458,  0.03879872,  0.03883488,  0.03924682,
        0.03929206,  0.03935926,  0.04086077,  0.04101177,  0.04117264,
        0.04212134,  0.04342242,  0.04352136,  0.04412831,  0.0452386 ,
        0.04543266,  0.04564498,  0.04564526,  0.04634779,  0.04747336,
        0.04873401,  0.04909523,  0.04942206,  0.05025457,  0.05256902,
        0.05275305,  0.05487084,  0.05523297,  0.05527964,  0.05582826,
        0.05745417,  0.0583499 ,  0.06120657,  0.06409312,  0.06454826,
        0.0664325 ,  0.0670618 ,  0.06729587,  0.07054543,  0.09072813,
        0.09275127,  0.09563409,  0.10731436,  0.12692092,  0.1364727 ,
        0.35784426,  0.40061739,  0.5198102 ,  0.66446805,  0.67

In [5]:
with open("params.json", 'r') as f:
    params = json.load(f)
    
anchors = [[1.06593733, 1.03880763], [2.08908397, 4.90636738], [2.35326204, 1.43357071], [4.52926972, 2.49737608], [8.66448722, 4.9158313]]
threshold = 0.2
overlapThresh = 0.4
colors = [(255,0,0), (0,255,0)]
klasses = ['Car', 'Pedestrian']

In [6]:
image = cv2.imread("kitti/data_object_image_2/training/image_2/004533.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (params["image_width"], params["image_height"]))
# image = image[:,640:,:]
print(image.shape)
# params["image_width"] = int(params["image_width"] // 2)
image_result = predict_image(image, params, predict_func, anchors, threshold, colors, overlapThresh)
plt.imshow(image_result)

write_image = cv2.cvtColor(image_result, cv2.COLOR_BGR2RGB)
cv2.imwrite("output.png", write_image)

(416, 416, 3)


NameError: name 'predict_func' is not defined

In [66]:
video_filename = 'test_videos/2.mov'

cap = cv2.VideoCapture(video_filename)

frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

print(frame_count)

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter("2_del.avi", fourcc, fps, (params["image_width"], params["image_height"]))

626


In [67]:
while cap.isOpened():
    frame_idx = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
    ret, frame = cap.read()
    if ret != True:
        break
    frame = frame[252:252 + 576,:,:]
    frame = cv2.resize(frame, (params["image_width"], params["image_height"]), interpolation = cv2.INTER_AREA)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image_result = predict_image(frame, params, predict_func, anchors, threshold, colors, overlapThresh)
    image_result = cv2.cvtColor(image_result, cv2.COLOR_BGR2RGB)
    if frame_idx % 100 == 0:
        print(frame_idx)
        cv2.imwrite(str(frame_idx) + ".png", frame)
        cv2.imwrite(str(frame_idx) + "_output.png", image_result)
    if frame_idx > 0:
        out.write(image_result)
#     if frame_idx >= 300:
#         break

0
100
200
300
400
500
600


In [11]:
out.release()