/
test_gallery.py
228 lines (184 loc) · 8.05 KB
/
test_gallery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
import numpy as np
import cv2
from fast_rcnn.config import cfg
from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv
from fast_rcnn.nms_wrapper import nms
from fast_rcnn.test_utils import get_image_blob, get_gt_boxes_blob
from fast_rcnn.test_probe import _im_exfeat
from utils.timer import Timer
def _im_detect(net, im, roidb, blob_names=None):
"""Detect object classes in an image given object proposals.
Arguments:
net (caffe.Net): Fast R-CNN network to use
im (ndarray): color image to test (in BGR order)
roidb (an roidb item): to provide gt_boxes if necessary
blob_names (list of str): list of feature blob names to be extracted
Returns:
boxes (ndarray): R x (4*K) array of predicted bounding boxes
scores (ndarray): R x K array of object class scores (K includes
background as object category 0)
features (dict of ndarray): {blob name: R x D array of features}
"""
im_blob, im_scales = get_image_blob(im)
assert len(im_scales) == 1, "Only single-image batch implemented"
blobs = {
'data': im_blob,
'im_info': np.array(
[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
dtype=np.float32),
}
if 'gt_boxes' in net.blobs:
# Supply gt_boxes as input. Used to get pid_labels for proposals.
blobs['gt_boxes'] = get_gt_boxes_blob(
roidb['boxes'], roidb['gt_classes'], roidb['gt_pids'], im_scales)
# reshape network inputs
for k, v in blobs.iteritems():
net.blobs[k].reshape(*(v.shape))
# do forward
forward_kwargs = {k: v.astype(np.float32, copy=False)
for k, v in blobs.iteritems()}
blobs_out = net.forward(**forward_kwargs)
# unscale rois back to raw image space
rois = net.blobs['rois'].data.copy()
boxes = rois[:, 1:5] / im_scales[0]
if cfg.TEST.SVM:
# use the raw scores before softmax under the assumption they
# were trained as linear SVMs
scores = net.blobs['cls_score'].data
else:
# the first column of the pid_prob is the non-person box score
scores = blobs_out['pid_prob'][:, 0]
scores = scores[:, np.newaxis]
scores = np.hstack([scores, 1. - scores])
if cfg.TEST.BBOX_REG:
# Apply bounding-box regression deltas
box_deltas = blobs_out['bbox_pred']
# As we no longer scale and shift the bbox_pred weights when snapshot,
# we need to manually do this during test.
if cfg.TRAIN.BBOX_NORMALIZE_TARGETS and \
cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
num_classes = box_deltas.shape[1] // 4
stds = np.tile(cfg.TRAIN.BBOX_NORMALIZE_STDS, num_classes)
means = np.tile(cfg.TRAIN.BBOX_NORMALIZE_MEANS, num_classes)
box_deltas = box_deltas * stds + means
boxes = bbox_transform_inv(boxes, box_deltas)
boxes = clip_boxes(boxes, im.shape)
else:
# Simply repeat the boxes, once for each class
boxes = np.tile(boxes, (1, scores.shape[1]))
features = {blob: net.blobs[blob].data.copy() for blob in blob_names} \
if blob_names is not None else {}
return boxes, scores, features
def _vis_detections(im, class_name, dets, thresh=0.3):
"""Visual debugging of detections."""
import matplotlib.pyplot as plt
im = im[:, :, (2, 1, 0)]
for i in xrange(np.minimum(10, dets.shape[0])):
bbox = dets[i, :4]
score = dets[i, -1]
if score > thresh:
plt.cla()
plt.imshow(im)
plt.gca().add_patch(
plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
edgecolor='g', linewidth=3)
)
plt.title('{} {:.3f}'.format(class_name, score))
plt.show()
def detect_and_exfeat(net, imdb,
start=None, end=None,
blob_names=None,
thresh=0.05, vis=False):
assert imdb.num_classes == 2, "Only support two-class detection"
assert cfg.TEST.HAS_RPN, "Only support RPN as proposal"
start = start or 0
end = end or imdb.num_images
num_images = end - start
# all detections are collected into:
# all_boxes[image] = N x 5 array of detections in
# (x1, y1, x2, y2, score)
# all_features[blob][image] = N x D array of features
all_boxes = [0 for _ in xrange(num_images)]
all_features = {} if blob_names is None else \
{blob: [0 for _ in xrange(num_images)]
for blob in blob_names}
# timers
_t = {'im_detect' : Timer(), 'misc' : Timer()}
for i in xrange(num_images):
im = cv2.imread(imdb.image_path_at(start + i))
roidb = imdb.roidb[start + i]
_t['im_detect'].tic()
boxes, scores, feat_dic = _im_detect(net, im, roidb, blob_names)
_t['im_detect'].toc()
_t['misc'].tic()
j = 1 # only consider j = 1 (foreground class)
inds = np.where(scores[:, j] > thresh)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
keep = nms(cls_dets, cfg.TEST.NMS)
all_boxes[i] = cls_dets[keep]
for blob, feat in feat_dic.iteritems():
all_features[blob][i] = feat[inds][keep]
_t['misc'].toc()
print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images,
_t['im_detect'].average_time, _t['misc'].average_time)
if vis:
_vis_detections(im, imdb.classes[j], all_boxes[i])
return all_boxes, all_features
def usegt_and_exfeat(net, imdb,
start=None, end=None, blob_names=None):
start = start or 0
end = end or imdb.num_images
num_images = end - start
# all detections are collected into:
# all_boxes[image] = N x 5 array of detections (gt) in
# (x1, y1, x2, y2, score)
# all_features[blob][image] = N x D array of features
all_boxes = [0 for _ in xrange(num_images)]
all_features = {} if blob_names is None else \
{blob: [0 for _ in xrange(num_images)]
for blob in blob_names}
# timers
_t = {'gt_exfeat' : Timer(), 'misc' : Timer()}
for i in xrange(num_images):
im = cv2.imread(imdb.image_path_at(start + i))
gt = imdb.roidb[start + i]['boxes']
_t['gt_exfeat'].tic()
feat_dic = _im_exfeat(net, im, gt, blob_names)
_t['gt_exfeat'].toc()
all_boxes[i] = np.hstack((gt, np.ones((gt.shape[0], 1)))) \
.astype(np.float32)
for blob, feat in feat_dic.iteritems():
all_features[blob][i] = feat
print 'gt_exfeat: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
_t['gt_exfeat'].average_time)
return all_boxes, all_features
def demo_detect(net, filename, blob_name='feat', threshold=0.5):
"""Detect persons in a gallery image and extract their features
Arguments:
net (caffe.Net): trained network
filename (str): path to a gallery image file (jpg or png)
blob_name (str): feature blob name. Default 'feat'
threshold (float): detection score threshold. Default 0.5
Returns:
boxes (ndarray): N x 5 detected boxes in format [x1, y1, x2, y2, score]
features (ndarray): N x D features matrix
"""
im = cv2.imread(filename)
boxes, scores, feat_dic = _im_detect(net, im, None, [blob_name])
j = 1 # only consider j = 1 (foreground class)
inds = np.where(scores[:, j] > threshold)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
boxes = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(boxes, cfg.TEST.NMS)
boxes = boxes[keep]
features = feat_dic[blob_name][inds][keep]
if boxes.shape[0] == 0:
return None, None
features = features.reshape(features.shape[0], -1)
return boxes, features