-
Notifications
You must be signed in to change notification settings - Fork 4
/
ApproxDetection.py
138 lines (124 loc) · 6.38 KB
/
ApproxDetection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
'''
Evaluate the accuracy and latency of an object detection model on a dataset.
Usage:
python3 ApproxDetection.py --imagefiles=test/VID_testimg_00106000.txt \
--repeat=1 --preheat=1 --dataset_prefix=/home/nvidia/ILSVRC2015/ \
--shape=576 --nprop=100 --weight=models/ApproxDet.pb \
--output=test/VID_tmp.txt
'''
import numpy as np
import argparse, os, time, tqdm
from PIL import Image
import tensorflow as tf
from utils_approxdet.detection_helper import load_graph_from_file
from utils_approxdet.detection_helper import output_dict_to_bboxes_single_img
if __name__== "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description=('Evaluate the latency and '
'accuracy of the detection model on a dataset.'))
parser.add_argument('--imagefiles', dest='imagefiles', required=True,
help='The path to the video frames.')
parser.add_argument('--repeat', dest='repeat', type=int, default=1,
help='How many times do we repeatly run on imagefiles.')
parser.add_argument('--preheat', dest='preheat',
help='Whether to preheat the branches in detection DNN.')
parser.add_argument('--nprop', dest='nprop', type=int,
help='Number of proposals in the RPN of the detection DNN.')
parser.add_argument('--shape', dest='shape', type=int,
help='The resized shape of the video frames. (smaller of height and width)')
parser.add_argument('--dataset_prefix', dest='dataset_prefix',
help='The path to the dataset.')
parser.add_argument('--weight', dest='weight',
help='The path to the weight file.')
parser.add_argument('--output', dest='output', required=True,
help='The filename of the detection and latency output, suffix will add.')
args = parser.parse_args()
# Hard-code the network configs
shapes = [224, 320, 448, 576]
nprops = [100, 50, 20, 10, 5, 3, 1]
dataset_prefix = args.dataset_prefix
weight = args.weight
# Load the detection DNN
detection_graph = load_graph_from_file(weight)
# Load the list of the test video frames
with open(args.imagefiles) as f:
lines = f.readlines()
if args.shape and args.nprop:
test_img_configs = [(line.strip().split()[0], args.shape, args.nprop,
False) for line in lines]
else:
test_imgs = [line.strip().split()[0] for line in lines]
test_shapes = [int(line.strip().split()[1]) for line in lines]
test_nprops = [int(line.strip().split()[2]) for line in lines]
test_img_configs = [(ti, ts, tn, False) for ti, ts, tn in \
zip(test_imgs, test_shapes, test_nprops)]
test_img_configs = test_img_configs * args.repeat
# Hard-code the path to the preheating video frames
if args.preheat:
img_dir = "Data/VID/train/ILSVRC2015_VID_train_0002/ILSVRC2015_train_00703000"
if not (args.shape and args.nprop):
# 28 configs * 3 images per config
configs = [(shape, nprop) for shape in shapes for nprop in nprops \
for run in range(3)]
else: # For baseline, args.shape and args.nprop must be specified
# 1 config * 3 images
configs = [(args.shape, args.nprop) for run in range(3)]
img_configs = [("{}/{:06d}.JPEG".format(img_dir, idx), *config, True) \
for idx, config in enumerate(configs)]
test_img_configs = img_configs + test_img_configs
# Output log files
if args.shape and args.nprop:
detoutput_filename = args.output.rsplit(".", 1)[0] + \
"_nprop{}_shape{}_det.txt".format(args.nprop, args.shape)
latoutput_filename = args.output.rsplit(".", 1)[0] + \
"_nprop{}_shape{}_lat.txt".format(args.nprop, args.shape)
else:
detoutput_filename = args.output.rsplit(".", 1)[0] + "_det.txt".format()
latoutput_filename = args.output.rsplit(".", 1)[0] + "_lat.txt".format()
fout_det = open(detoutput_filename, "w")
fout_lat = open(latoutput_filename, "w")
# Set TensorFlow config
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
# Run object detection
with detection_graph.as_default():
graph = tf.compat.v1.get_default_graph()
tensor_frame = graph.get_tensor_by_name('image_tensor:0')
tensor_nprop = graph.get_tensor_by_name('ApproxDet_num_proposals:0')
tensor_shape = graph.get_tensor_by_name('ApproxDet_min_dim:0')
output_tensor_dict = {}
for key in ['num_detections', 'detection_boxes', 'detection_scores',
'detection_classes']:
output_tensor_dict[key] = graph.get_tensor_by_name(key + ':0')
with tf.compat.v1.Session(config=config) as sess:
for path, shape, nprop, preheat_flag in tqdm.tqdm(test_img_configs):
# 1. Load a frame from the storage
time1 = time.time()
full_path = os.path.join(dataset_prefix, path)
image_pil = Image.open(full_path)
# 2. Run the detection DNN
time2 = time.time()
image_np = np.array(image_pil).astype(np.uint8)
image_4D = np.expand_dims(image_np, axis=0)
feed_dict = {tensor_frame: image_4D, tensor_nprop: nprop,
tensor_shape: shape}
output_dict = sess.run(output_tensor_dict, feed_dict = feed_dict)
time3 = time.time()
# 3. Post processing
bboxes = output_dict_to_bboxes_single_img(output_dict)
# 4. Print the detection bounding boxes, latency results
if not preheat_flag:
for cls, conf, ymin, xmin, ymax, xmax in bboxes:
print("{} {} {} {} {} {} {}".format(path, cls,
conf, ymin, xmin, ymax, xmax), file = fout_det)
nobj = len(bboxes)
height, width = image_np.shape[:2]
loading_lat, inf_lat = (time2-time1)*1e3, (time3-time2)*1e3
line = "{} {} {} {} {} {} {} {}".format(path, height,
width, shape, nprop, loading_lat, inf_lat, nobj)
for _, _, ymin, xmin, ymax, xmax in bboxes:
size = (ymax-ymin)*(xmax-xmin)
line += " {}".format(size)
print(line, file = fout_lat)
fout_det.close()
fout_lat.close()