-
Notifications
You must be signed in to change notification settings - Fork 4
/
ApproxDetectionProfilerSw.py
116 lines (100 loc) · 4.93 KB
/
ApproxDetectionProfilerSw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
'''
Evaluate the switching overhead of an object detection model along the "shape"
and "number of proposals" tuning knobs, on a 800+ frame video snippet.
All 28*27=756 pairs are studied.
shape(4) in [224, 320, 448, 576]
nprop(7) in [1, 3, 5, 10, 20, 50, 100]
Preheating and test images are both from a validation snippet
"Data/VID/train/ILSVRC2015_VID_train_0002/ILSVRC2015_train_00703000"
Usage:
python3 ApproxDetectionProfilerSw.py --dataset_prefix=/home/nvidia/ILSVRC2015/ \
--weight=models/ApproxDet.pb --output=test/VID_switchingoverhead_run0.txt
'''
import numpy as np
import argparse, os, time, random, tqdm
from PIL import Image
import tensorflow as tf
from utils_approxdet.detection_helper import load_graph_from_file
from utils_approxdet.detection_helper import output_dict_to_bboxes_single_img
if __name__== "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description=('Evaluate the switching '
'overhead of an object detection model along the "shape" and "number of '
'proposals" tuning knobs'))
parser.add_argument('--dataset_prefix', dest='dataset_prefix',
help='The path to the dataset.')
parser.add_argument('--weight', dest='weight',
help='The path to the weight file.')
parser.add_argument('--output', dest='output', required=True,
help='The filename of the latency logs.')
args = parser.parse_args()
# Output log file
fout = open(args.output, "w")
# Set TensorFlow config
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
# Load detection DNN
detection_graph = load_graph_from_file(args.weight)
# Load the list of test video frames:
# 28 branches * 3 images + 28*27 switching pairs * 20 images = 15204
img_dir = "Data/VID/train/ILSVRC2015_VID_train_0002/ILSVRC2015_train_00703000"
image_paths = ["{}/{:06d}.JPEG".format(img_dir, x%16+800) for x in range(28*3)] + \
["{}/{:06d}.JPEG".format(img_dir, x%800) for x in range(28*27*20)]
# Construct the experiments
all_shapes = [224, 320, 448, 576]
all_nprops = [100, 50, 20, 10, 5, 3, 1]
configs = [(nprop, shape) for nprop in all_nprops for shape in all_shapes]
all_pairs = []
for from_branch in range(28):
for to_branch in range(28):
if from_branch != to_branch:
all_pairs.append((configs[from_branch][0], configs[from_branch][1],
configs[to_branch][0], configs[to_branch][1]))
random.shuffle(all_pairs)
nprops, shapes = [], []
for nprop, shape in configs:
nprops += [nprop] * 3
shapes += [shape] * 3
for from_nprop, from_shape, to_nprop, to_shape in all_pairs:
nprops += [from_nprop] * 10
nprops += [to_nprop] * 10
shapes += [from_shape] * 10
shapes += [to_shape] * 10
# Run object detection
with detection_graph.as_default():
graph = tf.compat.v1.get_default_graph()
tensor_frame = graph.get_tensor_by_name('image_tensor:0')
tensor_nprop = graph.get_tensor_by_name('ApproxDet_num_proposals:0')
tensor_shape = graph.get_tensor_by_name('ApproxDet_min_dim:0')
output_tensor_dict = {}
for key in ['num_detections', 'detection_boxes', 'detection_scores',
'detection_classes']:
output_tensor_dict[key] = graph.get_tensor_by_name(key + ':0')
with tf.compat.v1.Session(config=config) as sess:
for idx in tqdm.tqdm(range(len(image_paths))):
path, nprop, shape = image_paths[idx], nprops[idx], shapes[idx]
# 1. Load a frame from storage
time1 = time.time()
full_path = os.path.join(args.dataset_prefix, path)
image_pil = Image.open(full_path)
# 2. detection DNN
time2 = time.time()
image_np = np.array(image_pil).astype(np.uint8)
image_4D = np.expand_dims(image_np, axis=0)
feed_dict = {tensor_frame: image_4D, tensor_nprop: nprop,
tensor_shape: shape}
output_dict = sess.run(output_tensor_dict, feed_dict = feed_dict)
time3 = time.time()
# 3. Post processing
bboxes = output_dict_to_bboxes_single_img(output_dict)
# 4. Print latency results to file
if idx >= 28*3: # after pre-heating
nobj = len(bboxes)
height, width = image_np.shape[:2]
loading_lat, inf_lat = (time2-time1)*1e3, (time3-time2)*1e3
sizes = [(ymax-ymin)*(xmax-xmin)*height*width \
for _, _, ymin, xmin, ymax, xmax in bboxes]
avgsize = np.sqrt(np.sum(sizes)) if sizes else 0
print("{} {} {} {} {} {} {}".format(path, nprop, shape,
nobj, avgsize, loading_lat, inf_lat), file = fout)
fout.close()