-
Notifications
You must be signed in to change notification settings - Fork 11
/
model.py
255 lines (206 loc) · 9.43 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 30 16:54:02 2018
@author: shirhe-lyh
"""
import tensorflow as tf
from abc import ABCMeta
from abc import abstractmethod
slim = tf.contrib.slim
class BaseModel(object):
"""Abstract base class for any model."""
__metaclass__ = ABCMeta
def __init__(self, num_classes_dict):
"""Constructor.
Args:
num_classes: Number of classes.
"""
self._num_classes_dict = num_classes_dict
@property
def num_classes_dict(self):
return self._num_classes_dict
@abstractmethod
def preprocess(self, inputs):
"""Input preprocessing. To be override by implementations.
Args:
inputs: A float32 tensor with shape [batch_size, height, width,
num_channels] representing a batch of images.
Returns:
preprocessed_inputs: A float32 tensor with shape [batch_size,
height, widht, num_channels] representing a batch of images.
"""
pass
@abstractmethod
def predict(self, preprocessed_inputs):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Args:
preprocessed_inputs: A float32 tensor with shape [batch_size,
height, width, num_channels] representing a batch of images.
Returns:
prediction_dict: A dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
pass
@abstractmethod
def postprocess(self, prediction_dict, **params):
"""Convert predicted output tensors to final forms.
Args:
prediction_dict: A dictionary holding prediction tensors.
**params: Additional keyword arguments for specific implementations
of specified models.
Returns:
A dictionary containing the postprocessed results.
"""
pass
@abstractmethod
def loss(self, prediction_dict, groundtruth_lists):
"""Compute scalar loss tensors with respect to provided groundtruth.
Args:
prediction_dict: A dictionary holding prediction tensors.
groundtruth_lists: A list of tensors holding groundtruth
information, with one entry for each image in the batch.
Returns:
A dictionary mapping strings (loss names) to scalar tensors
representing loss values.
"""
pass
class Model(BaseModel):
"""xxx definition."""
def __init__(self,
is_training,
num_classes_dict={'digits': 10, 'letters': 24}):
"""Constructor.
Args:
is_training: A boolean indicating whether the training version of
computation graph should be constructed.
num_classes: Number of classes.
"""
super(Model, self).__init__(num_classes_dict=num_classes_dict)
self._is_training = is_training
self._class_order = ['digits', 'letters']
def preprocess(self, inputs):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Args:
preprocessed_inputs: A float32 tensor with shape [batch_size,
height, width, num_channels] representing a batch of images.
Returns:
prediction_dict: A dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
preprocessed_inputs = tf.to_float(inputs)
preprocessed_inputs = tf.subtract(preprocessed_inputs, 128.0)
preprocessed_inputs = tf.div(preprocessed_inputs, 128.0)
return preprocessed_inputs
def predict(self, preprocessed_inputs):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Args:
preprocessed_inputs: A float32 tensor with shape [batch_size,
height, width, num_channels] representing a batch of images.
Returns:
prediction_dict: A dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
net = preprocessed_inputs
net = slim.repeat(net, 2, slim.conv2d, 32, [3, 3], scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv2')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv3')
net = slim.flatten(net, scope='flatten')
net = slim.dropout(net, keep_prob=0.5,
is_training=self._is_training)
net = slim.fully_connected(net, 512, scope='fc1')
net = slim.fully_connected(net, 512, scope='fc2')
prediction_dict = {}
for class_name, num_classes in self.num_classes_dict.items():
logits = slim.fully_connected(net, num_outputs=num_classes,
activation_fn=None,
scope='Predict/' + class_name)
prediction_dict[class_name] = logits
return prediction_dict
def postprocess(self, prediction_dict):
"""Convert predicted output tensors to final forms.
Args:
prediction_dict: A dictionary holding prediction tensors.
**params: Additional keyword arguments for specific implementations
of specified models.
Returns:
A dictionary containing the postprocessed results.
"""
postprecessed_dict = {}
for class_name in self.num_classes_dict:
logits = prediction_dict[class_name]
# logits = tf.nn.softmax(logits, name=class_name)
postprecessed_dict[class_name] = logits
return postprecessed_dict
def loss(self, prediction_dict, groundtruth_lists):
"""Compute scalar loss tensors with respect to provided groundtruth.
Args:
prediction_dict: A dictionary holding prediction tensors.
groundtruth_lists: A list of tensors holding groundtruth
information, with one entry for each task.
Returns:
A dictionary mapping strings (loss names) to scalar tensors
representing loss values.
"""
onehot_labels_dict = self._onehot_groundtruth_dict(groundtruth_lists)
for class_name in self.num_classes_dict:
weights = tf.cast(tf.greater(
tf.reduce_sum(onehot_labels_dict[class_name], axis=1), 0),
dtype=tf.float32)
slim.losses.softmax_cross_entropy(
logits=prediction_dict[class_name],
onehot_labels=onehot_labels_dict[class_name],
weights=weights,
scope='Loss/' + class_name)
loss = slim.losses.get_total_loss()
loss_dict = {'loss': loss}
return loss_dict
def _onehot_groundtruth_dict(self, groundtruth_lists):
"""Transform groundtruth lables to one-hot formats.
Args:
groundtruth_lists: A dict of tensors holding groundtruth
information, with one entry for task.
Returns:
onehot_labels_dict: A dictionary mapping strings (class names)
to one-hot lable tensors.
"""
one_hot = tf.one_hot(
groundtruth_lists, depth=sum(self.num_classes_dict.values()))
onehot_labels_dict = {}
start_index = 0
for class_name in self._class_order:
onehot_labels_dict[class_name] = tf.slice(
one_hot, [0, start_index],
[-1, self.num_classes_dict[class_name]])
start_index += self.num_classes_dict[class_name]
return onehot_labels_dict
def accuracy(self, postprocessed_dict, groundtruth_lists):
"""Calculate accuracy.
Args:
postprocessed_dict: A dictionary containing the postprocessed
results
groundtruth_lists: A dict of tensors holding groundtruth
information, with one entry for each image in the batch.
Returns:
accuracy: The scalar accuracy.
"""
onehot_labels_dict = self._onehot_groundtruth_dict(groundtruth_lists)
num_corrections = 0.
for class_name in self.num_classes_dict:
predicted_argmax = tf.argmax(tf.nn.softmax(
postprocessed_dict[class_name]), axis=1)
onehot_predicted = tf.one_hot(
predicted_argmax, depth=self.num_classes_dict[class_name])
onehot_sum = tf.add(onehot_labels_dict[class_name],
onehot_predicted)
correct = tf.greater(onehot_sum, 1)
num = tf.reduce_sum(tf.cast(correct, tf.float32))
num_corrections += num
total_nums = tf.cast(tf.shape(groundtruth_lists)[0], dtype=tf.float32)
accuracy = num_corrections / total_nums
return accuracy