/
model.py
158 lines (134 loc) · 6.64 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import tensorflow as tf
tf.app.flags.DEFINE_integer('batch_size', 50, 'Training batch size')
tf.app.flags.DEFINE_integer('emb_size', 300, 'Size of word embeddings')
tf.app.flags.DEFINE_integer('num_kernel', 100, 'Number of filters for each window size')
tf.app.flags.DEFINE_integer('min_window', 3, 'Minimum size of filter window')
tf.app.flags.DEFINE_integer('max_window', 5, 'Maximum size of filter window')
tf.app.flags.DEFINE_integer('vocab_size', 15000, 'Vocabulary size')
tf.app.flags.DEFINE_integer('num_class', 2, 'Number of class to consider')
tf.app.flags.DEFINE_integer('sent_len', 56, 'Input sentence length. This is after the padding is performed.')
tf.app.flags.DEFINE_float('l2_reg', 0, 'l2 regularization weight')
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer)
return var
def _variable_with_weight_decay(name, shape, initializer, wd):
var = _variable_on_cpu(name, shape, initializer)
if wd is not None and wd != 0.:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
else:
weight_decay = tf.constant(0.0, dtype=tf.float32)
return var, weight_decay
class Model(object):
def __init__(self, config, is_train=True):
self.is_train = is_train
self.emb_size = config.emb_size
self.batch_size = config.batch_size
self.num_kernel = config.num_kernel
self.min_window = config.min_window
self.max_window = config.max_window
self.vocab_size = config.vocab_size
self.num_class = config.num_class
self.sent_len = config.sent_len
self.l2_reg = config.l2_reg
if is_train:
self.optimizer = config.optimizer
self.dropout = config.dropout
self.build_graph()
def build_graph(self):
""" Build the computation graph. """
self._inputs = tf.placeholder(dtype=tf.int64, shape=[self.batch_size, self.sent_len], name='input_x')
self._labels = tf.placeholder(dtype=tf.int64, shape=[self.batch_size], name='input_y')
losses = []
# lookup layer
with tf.variable_scope('lookup') as scope:
self._W_emb = _variable_on_cpu(name='embedding', shape=[self.vocab_size, self.emb_size],
initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0))
# sent_batch is of shape: (batch_size, sent_len, emb_size, 1), in order to use conv2d
sent_batch = tf.nn.embedding_lookup(params=self._W_emb, ids=self._inputs)
sent_batch = tf.expand_dims(sent_batch, -1)
# conv + pooling layer
with tf.variable_scope('conv') as scope:
pool_tensors = []
for k_size in range(self.min_window, self.max_window+1):
kernel, wd = _variable_with_weight_decay(name='kernel_'+str(k_size),
shape=[k_size, self.emb_size, 1, self.num_kernel], initializer=tf.truncated_normal_initializer(stddev=0.01), wd=self.l2_reg)
losses.append(wd)
conv = tf.nn.conv2d(input=sent_batch, filter=kernel, strides=[1,1,1,1], padding='VALID')
biases = _variable_on_cpu('biases_'+str(k_size), [self.num_kernel], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
relu = tf.nn.relu(bias, name=scope.name)
# shape of relu: [batch_size, conv_len, 1, num_kernel]
conv_len = relu.get_shape()[1]
pool = tf.nn.max_pool(relu, ksize=[1,conv_len,1,1], strides=[1,1,1,1], padding='VALID')
# shape of pool: [batch_size, 1, 1, num_kernel]
pool = tf.squeeze(pool,squeeze_dims=[1,2]) # size: [batch_size, num_kernel]
pool_tensors.append(pool)
pool_layer = tf.concat(values=pool_tensors, axis=1, name='pool')
# drop out layer
if self.is_train and self.dropout > 0:
pool_dropout = tf.nn.dropout(pool_layer, 1 - self.dropout)
else:
pool_dropout = pool_layer
# fully-connected layer
pool_size = (self.max_window - self.min_window + 1) * self.num_kernel
with tf.variable_scope('fc') as scope:
W, wd = _variable_with_weight_decay('W', shape=[pool_size, self.num_class],
initializer=tf.truncated_normal_initializer(stddev=0.05), wd=self.l2_reg)
losses.append(wd)
biases = _variable_on_cpu('biases', [self.num_class], tf.constant_initializer(0.01))
logits = tf.nn.bias_add(tf.matmul(pool_dropout, W), biases)
# loss
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self._labels, logits=logits, name='cross_entropy_per_example')
cross_entropy_loss = tf.reduce_mean(cross_entropy, name='cross_entropy_loss')
losses.append(cross_entropy_loss)
self._total_loss = tf.add_n(losses, name='total_loss')
# self._total_loss = cross_entropy_loss
# correct prediction count
correct_prediction = tf.to_int32(tf.nn.in_top_k(logits, self._labels, 1))
self._true_count_op = tf.reduce_sum(correct_prediction)
# train on a batch
self._lr = tf.Variable(0.0, trainable=False)
if self.is_train:
if self.optimizer == 'adadelta':
opt = tf.train.AdadeltaOptimizer(self._lr)
elif self.optimizer == 'adagrad':
opt = tf.train.AdagradOptimizer(self._lr)
elif self.optimizer == 'adam':
opt = tf.train.AdamOptimizer(self._lr)
elif self.optimizer == 'sgd':
opt = tf.train.GradientDescentOptimizer(self._lr)
else:
raise ValueError("Optimizer not supported.")
grads = opt.compute_gradients(self._total_loss)
self._train_op = opt.apply_gradients(grads)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
else:
self._train_op = tf.no_op()
return
@property
def inputs(self):
return self._inputs
@property
def labels(self):
return self._labels
@property
def lr(self):
return self._lr
@property
def train_op(self):
return self._train_op
@property
def total_loss(self):
return self._total_loss
@property
def true_count_op(self):
return self._true_count_op
@property
def W_emb(self):
return self._W_emb
def assign_lr(self, session, lr_value):
session.run(tf.assign(self.lr, lr_value))
def assign_embedding(self, session, pretrained):
session.run(tf.assign(self.W_emb, pretrained))