Skip to content

Commit

Permalink
implement bilstm-cnn-crf model
Browse files Browse the repository at this point in the history
  • Loading branch information
Hironsan committed Jul 25, 2017
1 parent 1b032e5 commit d660e41
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 15 deletions.
1 change: 1 addition & 0 deletions anago/models/bilstm_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def _build_model(self):
recurrent_dropout=self.config.dropout))(x)
x = Dropout(self.config.dropout)(x)
preds = TimeDistributed(Dense(units=self.ntags, activation='softmax'))(x)

model = Model(inputs=[word_input, char_input], outputs=preds)
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(lr=self.config.learning_rate,
Expand Down
33 changes: 18 additions & 15 deletions anago/models/bilstm-crf.py → anago/models/bilstm_cnn_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""
import tensorflow as tf
import keras.backend as K
from keras.layers import Dense, LSTM, Bidirectional, Embedding, Input, Dropout
from keras.layers import Dense, LSTM, Bidirectional, Embedding, Input, Dropout, Conv2D, MaxPool2D, Reshape
from keras.layers.merge import Concatenate
from keras.layers.wrappers import TimeDistributed
from keras.models import Model
Expand All @@ -18,25 +18,27 @@
from anago.models.base_model import BaseModel


class BiLSTMCrf(BaseModel):
class BiLSTMCNNCrf(BaseModel):

def loss(self, y_true, y_pred):
y_t = K.argmax(y_true, -1)
y_t = tf.cast(y_t, tf.int32)
sequence_length = tf.constant(shape=(self.config.batch_size,), value=self.config.num_steps, dtype=tf.int32)
log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(y_pred, y_t, sequence_length)
self.transition_matrix = transition_params
loss = tf.reduce_mean(-log_likelihood)
self.transition_matrix = transition_params

return loss

def _build_model(self):
# build character based word embedding
char_input = Input(shape=(self.config.num_steps, self.max_word_len), dtype='int32')
x1 = Embedding(input_dim=self.char_vocab_size,
output_dim=self.char_embedding_size)(char_input)
x1 = TimeDistributed(Bidirectional(LSTM(self.char_lstm_dim,
dropout=self.config.dropout,
recurrent_dropout=self.config.dropout)))(x1)
char_input = Input(shape=(self.config.num_steps, self.config.max_word_len), dtype='int32')
x1 = TimeDistributed(Embedding(input_dim=self.config.char_vocab_size,
output_dim=self.config.char_embedding_size)
)(char_input)
x1 = Conv2D(self.config.nb_filters, (1, self.config.nb_kernels), activation='tanh')(x1)
x1 = MaxPool2D((1, self.config.max_word_len - self.config.nb_kernels + 1))(x1)
x1 = Reshape((self.config.num_steps, self.config.nb_filters))(x1)

# build word embedding
word_input = Input(shape=(self.config.num_steps,), dtype='int32')
Expand All @@ -57,9 +59,10 @@ def _build_model(self):
x = TimeDistributed(Dense(self.ntags))(x)
# pred = TimeDistributed(Dense(self.num_classes, activation='softmax'))(x)

self.model = Model(inputs=[word_input, char_input], outputs=[x])
self.model.compile(loss=self.loss,
optimizer=RMSprop(lr=self.config.learning_rate,
clipvalue=self.config.max_grad_norm),
metrics=['acc'])
self.model.summary()
model = Model(inputs=[word_input, char_input], outputs=[x])
model.compile(loss=self.loss,
optimizer=RMSprop(lr=self.config.learning_rate,
clipvalue=self.config.max_grad_norm),
metrics=['acc'])
model.summary()
self.model = model

0 comments on commit d660e41

Please sign in to comment.