Skip to content
This repository has been archived by the owner on Nov 8, 2022. It is now read-only.

Commit

Permalink
Fixed NER with TF1.12 (#356)
Browse files Browse the repository at this point in the history
* Fixed NER with TF1.12
  • Loading branch information
peteriz committed Apr 2, 2019
1 parent 1b9d133 commit 2acd47b
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 82 deletions.
1 change: 1 addition & 0 deletions examples/ner/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def vectorize(doc, w_vocab, c_vocab):
doc_vec = vectorize(text_arr, word_vocab, char_vocab)
seq_len = np.array([len(text_arr)]).reshape(-1, 1)
inputs = list(doc_vec)
# pylint: disable=no-member
if model.crf_mode == 'pad':
inputs = list(doc_vec) + [seq_len]
doc_ner = model.predict(inputs, batch_size=1).argmax(2).flatten()
Expand Down
8 changes: 4 additions & 4 deletions examples/ner/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,13 @@ def validate_input_args(input_args):
y_train = keras.utils.to_categorical(y_train, num_y_labels)

ner_model = NERCRF(use_cudnn=args.use_cudnn)
# pylint: disable=unexpected-keyword-arg
ner_model.build(args.word_length,
num_y_labels,
vocabulary_size,
char_vocabulary_size,
word_embedding_dims=args.word_embedding_dims,
char_embedding_dims=args.character_embedding_dims,
word_lstm_dims=args.char_features_lstm_dims,
tagger_lstm_dims=args.entity_tagger_lstm_dims,
dropout=args.dropout)

Expand All @@ -126,9 +126,9 @@ def validate_input_args(input_args):

train_inputs = [x_train, x_char_train]
test_inputs = [x_test, x_char_test]
if not args.use_cudnn:
train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))

train_inputs.append(np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
test_inputs.append(np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))

conll_cb = ConllCallback(test_inputs, y_test, dataset.y_labels.vocab,
batch_size=args.b)
Expand Down
12 changes: 6 additions & 6 deletions nlp_architect/api/ner_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class NerApi(AbstractApi):
NER model API
"""
model_dir = str(LIBRARY_OUT / 'ner-pretrained')
pretrained_model = path.join(model_dir, 'model.h5')
pretrained_model_info = path.join(model_dir, 'model_info.dat')
pretrained_model = path.join(model_dir, 'model_v4.h5')
pretrained_model_info = path.join(model_dir, 'model_info_v4.dat')

def __init__(self, prompt=True):
self.model = None
Expand Down Expand Up @@ -71,10 +71,10 @@ def _download_pretrained_model(self, prompt=True):
sys.exit(0)
download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
'/models/ner/',
'model.h5', self.pretrained_model)
'model_v4.h5', self.pretrained_model)
download_unlicensed_file('https://s3-us-west-2.amazonaws.com/nlp-architect-data'
'/models/ner/',
'model_info.dat', self.pretrained_model_info)
'model_info_v4.dat', self.pretrained_model_info)
print('Done.')

def load_model(self):
Expand Down Expand Up @@ -129,8 +129,8 @@ def inference(self, doc):
doc_vec = self.vectorize(text_arr, self.word_vocab, self.char_vocab)
seq_len = np.array([len(text_arr)]).reshape(-1, 1)
inputs = list(doc_vec)
if self.model.crf_mode == 'pad':
inputs = list(doc_vec) + [seq_len]
# pylint: disable=no-member
inputs = list(doc_vec) + [seq_len]
doc_ner = self.model.predict(inputs, batch_size=1).argmax(2).flatten()
tags = [self.y_vocab.get(n, None) for n in doc_ner]
return self.pretty_print(text_arr, tags)
73 changes: 27 additions & 46 deletions nlp_architect/contrib/tensorflow/python/keras/layers/crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
# limitations under the License.
# ******************************************************************************
import tensorflow as tf
from tensorflow import convert_to_tensor, keras


class CRF(keras.layers.Layer):
class CRF(tf.keras.layers.Layer):
"""
Conditional Random Field layer (tf.keras)
`CRF` can be used as the last layer in a network (as a classifier). Input shape (features)
Expand All @@ -29,55 +28,36 @@ class CRF(keras.layers.Layer):
Args:
num_labels (int): the number of labels to tag each temporal input.
mode (string, optional): operation mode, 'reg' for regular full sequence learning (all
sequences have equal length), or 'pad' for using with supplied sequence lengths (useful
for padded sequences)
Input shape:
'reg' mode - nD tensor with shape `(batch_size, sentence length, num_classes)`.
'pad' mode - tuple of `(batch_size, sentence length, num_classes)`, `(batch_size, 1)`
nD tensor with shape `(batch_size, sentence length, num_classes)`.
Output shape:
nD tensor with shape: `(batch_size, sentence length, num_classes)`.
"""
def __init__(self, num_classes, mode='reg', **kwargs):

def __init__(self, num_classes, **kwargs):
self.transitions = None
super(CRF, self).__init__(**kwargs)
# num of output labels
self.output_dim = int(num_classes)
self.mode = mode
if self.mode == 'pad':
self.input_spec = [keras.layers.InputSpec(min_ndim=3),
keras.layers.InputSpec(min_ndim=2)]
elif self.mode == 'reg':
self.input_spec = keras.layers.InputSpec(min_ndim=3)
else:
raise ValueError
self.supports_masking = True
self.input_spec = tf.keras.layers.InputSpec(min_ndim=3)
self.supports_masking = False
self.sequence_lengths = None

def get_config(self):
config = {
'output_dim': self.output_dim,
'mode': self.mode,
'supports_masking': self.supports_masking,
'transitions': tf.keras.backend.eval(self.transitions)
}
base_config = super(CRF, self).get_config()
return dict(list(base_config.items()) + list(config.items()))

def build(self, input_shape):
if self.mode == 'pad':
assert len(input_shape) == 2
assert len(input_shape[0]) == 3
assert len(input_shape[1]) == 2
f_shape = tf.TensorShape(input_shape[0])
input_spec = [keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]}),
keras.layers.InputSpec(min_ndim=2, axes={-1: 1}, dtype=tf.int32)]
else:
assert len(input_shape) == 3
f_shape = tf.TensorShape(input_shape)
input_spec = keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})
assert len(input_shape) == 3
f_shape = tf.TensorShape(input_shape)
input_spec = tf.keras.layers.InputSpec(min_ndim=3, axes={-1: f_shape[-1]})

if f_shape[-1] is None:
raise ValueError('The last dimension of the inputs to `CRF` '
Expand All @@ -92,21 +72,26 @@ def build(self, input_shape):
trainable=True)
self.built = True

def call(self, inputs, **kwargs):
if self.mode == 'pad':
sequences = convert_to_tensor(inputs[0], dtype=self.dtype)
self.sequence_lengths = tf.keras.backend.flatten(inputs[-1])
# pylint: disable=arguments-differ
def call(self, inputs, sequence_lengths=None, **kwargs):
sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
if sequence_lengths is not None:
assert len(sequence_lengths.shape) == 2
assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
assert seq_len_shape[1] == 1
self.sequence_lengths = tf.keras.backend.flatten(sequence_lengths)
else:
sequences = convert_to_tensor(inputs, dtype=self.dtype)
shape = tf.shape(inputs)
self.sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * \
(tf.shape(inputs)[1])

viterbi_sequence, _ = tf.contrib.crf.crf_decode(sequences, self.transitions,
self.sequence_lengths)
output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
return keras.backend.in_train_phase(sequences, output)
output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
return tf.keras.backend.in_train_phase(sequences, output)

def loss(self, y_true, y_pred):
y_pred = convert_to_tensor(y_pred, dtype=self.dtype)
y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
log_likelihood, self.transitions = \
tf.contrib.crf.crf_log_likelihood(y_pred,
tf.cast(tf.keras.backend.argmax(y_true),
Expand All @@ -116,12 +101,8 @@ def loss(self, y_true, y_pred):
return tf.reduce_mean(-log_likelihood)

def compute_output_shape(self, input_shape):
if self.mode == 'pad':
data_shape = input_shape[0]
else:
data_shape = input_shape
tf.TensorShape(data_shape).assert_has_rank(3)
return data_shape[:2] + (self.output_dim,)
tf.TensorShape(input_shape).assert_has_rank(3)
return input_shape[:2] + (self.output_dim,)

@property
def viterbi_accuracy(self):
Expand All @@ -130,7 +111,7 @@ def accuracy(y_true, y_pred):
sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
viterbi_sequence, _ = tf.contrib.crf.crf_decode(y_pred, self.transitions,
sequence_lengths)
output = keras.backend.one_hot(viterbi_sequence, self.output_dim)
output = tf.keras.backend.one_hot(viterbi_sequence, self.output_dim)
return tf.keras.metrics.categorical_accuracy(y_true, output)
accuracy.func_name = 'viterbi_accuracy'
return accuracy
1 change: 0 additions & 1 deletion nlp_architect/data/sequential_tagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def _read_file(self, filepath):
with open(filepath, encoding='utf-8') as fp:
data = fp.readlines()
data = [d.strip() for d in data]
data = [d for d in data if 'DOCSTART' not in d]
sentences = self._split_into_sentences(data)
parsed_sentences = [self._parse_sentence(s) for s in sentences if len(s) > 0]
return parsed_sentences
Expand Down
2 changes: 1 addition & 1 deletion nlp_architect/models/chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def build(self,

model = tf.keras.Model(input_src, [pos_out, chunks_out])
if optimizer is None:
self.optimizer = tf.train.AdamOptimizer()
self.optimizer = tf.keras.optimizers.Adam(0.001, clipnorm=5.)
else:
self.optimizer = optimizer
model.compile(optimizer=self.optimizer,
Expand Down
31 changes: 7 additions & 24 deletions nlp_architect/models/ner_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,8 @@ def __init__(self, use_cudnn=False):
self.char_vocab_size = None
self.word_embedding_dims = None
self.char_embedding_dims = None
self.word_lstm_dims = None
self.tagger_lstm_dims = None
self.dropout = None
self.crf_mode = None
self.use_cudnn = use_cudnn

def build(self,
Expand All @@ -51,10 +49,8 @@ def build(self,
char_vocab_size,
word_embedding_dims=100,
char_embedding_dims=16,
word_lstm_dims=20,
tagger_lstm_dims=200,
dropout=0.5,
crf_mode='pad'):
dropout=0.5):
"""
Build a NERCRF model
Expand All @@ -65,24 +61,17 @@ def build(self,
char_vocab_size (int): character vocabulary size
word_embedding_dims (int): word embedding dimensions
char_embedding_dims (int): character embedding dimensions
word_lstm_dims (int): character LSTM feature extractor output dimensions
tagger_lstm_dims (int): word tagger LSTM output dimensions
dropout (float): dropout rate
crf_mode (string): CRF operation mode, select 'pad'/'reg' for supplied sequences in
input or full sequence tagging. ('reg' is forced when use_cudnn=True)
"""
self.word_length = word_length
self.target_label_dims = target_label_dims
self.word_vocab_size = word_vocab_size
self.char_vocab_size = char_vocab_size
self.word_embedding_dims = word_embedding_dims
self.char_embedding_dims = char_embedding_dims
self.word_lstm_dims = word_lstm_dims
self.tagger_lstm_dims = tagger_lstm_dims
self.dropout = dropout
self.crf_mode = crf_mode

assert crf_mode in ('pad', 'reg'), 'crf_mode is invalid'

# build word input
words_input = tf.keras.layers.Input(shape=(None,), name='words_input')
Expand Down Expand Up @@ -117,23 +106,17 @@ def build(self,

inputs = [words_input, word_chars_input]

if self.use_cudnn:
self.crf_mode = 'reg'
with tf.device('/cpu:0'):
crf = CRF(self.target_label_dims, mode=self.crf_mode, name='ner_crf')
if self.crf_mode == 'pad':
sequence_lengths = tf.keras.layers.Input(batch_shape=(None, 1), dtype='int32')
predictions = crf([bilstm, sequence_lengths])
inputs.append(sequence_lengths)
else:
predictions = crf(bilstm)
sequence_lengths = tf.keras.layers.Input(shape=(1,), dtype='int32', name='seq_lens')
inputs.append(sequence_lengths)
crf = CRF(self.target_label_dims, name='ner_crf')
predictions = crf(inputs=bilstm, sequence_lengths=sequence_lengths)

# compile the model
model = tf.keras.Model(inputs=inputs,
outputs=predictions)
model.compile(loss={'ner_crf': crf.loss},
optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.),
metrics=[crf.viterbi_accuracy])
optimizer=tf.keras.optimizers.Adam(0.001, clipnorm=5.))

self.model = model

def _rnn_cell(self, units, **kwargs):
Expand Down

0 comments on commit 2acd47b

Please sign in to comment.