Skip to content

Commit

Permalink
optimized bert feature-extraction by concatenating last 4 hidden layers
Browse files Browse the repository at this point in the history
  • Loading branch information
alexwwang committed Apr 24, 2019
1 parent 4ceda01 commit a5dacd7
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions kashgari/embeddings/embeddings.py
Expand Up @@ -301,16 +301,22 @@ def build(self):
model = keras_bert.load_trained_model_from_checkpoint(config_path,
check_point_path,
seq_len=self.sequence_length)
output_layer = NonMaskingLayer()(model.output)
num_layers = len(model.layers)
features_layers = [model.get_layer(index=num_layers-1+idx*8).output\

This comment has been minimized.

Copy link
@BrikerMan

BrikerMan May 26, 2019

Owner

@alexwwang Have you compared the performance of model.output with this features_layers. I have got several feedbacks said 0.2.1 perform better than 0.2.3?

This comment has been minimized.

Copy link
@alexwwang

alexwwang via email Jun 2, 2019

Author Collaborator
for idx in range(-3, 1)]
embedding_layer = concatenate(features_layers)
output_layer = NonMaskingLayer()(embedding_layer)
#output_layer = NonMaskingLayer()(model.output)
self._model = Model(model.inputs, output_layer)

self.embedding_size = self.model.output_shape[-1]
dict_path = os.path.join(self.model_path, 'vocab.txt')
word2idx = {}
with open(dict_path, 'r', encoding='utf-8') as f:
words = f.read().splitlines()
for word in words:
word2idx[word] = len(word2idx)
for idx, word in enumerate(words):
word2idx[word] = idx
#word2idx[word] = len(word2idx)
for key, value in self.special_tokens.items():
word2idx[key] = word2idx[value]

Expand Down Expand Up @@ -393,6 +399,7 @@ def __init__(self,
super(TwoHeadEmbedding, self).__init__(name_or_path, sequence_length, embedding_size, **kwargs)

def build(self, **kwargs):
self.embedding_type = 'twohead'
if self._token2idx is None:
logging.debug('need to build after build_word2idx')
else:
Expand Down

0 comments on commit a5dacd7

Please sign in to comment.