Skip to content

Commit

Permalink
change code order
Browse files Browse the repository at this point in the history
  • Loading branch information
Hironsan committed Aug 30, 2017
1 parent f9b869c commit a26a010
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 21 deletions.
4 changes: 2 additions & 2 deletions anago/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ class Config(object):

# model settings
dropout = 0.5 # The probability of keeping weights in the dropout layer
char_dim = 25 # Character embedding dimension
char_dim = 25 # Character embedding dimension
word_dim = 100 # Word embedding dimension
lstm_size = 100 # The number of hidden units in lstm
char_lstm_size = 25 # The number of hidden units in char lstm
char_lstm_size = 25 # The number of hidden units in char lstm
use_char = True # Use character feature
crf = True # Use CRF

Expand Down
7 changes: 4 additions & 3 deletions anago/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ def __init__(self, config, weights):

def eval(self, x_test, y_test):
p = WordPreprocessor.load(os.path.join(self.config.save_path, 'preprocessor.pkl'))
train_steps, train_batches = batch_iter(
list(zip(x_test, y_test)), self.config.batch_size, preprocessor=p)

self.config.char_vocab_size = len(p.vocab_char)
self.config.vocab_size = len(p.vocab_word)

train_steps, train_batches = batch_iter(
list(zip(x_test, y_test)), self.config.batch_size, preprocessor=p)

model = SeqLabeling(self.config, ntags=len(p.vocab_tag))
model.load(filepath=os.path.join(self.config.save_path, self.weights))

f1score = F1score(train_steps, train_batches, p, model)
f1score.on_epoch_end(epoch=-1) # epoch is some value
6 changes: 3 additions & 3 deletions anago/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __getattr__(self, name):


class SeqLabeling(BaseModel):
"""A Keras implementation of BiLSTM-CRF for named-entity recognition.
"""A Keras implementation of BiLSTM-CRF for sequence labeling.
References
--
Expand Down Expand Up @@ -81,6 +81,6 @@ def __init__(self, config, embeddings=None, ntags=None):
self.crf = ChainCRF()
pred = self.crf(x)

self.sequence_lengths = Input(batch_shape=(None, 1), dtype='int32')
self.model = Model(inputs=[word_ids, char_ids, self.sequence_lengths], outputs=[pred])
sequence_lengths = Input(batch_shape=(None, 1), dtype='int32')
self.model = Model(inputs=[word_ids, char_ids, sequence_lengths], outputs=[pred])
self.config = config
4 changes: 0 additions & 4 deletions anago/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,10 @@ def __init__(self, config, weights, tokenizer=str.split):
self.config = config
self._tokenizer = tokenizer
self.p = WordPreprocessor.load(os.path.join(self.config.save_path, 'preprocessor.pkl'))

self.config.char_vocab_size = len(self.p.vocab_char)
self.config.vocab_size = len(self.p.vocab_word)

self.model = SeqLabeling(self.config, ntags=len(self.p.vocab_tag))
self.model.compile(loss=self.model.loss,
optimizer=Adam(lr=self.config.learning_rate),
)
self.model.load(filepath=os.path.join(self.config.save_path, weights))

def predict(self, words):
Expand Down
14 changes: 7 additions & 7 deletions anago/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,25 @@ class Trainer(object):
def __init__(self, config):
self.config = config

def train(self, x_train, y_train, x_valid=None, y_valid=None, x_test=None, y_test=None):
import numpy as np
p = prepare_preprocessor(np.r_[x_train, x_valid, x_test], y_train)
def train(self, x_train, y_train, x_valid=None, y_valid=None):
p = prepare_preprocessor(x_train, y_train)
embeddings = load_word_embeddings(p.vocab_word, self.config.glove_path, self.config.word_dim)
self.config.char_vocab_size = len(p.vocab_char)

train_steps, train_batches = batch_iter(
list(zip(x_train, y_train)), self.config.batch_size, preprocessor=p)
valid_steps, valid_batches = batch_iter(
list(zip(x_valid, y_valid)), self.config.batch_size, preprocessor=p)

embeddings = load_word_embeddings(p.vocab_word, self.config.glove_path, self.config.word_dim)
self.config.char_vocab_size = len(p.vocab_char)

model = SeqLabeling(self.config, embeddings, len(p.vocab_tag))
model.compile(loss=model.crf.loss,
optimizer=Adam(lr=self.config.learning_rate),
)
callbacks = get_callbacks(log_dir=self.config.log_dir,
save_dir=self.config.save_path,
valid=(valid_steps, valid_batches, p, model))
model.fit_generator(train_batches, train_steps, epochs=self.config.max_epoch,
model.fit_generator(generator=train_batches,
steps_per_epoch=train_steps,
epochs=self.config.max_epoch,
callbacks=callbacks)
p.save(os.path.join(self.config.save_path, 'preprocessor.pkl'))
3 changes: 1 addition & 2 deletions tests/train_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,4 @@ def test_train(self):
#x_train, y_train = x_train[:100], y_train[:100]
#x_valid, y_valid = x_train[:100], y_train[:100]
trainer = anago.Trainer(config)
trainer.train(x_train, y_train, x_test, y_test, x_valid, y_valid)
#trainer.train(x_train, y_train, x_valid, y_valid, x_test, y_test)
trainer.train(x_train, y_train, x_valid, y_valid, x_test, y_test)

0 comments on commit a26a010

Please sign in to comment.