change code order

Hironsan · Aug 30, 2017 · a26a010 · a26a010
1 parent f9b869c
commit a26a010
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 21 deletions.
diff --git a/anago/config.py b/anago/config.py
@@ -10,10 +10,10 @@ class Config(object):
 
     # model settings
     dropout = 0.5           # The probability of keeping weights in the dropout layer
-    char_dim = 25          # Character embedding dimension
+    char_dim = 25           # Character embedding dimension
     word_dim = 100          # Word embedding dimension
     lstm_size = 100         # The number of hidden units in lstm
-    char_lstm_size = 25    # The number of hidden units in char lstm
+    char_lstm_size = 25     # The number of hidden units in char lstm
     use_char = True         # Use character feature
     crf = True              # Use CRF
 

diff --git a/anago/evaluator.py b/anago/evaluator.py
@@ -14,13 +14,14 @@ def __init__(self, config, weights):
 
     def eval(self, x_test, y_test):
         p = WordPreprocessor.load(os.path.join(self.config.save_path, 'preprocessor.pkl'))
-        train_steps, train_batches = batch_iter(
-            list(zip(x_test, y_test)), self.config.batch_size, preprocessor=p)
-
         self.config.char_vocab_size = len(p.vocab_char)
         self.config.vocab_size = len(p.vocab_word)
 
+        train_steps, train_batches = batch_iter(
+            list(zip(x_test, y_test)), self.config.batch_size, preprocessor=p)
+
         model = SeqLabeling(self.config, ntags=len(p.vocab_tag))
         model.load(filepath=os.path.join(self.config.save_path, self.weights))
+
         f1score = F1score(train_steps, train_batches, p, model)
         f1score.on_epoch_end(epoch=-1)  # epoch is some value
diff --git a/anago/models.py b/anago/models.py
@@ -34,7 +34,7 @@ def __getattr__(self, name):
 
 
 class SeqLabeling(BaseModel):
-    """A Keras implementation of BiLSTM-CRF for named-entity recognition.
+    """A Keras implementation of BiLSTM-CRF for sequence labeling.
 
     References
     --
@@ -81,6 +81,6 @@ def __init__(self, config, embeddings=None, ntags=None):
         self.crf = ChainCRF()
         pred = self.crf(x)
 
-        self.sequence_lengths = Input(batch_shape=(None, 1), dtype='int32')
-        self.model = Model(inputs=[word_ids, char_ids, self.sequence_lengths], outputs=[pred])
+        sequence_lengths = Input(batch_shape=(None, 1), dtype='int32')
+        self.model = Model(inputs=[word_ids, char_ids, sequence_lengths], outputs=[pred])
         self.config = config
diff --git a/anago/tagger.py b/anago/tagger.py
@@ -14,14 +14,10 @@ def __init__(self, config, weights, tokenizer=str.split):
         self.config = config
         self._tokenizer = tokenizer
         self.p = WordPreprocessor.load(os.path.join(self.config.save_path, 'preprocessor.pkl'))
-
         self.config.char_vocab_size = len(self.p.vocab_char)
         self.config.vocab_size = len(self.p.vocab_word)
 
         self.model = SeqLabeling(self.config, ntags=len(self.p.vocab_tag))
-        self.model.compile(loss=self.model.loss,
-                           optimizer=Adam(lr=self.config.learning_rate),
-                           )
         self.model.load(filepath=os.path.join(self.config.save_path, weights))
 
     def predict(self, words):

diff --git a/anago/trainer.py b/anago/trainer.py
@@ -13,25 +13,25 @@ class Trainer(object):
     def __init__(self, config):
         self.config = config
 
-    def train(self, x_train, y_train, x_valid=None, y_valid=None, x_test=None, y_test=None):
-        import numpy as np
-        p = prepare_preprocessor(np.r_[x_train, x_valid, x_test], y_train)
+    def train(self, x_train, y_train, x_valid=None, y_valid=None):
+        p = prepare_preprocessor(x_train, y_train)
+        embeddings = load_word_embeddings(p.vocab_word, self.config.glove_path, self.config.word_dim)
+        self.config.char_vocab_size = len(p.vocab_char)
 
         train_steps, train_batches = batch_iter(
             list(zip(x_train, y_train)), self.config.batch_size, preprocessor=p)
         valid_steps, valid_batches = batch_iter(
             list(zip(x_valid, y_valid)), self.config.batch_size, preprocessor=p)
 
-        embeddings = load_word_embeddings(p.vocab_word, self.config.glove_path, self.config.word_dim)
-        self.config.char_vocab_size = len(p.vocab_char)
-
         model = SeqLabeling(self.config, embeddings, len(p.vocab_tag))
         model.compile(loss=model.crf.loss,
                       optimizer=Adam(lr=self.config.learning_rate),
                       )
         callbacks = get_callbacks(log_dir=self.config.log_dir,
                                   save_dir=self.config.save_path,
                                   valid=(valid_steps, valid_batches, p, model))
-        model.fit_generator(train_batches, train_steps, epochs=self.config.max_epoch,
+        model.fit_generator(generator=train_batches,
+                            steps_per_epoch=train_steps,
+                            epochs=self.config.max_epoch,
                             callbacks=callbacks)
         p.save(os.path.join(self.config.save_path, 'preprocessor.pkl'))
diff --git a/tests/train_test.py b/tests/train_test.py
@@ -20,5 +20,4 @@ def test_train(self):
         #x_train, y_train = x_train[:100], y_train[:100]
         #x_valid, y_valid = x_train[:100], y_train[:100]
         trainer = anago.Trainer(config)
-        trainer.train(x_train, y_train, x_test, y_test, x_valid, y_valid)
-        #trainer.train(x_train, y_train, x_valid, y_valid, x_test, y_test)
+        trainer.train(x_train, y_train, x_valid, y_valid, x_test, y_test)