♻️ Clean up the code.

BrikerMan · Jun 15, 2019 · e0e6b8a · e0e6b8a
1 parent 8afa170
commit e0e6b8a
Show file tree

Hide file tree

Showing 13 changed files with 102 additions and 326 deletions.
diff --git a/kashgari/layers/__init__.py b/kashgari/layers/__init__.py
@@ -10,7 +10,6 @@
 import tensorflow as tf
 from tensorflow.python import keras
 from kashgari.layers.non_masking_layer import NonMaskingLayer
-from kashgari.layers.crf import CRF
 
 L = keras.layers
 

diff --git a/kashgari/layers/crf.py b/kashgari/layers/crf.py
diff --git a/kashgari/loss.py b/kashgari/loss.py
diff --git a/kashgari/tasks/base_model.py b/kashgari/tasks/base_model.py
@@ -84,15 +84,29 @@ def task(self):
         return self.embedding.task
 
     def build_model(self,
-                    x_data: Union[Tuple[List[List[str]], ...], List[List[str]]],
-                    y_data: Union[List[List[str]], List[str]],
+                    x_train: Union[Tuple[List[List[str]], ...], List[List[str]]],
+                    y_train: Union[List[List[str]], List[str]],
                     x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None,
                     y_validate: Union[List[List[str]], List[str]] = None):
+        """
+        Build model with corpus
+
+        Args:
+            x_train: Array of train feature data (if the model has a single input),
+                or tuple of train feature data array (if the model has multiple inputs)
+            y_train: Array of train label data
+            x_validate: Array of validation feature data (if the model has a single input),
+                or tuple of validation feature data array (if the model has multiple inputs)
+            y_validate: Array of validation label data
+
+        Returns:
+
+        """
 
         if x_validate is not None and not isinstance(x_validate, tuple):
-            self.embedding.analyze_corpus(x_data + x_validate, y_data + y_validate)
+            self.embedding.analyze_corpus(x_train + x_validate, y_train + y_validate)
         else:
-            self.embedding.analyze_corpus(x_data, y_data)
+            self.embedding.analyze_corpus(x_train, y_train)
 
         if self.tf_model is None:
             self.build_model_arc()
@@ -187,45 +201,46 @@ def fit(self,
                                      batch_size=batch_size,
                                      **fit_kwargs)
 
-    def fit_with_generator(self,
-                           x_train: Union[Tuple[List[List[str]], ...], List[List[str]]],
-                           y_train: Union[List[List[str]], List[str]],
-                           x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None,
-                           y_validate: Union[List[List[str]], List[str]] = None,
-                           batch_size: int = 64,
-                           epochs: int = 5,
-                           callbacks: List[keras.callbacks.Callback] = None,
-                           fit_kwargs: Dict = None):
-        train_generator = self.get_data_generator(x_train,
-                                                  y_train,
-                                                  batch_size)
-        if fit_kwargs is None:
-            fit_kwargs = {}
-
-        validation_generator = None
-        validation_steps = None
-        if x_validate:
-            validation_generator = self.get_data_generator(x_validate,
-                                                           y_validate,
-                                                           batch_size)
-
-            if isinstance(x_validate, tuple):
-                validation_steps = len(x_validate[0]) // batch_size + 1
-            else:
-                validation_steps = len(x_validate) // batch_size + 1
-
-        if isinstance(x_train, tuple):
-            steps_per_epoch = len(x_train[0]) // batch_size + 1
-        else:
-            steps_per_epoch = len(x_train) // batch_size + 1
-        with utils.custom_object_scope():
-            return self.tf_model.fit_generator(train_generator,
-                                               steps_per_epoch=steps_per_epoch,
-                                               epochs=epochs,
-                                               validation_data=validation_generator,
-                                               validation_steps=validation_steps,
-                                               callbacks=callbacks,
-                                               **fit_kwargs)
+    # Todo: add fit generator function
+    # def fit_with_generator(self,
+    #                        x_train: Union[Tuple[List[List[str]], ...], List[List[str]]],
+    #                        y_train: Union[List[List[str]], List[str]],
+    #                        x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None,
+    #                        y_validate: Union[List[List[str]], List[str]] = None,
+    #                        batch_size: int = 64,
+    #                        epochs: int = 5,
+    #                        callbacks: List[keras.callbacks.Callback] = None,
+    #                        fit_kwargs: Dict = None):
+    #     train_generator = self.get_data_generator(x_train,
+    #                                               y_train,
+    #                                               batch_size)
+    #     if fit_kwargs is None:
+    #         fit_kwargs = {}
+    #
+    #     validation_generator = None
+    #     validation_steps = None
+    #     if x_validate:
+    #         validation_generator = self.get_data_generator(x_validate,
+    #                                                        y_validate,
+    #                                                        batch_size)
+    #
+    #         if isinstance(x_validate, tuple):
+    #             validation_steps = len(x_validate[0]) // batch_size + 1
+    #         else:
+    #             validation_steps = len(x_validate) // batch_size + 1
+    #
+    #     if isinstance(x_train, tuple):
+    #         steps_per_epoch = len(x_train[0]) // batch_size + 1
+    #     else:
+    #         steps_per_epoch = len(x_train) // batch_size + 1
+    #     with utils.custom_object_scope():
+    #         return self.tf_model.fit_generator(train_generator,
+    #                                            steps_per_epoch=steps_per_epoch,
+    #                                            epochs=epochs,
+    #                                            validation_data=validation_generator,
+    #                                            validation_steps=validation_steps,
+    #                                            callbacks=callbacks,
+    #                                            **fit_kwargs)
 
     def compile_model(self, **kwargs):
         """Configures the model for training.
@@ -289,17 +304,33 @@ def evaluate(self,
                  batch_size=None,
                  digits=4,
                  debug_info=False) -> Tuple[float, float, Dict]:
+        """
+        Evaluate model
+        Args:
+            x_data:
+            y_data:
+            batch_size:
+            digits:
+            debug_info:
+
+        Returns:
+
+        """
         raise NotImplementedError
 
     def build_model_arc(self):
         raise NotImplementedError
 
     def save(self, model_path: str):
-        pathlib.Path(model_path).mkdir(exist_ok=True, parents=True)
+        """
+        Save model
+        Args:
+            model_path:
 
-        # with open(os.path.join(model_path, 'processor.pickle'), 'wb') as f:
-        #     f.write(pickle.dumps(self.embedding.processor))
-        #     f.close()
+        Returns:
+
+        """
+        pathlib.Path(model_path).mkdir(exist_ok=True, parents=True)
 
         with open(os.path.join(model_path, 'model_info.json'), 'w') as f:
             f.write(json.dumps(self.info(), indent=2, ensure_ascii=True))
@@ -309,6 +340,15 @@ def save(self, model_path: str):
         logging.info('model saved to {}'.format(os.path.abspath(model_path)))
 
     def export(self, export_path: str, inputs: Optional[Dict] = None, outputs: Optional[Dict] = None):
+        """
+        Export model for tensorflow serving
+        Args:
+            export_path: The path to which the SavedModel will be stored.
+            inputs: dict mapping string input names to tensors. These are added
+                to the SignatureDef as the inputs.
+            outputs:  dict mapping string output names to tensors. These are added
+                to the SignatureDef as the outputs.
+        """
         pathlib.Path(export_path).mkdir(exist_ok=True, parents=True)
 
         ts = round(time.time())

diff --git a/kashgari/tasks/labeling/base_model.py b/kashgari/tasks/labeling/base_model.py
@@ -12,9 +12,6 @@
 
 import random
 import logging
-import json
-import numpy as np
-from kashgari.loss import weighted_categorical_crossentropy
 from seqeval.metrics import classification_report
 from seqeval.metrics.sequence_labeling import get_entities
 
@@ -83,23 +80,6 @@ def predict_entities(self,
             })
         return final_res
 
-    # Todo: Better way to do this, too
-    # def compile_model(self, **kwargs):
-    #     if kwargs.get('loss') is None:
-    #         idx2label = self.embedding.processor.idx2label
-    #         weight = np.full((len(idx2label),), 50)
-    #         for idx, label in idx2label.items():
-    #             if label == self.embedding.processor.token_pad:
-    #                 weight[idx] = 1
-    #             if label in ['O']:
-    #                 weight[idx] = 10
-    #         weight_dict = {}
-    #         for idx, label in idx2label.items():
-    #             weight_dict[label] = weight[idx]
-    #         logging.debug(f"label weights set to {weight_dict}")
-    #         kwargs['loss'] = weighted_categorical_crossentropy(weight)
-    #     super(BaseLabelingModel, self).compile_model(**kwargs)
-
     def evaluate(self,
                  x_data,
                  y_data,
@@ -151,7 +131,6 @@ def build_model_arc(self):
     # model.build_model(train_x[:100], train_y[:100])
 
     # model.fit(train_x[:1000], train_y[:1000], epochs=10)
-    r = model.predict_doccano(train_x[:1000], 'result.json', join_chunk='')
 
     import pprint