Skip to content

Commit

Permalink
♻️ Clean up the code.
Browse files Browse the repository at this point in the history
  • Loading branch information
BrikerMan committed Jun 15, 2019
1 parent 8afa170 commit e0e6b8a
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 326 deletions.
1 change: 0 additions & 1 deletion kashgari/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import tensorflow as tf
from tensorflow.python import keras
from kashgari.layers.non_masking_layer import NonMaskingLayer
from kashgari.layers.crf import CRF

L = keras.layers

Expand Down
118 changes: 0 additions & 118 deletions kashgari/layers/crf.py

This file was deleted.

42 changes: 0 additions & 42 deletions kashgari/loss.py

This file was deleted.

134 changes: 87 additions & 47 deletions kashgari/tasks/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,29 @@ def task(self):
return self.embedding.task

def build_model(self,
x_data: Union[Tuple[List[List[str]], ...], List[List[str]]],
y_data: Union[List[List[str]], List[str]],
x_train: Union[Tuple[List[List[str]], ...], List[List[str]]],
y_train: Union[List[List[str]], List[str]],
x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None,
y_validate: Union[List[List[str]], List[str]] = None):
"""
Build model with corpus
Args:
x_train: Array of train feature data (if the model has a single input),
or tuple of train feature data array (if the model has multiple inputs)
y_train: Array of train label data
x_validate: Array of validation feature data (if the model has a single input),
or tuple of validation feature data array (if the model has multiple inputs)
y_validate: Array of validation label data
Returns:
"""

if x_validate is not None and not isinstance(x_validate, tuple):
self.embedding.analyze_corpus(x_data + x_validate, y_data + y_validate)
self.embedding.analyze_corpus(x_train + x_validate, y_train + y_validate)
else:
self.embedding.analyze_corpus(x_data, y_data)
self.embedding.analyze_corpus(x_train, y_train)

if self.tf_model is None:
self.build_model_arc()
Expand Down Expand Up @@ -187,45 +201,46 @@ def fit(self,
batch_size=batch_size,
**fit_kwargs)

def fit_with_generator(self,
x_train: Union[Tuple[List[List[str]], ...], List[List[str]]],
y_train: Union[List[List[str]], List[str]],
x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None,
y_validate: Union[List[List[str]], List[str]] = None,
batch_size: int = 64,
epochs: int = 5,
callbacks: List[keras.callbacks.Callback] = None,
fit_kwargs: Dict = None):
train_generator = self.get_data_generator(x_train,
y_train,
batch_size)
if fit_kwargs is None:
fit_kwargs = {}

validation_generator = None
validation_steps = None
if x_validate:
validation_generator = self.get_data_generator(x_validate,
y_validate,
batch_size)

if isinstance(x_validate, tuple):
validation_steps = len(x_validate[0]) // batch_size + 1
else:
validation_steps = len(x_validate) // batch_size + 1

if isinstance(x_train, tuple):
steps_per_epoch = len(x_train[0]) // batch_size + 1
else:
steps_per_epoch = len(x_train) // batch_size + 1
with utils.custom_object_scope():
return self.tf_model.fit_generator(train_generator,
steps_per_epoch=steps_per_epoch,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_steps,
callbacks=callbacks,
**fit_kwargs)
# Todo: add fit generator function
# def fit_with_generator(self,
# x_train: Union[Tuple[List[List[str]], ...], List[List[str]]],
# y_train: Union[List[List[str]], List[str]],
# x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None,
# y_validate: Union[List[List[str]], List[str]] = None,
# batch_size: int = 64,
# epochs: int = 5,
# callbacks: List[keras.callbacks.Callback] = None,
# fit_kwargs: Dict = None):
# train_generator = self.get_data_generator(x_train,
# y_train,
# batch_size)
# if fit_kwargs is None:
# fit_kwargs = {}
#
# validation_generator = None
# validation_steps = None
# if x_validate:
# validation_generator = self.get_data_generator(x_validate,
# y_validate,
# batch_size)
#
# if isinstance(x_validate, tuple):
# validation_steps = len(x_validate[0]) // batch_size + 1
# else:
# validation_steps = len(x_validate) // batch_size + 1
#
# if isinstance(x_train, tuple):
# steps_per_epoch = len(x_train[0]) // batch_size + 1
# else:
# steps_per_epoch = len(x_train) // batch_size + 1
# with utils.custom_object_scope():
# return self.tf_model.fit_generator(train_generator,
# steps_per_epoch=steps_per_epoch,
# epochs=epochs,
# validation_data=validation_generator,
# validation_steps=validation_steps,
# callbacks=callbacks,
# **fit_kwargs)

def compile_model(self, **kwargs):
"""Configures the model for training.
Expand Down Expand Up @@ -289,17 +304,33 @@ def evaluate(self,
batch_size=None,
digits=4,
debug_info=False) -> Tuple[float, float, Dict]:
"""
Evaluate model
Args:
x_data:
y_data:
batch_size:
digits:
debug_info:
Returns:
"""
raise NotImplementedError

def build_model_arc(self):
raise NotImplementedError

def save(self, model_path: str):
pathlib.Path(model_path).mkdir(exist_ok=True, parents=True)
"""
Save model
Args:
model_path:
# with open(os.path.join(model_path, 'processor.pickle'), 'wb') as f:
# f.write(pickle.dumps(self.embedding.processor))
# f.close()
Returns:
"""
pathlib.Path(model_path).mkdir(exist_ok=True, parents=True)

with open(os.path.join(model_path, 'model_info.json'), 'w') as f:
f.write(json.dumps(self.info(), indent=2, ensure_ascii=True))
Expand All @@ -309,6 +340,15 @@ def save(self, model_path: str):
logging.info('model saved to {}'.format(os.path.abspath(model_path)))

def export(self, export_path: str, inputs: Optional[Dict] = None, outputs: Optional[Dict] = None):
"""
Export model for tensorflow serving
Args:
export_path: The path to which the SavedModel will be stored.
inputs: dict mapping string input names to tensors. These are added
to the SignatureDef as the inputs.
outputs: dict mapping string output names to tensors. These are added
to the SignatureDef as the outputs.
"""
pathlib.Path(export_path).mkdir(exist_ok=True, parents=True)

ts = round(time.time())
Expand Down
21 changes: 0 additions & 21 deletions kashgari/tasks/labeling/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@

import random
import logging
import json
import numpy as np
from kashgari.loss import weighted_categorical_crossentropy
from seqeval.metrics import classification_report
from seqeval.metrics.sequence_labeling import get_entities

Expand Down Expand Up @@ -83,23 +80,6 @@ def predict_entities(self,
})
return final_res

# Todo: Better way to do this, too
# def compile_model(self, **kwargs):
# if kwargs.get('loss') is None:
# idx2label = self.embedding.processor.idx2label
# weight = np.full((len(idx2label),), 50)
# for idx, label in idx2label.items():
# if label == self.embedding.processor.token_pad:
# weight[idx] = 1
# if label in ['O']:
# weight[idx] = 10
# weight_dict = {}
# for idx, label in idx2label.items():
# weight_dict[label] = weight[idx]
# logging.debug(f"label weights set to {weight_dict}")
# kwargs['loss'] = weighted_categorical_crossentropy(weight)
# super(BaseLabelingModel, self).compile_model(**kwargs)

def evaluate(self,
x_data,
y_data,
Expand Down Expand Up @@ -151,7 +131,6 @@ def build_model_arc(self):
# model.build_model(train_x[:100], train_y[:100])

# model.fit(train_x[:1000], train_y[:1000], epochs=10)
r = model.predict_doccano(train_x[:1000], 'result.json', join_chunk='')

import pprint

Expand Down

0 comments on commit e0e6b8a

Please sign in to comment.