Skip to content

Commit

Permalink
Merge 4283d46 into 7eddb47
Browse files Browse the repository at this point in the history
  • Loading branch information
BrikerMan committed Sep 30, 2019
2 parents 7eddb47 + 4283d46 commit 412a281
Show file tree
Hide file tree
Showing 18 changed files with 374 additions and 58 deletions.
153 changes: 153 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
version: 2

defualt_executer: &defualt_executer
machine:
image: circleci/classic:201808-01

default_steps:
- switch_to_3_7: &switch_to_3_7
run:
name: "Switch to Python v3.7"
command: |
pyenv versions
pyenv global 3.7.0
- create_env: &create_env
run:
name: create venv
command: |
python3 -m venv venv
- save_3_7_cache: &save_3_7_cache
save_cache:
paths:
- ./venv
key: py3.7-{{ checksum "requirements.dev.txt" }}

- restore_3_7_cache: &restore_3_7_cache
restore_cache:
keys:
- py3.7-{{ checksum "requirements.dev.txt" }}

- install_dependencies: &install_dependencies
run:
name: install dependencies
command: |
ls .
source venv/bin/activate
pip install -r requirements.dev.txt
- run_tests: &run_tests
run:
name: run tests
no_output_timeout: 1800
command: >
. venv/bin/activate
TESTFILES=$(circleci tests glob "tests/**/*.py" | circleci tests split --split-by=timings)
echo $TESTFILES
mkdir -pv test-reports
pytest --doctest-modules --junitxml=test-reports/junit.xml --cov=kashgari --cov-report=xml --cov-report=html $TESTFILES
jobs:
lint:
<<: *defualt_executer

steps:
- checkout
- *switch_to_3_7
- *create_env
- *restore_3_7_cache
- *install_dependencies
- *save_3_7_cache
- run:
name: run lint
command: |
source venv/bin/activate
flake8 kashgari
test_3.6:
<<: *defualt_executer
parallelism: 4
steps:
- checkout
- run:
name: "Switch to Python v3.6"
command: |
pyenv versions
pyenv global 3.6.5
- *create_env
- restore_cache:
keys:
- py3.6-{{ checksum "requirements.dev.txt" }}
- py3.6-
- *install_dependencies
- save_cache:
paths:
- ./venv
key: py3.6-{{ checksum "requirements.dev.txt" }}
- *run_tests
- store_test_results:
path: test-reports

- store_artifacts:
path: test-reports

test_3.7:
<<: *defualt_executer
parallelism: 4

steps:
- checkout
- *switch_to_3_7
- *create_env
- *restore_3_7_cache
- *install_dependencies
- *save_3_7_cache
- *run_tests
- store_test_results:
path: test-reports

- store_artifacts:
path: test-reports

document:
<<: *defualt_executer

steps:
- checkout
- *switch_to_3_7
- *create_env
- *restore_3_7_cache
- *install_dependencies
- *save_3_7_cache
- run:
name: Update Documents
command: |
ls
cd mkdocs
ls
mkdocs build
workflows:
version: 2
lint-test-and-deploy:
jobs:
- lint
- test_3.6:
requires:
- lint
- test_3.7:
requires:
- lint
- document:
filters:
branches:
only:
- master
requires:
- test_3.6
- test_3.7
4 changes: 4 additions & 0 deletions kashgari/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
"""
import os
os.environ['TF_KERAS'] = '1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

import keras_bert
from kashgari.macros import TaskType, config
Expand Down
34 changes: 26 additions & 8 deletions kashgari/embeddings/stacked_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
# file: stacked_embedding.py
# time: 2019-05-23 09:18

import json
import pydoc
from typing import Union, Optional, Tuple, List, Dict

import numpy as np
Expand All @@ -15,11 +17,10 @@

import kashgari
from kashgari.embeddings.base_embedding import Embedding
from kashgari.processors.base_processor import BaseProcessor
from kashgari.layers import L
from kashgari.processors.base_processor import BaseProcessor


# Todo: A better name for this class
class StackedEmbedding(Embedding):
"""Embedding layer without pre-training, train embedding layer while training model"""

Expand All @@ -28,12 +29,30 @@ def _load_saved_instance(cls,
config_dict: Dict,
model_path: str,
tf_model: keras.Model):
pass
embeddings = []
for embed_info in config_dict['embeddings']:
embed_class = pydoc.locate(f"{embed_info['module']}.{embed_info['class_name']}")
embedding: Embedding = embed_class._load_saved_instance(embed_info,
model_path,
tf_model)
embeddings.append(embedding)
instance = cls(embeddings=embeddings,
from_saved_model=True)
print('----')
print(instance.embeddings)

embed_model_json_str = json.dumps(config_dict['embed_model'])
instance.embed_model = keras.models.model_from_json(embed_model_json_str,
custom_objects=kashgari.custom_objects)
# Load Weights from model
for layer in instance.embed_model.layers:
layer.set_weights(tf_model.get_layer(layer.name).get_weights())
return instance

def info(self):
info = super(StackedEmbedding, self).info()
info['embeddings'] = [embed.info() for embed in self.embeddings]
info['config'] = []
info['config'] = {}
return info

def __init__(self,
Expand All @@ -58,9 +77,10 @@ def __init__(self,
processor=processor,
from_saved_model=from_saved_model)

self.embeddings = embeddings
self.processor = embeddings[0].processor

if not from_saved_model:
self.embeddings = embeddings
self.processor = embeddings[0].processor
self._build_model()

def _build_model(self, **kwargs):
Expand All @@ -71,8 +91,6 @@ def _build_model(self, **kwargs):

for embed in self.embeddings:
inputs += embed.embed_model.inputs
print(embed.embed_model.input)
print(embed.embed_model.inputs)

# inputs = [embed.embed_model.inputs for embed in self.embeddings]
outputs = layer_concatenate([embed.embed_model.output for embed in self.embeddings])
Expand Down
20 changes: 16 additions & 4 deletions kashgari/tasks/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@
# file: base_model.py
# time: 2019-05-22 11:21


import os
import json
import logging
import os
import warnings
import pathlib
from typing import Dict, Any, List, Optional, Union, Tuple

import numpy as np
import tensorflow as tf
from tensorflow import keras

import kashgari

from kashgari import utils
from kashgari.embeddings import BareEmbedding
from kashgari.embeddings.base_embedding import Embedding
Expand All @@ -44,7 +46,7 @@ def info(self):
'class_name': self.__class__.__name__,
'module': self.__class__.__module__,
'tf_version': tf.__version__,
'kashgari_version': tf.__version__
'kashgari_version': kashgari.__version__
}

@property
Expand All @@ -59,6 +61,17 @@ def token2idx(self) -> Dict[str, int]:
def label2idx(self) -> Dict[str, int]:
return self.embedding.label2idx

@property
def pre_processor(self):
warnings.warn("The 'pre_processor' property is deprecated, "
"use 'processor' instead", DeprecationWarning, 2)
"""Deprecated. Use `self.processor` instead."""
return self.embedding.processor

@property
def processor(self):
return self.embedding.processor

def __init__(self,
embedding: Optional[Embedding] = None,
hyper_parameters: Optional[Dict[str, Dict[str, Any]]] = None):
Expand Down Expand Up @@ -87,7 +100,6 @@ def __init__(self,
self.tf_model: keras.Model = None
self.hyper_parameters = self.get_default_hyper_parameters()
self.model_info = {}
self.pre_processor = self.embedding.processor

if hyper_parameters:
self.hyper_parameters.update(hyper_parameters)
Expand Down
8 changes: 4 additions & 4 deletions kashgari/tasks/classification/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,12 @@ def evaluate(self,
logging.debug('y : {}'.format(y_data[index]))
logging.debug('y_pred : {}'.format(y_pred[index]))

if self.pre_processor.multi_label:
y_pred_b = self.pre_processor.multi_label_binarizer.fit_transform(y_pred)
y_true_b = self.pre_processor.multi_label_binarizer.fit_transform(y_data)
if self.processor.multi_label:
y_pred_b = self.processor.multi_label_binarizer.fit_transform(y_pred)
y_true_b = self.processor.multi_label_binarizer.fit_transform(y_data)
report = metrics.classification_report(y_pred_b,
y_true_b,
target_names=self.pre_processor.multi_label_binarizer.classes_,
target_names=self.processor.multi_label_binarizer.classes_,
output_dict=output_dict,
digits=digits)
else:
Expand Down
2 changes: 1 addition & 1 deletion kashgari/tasks/classification/dpcnn_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def resnet_block(self, inputs, filters: int, kernel_size: int = 3,
return tensor_out

def build_model_arc(self):
output_dim = len(self.pre_processor.label2idx)
output_dim = len(self.processor.label2idx)
config = self.hyper_parameters
embed_model = self.embedding.embed_model

Expand Down

0 comments on commit 412a281

Please sign in to comment.