In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os, sys
import functools

os.chdir('..')
sys.path.append('..')
from data import dataset
from models import burn_in_lstm

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [24]:
INPUT_SHAPE_SPEC = (
    tf.TensorSpec([None, 14, 20], tf.float32),
    tf.TensorSpec([None, 19], tf.float32),
    tf.TensorSpec([None, 3], tf.int32)
)

class SimpleSequentialLSTM_SavedModel(burn_in_lstm.SimpleSequentialLSTM):
    def __init__(self):
        super(SimpleSequentialLSTM_SavedModel, self).__init__()
    
    @tf.function(input_signature=[INPUT_SHAPE_SPEC])
    def call(self, input_ops):
        input_seq, input_basic_num, input_basic_cat = input_ops

        # processing sequential features
        seq_list = tf.unstack(input_seq, axis=1)
        seq_embedding = tf.stack([self.shared_dense(seq) for seq in seq_list], axis=1)
        lstm_output, state_h, state_c = self.lstm(seq_embedding)
        
        # processing basic features
        industry, area, codenum = tf.unstack(input_basic_cat, axis=-1)
        industry_embedded = self.industry_embedding(industry)
        area_embedded = self.area_embedding(area)
        codenum_embedded = self.code_embedding(codenum)

        basic_features = tf.concat([
            input_basic_num, industry_embedded, area_embedded, codenum_embedded
        ], axis=-1)
        basic_embedding = self.basic_dense(basic_features)

        # processing global features
        global_features = tf.concat([state_h, basic_embedding], axis=-1)
        logits = self.global_dense(global_features)
        return logits
    

ds_train, ds_test = dataset.build_tfrecord_dataset('/home/yuki/Documents/ymx/stock_research/data/records', 1, 1)

In [27]:
def load_model():
#     model = burn_in_lstm.SimpleSequentialLSTM()
#     model = burn_in_lstm.BurnInStateLSTM()
    model = SimpleSequentialLSTM_SavedModel()
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    adam = tf.keras.optimizers.Adam(lr=1e-3)
    metrics = [
        tf.keras.metrics.BinaryAccuracy(name='acc'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
    model.compile(loss=loss, optimizer=adam, metrics=metrics)
    #model.build(input_shape=[(None, 14, 20), (None, 19), (None, 3)])
    xs, ys = next(iter(ds_train))
    model.call(xs)

    model_basedir = '/home/yuki/Documents/ymx/stock_research/train'
    if 'basic_lstm.v2.01.hdf5' in os.listdir(model_basedir):
        model.load_weights(os.path.join(model_basedir, 'basic_lstm.v2.01.hdf5'))
        print(' [*] Loaded pretrained model basic_lstm.v2.01.hdf5')
    return model

model = load_model()

 [*] Loaded pretrained model basic_lstm.v2.01.hdf5


## Compute KNN similarity matrices

In [9]:
import pprint

stock_basics = pd.read_csv('./data/stock_basics.csv')

def get_codenum_representation():
    value_set = list(set(stock_basics['codenum'].values.tolist()))
    value_set = list(map(lambda x: str(x).zfill(6), value_set))
    value_hash = dict(zip(value_set, list(range(len(value_set)))))
    return value_hash

stock_code_hash = get_codenum_representation()
stock_code_hash_reversed = dict(zip(stock_code_hash.values(), stock_code_hash.keys()))
# pprint.pprint(stock_code_hash)
print('OK')

OK


In [17]:
import heapq

def from_code_to_name(codestr):
    codenum = stock_code_hash_reversed[int(codestr)]
    dfline = stock_basics[stock_basics['codenum'] == int(codenum)]
    assert len(dfline) == 1, f'codestr={codestr} len(dfline)={len(dfline)}'
    return dfline['name'].values[0]

def get_knn_mapping(k=7):
    heapdict = {key: [] for key in stock_code_hash.keys()}
    niter = 0
    for x, xcode in stock_code_hash.items():
        ycodes = list(stock_code_hash.values()).copy()
        ycodes.remove(stock_code_hash[x])
        xcodes_arr = np.array([xcode] * len(ycodes), dtype=np.int32)
        ycodes_arr = np.array(ycodes, dtype=np.int32)
        simarray = model.stock_cosine_similarity(xcodes_arr, ycodes_arr)
        topk = tf.argsort(simarray, direction='DESCENDING')[: k]
        topk = topk.numpy().tolist()
        for elem in topk:
            heapdict[x].append(from_code_to_name(ycodes[elem]))
        if niter % 100 == 1:
            print('\r', f'>>>>>>>>>> iter={niter}', end='')
        niter += 1
    print('')
    return heapdict


stock_knn_mapping = get_knn_mapping()
print(stock_knn_mapping['600000'])

 >>>>>>>>>> iter=3801
['国海证券', '东北证券', '永太科技', '国光电器', '吉药控股', '安阳钢铁', '鹏欣资源']


## save model as pb

Note: 本方法首先有些要求需要满足:
- 可以拿的到模型的网络结构定义源码
- 网络结构里面的所有操作都是通过tf.keras完成的, 不能出现类似tf.nn 的tensorflow自己的操作符
- tf2.0下保存的模型是.h5格式的,并且仅保存了weights, 即通过model.save_weights保存的模型.

-------------------

Refer to: https://blog.csdn.net/Murdock_C/java/article/details/103204875

In [26]:
tf.saved_model.save(model, '/tmp/fucking-awesome.pb', signatures={'predict': model.call})

    relative to /home/yuki/anaconda3/lib/python3.6/site-packages/tensorflow/python/keras:

    saving/saved_model.py:1143 call_and_return_conditional_losses
        return layer_call(inputs, training=training), layer.get_losses_for(inputs)
    layers/recurrent.py:743 call
        zero_output_for_mask=self.zero_output_for_mask)
    backend.py:3806 rnn
        input_time_zero, tuple(initial_states) + tuple(constants))
    layers/recurrent.py:728 step
        output, new_states = self.cell.call(inputs, states, **kwargs)

    TypeError: wrapped_call() takes 1 positional argument but 2 were given

INFO:tensorflow:Assets written to: /tmp/fucking-awesome.pb/assets


In [10]:
tfrecord_ds, _ = dataset.build_tfrecord_dataset('/home/yuki/Documents/ymx/stock_research/data/records', 1, 1)
model_tmp = tf.saved_model.load('/tmp/fucking-awesome.pb/')

xs, ys = next(iter(tfrecord_ds))
dir(model_tmp)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_variable_with_custom_getter',
 '_checkpoint_dependencies',
 '_deferred_dependencies',
 '_gather_saveables_for_checkpoint',
 '_handle_deferred_dependencies',
 '_list_extra_dependencies_for_serialization',
 '_list_functions_for_serialization',
 '_lookup_dependency',
 '_maybe_initialize_trackable',
 '_name_based_attribute_restore',
 '_name_based_restores',
 '_no_dependency',
 '_object_identifier',
 '_preload_simple_restoration',
 '_restore_from_checkpoint_position',
 '_self_name_based_restores',
 '_self_setattr_tracking',
 '_self_unconditional_checkpoint_dependencies',
 '_self_unconditional_deferred_dependencies',
 '_s

In [11]:
model_tmp.signatures

_SignatureMap({})