In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.python.keras.preprocessing.text import Tokenizer

In [2]:
max_words = 5000
batch_size = 128
epochs = 5

Getting data:

In [3]:
from sklearn.datasets import fetch_20newsgroups
categories = ['alt.atheism', 'talk.religion.misc','comp.graphics', 'sci.space']
#categories = None
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)
from pprint import pprint
pprint(list(newsgroups_train.target_names))

['alt.atheism', 'comp.graphics', 'sci.space', 'talk.religion.misc']


In [4]:
newsgroups_train.filenames.shape

(2034,)

In [5]:
newsgroups_train.target.shape

(2034,)

In [6]:
newsgroups_train.target[:10]

array([1, 3, 2, 0, 2, 0, 2, 1, 2, 1], dtype=int64)

Converting text to vectors:

In [7]:
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(newsgroups_train["data"])
x_train = tokenizer.texts_to_matrix(newsgroups_train["data"], mode='binary')
x_test = tokenizer.texts_to_matrix(newsgroups_test["data"], mode='binary')
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

x_train shape: (2034, 5000)
x_test shape: (1353, 5000)


In [8]:
num_classes = np.max(newsgroups_train["target"]) + 1
print(num_classes, 'classes')

4 classes


Convert class vector to binary class matrix (for use with categorical_crossentropy):

In [9]:
y_train = tf.keras.utils.to_categorical(newsgroups_train["target"], num_classes)
y_test = tf.keras.utils.to_categorical(newsgroups_test["target"], num_classes)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

y_train shape: (2034, 4)
y_test shape: (1353, 4)


Building model functionally:

In [10]:
a = Input(shape=(max_words,))
b = Dense(512)(a)
b = Activation('relu')(b)
b = Dropout(0.5)(b)
b = Dense(num_classes)(b)
b = Activation('softmax')(b)
model = Model(inputs=a, outputs=b)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [11]:
model.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x1610dcdcc50>,
 <tensorflow.python.keras.layers.core.Dense at 0x1610dcdc908>,
 <tensorflow.python.keras.layers.core.Activation at 0x1610dcbf5f8>,
 <tensorflow.python.keras.layers.core.Dropout at 0x1610d96eb70>,
 <tensorflow.python.keras.layers.core.Dense at 0x1610dcbf390>,
 <tensorflow.python.keras.layers.core.Activation at 0x1610c965cf8>]

In [12]:
print(model.to_yaml())

backend: tensorflow
class_name: Model
config:
  input_layers:
  - [input_1, 0, 0]
  layers:
  - class_name: InputLayer
    config:
      batch_input_shape: !!python/tuple [null, 5000]
      dtype: float32
      name: input_1
      sparse: false
    inbound_nodes: []
    name: input_1
  - class_name: Dense
    config:
      activation: linear
      activity_regularizer: null
      bias_constraint: null
      bias_initializer:
        class_name: Zeros
        config: {dtype: float32}
      bias_regularizer: null
      dtype: float32
      kernel_constraint: null
      kernel_initializer:
        class_name: GlorotUniform
        config: {dtype: float32, seed: null}
      kernel_regularizer: null
      name: dense
      trainable: true
      units: 512
      use_bias: true
    inbound_nodes:
    - - - input_1
        - 0
        - 0
        - {}
    name: dense
  - class_name: Activation
    config: {activation: relu, dtype: float32, name: activation, trainable: true}
    inbound_nodes:


In [13]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [14]:
estimator = tf.keras.estimator.model_to_estimator(keras_model=model)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using the Keras model provided.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\asergeev\\AppData\\Local\\Temp\\tmpd_78rs5i', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001610DCE2748>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [15]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x=x_train,
    y=y_train,
    batch_size=batch_size,
    num_epochs=epochs,
    shuffle=False)
estimator.train(input_fn=train_input_fn)

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='C:\\Users\\asergeev\\AppData\\Local\\Temp\\tmpd_78rs5i\\keras\\keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})
INFO:tensorflow:Warm-starting from: ('C:\\Users\\asergeev\\AppData\\Local\\Temp\\tmpd_78rs5i\\keras\\keras_model.ckpt',)
INFO:tensorflow:Warm-starting variable: dense/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dense/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dense_1/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dense_1/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: Adam/iterations; prev_v

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x1610dcfb518>

In [16]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x=x_test,
    y=y_test,
    batch_size=batch_size,
    shuffle=False)
score = estimator.evaluate(input_fn=test_input_fn)
print('\n')
print('Score:', score)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Starting evaluation at 2019-04-11T07:54:53Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from C:\Users\asergeev\AppData\Local\Temp\tmpd_78rs5i\model.ckpt-81
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-04-11-07:54:53
INFO:tensorflow:Saving dict for global step 81: categorical_accuracy = 0.88543975, global_step = 81, loss = 0.3659391
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 81: C:\Users\asergeev\AppData\Local\Temp\tmpd_78rs5i\model.ckpt-81


Score: {'categorical_accuracy': 0.88543975, 'loss': 0.3659391, 'global_step': 81}


In [31]:
# Fetch the Keras session and save the model
# The signature definition is defined by the input and output tensors, and stored with the default serving key
import os
#import tempfile
#MODEL_DIR = tempfile.gettempdir()
#version = 1
export_path = 'D:/PythonProjects/Machine-Learning-Distributed-Systems/tmp/news/1'
#print('export_path = {}\n'.format(export_path))
if os.path.isdir(export_path):
    print('\nAlready saved a model, cleaning up\n')
    !rm -r {export_path}

#tf.contrib.saved_model.save_keras_model(model, '/tmp/news')
tf.saved_model.simple_save(
    tf.keras.backend.get_session(),
    export_path,
    inputs={'document': model.input},
    outputs={t.name:t for t in model.outputs})

#print('\nSaved model:')
#!ls -l {export_path}

INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: D:/PythonProjects/Machine-Learning-Distributed-Systems/tmp/news/1\saved_model.pb

Saved model:


'ls' is not recognized as an internal or external command,
operable program or batch file.


In [32]:
!saved_model_cli show --dir {export_path} --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['document'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 5000)
        name: input_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['activation_1/Softmax:0'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 4)
        name: activation_1/Softmax:0
  Method name is: tensorflow/serving/predict


In [17]:
import json
data = json.dumps({"signature_name": "serving_default", "instances": x_test[0:3].tolist()})
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))

Data: {"signature_name": "serving_default", "instances": ...  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]}


Первый экземпляр контейнера: localhost:32772

In [21]:
import requests
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:32772/v1/models/news:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
predictions

[[0.268494, 0.277034, 0.21562, 0.238852],
 [0.24658, 0.250593, 0.254366, 0.248461],
 [0.261858, 0.334612, 0.194442, 0.209088]]

In [22]:
for i in range(0,3):
  print("predicted: {}, real: {}".format(np.argmax(predictions[i]), newsgroups_test["target"][i]))

predicted: 1, real: 2
predicted: 2, real: 1
predicted: 1, real: 1


Второй экземпляр контейнера: localhost:32771

In [24]:
import requests
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:32771/v1/models/news:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
predictions

[[0.268494, 0.277034, 0.21562, 0.238852],
 [0.24658, 0.250593, 0.254366, 0.248461],
 [0.261858, 0.334612, 0.194442, 0.209088]]

In [25]:
for i in range(0,3):
  print("predicted: {}, real: {}".format(np.argmax(predictions[i]), newsgroups_test["target"][i]))

predicted: 1, real: 2
predicted: 2, real: 1
predicted: 1, real: 1
