# Train Model with CPU

In [45]:
import tensorflow as tf
from tensorflow.python.client import timeline
import pylab
import numpy as np

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

tf.logging.set_verbosity(tf.logging.INFO)

In [46]:
tf.reset_default_graph()

In [47]:
num_samples = 100000

In [48]:
from datetime import datetime 

version = int(datetime.now().strftime("%s"))
print(version)

1494219833


## Create Session

In [49]:
config = tf.ConfigProto(
  log_device_placement=True,
)
print(config)

sess = tf.Session(config=config)
print(sess)

log_device_placement: true

<tensorflow.python.client.session.Session object at 0x7f65d409ba58>


## Load and Shuffle Training Data 
`tf.train.shuffle_batch` uses `tf.RandomShuffleQueue` internally.

`min_after_dequeue` defines the buffer size when randomly sampling.  Larger buffers require more RAM, but provide better shuffling characteristics.

`capacity` must be larger than `min_after_dequeue`.  The difference in size becomes the prefetch maximum.

`capacity` = `batch_size` * (`num_threads` + `some_safety_margin`) + `min_after_dequeue`

In [50]:
training_queue = tf.train.string_input_producer([
  "hdfs://127.0.0.1:39000/linear/training.csv",
])

training_reader = tf.TextLineReader()
_, training_value = training_reader.read(training_queue)

x_training, y_training = tf.decode_csv(training_value, [[0.0],[0.0]])

x_training_batch, y_training_batch = \
    tf.train.shuffle_batch([x_training, y_training], 
                           batch_size=1000,
                           capacity=25000,
                           num_threads=24,
                           min_after_dequeue=10000)

#with tf.Session() as sess:
training_coord = tf.train.Coordinator()

training_enqueue_threads = tf.train.start_queue_runners(sess=sess, 
                                                        coord=training_coord)

print("Training Enqueue Thread Pool Size: %d" % len(training_enqueue_threads))

Training Enqueue Thread Pool Size: 27


In [51]:
# TODO:
# pylab.plot(x_train, y_train, '.')

In [52]:
# TODO:
#pylab.plot(x_test, y_test, '.')

In [53]:
# TODO:

with tf.device("/gpu:0"):
    W = tf.get_variable(shape=[], name='weights')
    print(W)

    b = tf.get_variable(shape=[], name='bias')
    print(b)
    

with tf.device("/gpu:0"):
    y_pred_batch = W * x_training_batch + b
    print(y_pred_batch)

    
with tf.device("/gpu:0"):
#    y_observed_batch = tf.placeholder(shape=[None], dtype=tf.float32, name='y_observed')
#    print(y_observed_batch)
    
    loss_op = tf.reduce_mean(tf.square(y_pred_batch - y_training_batch))
    optimizer_op = tf.train.GradientDescentOptimizer(0.025)
    training_op = optimizer_op.minimize(loss_op)  

    print("Loss Scalar: ", loss_op)
    print("Optimizer Op: ", optimizer_op)
    print("Training Op", training_op)

<tf.Variable 'weights:0' shape=() dtype=float32_ref>
<tf.Variable 'bias:0' shape=() dtype=float32_ref>
Tensor("add:0", shape=(1000,), dtype=float32, device=/device:GPU:0)
Loss Scalar:  Tensor("Mean:0", shape=(), dtype=float32, device=/device:GPU:0)
Optimizer Op:  <tensorflow.python.training.gradient_descent.GradientDescentOptimizer object at 0x7f658c06c550>
Training Op name: "GradientDescent"
op: "NoOp"
input: "^GradientDescent/update_weights/ApplyGradientDescent"
input: "^GradientDescent/update_bias/ApplyGradientDescent"
device: "/device:GPU:0"



## View Model Graph in Tensorboard

Navigate to the Graph tab at this URL:

http://[ip-address]:6006

## Randomly Initialize Variables (Weights and Bias)
The goal is to learn more accurate Weights and Bias during training.

In [54]:
with tf.device("/gpu:0"):
    init_op = tf.global_variables_initializer()
    print(init_op)

sess.run(init_op)
print("W: %f" % sess.run(W))
print("b: %f" % sess.run(b))    

name: "init"
op: "NoOp"
input: "^weights/Assign"
input: "^bias/Assign"
device: "/device:GPU:0"

W: 1.659906
b: -0.596357


## View Accuracy of Initial Random Variables (Pre-Training)
This should be relatively low because we have not trained the model.

In [55]:
%%time

loss = sess.run(loss_op)

print("Accuracy: %f" % (loss*100))

Accuracy: 21.479048
CPU times: user 48 ms, sys: 20 ms, total: 68 ms
Wall time: 23.9 ms


In [56]:
training_summary_writer = tf.summary.FileWriter('/root/tensorboard/linear/cpu/%s/training' % version, 
                                                 graph=tf.get_default_graph())

#validation_summary_writer = tf.summary.FileWriter('root/tensorboard/linear/cpu/%s/validation' % version, 
#                                                 graph=tf.get_default_graph())

## Setup Loss Summary Operations for Tensorboard

In [57]:
loss_summary_scalar_op = tf.summary.scalar('loss', loss_op)
loss_summary_merge_all_op = tf.summary.merge_all()

## Train the Model!

In [59]:
%%time

run_metadata = tf.RunMetadata()

max_steps = 401
for step in range(max_steps - 1):
    if (step < max_steps):
        training_summary_log, _ = sess.run([loss_summary_merge_all_op, training_op]) 
    else:  
        training_summary_log, _ = sess.run([loss_summary_merge_all_op, training_op],
                                            options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), 
                                            run_metadata=run_metadata)
    
    trace = timeline.Timeline(step_stats=run_metadata.step_stats)    
    with open('cpu-timeline.json', 'w') as trace_file:
        trace_file.write(trace.generate_chrome_trace_format(show_memory=True))

    if step % 10 == 0:
        training_summary_writer.add_summary(training_summary_log, step)
        training_summary_writer.flush()
        if step % 100 == 0:
            print(step, sess.run([W, b]))

0 [0.5244115, 0.073068172]
100 [0.40456215, 0.13596374]
200 [0.31920508, 0.1822772]
300 [0.25748399, 0.21509486]
CPU times: user 2min 36s, sys: 1min 5s, total: 3min 42s
Wall time: 1min 9s


In [38]:
#pylab.plot(x_train, y_train, '.', label="target")
#pylab.plot(x_train, sess.run(y_pred, feed_dict={x_observed: x_train, y_observed: y_train}), ".", label="predicted")
#pylab.legend()
#pylab.ylim(0, 1.0)

## View Loss Scalar Summary in Tensorboard

Navigate to the Scalars tab at this URL:

http://[ip-address]:6006

## Save Model Graph and Variables for Deployment

In [60]:
from tensorflow.python.saved_model import utils

tensor_info_x_observed = utils.build_tensor_info(x_training_batch)
print(tensor_info_x_observed)

tensor_info_y_pred = utils.build_tensor_info(y_pred_batch)
print(tensor_info_y_pred)

name: "shuffle_batch:0"
dtype: DT_FLOAT
tensor_shape {
  dim {
    size: 1000
  }
}

name: "add:0"
dtype: DT_FLOAT
tensor_shape {
  dim {
    size: 1000
  }
}



In [61]:
export_path = "/root/models/linear/cpu/%s" % version

print(export_path)

/root/models/linear/cpu/1494219833


In [62]:
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants

with tf.device("/cpu:0"):
  builder = saved_model_builder.SavedModelBuilder(export_path)

prediction_signature =  signature_def_utils.build_signature_def(
    inputs = {'inputs': tensor_info_x_observed}, 
    outputs = {'outputs': tensor_info_y_pred}, 
    method_name = signature_constants.PREDICT_METHOD_NAME)            

legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')

builder.add_meta_graph_and_variables(sess, 
  [tag_constants.SERVING],
  signature_def_map={"predict_linear":prediction_signature,
                      signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:prediction_signature}, 
                      legacy_init_op=legacy_init_op)

builder.save()

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.InvalidArgumentError'>, Cannot assign a device to node 'save/ShardedFilename': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
	 [[Node: save/ShardedFilename = ShardedFilename[_device="/device:GPU:0"](save/StringJoin, save/ShardedFilename/shard, save/num_shards)]]

Caused by op 'save/ShardedFilename', defined at:
  File "/opt/conda/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/conda/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/opt/conda/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
 

NotFoundError: FeedInputs: unable to find feed output save/Const:0

## Save Graph for Optimization and Transformation
We will use this later.

In [None]:
from tensorflow.python.framework import graph_io

graph_io.write_graph(sess.graph, 
                     "/root/models/optimize_me/", 
                     "unoptimized_cpu.pb")

## View Saved Model on Disk

You must replace `[version]` with the version number from above ^^

In [None]:
%%bash

ls -l /root/models/linear/cpu/[version]

## Validate Model
This should be relatively high after training the model.

In [None]:
validation_queue = tf.train.string_input_producer([
  "hdfs://127.0.0.1:39000/linear/validation.csv",
])

validation_reader = tf.TextLineReader()
_, validation_value = validation_reader.read(validation_queue)

x_validation, y_validation = tf.decode_csv(validation_value, [[0.0],[0.0]])

x_validation_batch, y_validation_batch = \
    tf.train.batch([x_validation, y_validation], 
                    batch_size=100,
                    capacity=2000)

validation_coord = tf.train.Coordinator()

validation_enqueue_threads = tf.train.start_queue_runners(sess=sess,
                                                          coord=validation_coord)
print("Validation Enqueue Thread Pool Size: %d" % len(validation_enqueue_threads))

## Clean Up

In [57]:
training_coord.request_stop()
training_coord.join(training_enqueue_threads)

validation_coord.request_stop()
validation_coord.join(validation_enqueue_threads)

sess.close()