# Train Model with GPU (and CPU*)
Note the use of CPU to store the variables `W` and `b` while the GPU stays focused on compute.

In [None]:
import tensorflow as tf
from tensorflow.python.client import timeline
import pylab
import numpy as np

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

tf.logging.set_verbosity(tf.logging.INFO)

In [None]:
tf.reset_default_graph()

In [None]:
num_samples = 100000

In [None]:
from datetime import datetime 

version = int(datetime.now().strftime("%s"))
print(version)

## Create Session

In [None]:
config = tf.ConfigProto(
  log_device_placement=True,
)
config.gpu_options.allow_growth=True
print(config)

sess = tf.Session(config=config)
print(sess)

## Load and Shuffle Training Data 
`tf.train.shuffle_batch` uses `tf.RandomShuffleQueue` internally.

`min_after_dequeue` defines the buffer size when randomly sampling.  Larger buffers require more RAM, but provide better shuffling characteristics.

`capacity` must be larger than `min_after_dequeue`.  The difference in size becomes the prefetch maximum.

`capacity` = `batch_size` * (`num_threads` + `some_safety_margin`) + `min_after_dequeue`

In [None]:
#with tf.device("/cpu:0"):
training_queue = tf.train.string_input_producer([
  "hdfs://127.0.0.1:39000/linear/training.csv",
], capacity=25000)
print(training_queue)

training_reader = tf.TextLineReader()
print(training_reader)

_, training_value = training_reader.read(training_queue)
x_training, y_training = tf.decode_csv(training_value, [[0.0],[0.0]])

x_training_batch, y_training_batch = \
    tf.train.shuffle_batch([x_training, y_training], 
                           batch_size=1000,
                           capacity=25000,
                           num_threads=40,
                           min_after_dequeue=10000)

training_coord = tf.train.Coordinator()

training_enqueue_threads = tf.train.start_queue_runners(sess=sess, 
                                                        coord=training_coord)

print(training_enqueue_threads)

print("Training Enqueue Thread Pool Size: %d" % len(training_enqueue_threads))



In [None]:
# TODO:
# pylab.plot(x_train, y_train, '.')

In [None]:
# TODO:
#pylab.plot(x_test, y_test, '.')

In [None]:
#with tf.device("/gpu:0"):
W = tf.get_variable(shape=[], name='weights')
print(W)

b = tf.get_variable(shape=[], name='bias')
print(b)

#with tf.device("/gpu:0"):
y_pred_batch = W * x_training_batch + b
print(y_pred_batch)

#with tf.device("/gpu:0"):
training_loss_op = tf.reduce_mean(tf.square(y_pred_batch - y_training_batch))
optimizer_op = tf.train.GradientDescentOptimizer(0.025)
training_op = optimizer_op.minimize(training_loss_op)  

print("Loss Scalar: ", training_loss_op)
print("Optimizer Op: ", optimizer_op)
print("Training Op", training_op)

## View Model Graph in Tensorboard

Navigate to the Graph tab at this URL:

http://[ip-address]:6006

## Randomly Initialize Variables (Weights and Bias)
The goal is to learn more accurate Weights and Bias during training.

In [None]:
#with tf.device("/cpu:0"):
init_op = tf.global_variables_initializer()
print(init_op)

sess.run(init_op)
print("W: %f" % sess.run(W))
print("b: %f" % sess.run(b))    

## View Accuracy of Initial Random Variables (Pre-Training)
This should be relatively low because we have not trained the model.

In [None]:
%%time

#with tf.device("/gpu:0"):
loss = sess.run(training_loss_op)

print("Accuracy: %f" % (loss*100))

## Setup Loss Summary Operations for Tensorboard

In [None]:
training_loss_summary_scalar_op = tf.summary.scalar('training_loss', training_loss_op)
training_loss_summary_merge_all_op = tf.summary.merge_all()
print(training_loss_summary_scalar_op)
print(training_loss_summary_merge_all_op)

## Train the Model!

In [None]:
%%time

run_metadata = tf.RunMetadata()

training_summary_writer = tf.summary.FileWriter('/root/tensorboard/linear/gpu/%s/training' % version, 
                                                 graph=tf.get_default_graph())

#with tf.device("/gpu:0"):
max_steps = 201
for step in range(max_steps - 1):
    if (step < max_steps):
        training_summary_log, _ = sess.run([training_loss_summary_merge_all_op, training_op]) 
    else:  
        training_summary_log, _ = sess.run([training_loss_summary_merge_all_op, training_op],
                                            options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), 
                                            run_metadata=run_metadata)

        trace = timeline.Timeline(step_stats=run_metadata.step_stats)    
        with open('gpu-timeline.json', 'w') as trace_file:
            trace_file.write(trace.generate_chrome_trace_format(show_memory=True))

    if step % 10 == 0:
        training_summary_writer.add_summary(training_summary_log, step)
        training_summary_writer.flush()

    if step % 100 == 0:
        print(step, sess.run([W, b]))

In [None]:
#pylab.plot(x_train, y_train, '.', label="target")
#pylab.plot(x_train, sess.run(y_pred, feed_dict={x_observed: x_train, y_observed: y_train}), ".", label="predicted")
#pylab.legend()
#pylab.ylim(0, 1.0)

## View Loss Scalar Summary in Tensorboard

Navigate to the Scalars tab at this URL:

http://[ip-address]:6006

## Save Model Graph and Variables for Deployment

In [None]:
from tensorflow.python.saved_model import utils

tensor_info_x_observed = utils.build_tensor_info(x_training_batch)
print(tensor_info_x_observed)

tensor_info_y_pred = utils.build_tensor_info(y_pred_batch)
print(tensor_info_y_pred)

In [None]:
export_path = "/root/models/linear/gpu/%s" % version

print(export_path)

In [None]:
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants

#with tf.device("/cpu:0"):
builder = saved_model_builder.SavedModelBuilder(export_path)

prediction_signature =  signature_def_utils.build_signature_def(
    inputs = {'inputs': tensor_info_x_observed}, 
    outputs = {'outputs': tensor_info_y_pred}, 
    method_name = signature_constants.PREDICT_METHOD_NAME)            

legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')

builder.add_meta_graph_and_variables(sess, 
   [tag_constants.SERVING], clear_devices=True,
   signature_def_map={"predict_linear" : prediction_signature,
                      signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY : prediction_signature}, 
                      legacy_init_op=legacy_init_op)

builder.save()


In [None]:
#!rm -rf /root/models/linear/gpu/

## Save Graph for Optimization and Transformation
We will use this later.

In [None]:
from tensorflow.python.framework import graph_io

graph_io.write_graph(sess.graph, 
                     "/root/models/optimize_me/", 
                     "unoptimized_gpu.pb")

## View Saved Model on Disk

You must replace `[version]` with the version number from above ^^

In [None]:
%%bash

ls -l /root/models/linear/gpu/[version]

## Clean Up

In [None]:
training_coord.request_stop()
training_coord.join(training_enqueue_threads)

sess.close()