In [None]:
# Issues: 1. checkpoint 2. how to control this random factor in our models

### tf.train.Saver()

A good practice is to periodically save the model’s parameters after a certain number of steps.  
So that we can restore/retrain our model from that step if need be.

In [None]:
tf.train.Saver.save(sess, save_path, global_step=None, latest_filename=None, 
                    meta_graph_suffix='meta', write_meta_graph=True, write_state=True)

# For example, if we want to save the variables of the graph after every 1000 training steps, we
# do the following:

# define model

# create a saver object
saver = tf.train.Saver()

# launch a session to compute the graph
with tf.Session() as sess:
    # actual training loop
    for step in range(training_step):
        sess.run([optimizer])
        
        if (step + 1) % 100 == 0:
            saver.save(sess, 'checkpoint_directory/model_name', global_step=model.global_step)
            
# global_step is the # of training steps our model has gone throught, we need to create it, initialize it
# to 0 and set it be not trainable
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
# need to pass global_step as a parameter to the optimizer so it knows to increament it by one with each training step
self.optimizer = tf.train.GradientDescentOptimizer(self.lr).minimize(self.loss, global_step=self.global_step)

# To restore the variables, we use tf.train.Saver.restore(sess, save_path)
saver.restore(sess, 'checkpoint/skip-gram-10000')

# before load the checkpoint, could check whether the checkpoint exists
ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
# The file checkpoint automatically updates the path to the latest checkpoint.

# you can also choose what variables to store by passing them in as a list or a dict when we 
# create the saver object.

v1 = tf.Variable(..., name='v1')
v2 = tf.Variable(..., name='v2')
# pass the variables as a dict
saver = tf.train.Saver({'v1': v1, 'v2': v2})

# pass them as a list
saver = tf.train.Saver([v1, v2])

# passing a list is equivalent to passing a dict with the variable op names # as keys
saver = tf.train.Saver({v.op.name: v for v in [v1, v2]})


#  Note that savers only save variables, not the entire graph, 
# so we still have to create the graph ourselves, and then load in variables. 
# The checkpoints specify the way to map from variable names to tensors.

### tf.summary

Use TensorBoard to show all satatistics, we have a new namescope in our graph to hold all the summary ops.

In [None]:
def _create_summaries(self):
    with tf.name_scope("summaries"):
        tf.summary.scalar("loss", self.loss)
        tf.summary.scalar("accuracy", self.accuracy)
        tf.summary.histogram("histogram loss", self.loss)
        # because you have several summaries, we should merge them all
        # into one op to make it easier to manager
        self.summary_op = tf.summary.merge_all()

loss_batch, _, summary = sess.run([model/loss, model.optimizer, model.summary_opt],
                                 feed_dict=feed_dict)

writer.add_summary(summary, global_step=step)

# you can visualize the statistics as images using tf.summary.image
tf.summary.image(name, tensor, max_outputs=3, collections=None)

### Control randomization


In [None]:
# tensorflow does not suppot random state as numpy but could control the randomization in two ways

# 1. Set random seed at operation level. All random tensors allow you to pass in seed value in
# their initialization. For example:
my_var = tf.Variable(tf.truncated_normal((- 1.0, 1.0), stddev=0.1, seed=0))
# Note that, session is the thing that keeps track of random state, so each new session will start
# the random state all over again.
c = tf.random_uniform([], -10, 10, seed=2)

with tf.Session() as sess:
print sess.run(c) # >> 3.57493 
print sess.run(c) # >> -5.97319

c = tf.random_uniform([], -10, 10, seed=2)
with tf.Session() as sess:
print sess.run(c) # >> 3.57493
with tf.Session() as sess:
print sess.run(c) # >> 3.57493

# each op keeps its own seed.
c = tf.random_uniform([], -10, 10, seed=2)
d = tf.random_uniform([], -10, 10, seed=2)
                
with tf.Session() as sess:
    print sess.run(c) # >> 3.57493 
    print sess.run(d) # >> 3.57493
    
# 2. Set random seed at graph level with tf.Graph.seed
tf.set_random_seed(seed) # just want to be able to replicate result on another graph

### Reading Data in Tensorflow
Two ways to load data into Tensorflow graph:
1. through feed_dict
2. through readers that allow us to read tensors directly from file

In [None]:
# How feed_dict work:  Feed_dict will first send data from the storage system to the client, 
# and then from client to the worker process. This will cause the data to slow down, especially if the client is
# on a different machine from the worker process.
# tensorflow readers
tf.TextLineReader
# output the lines of a file delimited by newlines e.g. text files, CSV files

tf.FixedLengthRecoredReader
# output the entire file when all files have same fixed lenghts
# e.g. each MNIST file has 28 * 28 pixels, CIFAR-10 32 * 32 * 3

tf.WholeFileReader
# output the entire file content

tf.TFRecordReader
# reads samples from tensorflow's own binary format (TFRecords)

tf.ReaderBase
# Allows you to create your own readers

# Data can be read in as individual data examples or in batches of examples.

filename_queue = tf.train.string_input_producer(["file0.csv", "file1.csv"])
reader = tf.TextLineReader()
key, value = reader.read(file_queue)

### tf.FIFOQueue

In [None]:
q = tf.FIFOQueue(3, "float")
init = q.enqueue_many(([0., 0., 0.],))
x = q.dequeue()
y = x + 1
q_inc = q.enqueue([y])
init.run()
q_inc.run()
q_inc.run()
q_inc.run()
q_inc.run()

# You can use tf.Coordinator and tf.QueueRunner to manage your queues
# Threads & Queues

with tf.Session() as sess:
    # start populating the filename queue.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)