In [2]:
import tensorflow as tf

In [37]:
# clean up default Graph() of tf
tf.reset_default_graph()

In [82]:
# to make this notebook's output stable across runs
def reset_graph(seed=1):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

## Section 1: Managing Computation Graphs in Tensorflow

In [39]:
# defining computation graph
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x * x * y + y + 2

#### Direct Way to Run a Simple Graph

In [40]:
# graph will not run until executed by session
sess = tf.Session()
# variables need to be initiated
sess.run(x.initializer)
sess.run(y.initializer)
# once all variables are initiated, run f
result = sess.run(f)

In [41]:
result

42

In [42]:
sess.close()

#### Use context manager with Session to run a Graph

In [43]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [44]:
result

42

#### Alternatively, define a "variable initializer" node in the graph

In [45]:
# prepare an init node
init = tf.global_variables_initializer()

with tf.Session() as sess:
    # init node initializes all Variables
    init.run()
    result = f.eval()

In [46]:
result

42

#### Managing graphs when more than 1 is available

Notice that all of the 3 nodes' `.graph` attribute point to the exact same computation graph (default graph)

In [47]:
# graph of f created above is the default graph
print(f.graph is tf.get_default_graph())
print(x.graph is tf.get_default_graph())
print(y.graph is tf.get_default_graph())

True
True
True


In [48]:
# explicitly creating a new Graph() to store nodes
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    # below line will raise exception since y is a node in
    # another graph
    #f2 = x2 * 3 * y
    f2 = x2 * x2 * 3
    # init will only initialize Variables **before** it
    # before to have init latest of the pack
    init = tf.global_variables_initializer()

In [49]:
# graph needs to be defined explicitly here
with tf.Session(graph=graph) as sess:
    init.run()
    result = f2.eval()

In [50]:
result

12

In [51]:
# the default graph was only teporarily set as graph
graph is tf.get_default_graph()

False

#### Lifecycle of Node Values

Node values are only computed when running a evaluation through the graph, and are discarded once the computation is done. Only `Variable` values are maintained.

In [52]:
graph1 = tf.Graph()
with graph1.as_default():
    w = tf.constant(10)
    x = w + 2
    y = x + 4
    z = x * 6

In [62]:
with tf.Session(graph=graph1) as sess:
    print(y.eval())
    print(z.eval())

16
72


In [65]:
w

<tf.Tensor 'Const:0' shape=() dtype=int32>

In [59]:
result

16

## Section 2: Running Linear Regression with TensorFlow

In [84]:
tf.reset_default_graph()

### Quick pull a sample dataset before running a TF Linear Regression

In [3]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
# obtain m and n, m is number of records and n is number of columns
m, n = housing.data.shape
# add 1s to the matrix for each row, which is for model bias / intercept
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

In [5]:
print(m)
print(n)
print(scaled_housing_data_plus_bias[:2, :])

20640
8
[[ 0.          2.34476576  0.98214266  0.62855945 -0.15375759 -0.9744286
  -0.04959654  1.05254828 -1.32783522]
 [ 0.          2.33223796 -0.60701891  0.32704136 -0.26333577  0.86143887
  -0.09251223  1.04318455 -1.32284391]]


### Create TF Computation Graph using Normal Equation for Linear Regression

In [192]:
tf.reset_default_graph()
# load Variable as constant tensors
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Calculate theta using the normal equation
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [193]:
with tf.Session() as sess:
    theta_value = theta.eval()

In [194]:
print(theta_value)

[[-3.7185181e+01]
 [ 4.3633747e-01]
 [ 9.3952334e-03]
 [-1.0711310e-01]
 [ 6.4479220e-01]
 [-4.0338000e-06]
 [-3.7813708e-03]
 [-4.2348403e-01]
 [-4.3721911e-01]]


## Create TF Computation Graph using Gradient Descent for Linear Regression

### Option 1: Manual Computation of Gradient Descent (Batch GD)

**IMPORTANT:**
**Rescaling input data by column (feature) such that features all have same scale. This is VITAL for any GD solution!**

Define the construction phase - build the `Graph()`

In [37]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01

# load training data
# note that X and y are computed directly, indicating this is a
# Batch GD
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=1), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

# calculate gradients of the mse (loss function)
gradients = 2 / m * tf.matmul(tf.transpose(X), error)

# define GD update formula
# note: tf.assign updates the Variable with new theta
training_op = tf.assign(theta, theta - learning_rate * gradients)

# last but not least, create a init node
init = tf.global_variables_initializer()

Define the execution phase - run the `Graph()`

In [38]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            # note here when mse is evaluated, it does not call learning_op
            # which actually updates the theta
            print("Epoch {0}: MSE = {1}".format(epoch, mse.eval()))
        # note that training_op is run here which updates theta in ach epoch
        sess.run(training_op)
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)

Epoch 0: MSE = 8.274792671203613
Epoch 100: MSE = 4.990600109100342
Epoch 200: MSE = 4.881271839141846
Epoch 300: MSE = 4.860585689544678
Epoch 400: MSE = 4.848357200622559
Epoch 500: MSE = 4.839105129241943
Epoch 600: MSE = 4.831891059875488
Epoch 700: MSE = 4.826225757598877
Epoch 800: MSE = 4.821752548217773
Epoch 900: MSE = 4.818204402923584
[[-0.5219252 ]
 [ 0.9154371 ]
 [ 0.15288655]
 [-0.3939141 ]
 [ 0.3970172 ]
 [ 0.00679991]
 [-0.04404339]
 [-0.57045436]
 [-0.54946506]]


### Option 2: Using TF's `autodiff` features to compute gradients

Use the `tf.gradients()` to automatically calculate the gradient based on the computation graph

In [39]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01

# load training data
# note that X and y are computed directly, indicating this is a
# Batch GD
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=1), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

# replace hand-written gradient computation with
# tf.gradients() returns a list of gradients with regard
# to each x
gradients = tf.gradients(ys=[mse], xs=[theta])[0]

# define GD update formula
# note: tf.assign updates the Variable with new theta
training_op = tf.assign(theta, theta - learning_rate * gradients)

# last but not least, create a init node
init = tf.global_variables_initializer()

In [40]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            # note here when mse is evaluated, it does not call learning_op
            # which actually updates the theta
            print("Epoch {0}: MSE = {1}".format(epoch, mse.eval()))
        # note that training_op is run here which updates theta in ach epoch
        sess.run(training_op)
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)

Epoch 0: MSE = 8.274792671203613
Epoch 100: MSE = 4.990600109100342
Epoch 200: MSE = 4.881271839141846
Epoch 300: MSE = 4.860585689544678
Epoch 400: MSE = 4.848357200622559
Epoch 500: MSE = 4.839105129241943
Epoch 600: MSE = 4.831891059875488
Epoch 700: MSE = 4.826225757598877
Epoch 800: MSE = 4.821752548217773
Epoch 900: MSE = 4.818204402923584
[[-0.5219252 ]
 [ 0.9154371 ]
 [ 0.15288658]
 [-0.3939141 ]
 [ 0.3970172 ]
 [ 0.00679992]
 [-0.0440434 ]
 [-0.57045406]
 [-0.5494649 ]]


### Option 3: Using TF's native optimizer

In [41]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01

# load training data
# note that X and y are computed directly, indicating this is a
# Batch GD
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=1), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

In [None]:
# replace hand-written gradient computation with
# tf native optimizer that calculates gradients automatically
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

In [None]:
# last but not least, create a init node
init = tf.global_variables_initializer()

In [42]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            # note here when mse is evaluated, it does not call learning_op
            # which actually updates the theta
            print("Epoch {0}: MSE = {1}".format(epoch, mse.eval()))
        # note that training_op is run here which updates theta in ach epoch
        sess.run(training_op)
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)

Epoch 0: MSE = 8.274792671203613
Epoch 100: MSE = 4.815185546875
Epoch 200: MSE = 4.80463981628418
Epoch 300: MSE = 4.8034348487854
Epoch 400: MSE = 4.803277969360352
Epoch 500: MSE = 4.803256511688232
Epoch 600: MSE = 4.803253650665283
Epoch 700: MSE = 4.803253650665283
Epoch 800: MSE = 4.8032546043396
Epoch 900: MSE = 4.8032546043396
[[-0.5219252 ]
 [ 0.8296325 ]
 [ 0.11875413]
 [-0.26555166]
 [ 0.30571672]
 [-0.00450233]
 [-0.03932676]
 [-0.8998556 ]
 [-0.87051237]]


## Creating Mini-batch GD Using `tf.placeholder` ops to train Linear Regression

In [60]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01

In [61]:
# define mini batch size and number of batches
mini_batch_size = 1000
n_batches = int(np.ceil(m / mini_batch_size))

# load training data
# note that X and y are shaped placeholder ready to take
# mini batch at execution phase
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

In [62]:
# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=1), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

# replace hand-written gradient computation with
# tf native optimizer that calculates gradients automatically
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
#optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

# last but not least, create a init node
init = tf.global_variables_initializer()

In [63]:
def fetch_data(n_records, batch_index, batch_size):
    start_index = batch_index * batch_size
    end_index = min((batch_index + 1) * batch_size, n_records)
    X_batch = scaled_housing_data_plus_bias[start_index:end_index, :]
    y_batch = housing.target.reshape(-1, 1)[start_index:end_index, :]
    return X_batch, y_batch

**Note:**

`.eval()` method for any computation node will need a `feed_dict` if the note is depend on any `placeholder` ops along its graph path

In [64]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch {0} MSE: {1}".format(epoch, mse.eval(feed_dict={X:X_batch, y:y_batch})))
        for batch_index in range(n_batches):
            # obtain batch data from the source
            X_batch, y_batch = fetch_data(m, batch_index, batch_size=mini_batch_size)
            # feed the batch data to the placeholder ops
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)

Epoch 0 MSE: 5.323634147644043
Epoch 100 MSE: 4.386706352233887
Epoch 200 MSE: 4.339206218719482
Epoch 300 MSE: 4.332136154174805
Epoch 400 MSE: 4.331105709075928
Epoch 500 MSE: 4.33095645904541
Epoch 600 MSE: 4.330934047698975
Epoch 700 MSE: 4.330931186676025
Epoch 800 MSE: 4.330931186676025
Epoch 900 MSE: 4.330931186676025
[[-0.5219252 ]
 [ 0.86755925]
 [ 0.10342795]
 [-0.28372166]
 [ 0.33215836]
 [ 0.00840846]
 [-0.03587097]
 [-0.8639838 ]
 [-0.9650578 ]]


## TF's built in serialization tool

In [66]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01
mini_batch_size = 1000
n_batches = int(np.ceil(m / mini_batch_size))

# load training data
# note that X and y are shaped placeholder ready to take
# mini batch at execution phase
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=1), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

# replace hand-written gradient computation with
# tf native optimizer that calculates gradients automatically
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
#optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

# last but not least, create a init node
init = tf.global_variables_initializer()

In [67]:
# add a Saver() node at the end of the graph construction phase
saver = tf.train.Saver()

In [68]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch {0} MSE: {1}".format(epoch, mse.eval(feed_dict={X:X_batch, y:y_batch})))
            # save the model to a path
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        for batch_index in range(n_batches):
            # obtain batch data from the source
            X_batch, y_batch = fetch_data(m, batch_index, batch_size=mini_batch_size)
            # feed the batch data to the placeholder ops
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)
    save_path = saver.save(sess, "/tmp/my_model.ckpt")

Epoch 0 MSE: 5.323634147644043
Epoch 100 MSE: 4.386706352233887
Epoch 200 MSE: 4.339206218719482
Epoch 300 MSE: 4.332136154174805
Epoch 400 MSE: 4.331105709075928
Epoch 500 MSE: 4.33095645904541
Epoch 600 MSE: 4.330934047698975
Epoch 700 MSE: 4.330931186676025
Epoch 800 MSE: 4.330931186676025
Epoch 900 MSE: 4.330931186676025
[[-0.5219252 ]
 [ 0.86755925]
 [ 0.10342795]
 [-0.28372166]
 [ 0.33215836]
 [ 0.00840846]
 [-0.03587097]
 [-0.8639838 ]
 [-0.9650578 ]]


**Note**

The `Saver` node saves the needed variables in the path (in this case, it is the `theta` Variable). When serialization is needed, call `Saver().restore()` to use the stored model values instead of the `init` node

In [69]:
with tf.Session() as sess:
    #sess.run(init)
    saver.restore(sess, '/tmp/my_model.ckpt')
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch {0} MSE: {1}".format(epoch, mse.eval(feed_dict={X:X_batch, y:y_batch})))
            # save the model to a path
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        for batch_index in range(n_batches):
            # obtain batch data from the source
            X_batch, y_batch = fetch_data(m, batch_index, batch_size=mini_batch_size)
            # feed the batch data to the placeholder ops
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)
    save_path = saver.save(sess, "/tmp/my_model.ckpt")

W0820 22:40:43.425536 16560 deprecation.py:323] From C:\Users\oycy\AppData\Roaming\Python\Python35\site-packages\tensorflow\python\training\saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


Epoch 0 MSE: 4.330931186676025
Epoch 100 MSE: 4.330931186676025
Epoch 200 MSE: 4.330931186676025
Epoch 300 MSE: 4.330931186676025
Epoch 400 MSE: 4.330931186676025
Epoch 500 MSE: 4.330931186676025
Epoch 600 MSE: 4.330931186676025
Epoch 700 MSE: 4.330931186676025
Epoch 800 MSE: 4.330931186676025
Epoch 900 MSE: 4.330931186676025
[[-0.5219252 ]
 [ 0.86755925]
 [ 0.10342795]
 [-0.28372166]
 [ 0.33215836]
 [ 0.00840846]
 [-0.03587097]
 [-0.8639838 ]
 [-0.9650578 ]]


Alternatively, **the whole graph structure** can be loaded instead, and the Variable states can be restored

In [87]:
reset_graph()

In [88]:
# loads the graph structure to default graph
saver = tf.train.import_meta_graph('/tmp/my_model.ckpt.meta')
# obtains the Variable
theta = tf.get_default_graph().get_tensor_by_name('theta:0')
graph = tf.get_default_graph()

In [91]:
with tf.Session(graph=graph) as sess:
    #sess.run(init)
    saver.restore(sess, '/tmp/my_model.ckpt')
    best_theta_restored = theta.eval()
    print(best_theta_restored)

[[-0.5219252 ]
 [ 0.86755925]
 [ 0.10342795]
 [-0.28372166]
 [ 0.33215836]
 [ 0.00840846]
 [-0.03587097]
 [-0.8639838 ]
 [-0.9650578 ]]


## Visualizing the graph

First create a dynamic directory creation system

In [119]:
reset_graph()

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = 'tf_logs'
logdir = "{}/run-{}/".format(root_logdir, now)

Then define the computation graph as usual

In [120]:
# reset default graph
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01
mini_batch_size = 100
n_batches = int(np.ceil(m / mini_batch_size))
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=1), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

Then define logging system. The `summary` is a TensorBoard compatible format that logs information for Tensorboard. 

In [121]:
# the summary scalar records the mse to a summary object
mse_summary = tf.summary.scalar('MSE', mse)
# the FileWriter writes the information to a designated location,
# for the currently default graph
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

Add actions in the execution phase to record the mse along the way

In [122]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch {0} MSE: {1}".format(epoch, mse.eval(feed_dict={X:X_batch, y:y_batch})))
        for batch_index in range(n_batches):
            # obtain batch data from the source
            X_batch, y_batch = fetch_data(m, batch_index, batch_size=mini_batch_size)
            # create a logging system that writes performance while training
            if batch_index % 10 == 0:
                # the summary str is calculated. Notice that mse_summary
                # knows to look for mse to eval
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:y_batch})
                # step is total number of minibatches from beginning
                step = epoch * n_batches + batch_index
                # call the file_writer to add the above information
                file_writer.add_summary(summary_str, step)
            # feed the batch data to the placeholder ops
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)

Epoch 0 MSE: 3.4942097663879395
Epoch 100 MSE: nan
Epoch 200 MSE: nan
Epoch 300 MSE: nan
Epoch 400 MSE: nan
Epoch 500 MSE: nan
Epoch 600 MSE: nan
Epoch 700 MSE: nan
Epoch 800 MSE: nan
Epoch 900 MSE: nan
[[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]


In [123]:
# close connection of the writer to the disk
file_writer.close()

**The records of MSE scalar is now traceable in Tensorboard**