In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [37]:
# clean up default Graph() of tf
tf.reset_default_graph()

## Section 1: Managing Computation Graphs in Tensorflow

In [39]:
# defining computation graph
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x * x * y + y + 2

#### Direct Way to Run a Simple Graph

In [40]:
# graph will not run until executed by session
sess = tf.Session()
# variables need to be initiated
sess.run(x.initializer)
sess.run(y.initializer)
# once all variables are initiated, run f
result = sess.run(f)

In [41]:
result

42

In [42]:
sess.close()

#### Use context manager with Session to run a Graph

In [43]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [44]:
result

42

#### Alternatively, define a "variable initializer" node in the graph

In [45]:
# prepare an init node
init = tf.global_variables_initializer()

with tf.Session() as sess:
    # init node initializes all Variables
    init.run()
    result = f.eval()

In [46]:
result

42

#### Managing graphs when more than 1 is available

Notice that all of the 3 nodes' `.graph` attribute point to the exact same computation graph (default graph)

In [47]:
# graph of f created above is the default graph
print(f.graph is tf.get_default_graph())
print(x.graph is tf.get_default_graph())
print(y.graph is tf.get_default_graph())

True
True
True


In [48]:
# explicitly creating a new Graph() to store nodes
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    # below line will raise exception since y is a node in
    # another graph
    #f2 = x2 * 3 * y
    f2 = x2 * x2 * 3
    # init will only initialize Variables **before** it
    # before to have init latest of the pack
    init = tf.global_variables_initializer()

In [49]:
# graph needs to be defined explicitly here
with tf.Session(graph=graph) as sess:
    init.run()
    result = f2.eval()

In [50]:
result

12

In [51]:
# the default graph was only teporarily set as graph
graph is tf.get_default_graph()

False

#### Lifecycle of Node Values

Node values are only computed when running a evaluation through the graph, and are discarded once the computation is done. Only `Variable` values are maintained.

In [52]:
graph1 = tf.Graph()
with graph1.as_default():
    w = tf.constant(10)
    x = w + 2
    y = x + 4
    z = x * 6

In [62]:
with tf.Session(graph=graph1) as sess:
    print(y.eval())
    print(z.eval())

16
72


In [65]:
w

<tf.Tensor 'Const:0' shape=() dtype=int32>

In [59]:
result

16

## Section 2: Running Linear Regression with TensorFlow

In [84]:
tf.reset_default_graph()

### Quick pull a sample dataset before running a TF Linear Regression

In [183]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
# obtain m and n, m is number of records and n is number of columns
m, n = housing.data.shape
# add 1s to the matrix for each row, which is for model bias / intercept
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [184]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

In [185]:
print(m)
print(n)
print(scaled_housing_data_plus_bias[:2, :])

20640
8
[[ 0.          2.34476576  0.98214266  0.62855945 -0.15375759 -0.9744286
  -0.04959654  1.05254828 -1.32783522]
 [ 0.          2.33223796 -0.60701891  0.32704136 -0.26333577  0.86143887
  -0.09251223  1.04318455 -1.32284391]]


### Create TF Computation Graph using Normal Equation for Linear Regression

In [192]:
tf.reset_default_graph()
# load Variable as constant tensors
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# Calculate theta using the normal equation
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [193]:
with tf.Session() as sess:
    theta_value = theta.eval()

In [194]:
print(theta_value)

[[-3.7185181e+01]
 [ 4.3633747e-01]
 [ 9.3952334e-03]
 [-1.0711310e-01]
 [ 6.4479220e-01]
 [-4.0338000e-06]
 [-3.7813708e-03]
 [-4.2348403e-01]
 [-4.3721911e-01]]


### Create TF Computation Graph using Gradient Descent for Linear Regression

#### Option 1: Manual Computation of Gradient Descent (Batch GD)

**IMPORTANT:**
**Rescaling input data by column (feature) such that features all have same scale. This is VITAL for any GD solution!**

Define the construction phase - build the `Graph()`

In [196]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01

# load training data
# note that X and y are computed directly, indicating this is a
# Batch GD
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

# calculate gradients of the mse (loss function)
gradients = 2 / m * tf.matmul(tf.transpose(X), error)

# define GD update formula
# note: tf.assign updates the Variable with new theta
training_op = tf.assign(theta, theta - learning_rate * gradients)

# last but not least, create a init node
init = tf.global_variables_initializer()

Define the execution phase - run the `Graph()`

In [197]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            # note here when mse is evaluated, it does not call learning_op
            # which actually updates the theta
            print("Epoch {0}: MSE = {1}".format(epoch, mse.eval()))
        # note that training_op is run here which updates theta in ach epoch
        sess.run(training_op)
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)

Epoch 0: MSE = 8.853888511657715
Epoch 100: MSE = 4.915576457977295
Epoch 200: MSE = 4.845273494720459
Epoch 300: MSE = 4.834809303283691
Epoch 400: MSE = 4.828482151031494
Epoch 500: MSE = 4.823597431182861
Epoch 600: MSE = 4.819718360900879
Epoch 700: MSE = 4.816625118255615
Epoch 800: MSE = 4.81414270401001
Epoch 900: MSE = 4.812143325805664
[[-0.19455743]
 [ 0.90342915]
 [ 0.14384116]
 [-0.3839345 ]
 [ 0.39435384]
 [ 0.0036655 ]
 [-0.04302227]
 [-0.64767283]
 [-0.62590075]]


### Option 2: Using TF's `autodiff` features to compute gradients

Use the `tf.gradients()` to automatically calculate the gradient based on the computation graph

In [198]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01

# load training data
# note that X and y are computed directly, indicating this is a
# Batch GD
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

# replace hand-written gradient computation with
# tf.gradients() returns a list of gradients with regard
# to each x
gradients = tf.gradients(ys=[mse], xs=[theta])[0]

# define GD update formula
# note: tf.assign updates the Variable with new theta
training_op = tf.assign(theta, theta - learning_rate * gradients)

# last but not least, create a init node
init = tf.global_variables_initializer()

In [199]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            # note here when mse is evaluated, it does not call learning_op
            # which actually updates the theta
            print("Epoch {0}: MSE = {1}".format(epoch, mse.eval()))
        # note that training_op is run here which updates theta in ach epoch
        sess.run(training_op)
    # after all epochs are done, the theta has been assigned n_epoch times
    best_theta = theta.eval()
    print(best_theta)

Epoch 0: MSE = 7.3176445960998535
Epoch 100: MSE = 5.145058631896973
Epoch 200: MSE = 5.035160064697266
Epoch 300: MSE = 4.9770097732543945
Epoch 400: MSE = 4.934591770172119
Epoch 500: MSE = 4.903024196624756
Epoch 600: MSE = 4.879417896270752
Epoch 700: MSE = 4.861688613891602
Epoch 800: MSE = 4.848317623138428
Epoch 900: MSE = 4.8381876945495605
[[-0.8194535 ]
 [ 0.90355426]
 [ 0.17581181]
 [-0.3244198 ]
 [ 0.3188639 ]
 [ 0.01529535]
 [-0.04572967]
 [-0.414511  ]
 [-0.38986284]]


### Option 3: Using TF's native optimizer

In [None]:
# reset default graph
tf.reset_default_graph()

# define hyper parameter
n_epochs = 1000
learning_rate = 0.01

# load training data
# note that X and y are computed directly, indicating this is a
# Batch GD
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')

# start a random theta for GB. It should have (n + 1) elements
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name='theta')

# calculate y_pred
y_pred = tf.matmul(X, theta, name='predictions')

# calculate error
error = y_pred - y
# calculate mean square error
mse = tf.reduce_mean(tf.square(error), name='mse')

# replace hand-written gradient computation with
# tf.gradients() returns a list of gradients with regard
# to each x
gradients = tf.gradients(ys=[mse], xs=[theta])[0]

# define GD update formula
# note: tf.assign updates the Variable with new theta
training_op = tf.assign(theta, theta - learning_rate * gradients)

# last but not least, create a init node
init = tf.global_variables_initializer()