In [1]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from tensorflow_graph_in_jupyter import show_graph
from datetime import datetime

In [2]:
# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [5]:
# crteating basic computational graph
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

In [6]:
# opening and running a tensorflow session - initialize the variables and evaluate
with tf.Session() as sess:
    x.initializer.run() #initializing variable x
    y.initializer.run() #initializing variable y
    result = f.eval() #evaluating f
    print(result)

42


In [7]:
# using a global_variable_initializer() to initialize all variables
# it does not perform the initialization immediately. It creates a node in the graph that will initialize all variables when it is run
init = tf.global_variables_initializer() #prepare an init node

In [8]:
with tf.Session() as sess:
    init.run() #actually initialize all variables
    result = f.eval()
    print(result)

42


In [9]:
###################### important points #########################
# 1. When we evaluate a node, TensorFlow automatically determines the set of nofes that it depends on and evaluates them first
# 2. All node values are dropped between graph runs, except variable values, which are maintained by the session across graph runs

In [10]:
# evaluating nodes in two different runs. All intermediate nodes are recomputed between runs
w = tf.constant(3)
x = w + 2
y = x + 5
z = x + 3
# both evaluations done in separate runs
with tf.Session() as sess:
    print(y.eval()) #10
    print(z.eval()) #8

10
8


In [11]:
# evaluating nodes in the same run. All intermediate nodes are shared
w = tf.constant(3)
x = w + 2
y = x + 5
z = x + 3
# both evaluations done in same run
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val) #10
    print(z_val) #8

10
8


In [12]:
########################### Linear Regression on TensorFlow ##########################

In [13]:
reset_graph()

In [14]:
# fetching the california housing dataset from sklearn
housing = fetch_california_housing()
print(housing.data.shape)
m, n = housing.data.shape

(20640, 8)


In [15]:
# adding the bias input feature
housing_bias = np.c_[np.ones((m,1)), housing.data]
print(housing_bias.shape)

(20640, 9)


In [16]:
# creating tensor flow nodes for the data observations and labels
# X->data tensor, a constant
X = tf.constant(housing_bias, dtype = tf.float32, name = "X")
# converting the labels from a 1D array to a column vector
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")

In [17]:
# transposing data matrix
Xt = tf.transpose(X)

In [18]:
# calculating theta hat (estimators) of the linear regression problem -> normal equation
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(Xt, X)),Xt),y)

In [19]:
# evaluating theta
with tf.Session() as sess:
    theta_value = theta.eval()

In [20]:
print(theta_value)

[[-3.6959320e+01]
 [ 4.3698898e-01]
 [ 9.4245886e-03]
 [-1.0791138e-01]
 [ 6.4842808e-01]
 [-3.9986235e-06]
 [-3.7866351e-03]
 [-4.2142656e-01]
 [-4.3467718e-01]]


In [21]:
########################### Linear Regression with pure numpy ##########################

In [22]:
# X -> data matrix
X = housing_bias
# y -> labels
y = housing.target.reshape(-1, 1)
# computing theta using numpy linear algebra package
# X.T -> X transpose
theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
print(theta_numpy)

[[-3.69419202e+01]
 [ 4.36693293e-01]
 [ 9.43577803e-03]
 [-1.07322041e-01]
 [ 6.45065694e-01]
 [-3.97638942e-06]
 [-3.78654265e-03]
 [-4.21314378e-01]
 [-4.34513755e-01]]


In [23]:
########################### Linear Regression with scikit-learn ##########################

In [24]:
#initializing linear regression model
lin_reg = LinearRegression()
#fitting linear regression model for the data in hand
lin_reg.fit(X, y)
#theta = intercept + coefficients
# np.r_ -> concatenate horizontally, 
theta_scikit = np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T]
print(theta_scikit)

[[-3.69419202e+01]
 [ 0.00000000e+00]
 [ 4.36693293e-01]
 [ 9.43577803e-03]
 [-1.07322041e-01]
 [ 6.45065694e-01]
 [-3.97638942e-06]
 [-3.78654265e-03]
 [-4.21314378e-01]
 [-4.34513755e-01]]


In [25]:
########################## Linear Regression using gradient descent - manual ######################

In [26]:
## scaling the data to make sure gradient descent converges
scaler = StandardScaler()
# adding the bias term after scaling
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_bias = np.c_[ np.ones((m,1)), scaled_housing_data ]

In [27]:
reset_graph()
n_epochs = 1000 #number of epochs
learning_rate = 0.01

In [28]:
# creating tensor flow nodes for the data observations and labels
# X->data tensor, a constant
X = tf.constant(scaled_housing_bias, dtype = tf.float32, name = "X")
# converting the labels from a 1D array to a column vector
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")

In [29]:
# random_function() creates a tensor (vector) containing randomly generated values
# dimension of theta = number of features + bias = 9 * 1
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0 , 1.0), name= "theta")

In [30]:
# tensor to hold the predicted regression values
y_pred = tf.matmul(X, theta, name = "predictions") # y_pred = X.theta, dimension = 20640 * 1
# initializing error term: predicted_values - actual_values
error = y_pred - y

In [31]:
# tensor to hold mean squared error
# reduce_mean -> computes mean of elements along the dimension
mse = tf.reduce_mean(tf.square(error), name = "mse")

In [32]:
# tensor to hold gradients, calculated as 2/m * XT.[y_pred - y]
gradients = 2/m * tf.matmul(tf.transpose(X), error) # dimension: 9 * 1

In [33]:
# Batch gradient descent: theta = theta - learning_rata * gradients. Dimension: 9 * 1
# assign() creates a node that will assign a new value to a variable
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [34]:
# global initializer
init = tf.global_variables_initializer()

In [35]:
# executing the session
with tf.Session() as sess:
    sess.run(init) #initializing all the global variables
    # running the training over 1000 epochs
    for epoch in range(n_epochs):
        # printing the MSE for every 100th epoch
        if epoch%100 == 0:
            print("Epoch: ", epoch, "MSE:", mse.eval())
        #computing the gradient
        sess.run(training_op)
    best_theta = theta.eval()

Epoch:  0 MSE: 12.408014
Epoch:  100 MSE: 0.75519687
Epoch:  200 MSE: 0.5420874
Epoch:  300 MSE: 0.5331699
Epoch:  400 MSE: 0.53053814
Epoch:  500 MSE: 0.5287963
Epoch:  600 MSE: 0.527549
Epoch:  700 MSE: 0.52664965
Epoch:  800 MSE: 0.5260011
Epoch:  900 MSE: 0.52553326


In [36]:
########################## Linear Regression using gradient descent - autodiff ######################

In [37]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01

In [38]:
# creating tensor flow nodes for the data observations and labels
X = tf.constant(scaled_housing_bias, dtype = tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")

In [39]:
# initializing theta, y_pred, error and mse same as above
theta = tf.Variable(tf.random_uniform([n+1 , 1], -1, 1, seed = 42), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

In [40]:
# instead of calculating gradients manually using autodiff() through gradients()
# It takes an op(mse) and a list of variables(theta) and creates a list of ops one for each variable to compute gradients of op with each variable
gradients = tf.gradients(mse, [theta])[0]

In [41]:
# creating a training operation
training_op = tf.assign(theta, theta - learning_rate * gradients)
# global initializer
init = tf.global_variables_initializer()

In [42]:
# executing the session - same as above
with tf.Session() as sess:
    sess.run(init) #initializing all the global variables
    # running the training over 1000 epochs
    for epoch in range(n_epochs):
        # printing the MSE for every 100th epoch
        if epoch%100 == 0:
            print("Epoch: ", epoch, "MSE:", mse.eval())
        #computing the gradient
        sess.run(training_op)
    best_theta = theta.eval()

Epoch:  0 MSE: 9.161542
Epoch:  100 MSE: 0.7145004
Epoch:  200 MSE: 0.56670487
Epoch:  300 MSE: 0.55557173
Epoch:  400 MSE: 0.5488112
Epoch:  500 MSE: 0.5436363
Epoch:  600 MSE: 0.53962904
Epoch:  700 MSE: 0.5365092
Epoch:  800 MSE: 0.53406775
Epoch:  900 MSE: 0.5321473


In [43]:
########################## Linear Regression using gradient descent optimizer ######################

In [44]:
## initializations same as above
# creating tensor flow nodes for the data observations and labels
X = tf.constant(scaled_housing_bias, dtype = tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
# initializing theta, y_pred, error and mse same as above
theta = tf.Variable(tf.random_uniform([n+1 , 1], -1, 1, seed = 42), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

In [45]:
# using tensorflow provided pre-defined optimizers (StochasticGradientDescent in this case)
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)

In [46]:
## instead of explicitly stating the training operation, optimizer object can be used
# Objective: minimizing mse
training_op = optimizer.minimize(mse)

In [47]:
# executing the session - same as above
# global initializer
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init) #initializing all the global variables
    # running the training over 1000 epochs
    for epoch in range(n_epochs):
        # printing the MSE for every 100th epoch
        if epoch%100 == 0:
            print("Epoch: ", epoch, "MSE:", mse.eval())
        #computing the gradient
        sess.run(training_op)
    best_theta = theta.eval()
print(best_theta)

Epoch:  0 MSE: 9.161542
Epoch:  100 MSE: 0.7145004
Epoch:  200 MSE: 0.56670487
Epoch:  300 MSE: 0.55557173
Epoch:  400 MSE: 0.5488112
Epoch:  500 MSE: 0.5436363
Epoch:  600 MSE: 0.53962904
Epoch:  700 MSE: 0.5365092
Epoch:  800 MSE: 0.53406775
Epoch:  900 MSE: 0.5321473
[[ 2.0685525 ]
 [ 0.8874027 ]
 [ 0.14401658]
 [-0.34770882]
 [ 0.36178368]
 [ 0.00393811]
 [-0.04269556]
 [-0.6614528 ]
 [-0.6375277 ]]


In [48]:
########################## Linear Regression using momentum optimizer ######################

In [49]:
## initializations same as above
# creating tensor flow nodes for the data observations and labels
X = tf.constant(scaled_housing_bias, dtype = tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = "y")
# initializing theta, y_pred, error and mse same as above
theta = tf.Variable(tf.random_uniform([n+1 , 1], -1, 1, seed = 42), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

In [50]:
# using tensorflow provided pre-defined optimizers (StochasticGradientDescent in this case)
optimizer = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.9)

In [51]:
## instead of explicitly stating the training operation, optimizer object can be used
# Objective: minimizing mse
training_op = optimizer.minimize(mse)

In [52]:
# executing the session - same as above
# global initializer
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init) #initializing all the global variables
    # running the training over 1000 epochs
    for epoch in range(n_epochs):
        # printing the MSE for every 100th epoch
        if epoch%100 == 0:
            print("Epoch: ", epoch, "MSE:", mse.eval())
        #computing the gradient
        sess.run(training_op)
    best_theta = theta.eval()
print(best_theta)

Epoch:  0 MSE: 9.161542
Epoch:  100 MSE: 0.53056407
Epoch:  200 MSE: 0.52501124
Epoch:  300 MSE: 0.52441067
Epoch:  400 MSE: 0.52433294
Epoch:  500 MSE: 0.5243226
Epoch:  600 MSE: 0.5243211
Epoch:  700 MSE: 0.524321
Epoch:  800 MSE: 0.524321
Epoch:  900 MSE: 0.524321
[[ 2.068558  ]
 [ 0.8296286 ]
 [ 0.11875337]
 [-0.26554456]
 [ 0.3057109 ]
 [-0.00450251]
 [-0.03932662]
 [-0.89986444]
 [-0.87052065]]


In [53]:
########################## Linear Regression using min-batch gradient descent ######################
# placeholder nodes don't actually perform any computation, they just output the data we tell them to output at runtime
# we pass a feed_dict to the eval() method that specify the values of placeholders

In [54]:
#this function created mini-batches for a given epoch
def fetch_data(epoch, batch_index, batch_size):
    # randomizing each batch and for every epoch
    np.random.seed(epoch * n_batches + batch_size)
    # generating random indices equivalent batch_size
    indices = np.random.randint(m, size = batch_size)
    # filtering observartions and labels to be used for the epoch
    X_batch = scaled_housing_bias[indices]
    y_batch = housing.target.reshape(-1,1)[indices]
    return X_batch, y_batch

In [55]:
# initializing (and placing) placeholders for data observations and labels instead of constants
# None = "any size"
X = tf.placeholder(tf.float32, shape = (None, n+1), name = "X") # X -> any number of rows, n+1(9) columns
y = tf.placeholder(tf.float32, shape = (None, 1), name = "y") # y -> any number of rows, 1 column

In [56]:
batch_size = 100
n_epochs = 10
n_batches = int(np.ceil(m/batch_size))

In [57]:
# executing the session - same as above
# global initializer
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init) #initializing all the global variables
    # running the training over 10 epochs over a batch size of 100
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch ,y_batch = fetch_data(epoch, batch_index, batch_size)
            #feeding the values to placeholders X and y
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
    best_theta = theta.eval()
print(best_theta)

[[ 2.068558  ]
 [ 0.82962054]
 [ 0.11875187]
 [-0.26552895]
 [ 0.30569792]
 [-0.00450293]
 [-0.03932633]
 [-0.8998828 ]
 [-0.8705383 ]]


In [58]:
############################ Saving and restoring models ########################
# Saver -> saves and restores all variables under their own name. 
# By default the saver also saves the graph structure itself in a second file with the extension .meta.

In [59]:
reset_graph()
n_epochs = 1000
learning_rate = 0.01

In [60]:
X = tf.constant(scaled_housing_bias, dtype = tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1,1), dtype = tf.float32, name = "y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1, 1, seed = 42), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()

In [61]:
#initializing Saver instance
saver = tf.train.Saver()

In [62]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch: ",epoch, "MSE: ", mse.eval())
            #saving the model after every 100th epoch
            saver.save(sess,"/tmp/my_model.ckpt")
        sess.run(training_op)
        best_theta = theta.eval()
        #saving the final model
        saver.save(sess, "/tmp/my_model_final.ckpt")

Epoch:  0 MSE:  9.161542
Epoch:  100 MSE:  0.7145004
Epoch:  200 MSE:  0.56670487
Epoch:  300 MSE:  0.55557173
Epoch:  400 MSE:  0.5488112
Epoch:  500 MSE:  0.5436363
Epoch:  600 MSE:  0.53962904
Epoch:  700 MSE:  0.5365092
Epoch:  800 MSE:  0.53406775
Epoch:  900 MSE:  0.5321473


In [63]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

In [64]:
#restoring the saved session
with tf.Session() as sess:
    saver.restore(sess,"/tmp/my_model_final.ckpt")
    best_theta_saved = theta.eval()

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt


In [65]:
#checking if both saved and restores theta values are equal (element-wise)
np.allclose(best_theta,best_theta_saved)

True

In [66]:
# restoring the graph structure as well. Starting with an empty graph
reset_graph()
## empty graph
# importing meta graph
saver = tf.train.import_meta_graph('/tmp/my_model_final.ckpt.meta')
#accessing theta op from the graph structure
theta = tf.get_default_graph().get_tensor_by_name("theta:0")

with tf.Session() as sess:
    #loading the saved model i.e. restoring the graph's saved state
    saver.restore(sess, "/tmp/my_model_final.ckpt")
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt


In [67]:
best_theta_restored

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

In [68]:
np.allclose(best_theta, best_theta_restored)

True

In [None]:
#visualizing the current default graph inside jupyter
show_graph(tf.get_default_graph())

In [1]:
############### Visualizing graphs using tensorboard ###################
# TensorBoard displays interactive visualizations of the provided training stats in a web browser
# It reads these training stats from a specified log folder
# tensorboard --logdir tf_logs/ (navigate to http://0.0.0.:6006)

In [71]:
reset_graph()
# retrieving the current time stamp
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
# adding timestamp to the name of the log_directory so that each run's stats will be distinct and don't get mixed up
root_logdir = "tf_logs"
logdir  = '{}/run-{}'.format(root_logdir, now)

In [72]:
# initializing model hyperparameters
batch_size = 100
n_epochs = 10
n_batches = int(np.ceil(m/batch_size))
learning_rate = 0.01

In [73]:
# initializing (and placing) placeholders for data observations and labels instead of constants
# None = "any size"
X = tf.placeholder(tf.float32, shape = (None, n+1), name = "X") # X -> any number of rows, n+1(9) columns
y = tf.placeholder(tf.float32, shape = (None, 1), name = "y") # y -> any number of rows, 1 column

In [74]:
# initializing theta, y_pred, error and mse same as above
theta = tf.Variable(tf.random_uniform([n+1 , 1], -1, 1, seed = 42), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")

In [75]:
# using tensorflow provided pre-defined optimizers (StochasticGradientDescent in this case)
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
## instead of explicitly stating the training operation, optimizer object can be used
# Objective: minimizing mse
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()

In [76]:
# creating a node in the graph that will evaluate MSE value and write it to a TensorBoard compatible binary log string called summary
mse_summary = tf.summary.scalar('MSE',mse)

In [77]:
# creating FileWriter that will be used to write the summaries to logfiles
# It also writes the graph definition (of the default graph provided) to a binary logfile called an events file
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [78]:
# running the graph execution phase, same as above- using BatchGradientDescent
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_data(epoch, batch_index, batch_size)
            # writing the mse summary to a log file after every 10th batch
            if batch_index % 10 ==0:
                # evaluating the step to compute MSE value over the given batch
                summary_str = mse_summary.eval(feed_dict = {X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            # actual training on the given batch
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
    best_theta = theta.eval()
file_writer.close()

In [79]:
best_theta

array([[ 2.1242497 ],
       [ 0.90131176],
       [ 0.2054878 ],
       [-0.1690098 ],
       [ 0.48434627],
       [-0.11807442],
       [-0.4183332 ],
       [-0.93492216],
       [-0.86516005]], dtype=float32)

In [80]:
###################### using Name Scopes to group related nodes ##################

In [81]:
reset_graph()
# retrieving the current time stamp
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
# adding timestamp to the name of the log_directory so that each run's stats will be distinct and don't get mixed up
root_logdir = "tf_logs"
logdir  = '{}/run-{}'.format(root_logdir, now)

# initializing model hyperparameters
batch_size = 100
n_epochs = 10
n_batches = int(np.ceil(m/batch_size))
learning_rate = 0.01

# initializing (and placing) placeholders for data observations and labels instead of constants
# None = "any size"
X = tf.placeholder(tf.float32, shape = (None, n+1), name = "X") # X -> any number of rows, n+1(9) columns
y = tf.placeholder(tf.float32, shape = (None, 1), name = "y") # y -> any number of rows, 1 column

# initializing theta, y_pred, error and mse same as above
theta = tf.Variable(tf.random_uniform([n+1 , 1], -1, 1, seed = 42), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")

In [82]:
# Assigning name scope of "loss" to related nodes. Name of each op defined within the scope is prefixed with "loss/"
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name = "mse")

In [83]:
# using tensorflow provided pre-defined optimizers (StochasticGradientDescent in this case)
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
## instead of explicitly stating the training operation, optimizer object can be used
# Objective: minimizing mse
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()

# creating a node in the graph that will evaluate MSE value and write it to a TensorBoard compatible binary log string called summary
mse_summary = tf.summary.scalar('MSE',mse)

# creating FileWriter that will be used to write the summaries to logfiles
# It also writes the graph definition (of the default graph provided) to a binary logfile called an events file
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [84]:
# running the graph execution phase, same as above- using BatchGradientDescent
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_data(epoch, batch_index, batch_size)
            # writing the mse summary to a log file after every 10th batch
            if batch_index % 10 ==0:
                # evaluating the step to compute MSE value over the given batch
                summary_str = mse_summary.eval(feed_dict = {X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            # actual training on the given batch
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
    best_theta = theta.eval()
file_writer.close()
print(best_theta)

[[ 2.1242497 ]
 [ 0.90131176]
 [ 0.2054878 ]
 [-0.1690098 ]
 [ 0.48434627]
 [-0.11807442]
 [-0.4183332 ]
 [-0.93492216]
 [-0.86516005]]


In [85]:
print(error.op.name, mse.op.name)

loss/sub loss/mse


In [86]:
################ Illustrating the modularity supported through tensorflow ############

In [87]:
# this function computes a ReLU operation on the given data X
def relu(X):
    # assigning name score to each node/op
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]),1)
        #intiating weights randomly
        w = tf.Variable(tf.random_normal(w_shape), name = "weights")
        #bias term
        b = tf.Variable(0.0, name = "bias")
        # computing a linear function of inputs
        z = tf.add(tf.matmul(X, w),b, name = "z")
        return tf.maximum(z, 0., name = "max")

In [88]:
## note that the session is not getting executed, we are just visualizing the graph structure
reset_graph()

n_features = 3
X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
#adding 5 relus
relus = [relu(X) for i in range(5)]
# add_n() creates an operation that will compute the sum of a list of tensors
output = tf.add_n(relus, name = "output")

In [89]:
#writing the structure of the execution graph to a file
file_writer = tf.summary.FileWriter("logs/relu", tf.get_default_graph())
file_writer.close()

In [90]:
################### Different scenarios of sharing variables between components ################

In [91]:
# 1. classical way of defining it outside the function and sending it as a parameter

In [92]:
reset_graph()

# relu() function same as earlier
def relu(X, threshold):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]),1)
        #intiating weights randomly
        w = tf.Variable(tf.random_normal(w_shape), name = "weights")
        #bias term
        b = tf.Variable(0.0, name = "bias")
        # computing a linear function of inputs
        z = tf.add(tf.matmul(X, w),b, name = "z")
        return tf.maximum(z, threshold, name = "max")

#creating the threshold variable
threshold = tf.Variable(0.0, name = "threshold")
X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
relus = [ relu(X, threshold) for i in range(5) ]
output = tf.add_n(relus, name = "output")

In [93]:
# 2. Set the shared variable as an attribute of the function upon the first call

In [94]:
reset_graph()

# relu() function same as earlier
def relu(X):
    with tf.name_scope("relu"):
        if not hasattr(relu, "threshold"):
            #variable name is prefixed by the name scope
            relu.threshold = tf.Variable(0.0, name = "threshold")
        w_shape = (int(X.get_shape()[1]),1)
        #intiating weights randomly
        w = tf.Variable(tf.random_normal(w_shape), name = "weights")
        #bias term
        b = tf.Variable(0.0, name = "bias")
        # computing a linear function of inputs
        z = tf.add(tf.matmul(X, w),b, name = "z")
        return tf.maximum(z, relu.threshold, name = "max")

X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
relus = [ relu(X) for i in range(5) ]
output = tf.add_n(relus, name = "output")

In [95]:
# 3. use get_variable() to create the shared variable if it does not exist yet or reuse it if it already exists
reset_graph()
# this code will create a variable names relu/threshold using 0.0 as initial value
with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold",shape = (),
                               initializer = tf.constant_initializer(0.0))

In [96]:
# setting reuse attribute to reuse a variable already created (in this case no need to specify shape or initializer)
with tf.variable_scope("relu", reuse = True):
    threshold = tf.get_variable("threshold")
# an exception will be raised if it was not created using get_variable

# setting reuse using reuse_variables() method within the score
with tf.variable_scope("relu") as scope:
    scope.reuse_variables()
    threshold = tf.get_variable("threshold")

In [97]:
reset_graph()

# relu() function reusing the already created variable
def relu(X):
    # reusing variables with relu/ scope
    with tf.variable_scope("relu", reuse = True):
        threshold = tf.get_variable("threshold")
        w_shape = (int(X.get_shape()[1]),1)
        #intiating weights randomly
        w = tf.Variable(tf.random_normal(w_shape), name = "weights")
        #bias term
        b = tf.Variable(0.0, name = "bias")
        # computing a linear function of inputs
        z = tf.add(tf.matmul(X, w),b, name = "z")
        return tf.maximum(z, threshold, name = "max")

X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
# initializing the threshold value
with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(), initializer = tf.constant_initializer(0.0))
relus = [ relu(X) for i in range(5) ]
output = tf.add_n(relus, name = "output")

In [98]:
file_writer = tf.summary.FileWriter("logs/relu6", tf.get_default_graph())
file_writer.close()

In [99]:
# 4. use get_variable() to create the shared variable in the function itself

In [100]:
reset_graph()

def relu(X):
    # initializing the variable within the relu()
    threshold = tf.get_variable("threshold", shape = (), initializer = tf.constant_initializer(0.0))
    w_shape = (int(X.get_shape()[1]),1)
    #intiating weights randomly
    w = tf.Variable(tf.random_normal(w_shape), name = "weights")
    #bias term
    b = tf.Variable(0.0, name = "bias")
    # computing a linear function of inputs
    z = tf.add(tf.matmul(X, w),b, name = "z")
    return tf.maximum(z, threshold, name = "max")

X = tf.placeholder(tf.float32, shape = (None, n_features), name = "X")
relus = []
for relu_index in range(5):
    # setting reuse = None on the first call, and reuse = True for the other calls
    with tf.variable_scope("relu", reuse = (relu_index >= 1 or None)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name = "output")

In [101]:
file_writer = tf.summary.FileWriter("logs/relu9", tf.get_default_graph())
file_writer.close()