# Set Up

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

## Import Tensorflow

In [2]:
import tensorflow as tf

In [3]:
tf.__version__

'1.12.0'

## Construction Phase: Build a computation graph

In [4]:
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')
f = x*x*y + y + 2

## Execution Phase: run computation graph

In [5]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    
print(result)

42


In [6]:
init = tf.global_variables_initializer() # prepare an init node

with tf.Session() as sess:
    init.run() # initialzie all variables
    result1 = f.eval()
    
print(result1)

42


In [7]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
x2.graph is graph

True

In [8]:
x2.graph is tf.get_default_graph()

False

Evaluate y and z separately, where w and x are initialized twice

In [9]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

# evaluate x, w twice
with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


Evaluate y and z in just one graph run, where w and x are initialized once

In [10]:
# evaluate x,w once
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


## Calculate theta directly via normal equation

In [11]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name='y')
XT = tf.transpose(X)
# theta = inverse(XT·X)·XT·Y
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()

In [12]:
theta_value


array([[-3.7185181e+01],
       [ 4.3633747e-01],
       [ 9.3952334e-03],
       [-1.0711310e-01],
       [ 6.4479220e-01],
       [-4.0338000e-06],
       [-3.7813708e-03],
       [-4.2348403e-01],
       [-4.3721911e-01]], dtype=float32)

## Calculate theta via Gradient Descent

**Scale data**

In [13]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_housing_data_plus_bias = scaler.fit_transform(housing_data_plus_bias)

**Add Log Directory**

In [14]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

### Batch Gradient Descent

In [15]:
n_epochs = 1000
learning_rate = 0.01
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")
# manual calculate gradient
# gradients = 2/m * tf.matmul(tf.transpose(X), error)

# use autodiff feature of tensorflow
# gradients = tf.gradients(mse, [theta])[0]

# training_op = tf.assign(theta, theta - learning_rate * gradients)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            save_path = saver.save(sess, "./tmp/my_model.ckpt")
            
#             summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
#             step = epoch * n_batches + batch_index
#             file_writer.add_summary(summary_str, step)
#         sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
        sess.run(training_op)
    best_theta = theta.eval()
    save_path = saver.save(sess, "./tmp/my_model_final.ckpt")

best_theta

Epoch 0 MSE = 8.018914
Epoch 100 MSE = 5.068821
Epoch 200 MSE = 4.972994
Epoch 300 MSE = 4.928205
Epoch 400 MSE = 4.8959312
Epoch 500 MSE = 4.872247
Epoch 600 MSE = 4.854808
Epoch 700 MSE = 4.841929
Epoch 800 MSE = 4.8323956
Epoch 900 MSE = 4.8253064


array([[ 0.527457  ],
       [ 0.860218  ],
       [ 0.16443959],
       [-0.24861485],
       [ 0.25884965],
       [ 0.01172017],
       [-0.04384349],
       [-0.53809774],
       [-0.5086996 ]], dtype=float32)

### Mini-Batch Gradient Descent

In [None]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

def fetch_batch(epoch, batch_index, batch_size):
    # generate randome indexes for this batch_index with constant size=100
    np.random.seed(epoch * n_batches + batch_index)
    mini_indexes = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[mini_indexes]
    y_batch = housing.target.reshape(-1, 1)[mini_indexes]
    return X_batch, y_batch

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
#         if epoch%100 == 0:
            
        for batch_index in range(n_batches):
#             if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    file_writer.close()
    best_theta = theta.eval()

best_theta

### Moduliarity in tensorflow

**Code w/o modularity**

In [17]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z2, 0., name="relu2")

output = tf.add(relu1, relu2, name="output")

**Code w/ modularity**

In [21]:
def relu(X):
    with tf.variable_scope("relu", reuse=True):
        threshold = tf.get_variable("threshold")
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, 0., name="relu")
    
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("relu"): # create the variable
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
relus = [relu(X) for relu_index in range(5)]
output = tf.add_n(relus, name="output")
    

In [22]:
relus

[<tf.Tensor 'relu_4/relu:0' shape=(?, 1) dtype=float32>,
 <tf.Tensor 'relu_5/relu:0' shape=(?, 1) dtype=float32>,
 <tf.Tensor 'relu_6/relu:0' shape=(?, 1) dtype=float32>,
 <tf.Tensor 'relu_7/relu:0' shape=(?, 1) dtype=float32>,
 <tf.Tensor 'relu_8/relu:0' shape=(?, 1) dtype=float32>]