# Tensorflow Utilities Tutorial

***

In this tutorial, the programming exercises from Chapter 9. Up and Running with Tensorflow of [Hands-On Machine Learning with Scikit-Learn and Tensorflow](https://www.safaribooksonline.com/library/view/hands-on-machine-learning/9781491962282/) will be refactored and reproduced.

By finishing this tutorial, you'll know the basics and paradigms of programming in Tensorflow

## Set Up Session

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

## Basics of Tensorflow

In [2]:
# Every node is associated with a computational graph:
with tf.Graph().as_default():
    x = tf.Variable(42, name='x')

x.graph is tf.get_default_graph()

False

In [3]:
# Initialize graph:
with tf.Graph().as_default():
    # Construction phase:
    x = tf.Variable(3, name='x')
    y = tf.Variable(4, name='y')

    f = x**3 + x**2*y + x*y + 2

    init = tf.global_variables_initializer()

    # Execution phase:
    with tf.Session() as sess:
        # Inside the with block, the newly created sess is used as the default session:
        init.run()        #tf.get_default_session.run(init)
        result = f.eval() #tf.get_default_session.run(f)

    print("[Result]: {}".format(result))

[Result]: 77


In [4]:
# Evaluate as much as possible in one graph run:
with tf.Graph().as_default():
    # Construction phase:
    x = tf.Variable(3, name='x')
    
    y = (x + 1)**2
    z = (x + 1)**3
    
    init = tf.global_variables_initializer()
    
    # Execution phase:
    with tf.Session() as sess:
        init.run()
        y_val, z_val = sess.run([y, z])
    
    print("[Y]: {}, [Z]: {}".format(y_val, z_val))

[Y]: 16, [Z]: 64


## Linear Regression through Normal Equation

### Load Dataset

In [5]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

california_housing = fetch_california_housing()

# Features:
features = StandardScaler().fit_transform(
    california_housing.data
)
# Targets:
targets = california_housing.target

### Preprocessing

In [6]:
# Feature dimensions:
N, D = features.shape

# Follow Andrew Ng's convensions:
features = np.c_[features, np.ones((N, 1))]
features = features.T

targets = targets.reshape((-1, 1)).T

# Finally:
print(repr(features.shape), repr(targets.shape))

(9, 20640) (1, 20640)


### Get Optimal Weights using Normal Equation

In [7]:
# Use normal equation from multivariate analysis:
with tf.Graph().as_default():
    X = tf.constant(features, dtype=tf.float32, name='X')
    y = tf.constant(targets, dtype=tf.float32, name='y')
    
    X_transposed = tf.transpose(X)
    w = tf.matmul(
        y, tf.matmul(
            X_transposed, tf.matrix_inverse(
                tf.matmul(X, X_transposed)
            )
        )
    )
    
    loss = tf.reduce_mean(
        (tf.matmul(w, X) - y)**2
    )
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        init.run()
        w_val, loss_val = sess.run([w, loss])
        
    print("[Loss]: {}".format(loss_val))
    
    total_variance = np.var(targets)
    print(
        "[R-Squared]: {}".format(
            100 * (1.0 - loss_val / total_variance)
        )
    )

[Loss]: 0.5243210196495056
[R-Squared]: 60.62326600675225


## Get Optimal Weights using Simple Batch Gradient Descent

In [8]:
# Only small learning rate could be used:
learning_rate = 0.01
# Need more iterations to converge:
max_iters = 1000

In [9]:
with tf.Graph().as_default():
    X = tf.constant(features, dtype=tf.float64, name='X')
    y = tf.constant(targets, dtype=tf.float64, name='y')
    
    w = tf.Variable(
        np.sqrt(1/(D+1)) * np.random.randn(1, D+1),
        name='w'
    )
    
    y_pred = tf.matmul(w, X, name='prediction')
    error = y_pred - y
    
    loss = tf.reduce_mean(error**2, name='loss')
    
    dw = 2.0 / N * tf.matmul(
        error,
        tf.transpose(X)
    )
    optimization = tf.assign(w, w - learning_rate*dw)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        init.run()
        
        for i in range(max_iters):
            _, loss_val = sess.run([optimization, loss])
            
            if (i + 1) % 250 == 0:
                print(
                    "[Loss @ {}]: {}".format(
                        i + 1,
                        loss_val
                    )
                )
        
        w_optimal = w.eval()
        loss_optimal = loss.eval()
    
        print(
            "[R-Squared]: {}".format(
                100 * (1.0 - loss_optimal / np.var(targets))
            )
        )    

[Loss @ 250]: 0.5808319058927106
[Loss @ 500]: 0.5492083942066661
[Loss @ 750]: 0.5355157712908981
[Loss @ 1000]: 0.5293930842553165
[R-Squared]: 60.243549419289344


## Get Optimal Weights using Adam Optimizer

In [10]:
# Larger learning rates can be used:
learning_rate = 0.1
# And converge faster:
max_iters = 200

In [11]:
with tf.Graph().as_default():
    X = tf.constant(features, dtype=tf.float64, name='X')
    y = tf.constant(targets, dtype=tf.float64, name='y')
    
    w = tf.Variable(
        np.sqrt(1/(D+1)) * np.random.randn(1, D+1),
        name='w'
    )
    y_pred = tf.matmul(w, X, name='prediction')
    
    loss = tf.reduce_mean((y_pred - y)**2, name='loss')
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        init.run()
        
        for i in range(max_iters):
            _, loss_val = sess.run([optimizer, loss])
            
            if (i + 1) % 50 == 0:
                print(
                    "[Loss @ {}]: {}".format(
                        i + 1,
                        loss_val
                    )
                )
        
        w_optimal = w.eval()
        loss_optimal = loss.eval()
    
        print(
            "[R-Squared]: {}".format(
                100 * (1.0 - loss_optimal / np.var(targets))
            )
        )    

[Loss @ 50]: 0.5300696547593745
[Loss @ 100]: 0.5244123209869164
[Loss @ 150]: 0.5243210139187467
[Loss @ 200]: 0.5243209868658081
[R-Squared]: 60.62326848676274


## Get Optimal Weights using Mini-Batch Gradient Descent

In [12]:
D, N = features.shape

learning_rate = 0.1

epochs = 20
batch_size = 256

In [13]:
with tf.Graph().as_default():
    X = tf.placeholder(shape=(D, None), dtype=tf.float64, name='X')
    y = tf.placeholder(shape=(1, None), dtype=tf.float64, name='y')
    
    w = tf.Variable(
        np.sqrt(1 / D) * np.random.randn(1, D),
        name = 'w'
    )
    
    y_pred = tf.matmul(w, X, name='prediction')
    
    loss = tf.reduce_mean(
        (y_pred - y)**2,
        name = 'loss'
    )
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        init.run()
        
        for e in range(epochs):
            shuffled_indices = np.random.permutation(N)
            
            for b in range(N // batch_size + 1):
                batch_indices = shuffled_indices[b*batch_size: (b+1)*batch_size]
                X_batch = features[:, batch_indices]
                y_batch = targets[:, batch_indices]
                _, loss_val = sess.run(
                    [optimizer, loss],
                    feed_dict = {
                        X: X_batch,
                        y: y_batch
                    }
                )
            
            if (e + 1) % 10 == 0:
                print(
                    "[Loss @ {}]: {}".format(
                        e + 1,
                        loss_val
                    )
                )
        
        # Finally:
        loss_optimal = sess.run(
            loss,
            feed_dict = {
                X: features,
                y: targets
            }
        )
    
    print(
        "[R-Squared]: {}".format(
            100 * (1.0 - loss_optimal / np.var(targets))
        )
    )     

[Loss @ 10]: 0.3948565401263247
[Loss @ 20]: 0.5279167089556231
[R-Squared]: 59.462623860017814


## Use TensorBoard for Logging

In [14]:
# Configurations:
D, N = features.shape

learning_rate = 0.1

epochs = 20
batch_size = 256

In [15]:
# Log output:
from datetime import datetime

timestamp = datetime.utcnow().strftime("%Y%m%d%H%M%S")
log_root_dir = "tf-logs"
log_dir = "{}/run-{}/".format(log_root_dir, timestamp)

In [16]:
with tf.Graph().as_default():
    with tf.name_scope('input'):
        X = tf.placeholder(shape=(D, None), dtype=tf.float64, name='X')
        y = tf.placeholder(shape=(1, None), dtype=tf.float64, name='y')
    
    with tf.name_scope('params'):
        w = tf.Variable(
            np.sqrt(1 / D) * np.random.randn(1, D),
            name = 'w'
        )

    with tf.name_scope('output'):
        y_pred = tf.matmul(w, X, name='prediction')
    
    with tf.name_scope('metrics'):
        loss = tf.reduce_mean(
            (y_pred - y)**2,
            name = 'loss'
        )
    
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    
    with tf.name_scope('logging'):
        loss_summary = tf.summary.scalar('Loss', loss)
        log_writer = tf.summary.FileWriter(
            log_dir, 
            tf.get_default_graph()
        )
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        init.run()
        
        for e in range(epochs):
            shuffled_indices = np.random.permutation(N)
            
            for b in range(N // batch_size + 1):
                batch_indices = shuffled_indices[b*batch_size: (b+1)*batch_size]
                X_batch = features[:, batch_indices]
                y_batch = targets[:, batch_indices]
                _ = sess.run(
                    optimizer,
                    feed_dict = {
                        X: X_batch,
                        y: y_batch
                    }
                )
            
            if (e + 1) % 2 == 0:
                loss_val = loss_summary.eval(
                    feed_dict = {
                        X: features,
                        y: targets                        
                    }
                )
                log_writer.add_summary(
                    loss_val, e + 1
                )
        
        # Finally:
        loss_optimal = sess.run(
            loss,
            feed_dict = {
                X: features,
                y: targets
            }
        )

    log_writer.close()
    
    print(
        "[R-Squared]: {}".format(
            100 * (1.0 - loss_optimal / np.var(targets))
        )
    )     

[R-Squared]: 53.3472802987882
