# Lesson Eight

### TensorBoard
According to the documentations:
The computations you'll use TensorFlow for - like training a massive deep neural network - can be complex and confusing. To make it easier to understand, debug, and optimize TensorFlow programs, we've included a suite of visualization tools called TensorBoard. You can use TensorBoard to visualize your TensorFlow graph, plot quantitative metrics about the execution of your graph, and show additional data like images that pass through it.

In [2]:
# Let's import from lesson_six
import lesson_six as lesson_six
# Rest the graph
lesson_six.reset_graph()

In [3]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing

# Instanciat the data
housing = fetch_california_housing()

# The shape of the data is (20640, 8) this will return m is the number of rows, n is the number of column
m, n = housing.data.shape 
#housing_data_and_bias = np.c_[np.ones((m, 1)), housing.data]

# We have to standardiaze the input data
from sklearn.preprocessing import StandardScaler

# Instantiate the preprocessing object
scaler = StandardScaler() 

# This will standardize the input data, the shape will be (m, 8)
scaled_housing_data = scaler.fit_transform(housing.data) 

# This will add bias, this will add column at index 0, the shape will be (m, 8+1) 
scaled_housing_data_and_bias = np.c_[np.ones((m, 1)), scaled_housing_data] 

In [4]:
def fetch_batch(epoch, batch_index, batch_size):
    '''
    This fucntion will return the X_batch, y_batch with random indices
    '''
    
    # Find a random indices whithin the size of the dataset
    np.random.seed(epoch * n_batches + batch_index)
    indices = np.random.randint(m, size=batch_size)
    
    # Pich the X and y batches using the random indices
    X_batch = scaled_housing_data_and_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

In [5]:
# First we will create a log
from datetime import datetime


def getLogDir():
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "/Volumes/MacAndroidStudio/ml-books/savedModel/tf_logs"
    logdir = "{}/run-{}/".format(root_logdir, now)
    
    return logdir

In [6]:

n_epochs = 1000
learning_rate = 0.01

# Change the definition of X and y in the construction phase to make them a placeholder nodes.
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

# Compute the theta with tf.random_uniform() use the number of column add 1 so it will match the X+bias columns
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")

# Compute the y_hat using tf.matmul()
y_pred = tf.matmul(X, theta, name="predictions")

# Get the error
error = y_pred - y

# Compute the Mean Square Error using tf.reduce_mean() and tf.square(error)
mse = tf.reduce_mean(tf.square(error), name="mse")

# Gradient Decscent with optimizer
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.5)
training_op = optimizer.minimize(mse)

# Globa variable initializer
global_init = tf.global_variables_initializer()

# This will create a node in the graph that will evaluate the MSE value and write it to a TensorBoard
# binary log string called a summary.
mse_summary = tf.summary.scalar('MSE', mse)

# This will create FileWriter object that we will use to write summaries to logfiles in the log directory
# The first parameter is the log directory path, and the second parameter which optional is the graph to visualize.
file_writer = tf.summary.FileWriter(getLogDir(), tf.get_default_graph())

# Define the batch size and compute the total number of batches:
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

# The running phase...
with tf.Session() as sess:
    # Pass the tf.global_variables_initializer() to the Session.run()
    sess.run(global_init)                                                                

    # iterate over
    for epoch in range(n_epochs): 
        
        for batch_index in range(n_batches):
            
            # Fetching the mini-batches one by one
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            
            # For every 10 mini-batches this code will evaluate the mse_summary node during training.
            # Then write the result to the file log using the FileWriter object
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            
            # Invoke run() and pass the training optimizer, with the X and y batches
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    # Pass the result here
    best_theta = theta.eval() 

# Close the FileWriter to free resources.
file_writer.close()

In [7]:
best_theta

array([[ 2.049927  ],
       [ 0.8267504 ],
       [ 0.11433333],
       [-0.23890038],
       [ 0.31248516],
       [ 0.03146332],
       [-1.4988426 ],
       [-0.87141562],
       [-0.83012849]], dtype=float32)

Fromt the terminal type this:
$ tensorboard --logdir=path/to/log-directory

# Back to basic again

In [34]:
lesson_six.reset_graph()

# Create a global variable
W = tf.Variable([.3], dtype=tf.float32)
b = tf.Variable([-.3], dtype=tf.float32)

# Create a placeholder: it is a promise to provide a value later.
x = tf.placeholder(tf.float32)

# This is a linear operation
linear_model = W * x + b

In [35]:
# This will initialize all the global variables, in this case W and b
init = tf.global_variables_initializer()

# Instantiate the Session()
sess = tf.Session()

# Pass the init to the run() 
sess.run(init)

In [36]:

# Here where all the computation is done:
# linear_mobel has a reference to W, x and b, in which its already know the value of W and b but not yet x
# because x is a placeholder and it is a promise to provide value later
# So this will result in processing every element in x, in according to linear_model.
print(sess.run(linear_model, {x:[1,2,3,4]}))


[ 0.          0.30000001  0.60000002  0.90000004]


In [37]:
# y is the target value, or often called the label
y = tf.placeholder(tf.float32)

# The tf.square() will square the error
squared_deltas = tf.square(linear_model - y)

# The tf.reduce_sum() will get the loss of the squared error
loss = tf.reduce_sum(squared_deltas)

# Now lets evaluate and pass the data for x and y
print(sess.run(loss, {x:[1,2,3,4], y:[0,-1,-2,-3]}))

23.66


In [38]:
# The tf.assign is to change/modify the value of a variable
fixW = tf.assign(W, [-1.])
fixb = tf.assign(b, [1.])

# this is same as initializeing
sess.run([fixW, fixb])


[array([-1.], dtype=float32), array([ 1.], dtype=float32)]

In [40]:
# This will evaluate the loss, given the value of x and y
print(sess.run(loss, {x:[1,2,3,4], y:[0,-1,-2,-3]}))

0.0


Consequently, TensorFlow can automatically produce derivatives given only a description of the model using the function tf.gradients. For simplicity, optimizers typically do this for you.

In [41]:
# We pass the learning rate as argument
optimizer = tf.train.GradientDescentOptimizer(0.01)

# then we pass the loss to the optimizer.minimize()
train = optimizer.minimize(loss)


In [47]:
sess.run(init) # reset values to incorrect defaults.
for i in range(1000):
    sess.run(train, {x:[1,2,3,4], y:[0,-1,-2,-3]})


In [48]:
# evaluate training accuracy
curr_W, curr_b, curr_loss = sess.run([W, b, loss], {x:[1,2,3,4], y:[0,-1,-2,-3]})
print("W: %s b: %s loss: %s"%(curr_W, curr_b, curr_loss))

W: [-0.9999969] b: [ 0.99999082] loss: 5.69997e-11


### From the documentations: 

tf.contrib.learn is a high-level TensorFlow library that simplifies the mechanics of machine learning, including the following:

1. running training loops
2. running evaluation loops
3. managing data sets
4. managing feeding


In [49]:
lesson_six.reset_graph()

In [50]:
import tensorflow as tf
# NumPy is often used to load, manipulate and preprocess data.
import numpy as np

# Declare list of features. We only have one real-valued feature. There are many
# other types of columns that are more complicated and useful.
features = [tf.contrib.layers.real_valued_column("x", dimension=1)]

# An estimator is the front end to invoke training (fitting) and evaluation
# (inference). There are many predefined types like linear regression,
# logistic regression, linear classification, logistic classification, and
# many neural network classifiers and regressors. The following code
# provides an estimator that does linear regression.
estimator = tf.contrib.learn.LinearRegressor(feature_columns=features)

# TensorFlow provides many helper methods to read and set up data sets.
# Here we use two data sets: one for training and one for evaluation
# We have to tell the function how many batches
# of data (num_epochs) we want and how big each batch should be.
x_train = np.array([1., 2., 3., 4.])
y_train = np.array([0., -1., -2., -3.])
x_eval = np.array([2., 5., 8., 1.])
y_eval = np.array([-1.01, -4.1, -7, 0.])
input_fn = tf.contrib.learn.io.numpy_input_fn({"x":x_train}, y_train,
                                              batch_size=4,
                                              num_epochs=1000)
eval_input_fn = tf.contrib.learn.io.numpy_input_fn(
    {"x":x_eval}, y_eval, batch_size=4, num_epochs=1000)

# We can invoke 1000 training steps by invoking the  method and passing the
# training data set.
estimator.fit(input_fn=input_fn, steps=1000)

# Here we evaluate how well our model did.
train_loss = estimator.evaluate(input_fn=input_fn)
eval_loss = estimator.evaluate(input_fn=eval_input_fn)
print("train loss: %r"% train_loss)
print("eval loss: %r"% eval_loss)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11a4c30b8>, '_master': '', '_num_ps_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/v8/qd7dlmk