# Linear regression with tensor flow

In [1]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from datetime import datetime


In [None]:
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
m, n = housing.data.shape
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

### Normal equation

$$ \theta = (\mathrm{X}^T \mathrm{X})^{-1} \mathrm{X}^T \mathrm{y} $$

In [None]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)

theta = tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), tf.matmul(XT, y))

In [None]:
with tf.Session() as sess:
    theta_value = theta.eval()

In [None]:
theta_value

### Gradient descent (manual)

In [None]:
n_epochs = 1000
learning_rate = 0.01

In [None]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [None]:
init = tf.global_variables_initializer()

In [None]:
sess = tf.InteractiveSession()

In [None]:
init.run()
for epoch in range(n_epochs):
    if epoch % 100 == 0:
        print("Epoch", epoch, "MSE =", mse.eval())
    sess.run(training_op)

best_theta = theta.eval()

In [None]:
best_theta

### Gradient descent (autodiff)

In [None]:
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [None]:
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()

init.run()
for epoch in range(n_epochs):
    if epoch % 100 == 0:
        print("Epoch", epoch, "MSE =", mse.eval())
    sess.run(training_op)

best_theta = theta.eval()

### using Optimizer

In [None]:
tf.reset_default_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n+1,1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
# TF Optimizer!

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

training_op = optimizer.minimize(mse)

In [None]:
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()

init.run()
for epoch in range(n_epochs):
    if epoch % 100 == 0:
        print("Epoch", epoch, "MSE =", mse.eval())
    sess.run(training_op)

best_theta = theta.eval()

## Feeding Data to the training algorithm (Mini Batch)

`placeholder()`: Nodes that output the data you tell them to output at runtime

In [None]:
tf.reset_default_graph()
A = tf.placeholder(tf.float32, shape=(None,3))
B = A + 5

In [None]:

with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1,2,3]]}) 
    B_val_2 = B.eval(feed_dict={A: [[4,5,6],[7,8,9]]})

In [None]:
print(B_val_1)

In [None]:
print(B_val_2)

### MIniBatch gradient descent
To implement minibatch lets change the definition if `X` and `y` in the construction phase to make them placeholders

In [None]:
n_epochs = 1000
learning_rate = 0.01

In [None]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None,1), name="y")

In [None]:
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [None]:
n_epochs = 10

In [None]:
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [None]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size=batch_size)  # not shown
    X_batch = scaled_housing_data_plus_bias[indices] # not shown
    y_batch = housing.target.reshape(-1, 1)[indices] # not shown
    return X_batch, y_batch

In [None]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
    best_theta = theta.eval()

In [None]:
best_theta

## Visualizing the graph and curves with TensorBoard


In [None]:
tf.reset_default_graph()


now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [None]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [None]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [None]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [None]:
with tf.Session() as sess:                                                        # not shown in the book
    sess.run(init)                                                                # not shown

    for epoch in range(n_epochs):                                                 # not shown
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()                                                     # n

In [None]:
file_writer.close()

In [None]:
best_theta

In [None]:
print(error.op.name)

## Modularity

Create a graph that adds the output of two rectified linear units (ReLU):

$$ h_{\mathbf{w},b} = \mathrm{max}(\mathbf{X · w} + b,\ 0)$$

The following code does the job but it's quite repetitive:

In [None]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name='X')

w1 = tf.Variable(tf.random_normal((n_features, 1)), name='weights1')
w2 = tf.Variable(tf.random_normal((n_features, 1)), name='weights2')

b1 = tf.Variable(0.0, name='bias1')
b2 = tf.Variable(0.0, name='bias2')

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name='relu1')
relu2 = tf.maximum(z2, 0., name='relu2')

output = tf.add(relu1, relu2, name='output')

Lets stay **DRY** (Don't repeat yourself). the following code creates 5 relus and outputs their sum:

In [2]:
# tf.reset_default_graph()

def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, 0., name='relu')

In [3]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, 1), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name = 'output')

In [7]:
now = datetime.utcnow().strftime("%Y%m%d%H%M")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [5]:
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
with tf.Session() as sess:
    X_output = X.eval(feed_dict={X: [[1],[2],[3]]}) 

In [6]:
file_writer.close()