# TensorflowML

---


In [1]:
import numpy as np
import tensorflow as tf

### Loss function

In [2]:
GRAPH = tf.Graph()
SESSION = tf.InteractiveSession(graph=GRAPH)


# ----------------------------------------
# MSE
# ----------------------------------------
X = tf.constant([[2, 4], [6, 8]],
                dtype=tf.float32)
X_HAT = tf.constant([[1, 2], [3, 4]],
                    dtype=tf.float32)
MSE = tf.nn.l2_loss(X - X_HAT)

# ----------------------------------------
# Cross entropy
# ----------------------------------------
Y = tf.constant([[1, 0, 0, 1], [0, 1, 1, 1]],
                dtype=tf.float32)
Y_HAT = tf.constant([[0, 1, 0, 1], [1, 1, 1, 1]],
                    dtype=tf.float32)
CE = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=Y_HAT, labels=Y
))
# ----------------------------------------
# Eval session values
# ----------------------------------------
(MSE_EVAL,
CE_EVAL) = SESSION.run([MSE, CE])

print(MSE_EVAL)
print(CE_EVAL)

15.0
3.5858502


### Example: Stochastic Gradient Descent

Now we'll go through a full example. We'll use *housing* data from california and we'll train a linear regression model in order to predict the price. 



In [3]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing

# Fetch data
housing = fetch_california_housing()
m, n = housing.data.shape

Remember to scale the data whenever using gradient related methods

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(housing.data)
scaled_housing_data = scaler.transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), 
                                      scaled_housing_data]

Constant nodes to hold the data

In [10]:
X = tf.constant(scaled_housing_data_plus_bias, 
               dtype=tf.float32, 
                name="X")
y = tf.constant(housing.target.reshape(-1, 1),
               dtype=tf.float32, 
                name="y")

We define $\theta$ (as a variable since it's going to be updated every iteration). Notice the dimension is $n+1$ since we are adding the bias term

In [11]:
theta = tf.Variable(tf.random_uniform([n + 1, 1],
                                     -1.0, 
                                     1.0),
                   name="theta")

Since we are performing linear regression, our predictions are as follows $\hat{y}=X\theta + b$ => $\hat{y}=X\theta$

In [12]:
y_pred = tf.matmul(X, theta, name="predictions")

We compute the residuals as follows:  $y-\hat{y}$ and our loss function: $mse=\frac{1}{m}\sum_{i=0}^m\|y_i - \hat{y}_i\|^2$

In [13]:
error = y_pred - y 
mse = tf.reduce_mean(tf.square(error), 
                     name="mse")

Since our loss function is as follows $mse=\frac{1}{m}\sum_{i=0}^m\|y_i - \hat{y}_i\|^2=\frac{1}{m}\sum_{i=0}^m\|y_i - X_i'\theta\|^2$ it's clear that the gradients are given by the following formula $\nabla_{\theta}mse=\frac{2}{m}\sum_{i=0}^m X_i'\|y_i - \hat{y}_i\|$

In [15]:
gradients = 2/m * tf.matmul(tf.transpose(X),
                            error,
                            name="mse")

We update our parameters: $\theta_{t+1} = \theta_{t} - \eta\nabla_{\theta_t}mse$ where $\eta$ is the learning rate.

In [16]:
learning_rate = 0.01
training_op = tf.assign(theta, 
                        theta - learning_rate * gradients)

Finally we are ready to initialize our variables and start running our process

In [17]:
n_epochs = 1000 
init = tf.global_variables_initializer()

with tf.Session() as sess: 
    sess.run(init)
     
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch ", 
                  epoch, 
                  " MSE = ", 
                  mse.eval())
        sess.run(training_op)
        
    final_theta = theta.eval()
    
print('Best theta')
print(final_theta)

Epoch  0  MSE =  7.4170833
Epoch  100  MSE =  0.7461083
Epoch  200  MSE =  0.63706625
Epoch  300  MSE =  0.6047505
Epoch  400  MSE =  0.58231485
Epoch  500  MSE =  0.5661528
Epoch  600  MSE =  0.55449426
Epoch  700  MSE =  0.54608625
Epoch  800  MSE =  0.54002035
Epoch  900  MSE =  0.53564537
Best theta
[[ 2.0685525 ]
 [ 0.77509713]
 [ 0.14420804]
 [-0.09578247]
 [ 0.13598455]
 [ 0.00546521]
 [-0.04031712]
 [-0.765513  ]
 [-0.7265943 ]]


#### Gradients of complex functions

In our last example we were able to compute the gradient explicitly. But what happens when we have an arbitrary complex function $\hat{y}=f(x)$? How could we compute the gradient of a function such as my_func? hint: do not try to do it manually!

```
def my_func(a, b):
   z = 0
   for i in range(100):
       z = a * np.cos(z + i) + z * np.sin(b - i)
   return z
```

Luckily, TensorFlow can perform automatic differentiation, hence give us the gradient of arbitrarily complex functions. For example, if we want to know the value of the gradient of my_func at $a=.2$ y $b = .3$ we'll perform the following. 


In [18]:
def my_func(a, b):
    z = 0
    for i in range(100):
        z = a * np.cos(z + i) + z * np.sin(b - i)
    return z

In [19]:
my_func(.2, .3)

-0.21253923284754916

We define a, b, z and the gradient

In [20]:
a = tf.Variable(0.2, name="a")
b = tf.Variable(0.3, name="b")
z = tf.constant(0.0, name="z0")
## my_func
for i in range(100):
    z = a * tf.cos(z + i) + z * tf.sin(b - i)

grads = tf.gradients(z, [a, b])

We initialize variables and execute our function

In [21]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    print(z.eval())
    print(sess.run(grads))

-0.21253741
[-1.1388494, 0.19671395]


Back to our linear regression example, we can replace the exact formula for the gradient by: 

```gradients = tf.gradients(mse, [theta])[0]```

Notice that the arguments are, a function and a list of variables according to which compute the gradient. 


In [22]:
gradients = tf.gradients(mse, [theta])[0]

n_epochs = 1000 
init = tf.global_variables_initializer()

with tf.Session() as sess: 
    sess.run(init)
     
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch ", 
                  epoch, 
                  " MSE = ", 
                  mse.eval())
        sess.run(training_op)
        
    final_theta = theta.eval()

print('Best theta')
print(final_theta)

Epoch  0  MSE =  7.41384
Epoch  100  MSE =  0.7347319
Epoch  200  MSE =  0.60109985
Epoch  300  MSE =  0.57916105
Epoch  400  MSE =  0.5647832
Epoch  500  MSE =  0.554306
Epoch  600  MSE =  0.54661655
Epoch  700  MSE =  0.54095954
Epoch  800  MSE =  0.53678596
Epoch  900  MSE =  0.53369814
Best theta
[[ 2.0685523e+00]
 [ 7.3350579e-01]
 [ 1.2789957e-01]
 [-3.3120632e-02]
 [ 9.1169663e-02]
 [ 7.2845651e-05]
 [-3.8050886e-02]
 [-9.2345887e-01]
 [-8.8046956e-01]]


#### Different optimizers

If we wan't to use an optimization method different from simple gradient descent we can replace the ```training_op``` method. 

In [23]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess: 
    sess.run(init)
     
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch ", 
                  epoch, 
                  " MSE = ", 
                  mse.eval())
        sess.run(training_op)
        
    final_theta = theta.eval()
    
print('Best theta')
print(final_theta)

Epoch  0  MSE =  13.034775
Epoch  100  MSE =  0.8876547
Epoch  200  MSE =  0.64736056
Epoch  300  MSE =  0.61111987
Epoch  400  MSE =  0.5872408
Epoch  500  MSE =  0.5699978
Epoch  600  MSE =  0.5575122
Epoch  700  MSE =  0.5484657
Epoch  800  MSE =  0.54190624
Epoch  900  MSE =  0.537146
Best theta
[[ 2.0685525 ]
 [ 0.8085636 ]
 [ 0.15110648]
 [-0.157846  ]
 [ 0.18679295]
 [ 0.00754053]
 [-0.0416143 ]
 [-0.6838008 ]
 [-0.6487208 ]]


#### Batch training

Let's suppose that we want to train our model using data batches instead of the whole dataset. In this case, we should define X and y as placeholders instead of constants. 


In [30]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X" ) 
y = tf.placeholder(tf.float32, shape=(None, 1), name="y" )

The rest of the code remains the same

In [31]:
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

We define our batch generator

In [32]:
n_epochs = 10
batch_size = 100 
n_batches = int(np.ceil(m / batch_size ))

def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  
    indices = np.random.randint(m, size=batch_size)  
    X_batch = scaled_housing_data_plus_bias[indices] 
    y_batch = housing.target.reshape(-1, 1)[indices] 
    return X_batch, y_batch


And we run our code

In [33]:
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        print('''Epoch: {epoch}'''.format(epoch=epoch))
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    final_theta = theta.eval()
              
print(final_theta)

Epoch: 0
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
[[ 2.070016  ]
 [ 0.8204561 ]
 [ 0.1173173 ]
 [-0.22739051]
 [ 0.3113402 ]
 [ 0.00353193]
 [-0.01126994]
 [-0.91643935]
 [-0.8795008 ]]


In order to save our models we call ```tf.train.Saver()```

In [34]:
n_epochs = 1000                                                                       
learning_rate = 0.01                                                                  

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")            
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")            
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")                                      
error = y_pred - y                                                                    
mse = tf.reduce_mean(tf.square(error), name="mse")                                    
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)            
training_op = optimizer.minimize(mse)                                                 

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())                                
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        sess.run(training_op)
    
    final_theta = theta.eval()
    print(final_theta)
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt")

Epoch 0 MSE = 2.7544272
Epoch 100 MSE = 0.63222194
Epoch 200 MSE = 0.5727796
Epoch 300 MSE = 0.5585005
Epoch 400 MSE = 0.54906934
Epoch 500 MSE = 0.5422877
Epoch 600 MSE = 0.5373788
Epoch 700 MSE = 0.5338219
Epoch 800 MSE = 0.5312427
Epoch 900 MSE = 0.52937055
[[ 2.06855249e+00]
 [ 7.74078071e-01]
 [ 1.31192386e-01]
 [-1.17845066e-01]
 [ 1.64778143e-01]
 [ 7.44078017e-04]
 [-3.91945094e-02]
 [-8.61356676e-01]
 [-8.23479772e-01]]


Finally, to load our model

In [35]:
with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")
    final_theta_restored = theta.eval() 

print(final_theta_restored)

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt
[[ 2.06855249e+00]
 [ 7.74078071e-01]
 [ 1.31192386e-01]
 [-1.17845066e-01]
 [ 1.64778143e-01]
 [ 7.44078017e-04]
 [-3.91945094e-02]
 [-8.61356676e-01]
 [-8.23479772e-01]]
