In [60]:
import tensorflow as tf

In [61]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")

f = x*x*y + y + 2

In [64]:
# init = tf.compat.v1.global_variables_initializer()

# with tf.compat.v1.Session() as sess:
#     init.run()
#     result = f.eval()

In [66]:
# result

## Our first model

In [9]:
import numpy as np
# import pandas as pd
from sklearn.datasets import fetch_california_housing

In [4]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [23]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name = "X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name = "y") \
# -1  means figure it out.

We've defined our $X$ and $y$ clearly.

Let's revise some theory about linear regression before creating the model. A multiple linear regressor is given by:

$$\hat{y} = \theta_0 + \theta_1x_1 + \theta_2x_2 + ... + \theta_nx_n = \theta^T\cdot\textbf{x}$$

And we minimise the MSE (Mean Squared Error) for $\theta$ to get the model with the smallest error. The MSE is given by:

$$\textbf{MSE}(\textbf{X}, h_\theta) = \frac{1}{m}\sum^m_{i=1}(\hat{y}_i - y_i)^2 = \frac{1}{m}\sum^m_{i=1}(\theta^T\cdot\textbf{x}_i - y_i)$$

This cost function has a closed-form solution in other words we can solve for $\hat{\theta}$. This is called the Normal Equation and is given by:

$$\hat{\theta} = (\textbf{X}^T\cdot\textbf{X})^{-1}\cdot\textbf{X}^T\cdot \textbf{y}$$

This means we can also solve for $\hat{\theta}$ in tensorflow!

In [33]:
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.linalg.inv(tf.matmul(XT, X)), XT), y)

In [40]:
with tf.compat.v1.Session() as sess:
    theta_value = theta.numpy()

In [41]:
theta_value

array([[-3.7111828e+01],
       [ 4.3621695e-01],
       [ 9.4042774e-03],
       [-1.0677948e-01],
       [ 6.4308453e-01],
       [-4.0602099e-06],
       [-3.7817818e-03],
       [-4.2313379e-01],
       [-4.3650228e-01]], dtype=float32)

In [47]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), sc.fit_transform(housing.data)]

So we found our optimal regressor. Nothing new here. But let's see how a neural network works if we didn't have a Normal Equation to find the exact optimal solution. Lets approximate it with GRADIENT DESCENT!

In [67]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random.uniform([n + 1, 1], -1.0, 1.0), name = "theta")
y_pred = tf.matmul(X, theta, name = "predictions")

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = "mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)

training_op = tf.compat.v1.assign(theta, theta - learning_rate * gradients)

init = tf.compat.v1.global_variables_initializer()

with tf.compat.v1.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.numpy()

RuntimeError: The Session graph is empty.  Add operations to the graph before calling run().