In [25]:
import warnings
warnings.filterwarnings('ignore')

# Широкий экран
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

In [23]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

**First graph and some types of initialization**

In [4]:
# setting up variables
x = tf.Variable(3, name = 'x') #Any node is automatically added to the default graph!
y = tf.Variable(4, name = 'y')
f = x*x*y + y + 2

In [6]:
# manually initialization
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
sess.close()
print(result)

42


In [7]:
# auto initiallization
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    result = f.eval()
sess.close()
print(result)

42


In [8]:
# auto initialization with interactive session (only in Jupyter)
# automatically percepts current session as default session
init = tf.global_variables_initializer()

sess = tf.InteractiveSession()
init.run()
result = f.eval()
sess.close()

print(result)


42


**Linear Regression in tf**

**Without gradient descent**

$$\Theta = (X^{T}\cdot X)^{-1}\cdot (X^{T}\cdot y)$$

In [16]:
# straightforward approach through normal equation without gradient descent
# https://www.geeksforgeeks.org/ml-normal-equation-in-linear-regression/
reset_graph()

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data] # add bias for linear algorithm. x0=1

X = tf.constant(housing_data_plus_bias, dtype = tf.float32, name = 'X')
y = tf.constant(housing.target.reshape(-1, 1), dtype  = tf.float32, name = 'y') #reshape create column matrix
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval() # get out matrix of weight for linear regression
sess.close()
print(theta_value)

[[-3.7225266e+01]
 [ 4.3568176e-01]
 [ 9.3872147e-03]
 [-1.0598953e-01]
 [ 6.3939309e-01]
 [-4.1104349e-06]
 [-3.7780963e-03]
 [-4.2437303e-01]
 [-4.3785891e-01]]


In [18]:
# The same in pure numpy
X = housing_data_plus_bias
y = housing.target.reshape(-1, 1)
theta_value_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
print(theta_value_numpy)

[[-3.69419202e+01]
 [ 4.36693293e-01]
 [ 9.43577803e-03]
 [-1.07322041e-01]
 [ 6.45065694e-01]
 [-3.97638942e-06]
 [-3.78654265e-03]
 [-4.21314378e-01]
 [-4.34513755e-01]]


**With gradient descent**

In [21]:
# prepare data for gradient descent
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [48]:
# manually computing gradients
reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = 'X')
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = 'y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1, 1, seed = 42), name = 'theta') # initialize a node with random numbers and fixed shape
y_pred = tf.matmul(X, theta, name = 'predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = 'mse') # just mean. 'reduce' in this finction from functional  programming style. Read more here: https://www.python-course.eu/lambda.php
gradients = 2/m*tf.matmul(tf.transpose(X), error) # check formula here: https://towardsdatascience.com/gradient-descent-in-python-a0d07285742f # alternative: gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate*gradients) # we can do a for-loop through this variable, reassign it

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch {}, MSE {}'.format(epoch, mse.eval()))
#             print(error.eval().sum())
        sess.run(training_op)        
    best_theta = theta.eval()

Epoch 0, MSE 9.161541938781738
Epoch 100, MSE 0.7145004272460938
Epoch 200, MSE 0.5667048692703247
Epoch 300, MSE 0.5555717945098877
Epoch 400, MSE 0.5488111972808838
Epoch 500, MSE 0.5436363220214844
Epoch 600, MSE 0.5396291017532349
Epoch 700, MSE 0.5365092158317566
Epoch 800, MSE 0.5340677499771118
Epoch 900, MSE 0.5321472883224487


In [49]:
best_theta 

array([[ 2.0685523 ],
       [ 0.8874027 ],
       [ 0.14401656],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614529 ],
       [-0.6375279 ]], dtype=float32)

In [52]:
# Using GradientDescentOptimizer
reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype = tf.float32, name = 'X')
y = tf.constant(housing.target.reshape(-1, 1), dtype = tf.float32, name = 'y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1, 1, seed = 42), name = 'theta') # initialize a node with random numbers and fixed shape
y_pred = tf.matmul(X, theta, name = 'predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = 'mse')
#----------------------
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learing_rate)
training_op = optimizer.minimize(mse)
#----------------------

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print('Epoch {}, MSE {}'.format(epoch, mse.eval()))
        sess.run(training_op)
    best_theta = theta.eval()    

Epoch 0, MSE 9.161541938781738
Epoch 100, MSE 0.5305594801902771
Epoch 200, MSE 0.5251553654670715
Epoch 300, MSE 0.5244484543800354
Epoch 400, MSE 0.5243411660194397
Epoch 500, MSE 0.5243241786956787
Epoch 600, MSE 0.5243214964866638
Epoch 700, MSE 0.5243210792541504
Epoch 800, MSE 0.5243210196495056
Epoch 900, MSE 0.5243209600448608


In [53]:
best_theta

array([[ 2.0685577 ],
       [ 0.8296404 ],
       [ 0.11875556],
       [-0.2655667 ],
       [ 0.30572918],
       [-0.00450185],
       [-0.03932704],
       [-0.8998373 ],
       [-0.87049514]], dtype=float32)

**With batch gradient descent**

In [75]:
### In this example batch iterations leads to gradient explosion! ###

In [54]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    indices = np.random.randint(m, size = batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

In [73]:
reset_graph()

n_epochs = 3
learning_rate = 0.0001
#-----------------------
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
#-----------------------

#------------------------------
X = tf.placeholder(tf.float32, shape = (None, n+1), name = 'X')
y = tf.placeholder(tf.float32, shape = (None, 1), name = 'y')
#-----------------------------

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1, 1, seed = 42), name = 'theta') # initialize a node with random numbers and fixed shape
y_pred = tf.matmul(X, theta, name = 'predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name = 'mse')
#----------------------
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learing_rate)
training_op = optimizer.minimize(mse)
#----------------------

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
#         if epoch % 100 == 0 and epoch!=0:
#             print('Epoch {}, MSE {}'.format(epoch, mse.eval()))
#----------------------
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
#---------------------
    best_theta = theta.eval()        

In [74]:
best_theta 

array([[ 4.2043988e+02],
       [-7.0654650e+00],
       [ 8.0287799e+02],
       [ 6.5292413e+02],
       [ 2.5552672e+02],
       [ 2.3535125e+03],
       [ 3.6404062e+04],
       [-8.4642719e+02],
       [-1.4592883e+03]], dtype=float32)