Load Tensorflow

In [1]:
import tensorflow as tf
tf.__version__

'2.2.0-rc2'

Load Data

In [0]:
#The data set is available with TF library
(train_x,train_y),(_,_) = tf.keras.datasets.boston_housing.load_data(test_split=0)

In [3]:
type(train_x)

numpy.ndarray

In [4]:
#Check how many training examples we have
train_x.shape

(506, 13)

In [5]:
train_x.dtype

dtype('float64')

In [0]:
#ML works with floating point numbers

In [7]:
#Data type of features
train_x.dtype

dtype('float64')

In [0]:
#In this case each floating point numbers uses 64 bits to store in memory
#By default Numpy uses float64
#To reduce the memory requirement. TF and DL Libraries use float32

In [0]:
#Convert x and y to 32 bits
train_x = train_x.astype('float32')
train_y = train_y.astype('float32')

In [10]:
train_x[0:5]

array([[1.23247e+00, 0.00000e+00, 8.14000e+00, 0.00000e+00, 5.38000e-01,
        6.14200e+00, 9.17000e+01, 3.97690e+00, 4.00000e+00, 3.07000e+02,
        2.10000e+01, 3.96900e+02, 1.87200e+01],
       [2.17700e-02, 8.25000e+01, 2.03000e+00, 0.00000e+00, 4.15000e-01,
        7.61000e+00, 1.57000e+01, 6.27000e+00, 2.00000e+00, 3.48000e+02,
        1.47000e+01, 3.95380e+02, 3.11000e+00],
       [4.89822e+00, 0.00000e+00, 1.81000e+01, 0.00000e+00, 6.31000e-01,
        4.97000e+00, 1.00000e+02, 1.33250e+00, 2.40000e+01, 6.66000e+02,
        2.02000e+01, 3.75520e+02, 3.26000e+00],
       [3.96100e-02, 0.00000e+00, 5.19000e+00, 0.00000e+00, 5.15000e-01,
        6.03700e+00, 3.45000e+01, 5.98530e+00, 5.00000e+00, 2.24000e+02,
        2.02000e+01, 3.96900e+02, 8.01000e+00],
       [3.69311e+00, 0.00000e+00, 1.81000e+01, 0.00000e+00, 7.13000e-01,
        6.37600e+00, 8.84000e+01, 2.56710e+00, 2.40000e+01, 6.66000e+02,
        2.02000e+01, 3.91430e+02, 1.46500e+01]], dtype=float32)

Normalize input features



In [0]:
from sklearn.preprocessing import Normalizer

transformer = Normalizer()
train_x = transformer.fit_transform(train_x)

In [12]:
train_x[0]

array([0.0024119 , 0.        , 0.01592969, 0.        , 0.00105285,
       0.01201967, 0.17945358, 0.00778265, 0.00782786, 0.60078794,
       0.04109624, 0.776719  , 0.03663436], dtype=float32)

Build Model

Define weights and bias

We need weights and bias

In [0]:
#Create weights and bias and initialize with Zero
w = tf.zeros(shape =(13,1))
b= tf.zeros(shape = (1))

Define a function to calculate prediction

In [0]:
#Calculate Y
#1. Multiply x and w (Matrix mulitplication)
#2. Shape of X [506,13]. Shape of Y [13,1]. Shape of the  product will be [560,1]
#3. Add the bias

In [0]:
def prediction(x,w,b):
    xw_matrixMultiplication = tf.matmul(x,w)
    y = tf.add(xw_matrixMultiplication, b)
    return y


Calculate Loss

In [0]:
#Function to calculate Loss (Mean Squared Error)

def loss(y_actual, y_predicted):
  diff= y_actual - y_predicted
  sqr = tf.square(diff)
  avg = tf.reduce_mean(sqr)

  return avg

Calculate Gradient Descent in TF


Function to train the model
1. Record all the mathematical steps to calculate Loss. The steps will be recorded using GradientTape

2. Calculate Gradients of Loss w.r.t weights and bias

3. Update weights and bias based on gradients and learning rate

In [0]:
def train(x, y_actual, w, b, learning_rate=0.01):
    
    #Record mathematical operations on 'tape' to calculate loss
    with tf.GradientTape() as t:
        
        t.watch([w,b])
        
        current_prediction = prediction(x, w, b)
        current_loss = loss(y_actual, current_prediction)
    
    #Calculate Gradients for Loss with respect to Weights and Bias
    dw, db = t.gradient(current_loss,[w, b])
    
    #Update Weights and Bias
    w = w - learning_rate*dw
    b = b - learning_rate*db
    
    return w, b

In [18]:
#Train for 100 Steps
for i in range(100):
    
    w, b = train(train_x, train_y, w, b, learning_rate=0.01)
    print('Current Loss on iteration', i, 
          loss(train_y, prediction(train_x, w, b)).numpy())

Current Loss on iteration 0 553.7515
Current Loss on iteration 1 518.26166
Current Loss on iteration 2 485.45786
Current Loss on iteration 3 455.13657
Current Loss on iteration 4 427.1098
Current Loss on iteration 5 401.20413
Current Loss on iteration 6 377.25894
Current Loss on iteration 7 355.12595
Current Loss on iteration 8 334.66785
Current Loss on iteration 9 315.75797
Current Loss on iteration 10 298.27924
Current Loss on iteration 11 282.1232
Current Loss on iteration 12 267.1898
Current Loss on iteration 13 253.38655
Current Loss on iteration 14 240.62785
Current Loss on iteration 15 228.83476
Current Loss on iteration 16 217.93404
Current Loss on iteration 17 207.85832
Current Loss on iteration 18 198.54506
Current Loss on iteration 19 189.9366
Current Loss on iteration 20 181.9796
Current Loss on iteration 21 174.62473
Current Loss on iteration 22 167.82646
Current Loss on iteration 23 161.54263
Current Loss on iteration 24 155.73433
Current Loss on iteration 25 150.36557
Cu

In [19]:
#Check Weights and Bias
print('Weights:\n', w.numpy())
print('Bias:\n',b.numpy())

Weights:
 [[6.27857521e-02]
 [2.58572161e-01]
 [2.17821732e-01]
 [1.46146421e-03]
 [1.13587305e-02]
 [1.31812572e-01]
 [1.38818812e+00]
 [8.23873580e-02]
 [1.76484972e-01]
 [7.99328279e+00]
 [3.82081807e-01]
 [7.41107655e+00]
 [2.53885090e-01]]
Bias:
 [11.476417]


In [20]:
train_x[0]

array([0.0024119 , 0.        , 0.01592969, 0.        , 0.00105285,
       0.01201967, 0.17945358, 0.00778265, 0.00782786, 0.60078794,
       0.04109624, 0.776719  , 0.03663436], dtype=float32)

If the input values are of different ranges. They are not normalized or scaled. we get the result as nan

Why Normalization is important.

Consider two actual values of y as 300 and 0.1 and the predicted values be 290 and 0.4. 

The loss will be 100 amd 0.09 respectively.  0.3% and 300% respectively

In this case during Gradient Descent, the importance will be given to higher value (100). So, the model will focus on larger or higher values and providing wrong results.

Hence normalization is important.