In [22]:
import numpy as np
import matplotlib.pyplot as plt
import math, copy

In [5]:
# Training Data
x_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [6]:
# Input data
print(f"X Shape: {x_train.shape}, X type: {type(x_train)}")
print(x_train)
print(f"Y Shape: {y_train.shape}, Y type: {type(y_train)}")
print(y_train)

X Shape: (3, 4), X type: <class 'numpy.ndarray'>
[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
Y Shape: (3,), Y type: <class 'numpy.ndarray'>
[460 232 178]


In [8]:
# Initializing the values of w and b for this book
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


# Single prediction of a row using vector notation

In [9]:
def predict(x, w, b):
    p = np.dot(w, x) + b
    return p

In [10]:
# getting a row from training data
x_vec = x_train[0, :]
print(f"shape: {x_vec.shape}, value: {x_vec}")

# making the prediction
f_wb = predict(x_vec, w_init, b_init)
print(f"Shape: {f_wb.shape}, value: {f_wb}")

shape: (4,), value: [2104    5    1   45]
Shape: (), value: 459.9999976194083


## Computing cost for multiple rows and features

In [13]:
def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        f_wb_i = predict(x[i], w, b)
        cost = cost + (f_wb_i - y[i]) ** 2
    cost = cost / (2 * m)
    return cost

In [14]:
# Computing the mean squared error of the training data set
cost = compute_cost(x_train, y_train, w_init, b_init)
print(f"Cost at optimal w: {cost}")

Cost at optimal w: 1.5578904428966628e-12


## Computing Gradient Derivative

In [20]:
def compute_gradient(x, y, w, b):
    
    m,n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0

    for i in range(m):
        err = (np.dot(w, x[i]) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * x[i, j]
        dj_db = dj_db + err
    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_dw, dj_db

In [21]:
# Compute and display gradient
tmp_dj_dw, tmp_dj_db = compute_gradient(x_train, y_train, w_init, b_init)
print(f"dj_dw: {tmp_dj_dw}")
print(f"dj_db: {tmp_dj_db}")

dj_dw: [-2.72623577e-03 -6.27197263e-06 -2.21745578e-06 -6.92403391e-05]
dj_db: -1.6739251501955248e-06


## Gradient Descent with multiple variables

In [23]:
def gradient_descent(x, y, w_in, b_in, gradient_function, L, epochs):
    w = copy.deepcopy(w_in)
    b = b_in

    for i in range(epochs):
        dj_dw, dj_db = gradient_function(x, y, w, b)

        w = w - L * dj_dw
        b = b - L * dj_db
    
    return w, b

In [28]:
# Initializing parameters
initial_w = np.zeros_like(w_init)
initial_b = 0

# Initializing gradient descent settings 
epochs = 1000
L = 5.0e-7

# running gradient descent
w_final, b_final = gradient_descent(x_train, y_train, initial_w, initial_b, compute_gradient, L, epochs)

print(f"Final w and b accordingly are: {w_final, b_final}")

m = x_train.shape[0]

for i in range(m):
    print(f"Prediction: {predict(x_train[i], w_final, b_final):0.2f}\t True value: {y_train[i]}")

Final w and b accordingly are: (array([ 0.20396569,  0.00374919, -0.0112487 , -0.0658614 ]), -0.002235407530932535)
Prediction: 426.19	 True value: 460
Prediction: 286.17	 True value: 232
Prediction: 171.47	 True value: 178
