In [1]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt

In [2]:
x_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [42]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])

*for calculation of the model*
$$f_{\vec{w}, b}(\vec{x}) = \vec{w}\cdot\vec{x} + b$$

In [70]:
def compute_model(x, w, b):
    m = x.shape[0]
    f_wb = np.zeros(m)
    for i in range(m):
        f_wb[i] = np.dot(w, x[i]) + b
    return f_wb

*for calculation of cost*
$$J(\vec{w}, b) = \frac{1}{2m}\sum_{i=1}^m (\hat{y}-y)^2$$

In [71]:
def compute_cost(x, y, w, b):
    f_wb = compute_model(x, w, b)
    error_squared = (f_wb - y)**2
    sum_error_squared = np.sum(error_squared)
    m = f_wb.shape[0]
    return sum_error_squared/(2*m)

In [72]:
print(compute_model(x_train, w_init, b_init))

[459.99999762 231.99999837 177.99999899]


In [73]:
print(compute_cost(x_train, y_train, w_init, b_init))

1.5578904880036537e-12


*Calculation of the derivatives*
$$\frac{\partial J}{\partial w_j} = \frac{1}{m}\sum_{i=1}^{m}(\hat{y} - y)x_j$$
Now for a matrix of derivatives
$$\frac{dJ}{dw} = \begin{bmatrix} \frac{\partial J}{\partial w_1} \ \frac{\partial J}{\partial w_2} \ ... \ \frac{\partial J}{\partial w_n} \end{bmatrix}$$
Also written as,
$$\frac{dJ}{dw} = \frac{1}{m}\sum_{i=1}^{n}(\hat{y} - y)\begin{bmatrix} x_1 \ x_2 \ ... \ x_n \end{bmatrix}$$

In [80]:
def compute_gradient(x, y, w, b):
    m,n = x.shape           #(number of examples, number of features)
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):                             
        err = (np.dot(x[i], w) + b) - y[i]   
        for j in range(n):                         
            dj_dw[j] = dj_dw[j] + err * x[i, j]    
        dj_db = dj_db + err                        
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m                                
        
    return dj_db, dj_dw

In [81]:
compute_gradient(x_train, y_train, w_init, b_init)

(-1.673925169143331e-06,
 array([-2.72623581e-03, -6.27197272e-06, -2.21745580e-06, -6.92403399e-05]))

*Now for the multi linear Gradient descent*
$$\vec{w} = \vec{w} - \alpha \frac{\partial}{\partial \vec{w}}J$$
$${b} = {b} - \alpha \frac{\partial}{\partial b}J$$

In [84]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)   ##None

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               ##None
        b = b - alpha * dj_db               ##None
      
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #return final w,b and J history for graphing

In [91]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(x_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = x_train.shape
for i in range(m):
    print(f"prediction: {np.dot(x_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Iteration    0: Cost  2529.46   
Iteration  100: Cost   695.99   
Iteration  200: Cost   694.92   
Iteration  300: Cost   693.86   
Iteration  400: Cost   692.81   
Iteration  500: Cost   691.77   
Iteration  600: Cost   690.73   
Iteration  700: Cost   689.71   
Iteration  800: Cost   688.70   
Iteration  900: Cost   687.69   
b,w found by gradient descent: -0.00,[ 0.20396569  0.00374919 -0.0112487  -0.0658614 ] 
prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178
