In [36]:
import math
import copy
import matplotlib.pyplot as plt
import numpy as np

In [37]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])

THE COST FUNCTION FORMULA

In [38]:
def compute_cost(X, y, w, b):
    """
    input args
    X (ndarray(m rows and n coloumns)): Data, m examples with n features 
    y (ndarray (only m rows)): Target vals
    w (ndarray (only n rows)): model parameter (slope)
    b (scalar): bias
     
    returns scalar cost
    """
    
    m = X.shape[0]
    cost = 0.00
    
    for i in range(m):
        f_wb_i = np.dot(X[i], w) + b    # computes f(w,b) = wx + b for each element
        cost += (f_wb_i - y[i])**2      # computes (f(w,b) for x(i) - y(i)) ^ 2
        
    return cost / (2 * m)

GRADIENT CALC ALGO

In [39]:
def compute_gradient(X, y, w, b):
    """
    Computes the value of gradient, ie dj/dw and dj/db j is cost function itself
    Args
    same as cost function
    
    returns
    dj_dw (ndarray (n rows)): The gradient of the cost w.r.t. the parameters w. 
    dj_db (scalar):       The gradient of the cost w.r.t. the parameter b.
    
    """
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.00
    
    for i in range(m):
        val = np.dot(X[i], w) + b - y[i]
        
        for j in range(n):
            dj_dw[j] += val * X[i, j]
            
        dj_db += val
        
    return dj_db / m, dj_dw / m  
    

GRADIENT DECENT ALGO

In [40]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    """
    
    
    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters  
      b_in (scalar)       : initial model parameter
      cost_function       : function to compute cost
      gradient_function   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,)) : Updated values of parameters 
      b (scalar)       : Updated value of parameter 
      """
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)   ##None

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               ##None
        b = b - alpha * dj_db               ##None
      
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #return final w,b and J history for graphing

MAIN FUNC

In [41]:
def main(X_train, y_train, compute_cost, compute_gradient, alpha, iterations):
    # initialize parameters
    initial_w = np.zeros_like(w_init)
    initial_b = 0.
    
    # run gradient descent 
    w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                        compute_cost, compute_gradient, 
                                                        alpha, iterations)
    print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
    m,_ = X_train.shape
    for i in range(m):
        print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}") 

In [42]:
if __name__ == "__main__":
    main(np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]]), np.array([460, 232, 178]), compute_cost, compute_gradient, 5.0e-7, 10000 )

Iteration    0: Cost  2700.91   
Iteration 1000: Cost  1014.56   
Iteration 2000: Cost  1014.56   
Iteration 3000: Cost  1014.56   
Iteration 4000: Cost  1014.56   
Iteration 5000: Cost  1014.56   
Iteration 6000: Cost  1014.56   
Iteration 7000: Cost  1014.56   
Iteration 8000: Cost  1014.56   
Iteration 9000: Cost  1014.56   
b,w found by gradient descent: 0.00,[0.21863113 0.         0.         0.        ] 
prediction: 460.00, target value: 460
prediction: 309.58, target value: 232
prediction: 186.27, target value: 178
