In [1]:
import math, copy
import numpy as np
import matplotlib.pyplot as plt



In [2]:
x_train = np.array([1.0, 2.0,3.0]) 
y_train = np.array([4.5, 6.0,6.5]) 

In [3]:
def compute_cost(x, y, w, b):
   
    m = x.shape[0] 
    cost = 0
    
    for i in range(m):
        f_wb = w * x[i] + b
        cost = cost + (f_wb - y[i])**2
    total_cost = 1 / (2 * m) * cost

    return total_cost

In [4]:
def compute_cost2(x, y, w, b):
   
    m = x.shape[0] 
    cost = 0
    
    for i in range(m):
        
        cost = cost + abs( (-w*x[i]+y[i]-b))/(math.sqrt(w**2+1))  
    total_cost = (1 / 2*m) * cost

    return total_cost

In [5]:
#compute_cost(x_train, y_train, 1, 1)

In [6]:
#compute_cost2(x_train, y_train, 1, 1)

In [7]:
def compute_gradient(x, y, w, b): 

    
    # Number of training examples
    m = x.shape[0]    
    dj_dw = 0
    dj_db = 0
    
    for i in range(m):  
        f_wb = w * x[i] + b 
        dj_dw_i = (f_wb - y[i]) * x[i] 
        dj_db_i = f_wb - y[i] 
        dj_db += dj_db_i
        dj_dw += dj_dw_i 
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 
        
    return dj_dw, dj_db

In [8]:
def compute_gradient2(x, y, w, b): 

    
    # Number of training examples
    m = x.shape[0]    
    dj_dw = 0
    dj_db = 0
    
    for i in range(m):  
        dj_dw_i= ((w**2+1)*x[i]*(b+w*x[i]-y[i])-w*(b+w*x[i]-y[i])**2)/(((w**2+1)**(3/2))*abs(b+w*x[i]-y[i]))
        dj_db_i=(b+w*x[i]-y[i])/((math.sqrt(w**2+1))*(abs(b+w*x[i]-y[i])))
        dj_db += dj_db_i
        dj_dw += dj_dw_i 
    dj_dw = dj_dw / 2*m 
    dj_db = dj_db / 2*m 
        
    return dj_dw, dj_db

In [9]:
compute_gradient(x_train, y_train, 1, 1)

(-5.333333333333333, -2.6666666666666665)

In [10]:
compute_gradient2(x_train, y_train, 1, 1)

(-10.60660171779821, -3.181980515339464)

In [11]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function): 
    
    
    
    w = copy.deepcopy(w_in) # avoid modifying global w_in
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    p_history = []
    b = b_in
    w = w_in
    
    for i in range(num_iters):
        # Calculate the gradient and update the parameters using gradient_function
        dj_dw, dj_db = gradient_function(x, y, w , b)     

        # Update Parameters using equation (3) above
        b = b - alpha * dj_db                            
        w = w - alpha * dj_dw                            

        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(x, y, w , b))
            p_history.append([w,b])
        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
 
    return w, b, J_history, p_history #return w and J,w history for graphing

In [12]:
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, 1, 1, 0.01, 
                                                    10000, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

Iteration    0: Cost 3.24e+00  dj_dw: -5.333e+00, dj_db: -2.667e+00   w:  1.053e+00, b: 1.02667e+00
Iteration 1000: Cost 6.00e-02  dj_dw:  3.549e-02, dj_db: -8.067e-02   w:  1.295e+00, b: 2.99632e+00
Iteration 2000: Cost 3.07e-02  dj_dw:  1.066e-02, dj_db: -2.423e-02   w:  1.089e+00, b: 3.46530e+00
Iteration 3000: Cost 2.80e-02  dj_dw:  3.202e-03, dj_db: -7.279e-03   w:  1.027e+00, b: 3.60618e+00
Iteration 4000: Cost 2.78e-02  dj_dw:  9.618e-04, dj_db: -2.186e-03   w:  1.008e+00, b: 3.64850e+00
Iteration 5000: Cost 2.78e-02  dj_dw:  2.889e-04, dj_db: -6.568e-04   w:  1.002e+00, b: 3.66121e+00
Iteration 6000: Cost 2.78e-02  dj_dw:  8.679e-05, dj_db: -1.973e-04   w:  1.001e+00, b: 3.66503e+00
Iteration 7000: Cost 2.78e-02  dj_dw:  2.607e-05, dj_db: -5.926e-05   w:  1.000e+00, b: 3.66617e+00
Iteration 8000: Cost 2.78e-02  dj_dw:  7.831e-06, dj_db: -1.780e-05   w:  1.000e+00, b: 3.66652e+00
Iteration 9000: Cost 2.78e-02  dj_dw:  2.352e-06, dj_db: -5.347e-06   w:  1.000e+00, b: 3.66662e+00


In [13]:
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, 1, 1, 0.01, 
                                                    10000, compute_cost2, compute_gradient2)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

Iteration    0: Cost 7.31e+00  dj_dw: -1.061e+01, dj_db: -3.182e+00   w:  1.106e+00, b: 1.03182e+00
Iteration 1000: Cost 5.46e-01  dj_dw: -2.711e-01, dj_db: -1.057e+00   w:  1.010e+00, b: 3.49745e+00
Iteration 2000: Cost 6.24e-01  dj_dw:  1.838e+00, dj_db:  1.056e+00   w:  9.911e-01, b: 3.48918e+00
Iteration 3000: Cost 6.58e-01  dj_dw: -6.704e+00, dj_db: -3.197e+00   w:  1.058e+00, b: 3.52342e+00
Iteration 4000: Cost 6.21e-01  dj_dw:  1.830e+00, dj_db:  1.051e+00   w:  9.991e-01, b: 3.47331e+00
Iteration 5000: Cost 6.55e-01  dj_dw: -6.679e+00, dj_db: -3.185e+00   w:  1.065e+00, b: 3.50749e+00
Iteration 6000: Cost 6.17e-01  dj_dw:  1.725e+00, dj_db:  1.027e+00   w:  1.048e+00, b: 3.49937e+00
Iteration 7000: Cost 5.76e-01  dj_dw:  1.763e+00, dj_db:  1.036e+00   w:  1.030e+00, b: 3.49137e+00
Iteration 8000: Cost 5.45e-01  dj_dw:  1.802e+00, dj_db:  1.045e+00   w:  1.011e+00, b: 3.48326e+00
Iteration 9000: Cost 5.51e-01  dj_dw: -2.714e-01, dj_db: -1.055e+00   w:  1.014e+00, b: 3.49608e+00
