In [1]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt


In [6]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [14]:
# def predict_single_loop(x, w, b): 
#     n = x.shape[0];
#     p = 0;
#     for i in range(n):
#         p += x[i] * w[i]
#     p += b;
#     return p;


def predict(x, w, b):
    p = np.dot(x,w) + b;
    return p;


In [15]:

b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])

temp = X_train[0, :];
pred = predict(temp , w_init, b_init);
print(pred)

459.9999976194082


In [18]:
def compute_cost(X, y, w, b) : 
    n = X.shape[0];
    cost = 0.0
    for i in range(n): 
        f_wb = np.dot(X[i] , w) + b;
        cost += (f_wb - y[i]) ** 2;
    cost = cost / (2 * n); 
    return cost;

In [19]:
cost = compute_cost(X_train, y_train, w_init, b_init)
print(f'Cost at optimal w : {cost}')

Cost at optimal w : 1.5578904330213735e-12


In [23]:
def compute_gradient(X,y, w , b):
    m,n = X.shape;
    dj_dw = np.zeros((n,));
    dj_db  = 0.
    
    for i in range(m): 
        err = (np.dot(X[i], w) + b) - y[i];
        for j in range(n) : 
            dj_dw += err * X[i, j];
        dj_db += err;
        
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m 
    return dj_db, dj_dw

In [24]:
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -1.6739251122999121e-06
dj_dw at initial w,b: 
 [-0.00280397 -0.00280397 -0.00280397 -0.00280397]


In [25]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in
    
    for i in range(num_iters):
        dj_db,dj_dw = gradient_function(X, y, w, b)

        w = w - alpha * dj_dw 
        b = b - alpha * dj_db

        if i<100000:
            J_history.append( cost_function(X, y, w, b))
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
    return w, b, J_history

In [26]:
initial_w = np.zeros_like(w_init)
initial_b = 0.
iterations = 1000
alpha = 5.0e-7
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Iteration    0: Cost  4073.16   
Iteration  100: Cost   729.91   
Iteration  200: Cost   729.90   
Iteration  300: Cost   729.90   
Iteration  400: Cost   729.90   
Iteration  500: Cost   729.90   
Iteration  600: Cost   729.90   
Iteration  700: Cost   729.90   
Iteration  800: Cost   729.90   
Iteration  900: Cost   729.89   
b,w found by gradient descent: -0.00,[0.1966389 0.1966389 0.1966389 0.1966389] 
prediction: 423.75, target value: 460
prediction: 287.29, target value: 232
prediction: 175.01, target value: 178
