In [17]:
import numpy as np
import copy, math
import matplotlib.pyplot as plt

In [18]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [19]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [20]:
def predict(X, w, b):
    y = np.dot(X, w) + b
    return y

In [21]:
# get a row from our training data
x_vec = X_train[0,:]
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# make a prediction
f_wb = predict(x_vec,w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2104    5    1   45]
f_wb shape (), prediction: 459.9999976194083


In [22]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0

    for i in range(m):
        f_wb = np.dot(X[i], w) + b
        cost += (f_wb - y[i])**2
    
    cost = cost / (2*m)
    
    return cost

In [23]:
# Compute and display cost using our pre-chosen optimal parameters. 
cost = compute_cost(X_train, y_train, w_init, b_init)
print(f'Cost at optimal w : {cost}')

Cost at optimal w : 1.5578904428966628e-12


In [28]:
def compute_gradient(X, y, w, b):
    m, n = X.shape
    dj_dw = np.zeros((n, ))
    dj_db = 0

    for i in range(m):
        error = (np.dot(X[i], w) + b) - y[i]

        for j in range(n):
            dj_dw[j] = dj_dw[j] + error * X[i, j]

        dj_db = dj_db + error 
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 

    return dj_dw, dj_db



In [29]:
def gradient_descent(X, y, w_in, b_in, n_iters, alpha, cost_function, gradient_compute):
    
    w = copy.deepcopy(w_in)
    b = b_in

    for i in range(n_iters):

        dj_dw, dj_db = gradient_compute(X, y, w, b)

        w = w - alpha * dj_dw
        b = b - alpha * dj_db

    
    return w, b


In [30]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 30000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final = gradient_descent(X_train, y_train, initial_w, initial_b, iterations, alpha, compute_cost, compute_gradient)

print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
# m,_ = X_train.shape
# for i in range(m):
#     print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

b,w found by gradient descent: -0.04,[ 0.23250243  0.09224319 -0.29424344 -1.1915986 ] 
