In [36]:
import numpy as np
import copy, math
import matplotlib.pyplot as plt

In [37]:
plt.style.use("./deeplearning.mplstyle")
np.set_printoptions(precision=2)

In [38]:
xtr = np.array([[2104,5,1,45],[1416,3,2,40],[852,2,1,35]])
ytr = np.array([460,232,178])

In [39]:
b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])

In [47]:
def predict_single_elem(x, w, b):
    m = x.shape[0]
    pred = 0
    for i in range(m):
        pred_i = w[i] * x[i]
        pred += pred_i
    pred += b
    return pred

In [49]:
x_vec = xtr[0,:]

In [51]:
predict_single_elem(x_vec, w_init, b_init)

np.float64(459.9999976194083)

In [59]:
def predict_single_vec(x, w, b):
    return w.dot(x)+b

In [60]:
predict_single_vec(x_vec, w_init, b_init)

np.float64(459.9999976194083)

In [61]:
def compute_cost(x, y, w, b, pred=predict_single_vec):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        y_hat = pred(x[i], w, b)
        j = (y_hat-y[i])**2 / (2*m)
        cost += j
    return cost

In [62]:
compute_cost(xtr, ytr, w_init, b_init)

np.float64(1.557890442896663e-12)

In [94]:
def compute_gradient(x, y, w, b):
    m, n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0
    for i in range(m):
        err = (w.dot(x[i]) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j]+ err * x[i, j]
        dj_db += err
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    return dj_dw, dj_db

In [95]:
compute_gradient(xtr, ytr, w_init, b_init)

(array([-2.73e-03, -6.27e-06, -2.22e-06, -6.92e-05]),
 np.float64(-1.6739251501955248e-06))

In [98]:
def gradient_descent(x, y, w, b, compute_cost, compute_gradient, alpha, iters):
    cost_history = []
    parameter_history = []

    m, n = x.shape
    w_in = w
    b_in = b

    for i in range(iters):
        dw, db = compute_gradient(x, y, w_in, b_in)
        w_in -= alpha*dw
        b_in -= alpha*db

        if i < 100000:
            cost_history.append(compute_cost(x, y, w_in, b_in))
            parameter_history.append([w_in, b_in])
    
        if i% math.ceil(iters/10)==0:
            print(f"Iterations: {i}, cost: {cost_history[-1]}, Parameters: W={w_in} B={b_in}")

    return w_in, b_in, cost_history, parameter_history

In [99]:
wf, bf, ch, ph = gradient_descent(xtr, ytr, np.zeros_like(w_init), 0, compute_cost, compute_gradient, alpha=5.0e-7, iters=1000)
print(F"Final Parameters: {wf}, {bf}")

Iterations: 0, cost: 2529.46295223163, Parameters: W=[2.41e-01 5.59e-04 1.84e-04 6.03e-03] B=0.000145
Iterations: 100, cost: 695.9903158352029, Parameters: W=[ 0.2  0.  -0.  -0. ] B=-0.0001198596187768893
Iterations: 200, cost: 694.9206979323061, Parameters: W=[ 0.2   0.   -0.   -0.01] B=-0.00035965781839536286
Iterations: 300, cost: 693.8604297851192, Parameters: W=[ 0.2   0.   -0.   -0.02] B=-0.0005983240279392168
Iterations: 400, cost: 692.8094286135914, Parameters: W=[ 0.2   0.   -0.   -0.02] B=-0.0008358632706869382
Iterations: 500, cost: 691.7676123706057, Parameters: W=[ 0.2   0.   -0.01 -0.03] B=-0.0010722805476294612
Iterations: 600, cost: 690.7348997354997, Parameters: W=[ 0.2   0.   -0.01 -0.04] B=-0.0013075808375690545
Iterations: 700, cost: 689.7112101076162, Parameters: W=[ 0.2   0.   -0.01 -0.04] B=-0.0015417690972177696
Iterations: 800, cost: 688.6964635999459, Parameters: W=[ 0.2   0.   -0.01 -0.05] B=-0.001774850261295446
Iterations: 900, cost: 687.6905810327946, Para

In [100]:
m, _ = xtr.shape
for i in range(m):
    print(f"Prediction: {predict_single_vec(xtr[i], wf, bf)}, Target: {ytr[i]}")

Prediction: 426.18530497189204, Target: 460
Prediction: 286.1674720078562, Target: 232
Prediction: 171.46763087132317, Target: 178
