In [1]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2)

In [2]:
x_train = np.array([[2345, 5,78, 90], [2367, 980, 45, 9], [4500, 8900, 89, 76]])
y_train = np.array([560, 789, 900])

In [10]:
print(f'shape of x_train is: ', {x_train.shape}, 'Type is: ', {type(x_train)})
print(x_train)
print(f"shape of y_train is: ", {y_train.shape}, "Type is: ", {type(y_train)})
print(y_train)

shape of x_train is:  {(3, 4)} Type is:  {<class 'numpy.ndarray'>}
[[2345    5   78   90]
 [2367  980   45    9]
 [4500 8900   89   76]]
shape of y_train is:  {(3,)} Type is:  {<class 'numpy.ndarray'>}
[560 789 900]


In [12]:
b_init = 450.023421352315
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])

In [13]:
def predict_single_loop(x, w, b):
    n = x.shape[0]
    p = 0
    for i in range(n):
        p_i = x[i] * w[i]
        p = p + p_i
    p = p + b
    return p

In [14]:
# get a row from our training data
x_vec = x_train[0, :]
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# make a prediction
f_wb = predict_single_loop(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2345    5   78   90]
f_wb shape (), prediction: -18705536.81199665


In [15]:
def predict(x, w, b):
    p = np.dot(x, w) + b
    return p

In [18]:
# get a row from our training data
x_vec = x_train[0, :]
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# make a prediction
f_wb = predict(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2345    5   78   90]
f_wb shape (), prediction: -18705536.81199665


In [19]:
def compute_cost(X, y, w, b):

    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb_i = np.dot(X[i], w) + b  # (n,)(n,) = scalar (see np.dot)
        cost = cost + (f_wb_i - y[i]) ** 2  # scalar
    cost = cost / (2 * m)  # scalar
    return cost

In [21]:
# Compute and display cost using our pre-chosen optimal parameters.
cost = compute_cost(x_train, y_train, w_init, b_init)
print(f"Cost at optimal w : {cost}")

Cost at optimal w : 111946576120849.62


In [22]:
def compute_gradient(X, y, w, b):
    m, n = X.shape  # (number of examples, number of features)
    dj_dw = np.zeros((n,))
    dj_db = 0.0

    for i in range(m):
        err = (np.dot(X[i], w) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * X[i, j]
        dj_db = dj_db + err
    dj_dw = dj_dw / m
    dj_db = dj_db / m

    return dj_db, dj_dw

In [23]:
# Compute and display gradient
tmp_dj_db, tmp_dj_dw = compute_gradient(x_train, y_train, w_init, b_init)
print(f"dj_db at initial w,b: {tmp_dj_db}")
print(f"dj_dw at initial w,b: \n {tmp_dj_dw}")

dj_db at initial w,b: -11410480.96057265
dj_dw at initial w,b: 
 [-3.95e+10 -5.21e+10 -9.80e+08 -1.01e+09]


In [24]:
def gradient_descent(
    X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters
):

    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  # avoid modifying global w within function
    b = b_in

    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db, dj_dw = gradient_function(X, y, w, b)  ##None

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw  ##None
        b = b - alpha * dj_db  ##None

        # Save cost J at each iteration
        if i < 100000:  # prevent resource exhaustion
            J_history.append(cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")

    return w, b, J_history  # return final w,b and J history for graphing

In [27]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.0
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent
w_final, b_final, J_hist = gradient_descent(
    x_train,
    y_train,
    initial_w,
    initial_b,
    compute_cost,
    compute_gradient,
    alpha,
    iterations,
)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m, _ = x_train.shape
for i in range(m):
    print(
        f"prediction: {np.dot(x_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}"
    )

Iteration    0: Cost 54296064.12   
Iteration  100: Cost 90545809926674674862239361805871311054392069603777570120319256170366337962092542505904327806797827652193091241979179496002502225244808011545951480094879053274875210611055829530358267712399711899472073273600778661556407484084872512657571166340218093568.00   
Iteration  200: Cost      inf   
Iteration  300: Cost      nan   
Iteration  400: Cost      nan   
Iteration  500: Cost      nan   
Iteration  600: Cost      nan   
Iteration  700: Cost      nan   
Iteration  800: Cost      nan   
Iteration  900: Cost      nan   
b,w found by gradient descent: nan,[nan nan nan nan] 
prediction: nan, target value: 560
prediction: nan, target value: 789
prediction: nan, target value: 900


  cost = cost + (f_wb_i - y[i]) ** 2  # scalar
  dj_dw[j] = dj_dw[j] + err * X[i, j]
  w = w - alpha * dj_dw  ##None
