In [None]:
import numpy as np
import matplotlib.pyplot as plt
import copy

In [2]:
def predict(x, w, b):
    return np.dot(x, w) + b

In [3]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb_i = predict(X[i], w, b)
        cost += (f_wb_i - y[i]) ** 2
    total_cost = cost / (2 * m)
    return total_cost

In [4]:
def compute_gradients(X, y, w, b):
    m, n= X.shape
    
    dj_dw = np.zeros((n, ))
    dj_db = 0.0
    
    for i in range(m):
        error = predict(X[i], w, b) - y[i]
        
        for j in range(n):
            dj_dw[j] = dj_dw[j] + error * X[i, j]
        
        dj_db = dj_db + error
        
    dj_db /= m
    dj_dw /= m
    
    return dj_dw, dj_db


In [5]:
def gradient_descent(X, y, w_in, b_in, cost_fun, gradient_fun, learning_rate, num_of_iterations):
    m, n = X.shape
    w = copy.deepcopy(w_in)
    b = b_in
    
    
    for i in range(num_of_iterations):
        dj_dw, dj_db = gradient_fun(X, y, w, b)
        temp_w = w - (learning_rate * dj_dw)
        temp_b = b - (learning_rate * dj_db)
        w = temp_w
        b = temp_b
        
        cost = cost_fun(X, y, w, b)
        if i % 5 == 0:
            print(f"Iteration {i}: Cost = {cost}")
    return w, b

In [6]:
# Testing
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [7]:
# data is stored in numpy array/matrix
print(f"X Shape: {X_train.shape}, X Type:{type(X_train)})")
print(X_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)})")
print(y_train)

X Shape: (3, 4), X Type:<class 'numpy.ndarray'>)
[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
y Shape: (3,), y Type:<class 'numpy.ndarray'>)
[460 232 178]


In [8]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [10]:
# get a row from our training data
x_vec = X_train[0,:]
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# make a prediction
f_wb = predict(x_vec,w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2104    5    1   45]
f_wb shape (), prediction: 459.9999976194083


In [11]:
# Compute and display cost using our pre-chosen optimal parameters. 
cost = compute_cost(X_train, y_train, w_init, b_init)
print(f'Cost at optimal w : {cost}')

Cost at optimal w : 1.5578904428966628e-12


In [12]:
#Compute and display gradient 
tmp_dj_db, tmp_dj_dw = compute_gradients(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: [-2.72623577e-03 -6.27197263e-06 -2.21745578e-06 -6.92403391e-05]
dj_dw at initial w,b: 
 -1.6739251501955248e-06


In [27]:
# initialize parameters
initial_w = np.zeros((X_train.shape[1], ))
initial_b = 0.
# some gradient descent settings
iterations = 10000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradients, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {predict(X_train[i], w_final, b_final):0.2f}, target value: {y_train[i]}")

Iteration 0: Cost = 2529.4629522316304
Iteration 5: Cost = 697.0153256006653
Iteration 10: Cost = 696.9610350024587
Iteration 15: Cost = 696.9069046611058
Iteration 20: Cost = 696.852798081742
Iteration 25: Cost = 696.7987152537909
Iteration 30: Cost = 696.7446561666906
Iteration 35: Cost = 696.6906208098934
Iteration 40: Cost = 696.6366091728472
Iteration 45: Cost = 696.5826212450056
Iteration 50: Cost = 696.5286570158306
Iteration 55: Cost = 696.4747164747836
Iteration 60: Cost = 696.4207996113372
Iteration 65: Cost = 696.3669064149657
Iteration 70: Cost = 696.3130368751441
Iteration 75: Cost = 696.2591909813583
Iteration 80: Cost = 696.2053687230979
Iteration 85: Cost = 696.1515700898511
Iteration 90: Cost = 696.0977950711173
Iteration 95: Cost = 696.0440436563995
Iteration 100: Cost = 695.990315835203
Iteration 105: Cost = 695.9366115970407
Iteration 110: Cost = 695.8829309314256
Iteration 115: Cost = 695.8292738278801
Iteration 120: Cost = 695.7756402759309
Iteration 125: Cost = 6

Iteration 655: Cost = 690.1707589672602
Iteration 660: Cost = 690.1196083265821
Iteration 665: Cost = 690.0684801232328
Iteration 670: Cost = 690.0173743472325
Iteration 675: Cost = 689.966290988616
Iteration 680: Cost = 689.9152300374167
Iteration 685: Cost = 689.8641914836725
Iteration 690: Cost = 689.8131753174239
Iteration 695: Cost = 689.7621815287215
Iteration 700: Cost = 689.711210107616
Iteration 705: Cost = 689.6602610441663
Iteration 710: Cost = 689.6093343284291
Iteration 715: Cost = 689.5584299504688
Iteration 720: Cost = 689.5075479003609
Iteration 725: Cost = 689.4566881681752
Iteration 730: Cost = 689.4058507439913
Iteration 735: Cost = 689.3550356178912
Iteration 740: Cost = 689.3042427799625
Iteration 745: Cost = 689.253472220296
Iteration 750: Cost = 689.2027239289891
Iteration 755: Cost = 689.1519978961411
Iteration 760: Cost = 689.1012941118597
Iteration 765: Cost = 689.0506125662513
Iteration 770: Cost = 688.9999532494286
Iteration 775: Cost = 688.9493161515121
Ite