In [None]:
import numpy as np
import matplotlib.pyplot as plt
import copy
from Z_Score import zscore_normalize_features

## Prediction function
$$ f_{\mathbf{w},b}(\mathbf{x}) = \mathbf{w} \cdot \mathbf{x} + \mathbf{b} \tag2 $$

In [None]:
def predict(x, w, b):
    return np.dot(x, w) + b

## Cost function
$$ \mathbf{J}({\mathbf{w}, b}) = \frac{1}{2m} \sum\limits_{i=0}^{m-1} (f_{\mathbf{w}, {b}}(x^i)-y^i)^2 $$

In [None]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb_i = predict(X[i], w, b)
        cost += (f_wb_i - y[i]) ** 2
    total_cost = cost / (2 * m)
    return total_cost

## Computing Gradients `w, b`
$$\frac{\partial J(\mathbf{w},b)}{\partial w_j}  = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)}\newline
\frac{\partial J(\mathbf{w},b)}{\partial b} = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) $$

In [None]:
def compute_gradients(X, y, w, b):
    m, n= X.shape
    
    dj_dw = np.zeros((n, ))
    dj_db = 0.0
    
    for i in range(m):
        error = predict(X[i], w, b) - y[i]
        
        for j in range(n):
            dj_dw[j] = dj_dw[j] + error * X[i, j]
        
        dj_db = dj_db + error
        
    dj_db /= m
    dj_dw /= m
    
    return dj_dw, dj_db


## Gradient Descent Implementation
$$ \mathbf{w_j} = \mathbf{w_j} -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} 
\newline \mathbf{b} = \mathbf{b} -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b} $$

In [None]:
def gradient_descent(X, y, w_in, b_in, cost_fun, gradient_fun, learning_rate, num_of_iterations):
    m, n = X.shape
    w = copy.deepcopy(w_in)
    b = b_in
    
    
    for i in range(num_of_iterations):
        dj_dw, dj_db = gradient_fun(X, y, w, b)
        temp_w = w - (learning_rate * dj_dw)
        temp_b = b - (learning_rate * dj_db)
        w = temp_w
        b = temp_b
        
        cost = cost_fun(X, y, w, b)
        if i % 5 == 0:
            print(f"Iteration {i}: Cost = {cost}")
    return w, b

## Testing Example

In [None]:
# Testing
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

X_train, mue, sigma = zscore_normalize_features(X_train)

In [None]:
# data is stored in numpy array/matrix
print(f"X Shape: {X_train.shape}, X Type:{type(X_train)})")
print(X_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)})")
print(y_train)

In [None]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

In [None]:
# get a row from our training data
x_vec = X_train[0,:]
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# make a prediction
f_wb = predict(x_vec,w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

In [None]:
# Compute and display cost using our pre-chosen optimal parameters. 
cost = compute_cost(X_train, y_train, w_init, b_init)
print(f'Cost at optimal w : {cost}')

In [None]:
#Compute and display gradient 
tmp_dj_db, tmp_dj_dw = compute_gradients(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

In [None]:
# initialize parameters
initial_w = np.zeros((X_train.shape[1], ))
initial_b = 0.
# some gradient descent settings
iterations = 1000000
alpha = 4.1e-1
# run gradient descent 
w_final, b_final = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradients, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {predict(X_train[i], w_final, b_final):0.2f}, target value: {y_train[i]}")