In [1]:
import numpy as np
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

## Define a function to predict the values of y given w, b and x

In [2]:
def predict(x, w, b):
    """Predicts value of output based on the input data row, features weights and bias

    Args:
        x (ndarray (m,)): Example row of input
        w (ndarray (n,)): input parameters
        b (scalar): input parameter
        
    Returns:
        y (scalar): predicted value
    """
    y_hat = np.dot(x, w) + b
    return y_hat
        

In [6]:
w_init = np.array([100, 200, 300, 400])
b_init = 3
m = X_train.shape[0]
for i in range(m):
    y_hat = predict(X_train[i], w_init, b_init)
    print(f"""Actual : {y_train[i]}, Predicted : {y_hat}""")

Actual : 460, Predicted : 229703
Actual : 232, Predicted : 158803
Actual : 178, Predicted : 99903


## Find the cost, given the weights and bias of the prediction function

In [7]:
def compute_cost(X, y, w, b):
    """Computes the cost, given the weights and bias of the function

    Args:
        X (ndarray (m,n)): Training data
        y (ndarray (m,)): Target Value
        w (ndarray (n,)): Input parameters
        b (scalar): Input parameter
        
    Return:
        J (scalar): Cost
    """
    m = X.shape[0]
    cost = 0
    for i in range(m):
        err = predict(X[i], w, b) - y[i]
        cost += err ** 2
    cost = cost / (2*m)
    
    return cost

In [9]:
print(f"""Cost : {compute_cost(X_train, y_train, w_init, b_init)}""")

Cost : 14607031785.833334


## Since the cost is high, we need to find the right values of weights and bias to reduce the cost. That's where gradient descent comes in picture

### 1. Computing the gradient

In [39]:
def find_gradients(X, y, w, b):
    """Computes the gradient, given the weights and bias of the function

    Args:
    X (ndarray (m,n)): Training data
    y (ndarray (m,)): Target Value
    w (ndarray (n,)): Input parameters
    b (scalar): Input parameter

    Return:
    dj_dw (ndarray (n,)): Gradient w.r.t. w
    dj_db (scalar): Gradient w.r.t. b
    """   
    m = X.shape[0]
    n = X.shape[1]
    dj_dw = np.zeros(n)
    dj_db = 0
    for i in range(m):
        err = predict(X[i], w, b) - y[i]
        for j in range(n):
            print(dj_dw)
            dj_dw[j] += (predict(X[i], w, b) - y[i]) * X[i, j]
        dj_db += (predict(X[i], w, b) - y[i])
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    
    return dj_dw, dj_db

In [40]:
b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(find_gradients(X_train, y_train, w_init, b_init))

[0. 0. 0. 0.]
[-0.00500876  0.          0.          0.        ]
[-5.00876493e-03 -1.19029585e-05  0.00000000e+00  0.00000000e+00]
[-5.00876493e-03 -1.19029585e-05 -2.38059170e-06  0.00000000e+00]
[-5.00876493e-03 -1.19029585e-05 -2.38059170e-06 -1.07126626e-04]
[-7.31768305e-03 -1.19029585e-05 -2.38059170e-06 -1.07126626e-04]
[-7.31768305e-03 -1.67947342e-05 -2.38059170e-06 -1.07126626e-04]
[-7.31768305e-03 -1.67947342e-05 -5.64177549e-06 -1.07126626e-04]
[-7.31768305e-03 -1.67947342e-05 -5.64177549e-06 -1.72350302e-04]
[-8.17870722e-03 -1.67947342e-05 -5.64177549e-06 -1.72350302e-04]
[-8.17870722e-03 -1.88159177e-05 -5.64177549e-06 -1.72350302e-04]
[-8.17870722e-03 -1.88159177e-05 -6.65236723e-06 -1.72350302e-04]
(array([-2.72623574e-03, -6.27197255e-06, -2.21745574e-06, -6.92403377e-05]), np.float64(-1.6739251122999121e-06))


### 2. Now we can implement gradient descent to get the optimal values of w and b

In [32]:
def gradient_descent(X, y, w, b, alpha, n_iters):
    
    m = X.shape[0]
    w_copy = w.copy()
    b_copy = b
    
    for i in range(n_iters):
        dj_dw, dj_db = find_gradients(X, y, w_copy, b_copy)
        print(dj_dw)
        w_copy = w_copy - alpha * dj_dw
        b_copy = b_copy - alpha * dj_db
        
    return w_copy, b_copy

In [38]:
print(gradient_descent(X_train, y_train, w_init, b_init, 0.000000000025, 10))

[-2.72623574e-03 -6.27197255e-06 -2.21745574e-06 -6.92403377e-05]
[-2.72607306e-03 -6.27159818e-06 -2.21732421e-06 -6.92362210e-05]
[-2.72591049e-03 -6.27122408e-06 -2.21719275e-06 -6.92321072e-05]
[-2.72574778e-03 -6.27084963e-06 -2.21706118e-06 -6.92279891e-05]
[-2.72558514e-03 -6.27047533e-06 -2.21692968e-06 -6.92238736e-05]
[-2.72542262e-03 -6.27010134e-06 -2.21679822e-06 -6.92197602e-05]
[-2.72526007e-03 -6.26972727e-06 -2.21666680e-06 -6.92156467e-05]
[-2.72509764e-03 -6.26935347e-06 -2.21653545e-06 -6.92115361e-05]
[-2.72493507e-03 -6.26897937e-06 -2.21640399e-06 -6.92074224e-05]
[-2.72477264e-03 -6.26860556e-06 -2.21627264e-06 -6.92033118e-05]
(array([  0.39133535,  18.75376741, -53.36032453, -26.42131618]), np.float64(785.1811367994083))


# I Made it Work!