In [1]:
import numpy as np
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

## Initializing the weights and bias parameters

In [4]:
b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])

In [None]:
def predict_single_loop(x, w, b):
    """single predict using linear regression

    Args:
        x (ndarray): Shape (n,) example with multiple features
        w (ndarray): Shape (n,) model parameters
        b (scalar): model parameter
        
    Returns:
        p (scalar): prediction
    """
    n = x.shape[0]
    p=0
    for i in range(n):
        p_i = x[i] * w[i]
        p = p + p_i
    p = p + b
    return p

In [None]:
predict_single_loop(X_train, w_init, b_init)

In [None]:
x_vec = X_train[0,:]
f_wb = predict_single_loop(x_vec, w_init, b_init)
print(f_wb)

In [None]:
x_vec

In [None]:
w_init

In [None]:
np.dot(x_vec, w_init) + b_init

In [None]:
def predict(x, w, b):
    """single predict using linear regression

    Args:
        x (ndarray): Shape (n,) example with multiple features
        w (ndarray): Shape (n,) model parameters
        b (scalar): model parameter
        
    Return:
        p (scalar): prediction
    """
    p = np.dot(x, w) + b
    return p

In [None]:
print(predict(x_vec, w_init, b_init))

## Compute Cost with Multiple Variables

In [None]:
def compute_cost(X, y, w, b):
    """compute cost

    Args:
        X (ndarray (m,n)): Data, m examples with n features
        y (ndarray (m,)): target values
        w (ndarray (n,)): model parameters
        b (scalar): model parameter
        
    Returns:
        cost(scalar): cost
    """
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        y_hat = np.dot(X[i], w) + b
        cost = cost + (y_hat - y[i])**2
    cost = cost/(2*m)
    return cost

In [None]:
cost = compute_cost(X_train, y_train, w_init, b_init)
print(cost)

In [None]:
X_train

## Compute gradient with multiple Variables

In [2]:
def compute_gradient(X, y, w, b):
    """Computes the gradient for linear regression

    Args:
        X (X (ndarray (m, n))): Data, m examples with n features
        y (ndarray (m,)): target values
        w (ndarray (n,)): model parameters
        b (scalar): model paramerter
        
    Returns:
        dj_dw : (ndarray (n,)): The gradient of the cost w.r.t. the parameters w.
        dj_db (scalar):         The gradient of the cost w.r.t. the parameter b.
    """
    m, n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    
    print(dj_dw)
    for i in range(m):
        err = (np.dot(X[i],w) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * X[i, j]
            print(dj_dw)
        dj_db = dj_db + err
        
    dj_dw = dj_dw / m
    dj_db = dj_db / m
        
    return dj_db, dj_dw

In [5]:
tmp_dj_db, tmp_dj_dw =  compute_gradient(X_train, y_train, w_init, b_init)
print(tmp_dj_db)
print(tmp_dj_dw)

[0. 0. 0. 0.]
[-0.00500876  0.          0.          0.        ]
[-5.00876493e-03 -1.19029585e-05  0.00000000e+00  0.00000000e+00]
[-5.00876493e-03 -1.19029585e-05 -2.38059170e-06  0.00000000e+00]
[-5.00876493e-03 -1.19029585e-05 -2.38059170e-06 -1.07126626e-04]
[-7.31768305e-03 -1.19029585e-05 -2.38059170e-06 -1.07126626e-04]
[-7.31768305e-03 -1.67947342e-05 -2.38059170e-06 -1.07126626e-04]
[-7.31768305e-03 -1.67947342e-05 -5.64177549e-06 -1.07126626e-04]
[-7.31768305e-03 -1.67947342e-05 -5.64177549e-06 -1.72350302e-04]
[-8.17870722e-03 -1.67947342e-05 -5.64177549e-06 -1.72350302e-04]
[-8.17870722e-03 -1.88159177e-05 -5.64177549e-06 -1.72350302e-04]
[-8.17870722e-03 -1.88159177e-05 -6.65236723e-06 -1.72350302e-04]
[-8.17870722e-03 -1.88159177e-05 -6.65236723e-06 -2.07721013e-04]
-1.6739251122999121e-06
[-2.72623574e-03 -6.27197255e-06 -2.21745574e-06 -6.92403377e-05]


## Gradient Descent with Multiple Variables

In [None]:
import copy, math
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    """Performs batch gradient descent to learn theta. Updates theta by taking
    num_iters gradient steps with learning rate alpha

    Args:
        X (ndarray(m,n)): Data, m examples with n features
        y (ndarray (m,)): target values
        w_in (ndarray (n,)): initial model parameters
        b_in (scalar): initial model
        cost_function: function to compute cost
        gradient_function: function to compute the gradient
        alpha (float): learning rate
        num_iters (int): number of iterations to run gradient descent
        
    Returns:
        w (ndarray(n,)): Updated values of parameters
        b (scalar): Updated value of parameter
    """
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in) # avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):
        
        # Calculate the gradient and update the parameters
        dj_db, dj_dw = gradient_function(X, y, w, b)
        
        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw 
        b = b - alpha * dj_db
        
        #Save cost J at each iteration
        if i < 100000:
            J_history.append(cost_function(X, y, w, b))
            
        # Print cost at every intervals 10 times or as many iterations if < 10
        if math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
            
    return w, b, J_history

In [None]:
initial_w = np.zeros_like(w_init)
initial_b = 0.
iterations = 1000
alpha = 5.0e-7
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f}, {w_final}")
m, _ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

In [None]:
X[1, 0]