In [24]:
import math
import copy
import numpy as np
import matplotlib.pyplot as plt

<a name="toc_15456_2"></a>
# 2 Problem Statement

You will use the motivating example of housing price prediction. The training dataset contains three examples with four features (size, bedrooms, floors and, age) shown in the table below.  Note that, unlike the earlier labs, size is in sqft rather than 1000 sqft. This causes an issue, which you will solve in the next lab!

| Size (sqft) | Number of Bedrooms  | Number of floors | Age of  Home | Price (1000s dollars)  |   
| ----------------| ------------------- |----------------- |--------------|-------------- |  
| 2104            | 5                   | 1                | 45           | 460           |  
| 1416            | 3                   | 2                | 40           | 232           |  
| 852             | 2                   | 1                | 35           | 178           |  

You will build a linear regression model using these values so you can then predict the price for other houses. For example, a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old.  

Please run the following code cell to create your `X_train` and `y_train` variables.

In [25]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])
X_train.shape
X_train
y_train
y_train.shape

(3,)

In [26]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [27]:
# prediction using dot product
def predict_dot(x,w,b):
    p = np.dot(x,w) + b
    return p

In [28]:
# get row from training data
x_vec = X_train[0,:]
# prediction
f_wb = predict_dot(x_vec,w_init,b_init)
f_wb

np.float64(459.9999976194083)

In [29]:
# compute cost
# mean squared error for our predictions
def compute_cost(X,y,w,b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb_i = np.dot(w , X[i]) + b
        cost = cost + (f_wb_i - y[i]) ** 2
    cost /= (2 * m)
    return cost

In [30]:
# compute cost
cost = compute_cost(X_train,y_train,w_init,b_init)
cost

np.float64(1.5578904428966628e-12)

In [31]:
# gradient descent
def compute_gradient(X,y,w,b):
    m , n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    for i in range (m):
        # err =  f_wb -> (w * xi + b ) - y
        err = (np.dot(X[i],w) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * X[i,j]
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_dw , dj_db

In [36]:
def gradient_descent(X,y,w_in,b_in,cost_fun,gradient_fun,alpha,num):
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    for i in range(num):
        # Calculate the gradient and update the parameters
        dj_dw , dj_db = gradient_fun(X,y,w,b)

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        # save the cost J 
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_fun(X, y, w, b))
         # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #return final w,b and J history for graphing

In [37]:
# test our functions
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
b_final , w_final

Iteration    0: Cost  2529.46   
Iteration  100: Cost   695.99   
Iteration  200: Cost   694.92   
Iteration  300: Cost   693.86   
Iteration  400: Cost   692.81   
Iteration  500: Cost   691.77   
Iteration  600: Cost   690.73   
Iteration  700: Cost   689.71   
Iteration  800: Cost   688.70   
Iteration  900: Cost   687.69   


(np.float64(-0.002235407530932535),
 array([ 0.20396569,  0.00374919, -0.0112487 , -0.0658614 ]))

In [38]:
# predictions
m , _ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178
