In [83]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('./deeplearning.mplstyle')
np.set_printoptions(precision=2) # reduced display precision on numpy arrays

You will use the motivating example of housing price prediction. The training dataset contains three examples with four features (size, bedrooms, floors and, age) shown in the table below. Note that, unlike the earlier labs, size is in sqft rather than 1000 sqft. This causes an issue, which you will solve in the next lab!

You will build a linear regression model using these values so you can then predict the price for other houses. For example, a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old.

In [84]:
x_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]]) # each list element is one house as [cost, bedrooms, floors, age]
y_train = np.array([460, 232, 178])

In [85]:
# the data is stored in a matrix
print(f"x Shape: {x_train.shape}, x Type:{type(x_train)})")
print(x_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)})")
print(y_train)

x Shape: (3, 4), X Type:<class 'numpy.ndarray'>)
[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
y Shape: (3,), y Type:<class 'numpy.ndarray'>)
[460 232 178]


In [86]:
# for the purpose of the lab, w and b are chosen initialized near optimal values
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [87]:
# the model for multivariate regression is a little different
# is looks like this: fwb(x)=w1x1+...+wnxn+b where inputs w and x are vectors and elements i are being multipled in the function
def predict_single_loop(x,w,b):
    n = x.shape[0]
    p = 0
    for i in range(n):
        p_i = x[i]*w[i]
        p = p + p_i
        print(p) # visualize p compounding
    p = p + b
    return p

In [88]:
# with this method, we can use a row from our training data and make a prediction
x_vec = x_train[0,:] # only first vector
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")
f_wb = predict_single_loop(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2104    5    1   45]
823.3695764
917.13841345
863.7780889200001
-325.18113917999995
f_wb shape (), prediction: 459.9999976194083


In [89]:
def predict(x, w, b):
    p = np.dot(x, w) + b
    return p

In [90]:
# with this method, we can use a row from our training data and make a prediction
# because we are using the dot function we will have faster processing and shorter code
x_vec = x_train[0,:] # only first vector
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")
f_wb = predict(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2104    5    1   45]
f_wb shape (), prediction: 459.9999976194082


In [91]:
# the cost function for multivariate regression is the same except for that fact that x and w are vectors and not scalars
# this helps support prediction for multiple features
def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b
        cost = cost + (f_wb_i - y[i])**2
    cost = cost / (2 * m)
    return cost

In [92]:
# we can now compute and display cost using our pre-chosen optimal parameters
# remember the cost is the closest to 0 at the optimal w and b
cost = compute_cost(x_train, y_train, w_init, b_init)
print(f'Cost at optimal w : {cost}')

Cost at optimal w : 1.5578904330213735e-12


In [93]:
def compute_gradient(x, y, w, b):
    m,n = x.shape # number of examples and number of features(inputs)
    dj_dw = np.zeros((n,)) # dJ/dw
    dj_db = 0. # dJ/db
    for i in range(m):
        err = (np.dot(x[i], w) + b) - y[i] # find error(cost for 1 data point)  
        for j in range(n):                         
            dj_dw[j] = dj_dw[j] + err * x[i, j] # iterate through each element in each array in x_train and compund error for each feature(weight)
        dj_db = dj_db + err # calculate error for all features(bias)        
    dj_dw = dj_dw / m # average w for all given data points                       
    dj_db = dj_db / m # average b for all given data points                             
    return dj_db, dj_dw

In [94]:
# we can now compute and display gradient for our 2d array
tmp_dj_db, tmp_dj_dw = compute_gradient(x_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -1.6739251122999121e-06
dj_dw at initial w,b: 
 [-2.73e-03 -6.27e-06 -2.22e-06 -6.92e-05]


In [95]:
def gradient_descent(x, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    J_history = [] # function history saved to see iterations, used for graphing
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in

    for i in range(num_iters): # calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(x, y, w, b) # store gradient
        w = w - alpha * dj_dw # run descent on w for x iterations
        b = b - alpha * dj_db # run descent on b for x iterations
        if i<100000: # less than 100,000 to prevent resource exhaustion 
            J_history.append( cost_function(x, y, w, b))
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ") 
    return w, b, J_history # return w, b, and J,w history for graphing

In [98]:
initial_w = np.zeros_like(w_init) # initialize w with predetermined values
initial_b = 0. # initialize b, default is 0
iterations = 1000 # gradient settings from lab
alpha = 5.0e-7 # gradient settings from lab
w_final, b_final, J_hist = gradient_descent(x_train, y_train, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations) # notice cost is decreasing but predictions are still inaccurate
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = x_train.shape
for i in range(m):
    print(f"prediction: {np.dot(x_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}") # we can see here that our predictions are off inconsitently even though cost decreases

Iteration    0: Cost  2529.46   
Iteration  100: Cost   695.99   
Iteration  200: Cost   694.92   
Iteration  300: Cost   693.86   
Iteration  400: Cost   692.81   
Iteration  500: Cost   691.77   
Iteration  600: Cost   690.73   
Iteration  700: Cost   689.71   
Iteration  800: Cost   688.70   
Iteration  900: Cost   687.69   
b,w found by gradient descent: -0.00,[ 0.2   0.   -0.01 -0.07] 
prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178


This issue of the cost decreasing while the results are still inconsitent will be improved upon in the next lab.