In [3]:
import copy,math
import numpy as np
import matplotlib.pyplot as plt

np.set_printoptions(precision=2)  # reduced display precision on numpy arrays


| Size (sqft) | Number of Bedrooms  | Number of floors | Age of  Home | Price (1000s dollars)  |   
| ----------------| ------------------- |----------------- |--------------|-------------- |  
| 2104            | 5                   | 1                | 45           | 460           |  
| 1416            | 3                   | 2                | 40           | 232           |  
| 852             | 2                   | 1                | 35           | 178           |  

We will build a linear regression model using these values so we can then predict the price for other houses. For example, a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old.  

In [4]:
# set the data 

X_train  =  np.array([[2104,5,1,45],[1416,3,2,40],[852,2,1,35]])
Y_train  = np.array([460,232,178])

In [5]:
print (f"X train =\n {X_train}")
print (f"\n Y train =\n {Y_train}")

X train =
 [[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]

 Y train =
 [460 232 178]


In [6]:
Y_train.shape

(3,)

In [7]:
X_train.shape

(3, 4)

For demonstration, $\mathbf{w}$ and $b$ will be loaded with some initial selected values that are near the optimal. $\mathbf{w}$ is a 1-D NumPy vector.

In [8]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


Single Prediction element by element

In [9]:
def predict_single_loop(x,w,b):
    """ 
     single predict using linear regression 

     Args :
        x (ndarray) : Shape (n,) example with multible feature 
        w (ndarray) : Shape (n,) model parameters 
        b (scalar)  : model parameters 

    return 
    P is a scalar 
    """

    n = x.shape[0]
    p = 0
    for i in range(n):
        p_i = x[i] * w[i]
        p = p + p_i
    p = p + b

    return p

In [10]:
# get a row from our training data 
x_vec = X_train[0,:]
print (f"x_vec shap = {x_vec.shape} and x_vec value = {x_vec}")
# make a prediction 
f_wb = predict_single_loop(x_vec , w_init,b_init)

print (f" our prediction is {f_wb:.3f} and it's shape is {f_wb.shape}")

x_vec shap = (4,) and x_vec value = [2104    5    1   45]
 our prediction is 460.000 and it's shape is ()


Single prediction vector 

In [11]:
def prediction_vector (x,w,b):
    """ Single predict using liner regression 
    Args :
        x (ndarray) shape (n,) example with multible feature 
        w (ndarray) shape (n,) example with multible feature 
        b (scalar)
    """

    p = np.dot(x,w) + b

    return p

In [12]:
# get a row from our data set

x_vec = X_train[0,:]
print (f"x_vec shape is {x_vec.shape} ,x_vex value is {x_vec}")

f_wb = prediction_vector (x_vec, w_init , b_init)

print (f"Our prediction value is {f_wb:.4f} and it's shape is {f_wb.shape}")

x_vec shape is (4,) ,x_vex value is [2104    5    1   45]
Our prediction value is 460.0000 and it's shape is ()


Same results so we will always use one statment code for this prediction 

Compute cost with Multiple variable 

In [13]:
def compute_cost (x,y,w,b):
    """
    compue cost
    Argu:
        x (ndarray(m,n)) : Data, m exapmle with n feature 
        y (ndarray(m,))  : target values
        w (ndarray(n,))  : model parameters 
        b (scalar)       : model parameter 
    """
    m = x.shape[0]
    cost = 0.0
    for i in range (m):
        f_wb_i = np.dot(x[i],w)+b
        cost = cost + (f_wb_i - y[i])**2
        cost = cost / (2 * m)
    
    return cost


In [14]:
cost = compute_cost(X_train , Y_train, w_init , b_init)

print (f"cost at optimal b = {cost}")

cost at optimal b = 2.7030944788097125e-13


In [15]:
def compute_gradient (x,y,w,b):
    """  function descreption 
    Compute the gradient for linear regression 

        Args :
     x (ndarray (m,n)) Data, m examples with n features
     y (ndarray (m,))  target values 
     w (ndarray)       model parameters 
     b                 model parameter

     return 
         dj_dw (ndarray(n,)) The gradient of the cost w.r.t. the parameters w.
         dj_db (scalar)      The gradiant of the cost w.r.tt the parameter b.
    """

    m,n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0 

    for i in range (m):
        err = (np.dot(x[i] , w) + b) - y[i]
        for j in range (n):
            dj_dw[j] = dj_dw[j]+ err * x[i,j] 
        dj_db = dj_db+err

    dj_dw = dj_dw / m
    dj_db = dj_db / m 

    return dj_db ,dj_dw



In [16]:
tmp_dj_db, tmp_dj_dw = compute_gradient ( X_train,Y_train  ,w_init,b_init)

print (f"dj_dw at initial w,b =\n {tmp_dj_dw}")
print (f"dj_db at intial w,b = \n {tmp_dj_db}")

dj_dw at initial w,b =
 [-2.73e-03 -6.27e-06 -2.22e-06 -6.92e-05]
dj_db at intial w,b = 
 -1.6739251122999121e-06


In [33]:
def gradient_descent (x,y,w_in,b_in,cost_function,gradient_function,alpha,num_iters):
    """ function help

    this function to performs batch graient descent to learn w and b, 
    updates w and b by taking num_iters gradient steps with learning rate alpha

    Args :
        x (ndarray(m,n))    : Data m examples and n is features 
        y (ndarray ((m,))   : target values
        w_in (ndarray (n,)) : intial model parameters 
        b_in scalar         : intial model parameter
        cost function       : function to compute cost
        gradiant function   : finction to compute gradiant 
        alpha (float)       : learning rate 
        num_iters           : number of iterations to run gradiant descent 

        return:
        w (ndarray (n,)) : updated value of parameters 
        b (scalar)       : updated value of paramete
    """

    j_history=[] # an array to store cost j and w's at each iteration primarily for graphing later
    w = copy.deepcopy(w_in)
    b = b_in
    for i in range (num_iters) :
        #gradiant calulate and update the parameters 
        dj_db,dj_dw = gradient_function(x,y,w,b)
        #update parameters using alpha and gradiant 
        w = w - alpha * dj_dw 
        b = b - alpha * dj_db

        #if i <100000:
            #j_history = j_history.append(cost_function(x,y,w,b)) # Save cost J at each iteration

        #if i% math.ceil(num_iters / 10) == 0:
            #print(f"Iteration {i:4d} : cost {j_history[-1]:8.2f} ")
        
    return w,b,j_history

In [34]:
initial_w = np.zeros_like(w_init)
intial_b = 0.

iterations = 1000
alpha = 5.0e-7

w_final, b_final,j_hist = gradient_descent(X_train,Y_train,initial_w,intial_b,
                                            compute_cost,compute_gradient,
                                            alpha, iterations)

In [36]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(X_train, Y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)


In [37]:
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape

b,w found by gradient descent: -0.00,[ 0.2   0.   -0.01 -0.07] 


In [39]:
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {Y_train[i]}")

prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178


In [41]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(X_train, Y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {Y_train[i]}")

b,w found by gradient descent: -0.00,[ 0.2   0.   -0.01 -0.07] 
prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178
