# Import Libraries

In [4]:
import numpy as np
import matplotlib.pyplot as plt 
import math, copy

# Initialization

In [5]:
x_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

# Prediction By Vectorization

In [6]:
def prediction(x, w, b):
    p = np.dot(w, x) + b
    return p

In [11]:
x_vec = x_train[0,:]
f_wb = prediction(x_vec, w_final, b_final)
print(f"Prediction: {f_wb}")

NameError: name 'w_final' is not defined

# Cost Function

In [13]:
def cost_function(x, y, w, b):
    m = x.shape[0]
    cost = 0.0

    for i in range(m):
        cost = cost + (np.dot(x[i], w) + b - y[i]) ** 2
    cost = (cost / 2*m)

    print(f"Cost at optimal W: {cost}")
    
    return cost

# Calculate Gradient

In [14]:
def compute_gradient(x, y, w, b):
    m, n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    
    for i in range(m):                             
        err = (np.dot(x[i], w) + b) - y[i]   
        for j in range(n):                         
            dj_dw[j] = dj_dw[j] + err * x[i, j]    
        dj_db = dj_db + err                        

    dj_dw = dj_dw / m                                
    dj_db = dj_db / m     

    return dj_dw, dj_db   # ✅ correct


# Calculate Gradient Descent

In [15]:
def gradient_descent(x, y, w_in, b_in, alpha, cost_function, gradient_function, num_iters):
    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in

    for i in range(num_iters):
        dj_dw, dj_db = gradient_function(x, y, w, b)

        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(x, y, w, b))

    return w, b, J_history
    

# Initialize Parameters


In [16]:
# initialize parameters
m,n = x_train.shape
initial_w = np.zeros(n)
initial_b = 0.
# some gradient descent settings
iterations = 100000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(x_train, y_train, initial_w, initial_b, alpha, cost_function, compute_gradient, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
for i in range(m):
    print(f"prediction: {np.dot(x_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Cost at optimal W: 22765.16657008467
Cost at optimal W: 6892.50314695769
Cost at optimal W: 6296.661252189402
Cost at optimal W: 6274.203683060785
Cost at optimal W: 6273.266932609294
Cost at optimal W: 6273.137930405988
Cost at optimal W: 6273.039253625178
Cost at optimal W: 6272.941723287022
Cost at optimal W: 6272.844244217301
Cost at optimal W: 6272.746775310194
Cost at optimal W: 6272.649315022129
Cost at optimal W: 6272.551863294433
Cost at optimal W: 6272.454420124191
Cost at optimal W: 6272.356985510536
Cost at optimal W: 6272.259559452713
Cost at optimal W: 6272.162141949953
Cost at optimal W: 6272.064733001524
Cost at optimal W: 6271.967332606626
Cost at optimal W: 6271.869940764545
Cost at optimal W: 6271.772557474462
Cost at optimal W: 6271.6751827356775
Cost at optimal W: 6271.57781654738
Cost at optimal W: 6271.480458908833
Cost at optimal W: 6271.383109819288
Cost at optimal W: 6271.285769277958
Cost at optimal W: 6271.188437284118
Cost at optimal W: 6271.091113836953
Co

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=False, figsize=(12, 4))
ax1.plot(J_hist)
ax2.plot(100 + np.arange(len(J_hist[100:])), J_hist[100:])
ax1.set_title("Cost vs. iteration");  ax2.set_title("Cost vs. iteration (tail)")
ax1.set_ylabel('Cost')             ;  ax2.set_ylabel('Cost') 
ax1.set_xlabel('iteration step')   ;  ax2.set_xlabel('iteration step') 
plt.show()