In [2]:
import matplotlib.pyplot as plt
import numpy as np
import copy, math

In [3]:
X_train = np.array([1.0, 2.0, 3.0])
y_train = np.array([300.0, 500.0, 700.0])

In [4]:
def compute_cost(x, y, w, b):
  m = x.shape[0]
  total_cost = 0

  for i in range(m):
    f_wb = w * x[i] + b
    total_cost = total_cost + (f_wb - y[i])**2

  return (1 / (2 * m)) * total_cost

In [5]:
def compute_gradient(x, y, w, b):
  m = x.shape[0]
  dj_dw = 0
  dj_db = 0

  for i in range(m):
    f_wb = w * x[i] + b
    dj_dw = dj_dw + (f_wb - y[i]) * x[i]
    dj_db = dj_db + (f_wb - y[i])
  dj_dw = dj_dw / m
  dj_db = dj_db / m

  return dj_dw, dj_db

In [6]:
def gradient_descent(x, y, w_initial, b_initial, alpha, num_iterations, cost_function, gradient_function):
  w = copy.deepcopy(w_initial)
  b = b_initial
  w = w_initial
  costs_history = []
  parameters_history = []

  for i in range(num_iterations):
    dj_dw, dj_db = gradient_function(x, y, w, b)
    w = w - alpha * dj_dw
    b = b - alpha * dj_db

    if num_iterations < 100000:
      costs_history.append(cost_function(x, y, w, b))
      parameters_history.append([w, b])
    
    if i % math.ceil(num_iterations / 10) == 0:
      print(f"Iteration {i:4}: Cost {costs_history[-1]:0.2e}",
            f"dj_dw: {dj_dw:0.3e}, dj_db: {dj_db:0.3e}",
            f"w: {w:0.3e}, b: {b:0.3e}")

  return w, b, costs_history, parameters_history


In [7]:
w_initial = 0
b_initial = 0

iterations = 10000
alpha = 0.1

w_final, b_final, costs_history, parameters_history = gradient_descent(X_train, y_train, w_initial, b_initial, 
                                                                       alpha, iterations, compute_cost, compute_gradient)

print(f"(w,b) found by gradient descent: ({w_final:0.4f}, {b_final:0.4f})")

Iteration    0: Cost 2.74e+04 dj_dw: -1.133e+03, dj_db: -5.000e+02 w: 1.133e+02, b: 5.000e+01
Iteration 1000: Cost 2.23e-10 dj_dw: 2.983e-06, dj_db: -6.780e-06 w: 2.000e+02, b: 1.000e+02
Iteration 2000: Cost 6.99e-21 dj_dw: 1.664e-11, dj_db: -3.801e-11 w: 2.000e+02, b: 1.000e+02
Iteration 3000: Cost 2.85e-26 dj_dw: 7.579e-14, dj_db: -5.684e-14 w: 2.000e+02, b: 1.000e+02
Iteration 4000: Cost 2.85e-26 dj_dw: 7.579e-14, dj_db: -5.684e-14 w: 2.000e+02, b: 1.000e+02
Iteration 5000: Cost 2.85e-26 dj_dw: 7.579e-14, dj_db: -5.684e-14 w: 2.000e+02, b: 1.000e+02
Iteration 6000: Cost 2.85e-26 dj_dw: 7.579e-14, dj_db: -5.684e-14 w: 2.000e+02, b: 1.000e+02
Iteration 7000: Cost 2.85e-26 dj_dw: 7.579e-14, dj_db: -5.684e-14 w: 2.000e+02, b: 1.000e+02
Iteration 8000: Cost 2.85e-26 dj_dw: 7.579e-14, dj_db: -5.684e-14 w: 2.000e+02, b: 1.000e+02
Iteration 9000: Cost 2.85e-26 dj_dw: 7.579e-14, dj_db: -5.684e-14 w: 2.000e+02, b: 1.000e+02
(w,b) found by gradient descent: (200.0000, 100.0000)


In [8]:
x_pred = 2
y_pred = w_final * x_pred + b_final
print(f"With a size of {x_pred * 1000} sqft, it is predicted to be {y_pred * 1000:0.0f} dollars")

With a size of 2000 sqft, it is predicted to be 500000 dollars
