<a href="https://colab.research.google.com/github/Kai0421/PyTorchLearning/blob/main/LinearRegressionLab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Squared Error Function
Using the below equation to find the cost of function J
\begin{align}
  f_{(w,b)} &= w x + b \\
  \hat{y} &= w x + b \\
  J_{(w,b)} &= {\frac 1 {2m}} \sum_{n=1}^m (\hat{y}^{(i)} - y )^2 \\
  J_{(w,b)} &= {\frac 1 {2m}} \sum_{n=1}^m (f_{(w,b)}(x^{(i)}) - y )^2 \\
  J_{(w,b)} &= {\frac 1 {2m}} \sum_{n=1}^m ( (wx^{(i)} + b ) - y )^2
\end{align}

In [7]:
import numpy as np

x_train = np.array([1, 2, 3, 4])
y_train = np.array([100, 200, 300, 400])

def compute_cost(x, y, w, b):

  m = x.shape[0]
  cost_sum = 0
  for i in range(m):
    f_w_b = (w * x[i]) + b
    cost = (f_w_b - y[i])**2
    cost_sum = cost_sum + cost

  square_error = (1/2*m) * cost_sum

  return square_error;

# Gradient Descent 
Gradient descent finds the mins cost of function J:

\begin{align}
  W &= w - α ⋅ \boxed{{\frac ∂ {∂w}} J(w,b)} \\
  W &= w - α ⋅ \boxed{{\frac 1 m} \sum_{n=1}^m (f_{(w,b)}(x^{(i)}) - y^{(i)} )x^{(i)} } \\
  B &= b - α ⋅ \boxed{{\frac ∂ {∂b}} J(w,b)} \\
  B &= b - α ⋅ \boxed{{\frac 1 m} \sum_{n=1}^m (f_{(w,b)}(x^{(i)}) - y^{(i)} )}
\end{align}

In [12]:
import math

def compute_gradient(x, y, w, b):
  m = x.shape[0]

  w_cost_calc = 0
  b_cost_calc = 0

  for i in range(m):
    # W calculation
    f_w_b = (x[i] * w) + b
    cost = f_w_b - y[i]
    w_cost_calc = w_cost_calc + (cost * x[i])
    b_cost_calc = b_cost_calc + cost

  dj_dw = w_cost_calc/m
  dj_db = b_cost_calc/m
  
  return dj_dw, dj_db; 

def gradient_descent(x, y, w_input, b_input, iteration, alpha, cost_function, gradient_function):

  J_history=[]
  p_history=[]

  w = w_input
  b = b_input

  for i in range(iteration):
    dj_dw, dj_db = gradient_function(x, y, w, b)

    w = w - alpha * dj_dw
    b = b - alpha * dj_db

    # Save cost J at each iteration
    if i<100000:      # prevent resource exhaustion 
        J_history.append( cost_function(x, y, w , b))
        p_history.append([w,b])
    # Print cost every at intervals 10 times or as many iterations if < 10
    if i% math.ceil(iteration/10) == 0:
        print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
              f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
              f"w: {w: 0.3e}, b:{b: 0.5e}")
 
  return w, b, J_history, p_history #return w and J,w history for graphing

In [13]:
# Initialize value
w_init = 0
b_init = 0

# Gradient Descent initialization
iteration = 10000
tmp_alpha = 1.0e-2

w_final, b_final, j_history, p_history = gradient_descent( x_train, y_train, w_init, b_init, iteration, tmp_alpha, compute_cost, compute_gradient)

print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

Iteration    0: Cost 5.04e+05  dj_dw: -7.500e+02, dj_db: -2.500e+02   w:  7.500e+00, b: 2.50000e+00
Iteration 1000: Cost 6.19e+01  dj_dw: -3.470e-01, dj_db:  1.020e+00   w:  9.769e+01, b: 6.80516e+00
Iteration 2000: Cost 3.09e+00  dj_dw: -7.758e-02, dj_db:  2.281e-01   w:  9.948e+01, b: 1.52137e+00
Iteration 3000: Cost 1.55e-01  dj_dw: -1.734e-02, dj_db:  5.099e-02   w:  9.988e+01, b: 3.40117e-01
Iteration 4000: Cost 7.72e-03  dj_dw: -3.877e-03, dj_db:  1.140e-02   w:  9.997e+01, b: 7.60368e-02
Iteration 5000: Cost 3.86e-04  dj_dw: -8.668e-04, dj_db:  2.548e-03   w:  9.999e+01, b: 1.69988e-02
Iteration 6000: Cost 1.93e-05  dj_dw: -1.938e-04, dj_db:  5.697e-04   w:  1.000e+02, b: 3.80027e-03
Iteration 7000: Cost 9.64e-07  dj_dw: -4.332e-05, dj_db:  1.274e-04   w:  1.000e+02, b: 8.49591e-04
Iteration 8000: Cost 4.82e-08  dj_dw: -9.685e-06, dj_db:  2.847e-05   w:  1.000e+02, b: 1.89935e-04
Iteration 9000: Cost 2.41e-09  dj_dw: -2.165e-06, dj_db:  6.366e-06   w:  1.000e+02, b: 4.24620e-05
