In [47]:
import numpy as np
import matplotlib.pyplot as plt

In [48]:
x_train = np.array([1.0, 2.0])
y_train = np.array([100.0, 200.0])

### What is Gradient Descent ?
+ Gradient descent is an optimization algorithm used to minimize the cost function $J(w, b)$ by updating the parameters $w$ and $b$ in the direction that reduces the cost the most.
### Update rule
+ In each iteraction, gradient descent performs the follwing updates:
$$
\begin{aligned}
w = w - \alpha \frac{\partial J(w,b)}{\partial w} \\
b = b - \alpha \frac{\partial J(w,b)}{\partial b}
\end{aligned}
$$
+ The gradient is defined as:
$$
J(w, b) = \frac{1}{2m} \sum_{i = 0}^{m - 1}(f_{w, b}(X^{(i)}) - y^{(i)})^2
$$
$$
\Leftrightarrow J(w, b) = \frac{1}{2m} \sum_{i = 0}^{m - 1}(wx^{(i)} + b - y^{(i)})^2
$$
+ We want to compute:
$$
    \frac{\partial J(w, b)}{\partial w}
$$
$$
    \frac{\partial J(w, b)}{\partial w} = \frac{1}{2m} \sum_{i = 0}^{m - 1} 2(wx^{(i)} + b - y^{(i)})\cdot x^{(i)}
$$
+ We want to compute:
$$
    \frac{\partial J(w, b)}{\partial b}
$$
$$
    \frac{\partial J(w, b)}{\partial b} = \frac{1}{2m} \sum_{i = 0}^{m - 1} 2(wx^{(i)} + b - y^{(i)})\cdot 1
$$




In [49]:
def compute_loss_value(x, y, w, b = 0):
    """
    Computes the total squared error (loss) between the predicted and target values

    Args: 
        x : Input feature values
        y : Actual target values
        w : Wieght parameter for the linear model

    Returns:
        float: the sum of squared differances between the predicted and target values
    """
    m = len(x)
    total_cost = 0
    for i in range(m):
        f_wb = w * x[i] + b
        total_cost += (f_wb - y[i]) ** 2 
    return total_cost

In [50]:
def compute_gradient(x, y, w, b):
    m = len(x)
    dj_dw = 0
    dj_db = 0
    for i in range(m):
        dj_db += w * x[i] + b - y[i]
        dj_dw += (w * x[i] + b - y[i]) * x[i]
    dj_db /= m
    dj_dw /= m
    return dj_dw, dj_db

In [51]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
    w_out = w_in
    b_out = b_in
    for i in range(num_iters):
        dj_dw, dj_db = compute_gradient(x, y, w_out, b_out)
        w_out -= alpha * dj_dw
        b_out -= alpha * dj_db
    return w_out, b_out

