In [12]:
# this code gives the implementation of gradient descent algothim
import numpy as np
import matplotlib.pyplot as plt
import math

In [2]:
X_train = np.array([1.0, 2.0])
y_train = np.array([300.0, 500.0])

In [3]:
w = 200
b = 100

In [4]:
def compute_cost(x, y, w, b):
    """
    Compute the cost function for linear regression.

    Args:
        x (ndarray(m,)): Input features, m examples
        y (ndarray(m,)): Target values
        w (float): Weight parameter
        b (float): Bias parameter

    Returns:
        float: The cost (mean squared error / 2) of using w, b as parameters
               to fit the data points x and y.
    """
    m = x.shape[0]  # number of training examples
    cost_sum = 0.0

    # Loop through all training examples
    for i in range(m):
        f_wb = w * x[i] + b       # prediction
        cost = (f_wb - y[i])**2   # squared error
        cost_sum += cost

    # Average cost
    total_cost = (1 / (2 * m)) * cost_sum
    return total_cost


In [5]:
cost = compute_cost(X_train, y_train, w, b)

print(f"cost at w = {w}, b = {b} : {cost} ")

cost at w = 200, b = 100 : 0.0 


In [8]:
# gradient descent determines the values of w, b that minimize the cost function
# 1st we will calculate gradient than gradient descent

def compute_gradient(x, y, w, b):
    """
    Computes the gradient for linear regression.
    Args:
        x (ndarray (m,)) : Data, m examples.
        y (ndarrat (m,)) : target values.
        w, b (scalar)    : model parameters.

    Returns:
        d_w (scalar): The gradient of cost w.r.t the parameter w.
        d_b (scalar): The gradient of cost w.r.t the parameter b.

    
    """
    # gradient is calculating derivative
    gradient_w_sum = 0
    gradient_b_sum = 0

    m = len(x)

    for i in range(m):

        f_wb = w*x[i] + b

        gradient_w_i = (f_wb - y[i])*x[i]
        gradient_b_i = (f_wb - y[i])

        gradient_w_sum += gradient_w_i
        gradient_b_sum += gradient_b_i

    d_w = (1/m)*gradient_w_sum
    d_b = (1/m)*gradient_b_sum

    return d_w, d_b

        

        

In [11]:
# this implement the gradient descent for various values

def gradient_descent(w_in, alpa, b_in, X, y, cost, gradient):

    J_history = []
    p_history = []

    b = b_in
    w = w_in

    for i in range(len(X)):
        dj_dw, dj_db = compute_gradient(X,y,w,b)

        b = b - alpha * dj_db
        w = w - alpha * dj_dw

        if i<100000:
            J_history.append(cost_function(x, y, w , b))
            p_history.append([w, b])


        if i % math.ceil(len(X)/10) == 0:
            print(f"Iteration {i:4} : Cost {J_history[-1] : 0.2e}",
                  f"dj_dw : {dj_dw : 0.3e}, dj_dw : {dj_db : 0.3e}",
                  f"w : {w : 0.3e}, b : {b : 0.5e}")

    return w, b, J_history, p_history     
        

In [None]:
# this demonstrate the use of math.ceil

num_iters = 77   # total iterations
for i in range(num_iters + 1):   # loop from 0 to 50
    if i % math.ceil(num_iters/10) == 0:
        print(f"Checkpoint at iteration {i}")


Checkpoint at iteration 0
Checkpoint at iteration 8
Checkpoint at iteration 16
Checkpoint at iteration 24
Checkpoint at iteration 32
Checkpoint at iteration 40
Checkpoint at iteration 48
Checkpoint at iteration 56
Checkpoint at iteration 64
Checkpoint at iteration 72
spaced: 0
spaced: 1
spaced: 2
spaced: 3
spaced: 4
spaced: 5
spaced: 6
spaced: 7
spaced: 8
spaced: 9
spaced:10
spaced:11
spaced:12
spaced:13
spaced:14
spaced:15
spaced:16
spaced:17
spaced:18
spaced:19
1.23e-06


In [None]:
for i in range(20):
    print(f"spaced:{i:2}")


spaced: 0
spaced: 1
spaced: 2
spaced: 3
spaced: 4
spaced: 5
spaced: 6
spaced: 7
spaced: 8
spaced: 9
spaced:10
spaced:11
spaced:12
spaced:13
spaced:14
spaced:15
spaced:16
spaced:17
spaced:18
spaced:19


In [None]:
value = 0.00000123456
print(f"{value:0.2e}") # effectively convert in power of e, with 2 digits after the decimal


1.23e-06
