## Import Packges

In [1]:
import numpy as np
from tqdm import tqdm


## 4A

In [2]:
X = np.random.rand(10000, 4)
eps = np.random.normal(0, 1, 10000)
Y = []
for x, e in zip(X, eps):
    Y.append(x[0] - 2*x[1] + 3*x[2] - 4*x[3] + e)

## 4B

In [3]:
def rss_derivative_at_point(X, Y, Y_hat):
    DL_DY_hat = -2*(Y - Y_hat)
    DL_DW = DL_DY_hat * X
    return DL_DW

def GD(lr, init_weights=[1, 1, 1, 1], der_func=rss_derivative_at_point):
    curr_weights = init_weights
    epocs_num = 500
    # iterate over data 500 times
    for epoch in tqdm(range(epocs_num)):
        derivative = [0, 0, 0, 0]
        # calculate the derivative of each point in the data and average them
        for sample, true_Y in zip(X, Y):
            Y_hat = np.dot(sample, curr_weights)
            derivative = np.add(derivative, der_func(sample, true_Y, Y_hat))
        derivative = np.divide(derivative, len(Y))
        curr_weights = np.subtract(curr_weights, lr*derivative)
    return str(curr_weights)

weights = GD(0.1)
print('Final weights are: ' + str(weights))

100%|██████████| 500/500 [00:24<00:00, 20.22it/s]

Final weights are: [ 0.9911024  -2.00665401  2.99345626 -3.99589024]





## 4C

### Exponential Decay Of The Step-Size

In [4]:
def GD_lr_decay(lr0, init_weights=[1, 1, 1, 1], der_func=rss_derivative_at_point):
    curr_weights = init_weights
    epocs_num = 500
    # iterate over data 500 times
    for epoch in tqdm(range(epocs_num)):
        lr = lr0 * np.exp(-0.01*(epoch+1))
        derivative = [0, 0, 0, 0]
        # calculate the derivative of each point in the data and average them
        for sample, true_Y in zip(X, Y):
            Y_hat = np.dot(sample, curr_weights)
            derivative = np.add(derivative, der_func(sample, true_Y, Y_hat))
        derivative = np.divide(derivative, len(Y))
        curr_weights = np.subtract(curr_weights, lr*derivative)
    return str(curr_weights)

weights = GD_lr_decay(0.1)
print('Final weights are: ' + str(weights))

100%|██████████| 500/500 [00:24<00:00, 20.37it/s]

Final weights are: [ 0.70712898 -1.70027927  2.29243596 -3.32095071]





### Stochastic Gradient Decennt

In [5]:
def SGD(lr, init_weights=[1, 1, 1, 1], der_func=rss_derivative_at_point):
    curr_weights = init_weights
    epocs_num = 10
    batch_size = 64
    # iterate over data 10 times
    for epoch in tqdm(range(epocs_num)):
        derivative = [0, 0, 0, 0]
        # calculate the derivative of each point in the data and average them
        i = 0
        for sample, true_Y in zip(X, Y):
            Y_hat = np.dot(sample, curr_weights)
            derivative = np.add(derivative, der_func(sample, true_Y, Y_hat))
            i += 1
            if i % batch_size == 0 or i == len(Y):
                derivative = np.divide(derivative, len(Y)%batch_size if i == len(Y) else batch_size)
                curr_weights = np.subtract(curr_weights, lr*derivative)
                derivative = [0, 0, 0, 0]
    return str(curr_weights)

weights = SGD(0.1)
print('Final weights are: ' + str(weights))

100%|██████████| 10/10 [00:00<00:00, 18.26it/s]

Final weights are: [ 0.99671626 -2.00365257  3.03031944 -3.97313679]





### Momentum

In [6]:
def GD_with_mometum(lr, init_weights=[1, 1, 1, 1], der_func=rss_derivative_at_point):
    curr_weights = init_weights
    epocs_num = 500
    gamma = 0.8
    momentum = [0, 0, 0, 0]
    # iterate over data 500 times
    for epoch in tqdm(range(epocs_num)):
        derivative = [0, 0, 0, 0]
        # calculate the derivative of each point in the data and average them
        for sample, true_Y in zip(X, Y):
            Y_hat = np.dot(sample, curr_weights)
            derivative = np.add(derivative, der_func(sample, true_Y, Y_hat))
        derivative = np.divide(derivative, len(Y))
        momentum = np.add(np.dot(momentum, gamma), np.dot(derivative, lr))
        curr_weights = np.subtract(curr_weights, momentum)
    return str(curr_weights)

weights = GD_with_mometum(0.1)
print('Final weights are: ' + str(weights))

100%|██████████| 500/500 [00:24<00:00, 20.33it/s]

Final weights are: [ 0.99142645 -2.00712527  2.99445293 -3.99673396]



