In [2]:
import numpy as np
from matplotlib import pyplot as plt


def loss(actual_values, calculated_values):
    return calculated_values - actual_values

def stochastic_grad_desc(X_data,
                         y_data,
                         lr_schedule,
                         learning_rate = 1e-2,
                         eps = 1e-4,
                         batch_size = 1,
                         max_iter = 1000):
    n = y_data.size
    current = np.zeros(X_data.shape[1])
    result = current
    for i in range(max_iter):
        for j in range(batch_size):
            index = np.random.randint(0, n)
            batch_x = X_data[index]
            batch_y = y_data[index]
            hypotesis = np.dot(batch_x, current)
            error = loss(batch_y, hypotesis)
            if np.abs(error) < eps:
                return result
            grad = np.dot(batch_x.T, error) / len(batch_x)
            current -= learning_rate * grad
        result = np.vstack((result, current))
        learning_rate = lr_schedule(learning_rate, i)
    return result

In [17]:
def const_lr(learning_rate, iter_num):
    return learning_rate

def step_lr(learning_rate, 
            iter_num, 
            initial_lr = 1e-2,
            drop = 1.2,
            frequency = 10):
    return initial_lr * np.power(drop, np.floor((iter_num + 1) / frequency))

def exponential_lr(learning_rate, 
                   iter_num,
                   initial_lr = 1e-1,
                   k = 1e-2):
    return initial_lr * np.exp(-k * iter_num)
    
    

In [18]:
X = np.array([
    [4, 1],
    [2, 8],
    [1, 0],
    [3, 2],
    [1, 4],
    [6, 7]
])

y = np.array([
    2,
    -14,
    1,
    -1,
    -7,
    -8
])

# constant learning rate
print()
for batch_size in range(1, len(y) + 1):
    result = stochastic_grad_desc(X, y, const_lr, batch_size=batch_size)
    values = result[-1]
    print(values)
    print(f'iterations: {len(result)}, batch_size: {batch_size}')

# learning rate with step
print()
for batch_size in range(1, len(y) + 1):
    result = stochastic_grad_desc(X, y, step_lr, batch_size=batch_size)
    values = result[-1]
    print(values)
    print(f'iterations: {len(result)}, batch_size: {batch_size}')

# exponential learning rate
print()
for batch_size in range(1, len(y) + 1):
    result = stochastic_grad_desc(X, y, exponential_lr, batch_size=batch_size)
    values = result[-1]
    print(values)
    print(f'iterations: {len(result)}, batch_size: {batch_size}')


[ 0.99987544 -1.99994731]
iterations: 544, batch_size: 1
[ 0.99966042 -1.99972169]
iterations: 233, batch_size: 2
[ 0.9998582  -1.99988004]
iterations: 171, batch_size: 3
[ 0.99449527 -1.99571855]
iterations: 79, batch_size: 4
[ 0.99915549 -1.99936395]
iterations: 88, batch_size: 5
[ 0.99979628 -1.99983989]
iterations: 81, batch_size: 6

[ 1.00009903 -2.00005975]
iterations: 141, batch_size: 1
[ 0.99994424 -2.00000446]
iterations: 103, batch_size: 2
[ 0.99862425 -1.99966943]
iterations: 73, batch_size: 3
[ 0.99584491 -1.99849332]
iterations: 54, batch_size: 4
[ 0.99983805 -1.99986116]
iterations: 61, batch_size: 5
[ 0.99945856 -1.99970747]
iterations: 51, batch_size: 6

[ 0.99851385 -1.99873287]
iterations: 168, batch_size: 1
[ 0.99992436 -2.00003333]
iterations: 69, batch_size: 2
[ 0.99993629 -2.00002597]
iterations: 39, batch_size: 3
[ 0.99985513 -2.00004064]
iterations: 33, batch_size: 4
[ 0.99967749 -2.00058234]
iterations: 44, batch_size: 5
[ 1.00371846 -2.00182808]
iterations: 3