# Batch Gradient Descent

In [1]:
# Batch Gradient Descent
import numpy as np

In [2]:
eta = 0.1 # learning rate
n_iterations = 1000
m = 100

In [3]:
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [4]:
X_b = np.c_[np.ones((100, 1)), X]  # add x0 = 1 to each instance
theta = np.random.randn(2,1) #random initilization

In [5]:
for iteration in range(n_iterations):
    gradients = 2/m * X_b.T.dot(X_b.dot(theta)-y)
    theta = theta - eta * gradients
print(theta)

[[3.99522529]
 [3.20173637]]


# Stochastic Gradient Descent

In [6]:
# Stochastic Gradient Descent
n_epochs = 50
t0, t1 = 5, 50 #learning schedule hyperparameters
m = 100

In [7]:
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [8]:
X_b = np.c_[np.ones((100, 1)), X]  # add x0 = 1 to each instance
theta = np.random.randn(2,1) #random initilization

In [9]:
def learning_schedule(t):
    return t0/(t+t1)

for epoch in range(n_epochs):
    for i in range(m):
        random_index = np.random.randint(m)
        xi = X_b[random_index:random_index+1]
        yi = y[random_index:random_index+1]
        gradients = 2 * xi.T.dot(xi.dot(theta)-yi)
        eta = learning_schedule(epoch * m + i)
        theta = theta - eta * gradients
print(theta)

[[3.73531004]
 [3.09580748]]


# Mini-batch Gradient Descent

In [10]:
# Mini-batch Gradient Descent
n_iterations = 50
minibatch_size = 20
t0, t1 = 200, 1000
m = 100

In [11]:
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [12]:
X_b = np.c_[np.ones((100, 1)), X]  # add x0 = 1 to each instance
np.random.seed(42)
theta = np.random.randn(2,1)  # random initialization

In [13]:
def learning_schedule(t):
    return t0 / (t + t1)

t = 0
for epoch in range(n_iterations):
    shuffled_indices = np.random.permutation(m)
    X_b_shuffled = X_b[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    for i in range(0, m, minibatch_size):
        t += 1
        xi = X_b_shuffled[i:i+minibatch_size]
        yi = y_shuffled[i:i+minibatch_size]
        gradients = 2/minibatch_size * xi.T.dot(xi.dot(theta) - yi)
        eta = learning_schedule(t)
        theta = theta - eta * gradients
print(theta)

[[3.87565983]
 [3.05054404]]
