In [43]:
import numpy as np

def gradient_descent(X, y, weights, learning_rate, n_iterations, batch_size=1, method='batch'):
    match method:
        case 'batch':
            batches = [X]
            labels = [y]
        case 'stochastic':
            batches = np.split(X, X.shape[0])
            labels = np.split(y, X.shape[0])
        case 'mini_batch':
            batches = np.split(X, batch_size)
            labels = np.split(y, batch_size)
    
    for _ in range(n_iterations):
        for i in range(len(batches)):
            # get the batches and labels
            batch = batches[i]
            label = labels[i]

            # forward pass
            pred = batch@weights
            loss = np.mean((pred - label)**2) # MSE

            # backwards pass
            grad = (2/len(label)) * batch.T @ (pred - label)

            # update the weights
            weights -= grad * learning_rate

        # Logging
        if _ % 100 == 0:
            print(f'Loss is: {loss}')
            print(f'Pred is {pred}')
            print('----------')
    return weights



In [44]:
# Sample data
X = np.array([[1, 1], [2, 1], [3, 1], [4, 1]])
y = np.array([2, 3, 4, 5])

# Parameters
learning_rate = 0.01
n_iterations = 1000
batch_size = 2

# Initialize weights
weights = np.zeros(X.shape[1])

In [45]:
gradient_descent(X, y, weights, learning_rate, n_iterations, method='batch')

batches=[array([[1, 1],
       [2, 1],
       [3, 1],
       [4, 1]])]
labels=[array([2, 3, 4, 5])]
Loss is: 13.5
Pred is [0. 0. 0. 0.]
----------
Loss is: 0.032074295101586096
Pred is [1.71082014 2.85987253 4.00892492 5.15797731]
----------
Loss is: 0.01760840354360213
Pred is [1.78573589 2.89617436 4.00661284 5.11705132]
----------
Loss is: 0.009666802477571085
Pred is [1.84124374 2.92307172 4.0048997  5.08672768]
----------
Loss is: 0.005306958686457833
Pred is [1.88237157 2.94300097 4.00363037 5.06425977]
----------
Loss is: 0.002913456705577233
Pred is [1.91284471 2.9577673  4.00268988 5.04761246]
----------
Loss is: 0.0015994528084292622
Pred is [1.9354234  2.96870822 4.00199303 5.03527784]
----------
Loss is: 0.0008780804195562414
Pred is [1.95215279 2.97681475 4.00147671 5.02613867]
----------
Loss is: 0.0004820556249891878
Pred is [1.96454822 2.98282119 4.00109415 5.01936711]
----------
Loss is: 0.00026464275982962456
Pred is [1.97373246 2.98727158 4.0008107  5.01434982]
-----

array([1.01003164, 0.97050576])

In [46]:
gradient_descent(X, y, weights, learning_rate, n_iterations, method='stochastic')

batches=[array([[1, 1]]), array([[2, 1]]), array([[3, 1]]), array([[4, 1]])]
labels=[array([2]), array([3]), array([4]), array([5])]
Loss is: 0.00017435016374913232
Pred is [5.01320417]
----------
Loss is: 9.357504360231605e-06
Pred is [5.003059]
----------
Loss is: 7.468982453722015e-07
Pred is [5.00086423]
----------
Loss is: 5.96160009614058e-08
Pred is [5.00024416]
----------
Loss is: 4.7584360958187265e-09
Pred is [5.00006898]
----------
Loss is: 3.798093416453241e-10
Pred is [5.00001949]
----------
Loss is: 3.031566108966591e-11
Pred is [5.00000551]
----------
Loss is: 2.4197385571769832e-12
Pred is [5.00000156]
----------
Loss is: 1.9313894109541683e-13
Pred is [5.00000044]
----------
Loss is: 1.5415983622284932e-14
Pred is [5.00000012]
----------


array([1.00000003, 0.9999999 ])

In [47]:
gradient_descent(X, y, weights, learning_rate, n_iterations, batch_size, method='mini_batch')

batches=[array([[1, 1],
       [2, 1]]), array([[3, 1],
       [4, 1]])]
labels=[array([2, 3]), array([4, 5])]
Loss is: 4.525293951906432e-16
Pred is [4.         5.00000003]
----------
Loss is: 1.8288081205262217e-16
Pred is [4.         5.00000002]
----------
Loss is: 5.263460502655009e-17
Pred is [4.         5.00000001]
----------
Loss is: 1.514866875281022e-17
Pred is [4.         5.00000001]
----------
Loss is: 4.359911143640006e-18
Pred is [4. 5.]
----------
Loss is: 1.2548193822190541e-18
Pred is [4. 5.]
----------
Loss is: 3.611467909927828e-19
Pred is [4. 5.]
----------
Loss is: 1.0394109469342545e-19
Pred is [4. 5.]
----------
Loss is: 2.991517913309273e-20
Pred is [4. 5.]
----------
Loss is: 8.609874063511644e-21
Pred is [4. 5.]
----------


array([1., 1.])

In [9]:
weights

array([0., 0.])

In [24]:
np.split(X, X.shape[0])

[array([[1, 1]]), array([[2, 1]]), array([[3, 1]]), array([[4, 1]])]