# Approximation of linear function using stochastic gradient descent

In [56]:
import torch
from tqdm import tqdm
%pylab
%matplotlib inline

Using matplotlib backend: MacOSX
Populating the interactive namespace from numpy and matplotlib


### $ f(x) = k_1x + k_2x + b + n$,   where $n$ - normalized noise

In [65]:
def dataloader(M: int, k1: float, k2: float, b: float, batch_size=10, shuffle=True) -> zip:
    X = torch.rand(M, 2) * 100
    y = k1 * X[:,0] + k2 * X[:,1] + b + torch.randn(M)
    if shuffle:
        perm = torch.randperm(X.shape[0])
        X = X[perm]
        y = y[perm]
    X_batches = torch.split(X, batch_size)
    y_batches = torch.split(y, batch_size)
    return zip(X_batches, y_batches)

In [59]:
def model(k1k2b, X_batch):
    return k1k2b[0] * X_batch[:,0] + k1k2b[1] * X_batch[:,1] + k1k2b[2]

In [58]:
def clip_grad(grad: torch.Tensor, max_grad_len: float) -> tuple:
    grad_len = torch.norm(grad)
    if grad_len < max_grad_len:
        return grad, grad_len
    return (grad / grad_len) * max_grad_len, grad_len

In [63]:
def main(k1: float, k2: float, b: float, M: int, epochs=1, alpha=.1, step=1, max_grad_len=1.) -> tuple:
    k1k2b = torch.tensor([1., 1., 0.], dtype=torch.float, requires_grad=True)
    for epoch in tqdm([_ for _ in range(epochs)]):
        for X_batch, y_batch in dataloader(100,1,2,3,7):
            yy = model(k1k2b, X_batch)
            mse = ((yy - y_batch)**2).mean()
            mse.backward()
            clipped_grad, _ = clip_grad(k1k2b.grad, max_grad_len)
            k1k2b = (k1k2b - clipped_grad * alpha * step).clone().detach().requires_grad_(True)
    return k1k2b[0].item(), k1k2b[1].item(), k1k2b[2].item()

In [64]:
main(1., 2., 3., 100, epochs=10000)

100%|██████████| 10000/10000 [00:35<00:00, 281.82it/s]


(0.950163722038269, 1.973562479019165, 3.0207550525665283)