In [1]:
import torch
import random

In [2]:
W = torch.tensor([2, -3], dtype=torch.float32)
b = torch.tensor([2.0])

In [41]:
X = torch.rand((1000, 2))
y = torch.matmul(X, W) + b
y += torch.normal(0.0, 0.01, y.shape)

In [28]:
train_W = torch.rand_like(W, requires_grad=True)
train_b = torch.zeros_like(b, requires_grad=True)

In [24]:
def data_iter(batch_size, X, y):
    indies = torch.arange(X.shape[0])
    random.shuffle(indies)
    
    for i in range(0, len(y), batch_size):
        X_batch = X[i:i + batch_size]
        y_batch = y[i:i + batch_size]
        
        yield X_batch, y_batch

In [27]:
for X, y in data_iter(200, X, y):
    print(X.shape)

torch.Size([200, 2])


In [35]:
def loss(y_true, y_pred):
    return 1/2*torch.pow((y_pred - y_true), 2)
loss(torch.tensor([10.5]), torch.tensor([2]))

tensor([36.1250])

In [45]:
def linrg(X):
    return torch.matmul(X, train_W) + train_b

In [66]:
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
#             print(param.grad)
            param -= (lr/batch_size)*param.grad
            param.grad.zero_()

In [69]:
lr = 0.1
batch_size = 10
epochs = 3

In [75]:
for epoch in range(epochs):
    for train_X, train_y in data_iter(batch_size, X, y):
        y_pred = linrg(train_X)
        l = loss(train_y, y_pred)
        l.sum().backward()
        sgd((train_W, train_b), lr, batch_size)
        print(f'Loss: {l.sum():.4f}, train_W: {train_W}, train_b: {train_b}')

Loss: 0.0005, train_W: tensor([ 2.0004, -2.9994], requires_grad=True), train_b: tensor([1.9999], requires_grad=True)
Loss: 0.0006, train_W: tensor([ 2.0004, -2.9995], requires_grad=True), train_b: tensor([2.0000], requires_grad=True)
Loss: 0.0006, train_W: tensor([ 2.0006, -2.9994], requires_grad=True), train_b: tensor([2.0001], requires_grad=True)
Loss: 0.0001, train_W: tensor([ 2.0005, -2.9995], requires_grad=True), train_b: tensor([2.0000], requires_grad=True)
Loss: 0.0006, train_W: tensor([ 2.0002, -2.9993], requires_grad=True), train_b: tensor([2.0000], requires_grad=True)
Loss: 0.0006, train_W: tensor([ 2.0004, -2.9991], requires_grad=True), train_b: tensor([2.0005], requires_grad=True)
Loss: 0.0005, train_W: tensor([ 2.0001, -2.9994], requires_grad=True), train_b: tensor([2.0001], requires_grad=True)
Loss: 0.0006, train_W: tensor([ 2.0000, -2.9994], requires_grad=True), train_b: tensor([2.0000], requires_grad=True)
Loss: 0.0005, train_W: tensor([ 1.9999, -2.9995], requires_grad=