In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader

from datasets import LinearDataset, BostonDataset

In [2]:
batch_size = 1
unfold_length = 10
epochs = 500

In [3]:
#learnable parameters
lr = torch.tensor([0.001], requires_grad=True)

In [4]:
dataset = LinearDataset()
train_loader = DataLoader(dataset, batch_size=batch_size)

In [5]:
def loss_fn(y, y_pred):
    return (y - y_pred) ** 2


In [6]:
X, y = next(iter(train_loader))
X, y = X.to(dtype=torch.float32), y.to(dtype=torch.float32)
print(X.shape)
print(y.shape)

torch.Size([1, 1])
torch.Size([1])


### Shape of Parameters
Parameters in time are a (python vanilla) list of paramters. The length of the paramter list is `unfold_length`
At the n-th step, we take `weight[n]` for feedforward

In [7]:
weights = [None for i in range(unfold_length)]
weights[0] = nn.Parameter(torch.rand(1, dtype=torch.float32))

bias = [None for i in range(unfold_length)]
bias[0] = nn.Parameter(torch.rand(1, dtype=torch.float32))

In [21]:
for i, (X, y) in enumerate(train_loader):
    print(f"Batch: {i}")
    X, y = X.to(dtype=torch.float32), y.to(dtype=torch.float32)

    pred = torch.matmul(X, weights[i]) + bias[i]
    loss = loss_fn(y, pred).mean()

    print(f"MSE: {loss.item()}")

    loss.backward()

    if (i == len(train_loader) - 1):
        break

    with torch.no_grad():
        weights[i+1] = weights[i] - weights[i].grad * lr
        bias[i+1] = bias[i] - bias[i].grad * lr

    # no need to zero gradients, as weights[i+1] and weights[i] are not the same variable
    # require grad, as assignment is done in no_grad context
    weights[i+1].requires_grad_()
    bias[i+1].requires_grad_()


Batch: 0
MSE: 0.02533680759370327
Batch: 1
MSE: 0.02752051316201687
Batch: 2
MSE: 0.04290696606040001
Batch: 3
MSE: 0.012292025610804558
Batch: 4
MSE: 0.037217654287815094
Batch: 5
MSE: 0.5550603270530701
Batch: 6
MSE: 0.2331543266773224
Batch: 7
MSE: 0.3621459901332855
Batch: 8
MSE: 0.5448423624038696
Batch: 9
MSE: 0.9242441654205322


In [22]:
with torch.no_grad():
    weights[0] = weights[9]
    bias[0] = bias[9]

weights[0].requires_grad_()
bias[0].requires_grad_()

tensor([1.1586], requires_grad=True)