In [1]:
# Import the torch modules
import torch
import torch.optim as optim
import torch.nn as nn

# Subclassing nn.Module

Another way to build models is to **subclass nn.Module**, which can do more complex things than nn.Sequential which just applies one layer after another.

In order to **subclass nn.Module**, at a minimum we **need to define a forward function that takes the inputs to the module and returns the output**. This is where we define our module’s computation.

Note: With PyTorch, if we use standard torch operations, autograd will take care of the backward pass automatically; and indeed, an nn.Module never comes with a backward

In [2]:
class SubclassModel(nn.Module):
    def __init__(self):
        super().__init__()  # <1>
        self.hidden_linear = nn.Linear(1, 13)
        self.hidden_activation = nn.Tanh()
        self.output_linear = nn.Linear(13, 1)

    def forward(self, input):
        hidden_t = self.hidden_linear(input)
        activated_t = self.hidden_activation(hidden_t)
        output_t = self.output_linear(activated_t)
        return output_t

subclass_model = SubclassModel()
subclass_model

SubclassModel(
  (hidden_linear): Linear(in_features=1, out_features=13, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=13, out_features=1, bias=True)
)

In [3]:
# prepare datasets
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]

# To accommodate multiple samples, modules expect the zeroth dimension of the input to be the number of samples in the batch
# we would need to add an extra dimension to turn that 1D tensor into a matrix with samples in the rows and features in the columns.
t_c = torch.tensor(t_c).unsqueeze(1) # Adds the extra dimension at axis 1
t_u = torch.tensor(t_u).unsqueeze(1) #

print('t_u.shape -->', t_u.shape)

n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

print('t_un_train.shap -->', t_un_train.shape)
print('t_un_val.shape -->', t_un_val.shape)

t_u.shape --> torch.Size([11, 1])
t_un_train.shap --> torch.Size([9, 1])
t_un_val.shape --> torch.Size([2, 1])


In [4]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val,
                  t_c_train, t_c_val):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train) # <1>
        loss_train = loss_fn(t_p_train, t_c_train)

        with torch.no_grad():
          t_p_val = model(t_u_val) # <1>
          loss_val = loss_fn(t_p_val, t_c_val)

        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")

In [6]:
optimizer = optim.SGD(subclass_model.parameters(), lr=1e-2)

training_loop(
    n_epochs = 3000,
    optimizer = optimizer,
    model = subclass_model,
    loss_fn = nn.MSELoss(), # note: we are no longer using our hand-written loss function from earlier
    t_u_train = t_un_train,
    t_u_val = t_un_val,
    t_c_train = t_c_train,
    t_c_val = t_c_val)

print()

list(subclass_model.named_parameters())

Epoch 1, Training loss 3.9689, Validation loss 25.5023
Epoch 1000, Training loss 52.3371, Validation loss 225.1799
Epoch 2000, Training loss 34.1107, Validation loss 214.7565
Epoch 3000, Training loss 52.3920, Validation loss 226.8432



[('hidden_linear.weight',
  Parameter containing:
  tensor([[-0.0605],
          [ 3.1941],
          [ 4.8323],
          [-4.6373],
          [-0.3479],
          [ 0.5380],
          [-3.2319],
          [ 1.1431],
          [-1.3278],
          [-0.4167],
          [ 3.5310],
          [-1.7390],
          [ 1.2383]], requires_grad=True)),
 ('hidden_linear.bias',
  Parameter containing:
  tensor([ -4.7709,  -4.3739,  -2.2809,   1.4888,   9.7830,  -9.8573,   0.8267,
          -18.4324,  18.9109,  -4.1175,  -1.0677,   1.6804,   1.8900],
         requires_grad=True)),
 ('output_linear.weight',
  Parameter containing:
  tensor([[-1.2746,  5.1810,  5.7557, -5.4474, -2.1109,  0.7308, -4.6444,  6.9543,
           -7.2767,  0.9431,  4.5173, -2.8148, -2.5398]], requires_grad=True)),
 ('output_linear.bias',
  Parameter containing:
  tensor([4.1430], requires_grad=True))]