In [2]:
%matplotlib inline
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

torch.set_printoptions(edgeitems=2, linewidth=75)

In [21]:
t_c = [0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c).unsqueeze(1) # <1>
t_u = torch.tensor(t_u).unsqueeze(1) # <1>

t_u.shape

torch.Size([11, 1])

In [22]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices

(tensor([ 2,  3,  4,  0,  1, 10,  9,  6,  8]), tensor([5, 7]))

In [23]:
t_u_train = t_u[train_indices]
t_c_train = t_c[train_indices]

t_u_val = t_u[val_indices]
t_c_val = t_c[val_indices]

t_un_train = 0.1 * t_u_train
t_un_val = 0.1 * t_u_val

In [24]:
t_un_val

tensor([[4.8900],
        [2.1800]])

Let's use `__call__` instead of forward. All subclasses of nn.Module have their `__call__` method defined.

In [8]:
linear_model = nn.Linear(1, 1)
linear_model(t_un_val)

tensor([[1.6697],
        [0.5563]], grad_fn=<AddmmBackward0>)

In [9]:
linear_model.weight

Parameter containing:
tensor([[0.2320]], requires_grad=True)

In [10]:
linear_model.bias

Parameter containing:
tensor([-0.2301], requires_grad=True)

In [11]:
x = torch.ones(1)
linear_model(x)

tensor([0.0019], grad_fn=<AddBackward0>)

In [12]:
x = torch.ones(10, 1)
linear_model(x)

tensor([[0.0019],
        [0.0019],
        [0.0019],
        [0.0019],
        [0.0019],
        [0.0019],
        [0.0019],
        [0.0019],
        [0.0019],
        [0.0019]], grad_fn=<AddmmBackward0>)

In [12]:
t_c = torch.tensor([0.5,  14.0, 15.0, 28.0, 11.0,  8.0,  3.0, -4.0,  6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4])
t_u.shape

torch.Size([11])

In [13]:
t_u = t_u.unsqueeze(1)

In [15]:
t_u.shape

torch.Size([11, 1])

In [16]:
linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(
    linear_model.parameters(),
    lr=1e-2
)

In [17]:
linear_model.parameters()

<generator object Module.parameters at 0x7f042c0a9ba0>

In [18]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[0.5716]], requires_grad=True),
 Parameter containing:
 tensor([-0.7016], requires_grad=True)]

In [26]:
# training loop
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val, t_c_train, t_c_val):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train)
        loss_train = loss_fn(t_p_train, t_c_train)

        t_p_val = model(t_u_val)
        loss_val = loss_fn(t_p_val, t_c_val)

        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        if epoch == 1 or epoch % 1000 == 0:
            print("Epoch {}, Training {}, Validation {}".format(epoch, loss_train, loss_val))

In [27]:
linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    model=linear_model,
    loss_fn=nn.MSELoss(),
    t_u_train=t_un_train,
    t_u_val=t_un_val,
    t_c_train=t_c_train,
    t_c_val=t_c_val
)

print()
print(linear_model.weight)
print(linear_model.bias)

Epoch 1, Training 424.50665283203125, Validation 89.10836029052734
Epoch 1000, Training 5.040363788604736, Validation 1.9239487648010254
Epoch 2000, Training 3.2320590019226074, Validation 1.579161286354065
Epoch 3000, Training 3.0649147033691406, Validation 2.8070688247680664

Parameter containing:
tensor([[5.5431]], requires_grad=True)
Parameter containing:
tensor([-18.3195], requires_grad=True)


In [28]:
seq_model = nn.Sequential(
    nn.Linear(1, 13),
    nn.Tanh(),
    nn.Linear(13, 1)
)
seq_model

Sequential(
  (0): Linear(in_features=1, out_features=13, bias=True)
  (1): Tanh()
  (2): Linear(in_features=13, out_features=1, bias=True)
)

In [29]:
[param.shape for param in seq_model.parameters()]

[torch.Size([13, 1]), torch.Size([13]), torch.Size([1, 13]), torch.Size([1])]

In [30]:
for name, param in seq_model.named_parameters():
    print(name, param.shape)

0.weight torch.Size([13, 1])
0.bias torch.Size([13])
2.weight torch.Size([1, 13])
2.bias torch.Size([1])


Sequential also accepts an OrderedDict, in which we can name each module passed to Sequential.

In [31]:
from collections import OrderedDict

In [32]:
seq_model = nn.Sequential(
    OrderedDict(
        [
            ("hidden_linear", nn.Linear(1, 8)),
            ("hidden_activation", nn.Tanh()),
            ("output_linear", nn.Linear(8, 1))
        ]
    )
)

seq_model

Sequential(
  (hidden_linear): Linear(in_features=1, out_features=8, bias=True)
  (hidden_activation): Tanh()
  (output_linear): Linear(in_features=8, out_features=1, bias=True)
)

In [33]:
for name, params in seq_model.named_parameters():
    print(name, params.shape)

hidden_linear.weight torch.Size([8, 1])
hidden_linear.bias torch.Size([8])
output_linear.weight torch.Size([1, 8])
output_linear.bias torch.Size([1])


In [34]:
seq_model.output_linear.bias

Parameter containing:
tensor([-0.2449], requires_grad=True)

In [36]:
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3)

training_loop(
    n_epochs=5000,
    optimizer=optimizer,
    model=seq_model,
    loss_fn=nn.MSELoss(),
    t_u_train=t_un_train,
    t_u_val=t_un_val,
    t_c_train=t_c_train,
    t_c_val=t_c_val
)

print()
print(linear_model.weight)
print(linear_model.bias)

Epoch 1, Training 213.4250030517578, Validation 38.41298294067383
Epoch 1000, Training 10.824342727661133, Validation 2.341477870941162
Epoch 2000, Training 6.3280792236328125, Validation 0.741100549697876
Epoch 3000, Training 3.70603084564209, Validation 1.1208921670913696
Epoch 4000, Training 2.70620059967041, Validation 1.7753510475158691
Epoch 5000, Training 2.342121124267578, Validation 2.2711496353149414

Parameter containing:
tensor([[0.8501]], requires_grad=True)
Parameter containing:
tensor([-0.7922], requires_grad=True)
