In [1]:
import torch
import numpy as np

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
def model(t_u, w, b):
    return w * t_u + b

In [4]:
def loss_fn(t_p, t_c):
    squared_diffs = (t_p - t_c) ** 2
    return squared_diffs.mean()

In [5]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [13]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)
print(params)
a = [params]
a[0]

tensor([1., 0.], requires_grad=True)


tensor([1., 0.], requires_grad=True)

In [15]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()
optimizer.step()
params

tensor([ 0.9097, -0.0017], requires_grad=True)

In [18]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
optimizer.zero_grad()
loss.backward()

optimizer.step()
params

tensor([-44.1730,  -0.8260], requires_grad=True)

In [24]:
def training_loop(epochs, optimizer, t_u, t_c, params):
    for epoch in range(epochs):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print("Epoch %d, Loss %f" % (epoch, float(loss)))
    return params

In [25]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1.0e-2
optimizer = optim.SGD([params], lr=learning_rate)
training_loop(epochs=5000, optimizer=optimizer, params=params, t_u=t_un, t_c=t_c)  # <1>

Epoch 0, Loss 80.364342
Epoch 500, Loss 7.843377
Epoch 1000, Loss 3.825483
Epoch 1500, Loss 3.091630
Epoch 2000, Loss 2.957596
Epoch 2500, Loss 2.933116
Epoch 3000, Loss 2.928646
Epoch 3500, Loss 2.927829
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652


tensor([  5.3671, -17.3012], requires_grad=True)

In [27]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate)  # <1>

training_loop(epochs=2000, optimizer=optimizer, params=params, t_u=t_u, t_c=t_c)  # <2>

Epoch 0, Loss 1763.884766
Epoch 500, Loss 7.588875
Epoch 1000, Loss 3.085365
Epoch 1500, Loss 2.928568


tensor([  0.5367, -17.3021], requires_grad=True)

In [43]:
n_samples = t_u.shape[0]

val_test_samples = int(0.2 * n_samples)

shuffled_index = torch.randperm(n_samples)
n_samples, val_test_samples, shuffled_index

train_test = t_u[shuffled_index[:-val_test_samples]]
val_test = t_u[shuffled_index[-val_test_samples:]]
t_c_train = t_c[shuffled_index[:-val_test_samples]]
t_c_val = t_c[shuffled_index[-val_test_samples:]]
train_test, val_test
train_test = 0.1 * train_test
val_test = 0.1 * val_test
train_test, val_test

(tensor([3.3900, 5.8200, 6.8400, 2.1800, 5.6300, 4.8900, 5.5900, 8.1900, 6.0400]),
 tensor([3.5700, 4.8400]))

In [52]:
def training_loop(epochs, params, train_dataset, test_dataset, t_c, optimizer):
    for epoch in range(epochs):
        t_p = model(train_dataset, *params)
        loss = loss_fn(t_p, t_c_train)
        with torch.no_grad():
            t_p_test = model(test_dataset, *params)
            loss_test = loss_fn(t_p_test, t_c_val)
            assert loss_test.requires_grad == False  # <2>
            if epoch % 500 == 0:
                print(
                    f"epoch : {epoch}, train loss : {loss.item()}, test loss :{loss_test.item()}"
                )
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return params

In [53]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    epochs=5000,
    params=params,
    train_dataset=train_test,
    test_dataset=val_test,
    optimizer=optimizer,
    t_c=t_c,
)

epoch : 0, train loss : 97.02635955810547, test loss :5.385250091552734
epoch : 500, train loss : 6.512426376342773, test loss :21.57388687133789
epoch : 1000, train loss : 3.0708529949188232, test loss :12.303930282592773
epoch : 1500, train loss : 2.446415424346924, test loss :9.25805950164795
epoch : 2000, train loss : 2.3331196308135986, test loss :8.124459266662598
epoch : 2500, train loss : 2.3125622272491455, test loss :7.671308994293213
epoch : 3000, train loss : 2.308833599090576, test loss :7.483676910400391
epoch : 3500, train loss : 2.308157205581665, test loss :7.404728412628174
epoch : 4000, train loss : 2.3080332279205322, test loss :7.371284484863281
epoch : 4500, train loss : 2.308011054992676, test loss :7.357072830200195


tensor([  5.2097, -16.0034], requires_grad=True)