# Autograd

In [1]:
from matplotlib import pyplot as plt
import torch
import torch.optim as optim

In [2]:
t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]

In [3]:
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)
print(t_c.shape)
print(t_u.shape)

torch.Size([11])
torch.Size([11])


In [4]:
# linear model
def model(t_u, w, b):
    return w * t_u + b

In [5]:
# loss function
def loss_fn(t_p, t_c):
    square_diffs = (t_p - t_c)**2
    return square_diffs.mean()

In [6]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

In [7]:
params.grad is None

True

In [8]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()

In [9]:
params.grad

tensor([4517.2969,   82.6000])

In [10]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()

In [11]:
# производные накапливаются
params.grad

tensor([9034.5938,  165.2000])

In [12]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()
params.grad

tensor([13551.8906,   247.8000])

In [13]:
# надо на каждой итерации обнулять градиенты
params.grad.zero_()
params.grad

tensor([0., 0.])

In [14]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()
params.grad

tensor([4517.2969,   82.6000])

## Training loop

In [15]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        if params.grad is not None:
            params.grad.zero_()

        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        loss.backward()

        with torch.no_grad():
            params -= learning_rate * params.grad

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))

    return params            

In [16]:
t_un = 0.1 * t_u

In [17]:
training_loop(
    n_epochs = 5000,
    learning_rate = 1e-2,
    params = torch.tensor([1.0, 0.0], requires_grad=True),
    t_u = t_un, 
    t_c = t_c
)

Consider using tensor.detach() first. (Triggered internally at /pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:835.)
  print('Epoch %d, Loss %f' % (epoch, float(loss)))


Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

## Using optimizers

In [18]:
dir(optim)[:15]

['ASGD',
 'Adadelta',
 'Adafactor',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam']

In [20]:
# Creating optimizer
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-5
optimizer = optim.SGD([params], lr=learning_rate)

In [21]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
loss.backward()

In [22]:
optimizer.step()

In [23]:
params

tensor([ 9.5483e-01, -8.2600e-04], requires_grad=True)

In [24]:
params.grad

tensor([4517.2969,   82.6000])

In [25]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
optimizer.zero_grad()
loss.backward()
optimizer.step()

In [26]:
params

tensor([ 0.9123, -0.0016], requires_grad=True)

In [27]:
params.grad

tensor([4251.5220,   77.9184])

In [28]:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
optimizer.zero_grad()
loss.backward()
optimizer.step()

In [29]:
params

tensor([ 0.8723, -0.0023], requires_grad=True)

In [30]:
params.grad

tensor([4001.3838,   73.5123])

## Training loop

In [31]:
def training_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))

    return params

In [32]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    t_u = t_un,
    t_c = t_c
)

Epoch 500, Loss 7.860120
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [33]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.Adam([params], lr=learning_rate)

training_loop(
    n_epochs = 5000,
    optimizer = optimizer,
    params = params,
    t_u = t_u,
    t_c = t_c
)

Epoch 500, Loss 25.590321
Epoch 1000, Loss 22.958574
Epoch 1500, Loss 19.641720
Epoch 2000, Loss 16.026117
Epoch 2500, Loss 12.481897
Epoch 3000, Loss 9.332447
Epoch 3500, Loss 6.805122
Epoch 4000, Loss 4.996394
Epoch 4500, Loss 3.867487
Epoch 5000, Loss 3.274242


tensor([  0.5017, -15.3177], requires_grad=True)

## Разбиение набора данных

In [34]:
t_u.shape

torch.Size([11])

In [35]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

In [36]:
shuffled_indices = torch.randperm(n_samples)
shuffled_indices

tensor([ 3,  0, 10,  6,  7,  1,  5,  8,  4,  9,  2])

In [37]:
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

In [38]:
train_indices, val_indices

(tensor([ 3,  0, 10,  6,  7,  1,  5,  8,  4]), tensor([9, 2]))

In [39]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

In [40]:
train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [41]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u, train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        val_t_p = model(val_t_u, *params)
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Train loss {train_loss.item():.4f},"
                  f" Valivdation loss {val_loss.item():.4f}")

    return params

In [42]:
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 3000,
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un,
    val_t_u = val_t_un,
    train_t_c = train_t_c,
    val_t_c = val_t_c
)

Epoch 1, Train loss 83.4771, Valivdation loss 66.3570
Epoch 2, Train loss 44.3109, Valivdation loss 14.4402
Epoch 3, Train loss 37.3050, Valivdation loss 4.5136


Epoch 500, Train loss 7.1618, Valivdation loss 2.2583
Epoch 1000, Train loss 3.4705, Valivdation loss 2.7107
Epoch 1500, Train loss 3.0020, Valivdation loss 2.9045
Epoch 2000, Train loss 2.9426, Valivdation loss 2.9777
Epoch 2500, Train loss 2.9350, Valivdation loss 3.0042
Epoch 3000, Train loss 2.9340, Valivdation loss 3.0138


tensor([  5.4100, -17.3964], requires_grad=True)