In [1]:
%matplotlib inline
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, linewidth=75)

In [2]:
t_c = torch.tensor([0.5, 14.0, 15.0, 28.0, 11.0,
                    8.0, 3.0, -4.0, 6.0, 13.0, 21.0])
t_u = torch.tensor([35.7, 55.9, 58.2, 81.9, 56.3, 48.9,
                    33.9, 21.8, 48.4, 60.4, 68.4])
t_un = 0.1 * t_u

In [3]:
def model(t_u, w0, w1, b):
    return w0 * t_u**2 + w1 * t_u + b

In [4]:
def loss_fn(predicted, actual):
    squared_diffs = (predicted - actual)**2
    return squared_diffs.mean()

In [5]:
import torch.optim as optim

dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [6]:
# Validate that the the new model is working

params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

loss.backward()
optimizer.step()

params

tensor([-2.3139e+03, -3.5181e+01, -5.9642e-01], requires_grad=True)

In [7]:
# Does zeroing the optimizer gradient at the begining make a difference 

params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad() # <1>

loss.backward()
optimizer.step()

params

tensor([-2.3139e+03, -3.5181e+01, -5.9642e-01], requires_grad=True)

In [8]:
# Significance of T_un

params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

loss.backward()
optimizer.step()

params

tensor([-2.3139e+03, -3.5181e+01, -5.9642e-01], requires_grad=True)

In [9]:
# Significance of T_un with zero gradient

params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)

optimizer.zero_grad() # <1>

loss.backward()
optimizer.step()

params

tensor([-2.3139e+03, -3.5181e+01, -5.9642e-01], requires_grad=True)

In [10]:
rates_to_learn_at = [1/x for x in [10, 100, 1000, 10000]]
rates_to_learn_at

[0.1, 0.01, 0.001, 0.0001]

In [42]:
def training_loop(n_epochs, optimizer, params, t_u, t_c, epoch_report_val = 500):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params) 
        loss = loss_fn(t_p, t_c)
        
        optimizer.zero_grad()
        
        loss.backward()
        optimizer.step()

        if epoch % epoch_report_val == 0:
            print(f'Epoch {epoch}, Loss {float(loss)}')
            
    return params

## Deciding which optimizer to use

In [43]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = rates_to_learn_at[3]
optimizer = optim.SGD([params], lr=learning_rate) 

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    params = params, 
    t_u = t_un,
    t_c = t_c)

Epoch 500, Loss 10.708596229553223
Epoch 1000, Loss 8.642083168029785
Epoch 1500, Loss 7.1710052490234375
Epoch 2000, Loss 6.123476982116699
Epoch 2500, Loss 5.377227306365967
Epoch 3000, Loss 4.845284938812256
Epoch 3500, Loss 4.465787887573242
Epoch 4000, Loss 4.194724082946777
Epoch 4500, Loss 4.0008015632629395
Epoch 5000, Loss 3.8617441654205322


tensor([ 0.5570, -0.8881, -0.8753], requires_grad=True)

In [44]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = rates_to_learn_at[3]
optimizer = optim.Adam([params], lr=learning_rate) 

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    params = params,
    t_u = t_u, 
    t_c = t_c
    )

Epoch 500, Loss 10577728.0
Epoch 1000, Loss 9524402.0
Epoch 1500, Loss 8545122.0
Epoch 2000, Loss 7634292.5
Epoch 2500, Loss 6787368.0
Epoch 3000, Loss 6000706.0
Epoch 3500, Loss 5271407.5
Epoch 4000, Loss 4597170.0
Epoch 4500, Loss 3976134.25
Epoch 5000, Loss 3406753.75


tensor([ 0.5412,  0.5412, -0.4588], requires_grad=True)

In [14]:
n_samples = t_u.shape[0]
n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_indices, val_indices  # <1>

(tensor([ 0,  3,  5,  4, 10,  2,  1,  9,  6]), tensor([8, 7]))

In [47]:
training_t_u = t_u[train_indices]
training_t_c = t_c[train_indices]

validation_t_u = t_u[val_indices]
validation_t_c = t_c[val_indices]

training_t_un = 0.1 * training_t_u
validation_t_un = 0.1 * validation_t_u

In [75]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_fn(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_fn(val_t_p, val_t_c)
        
        optimizer.zero_grad()
        
        train_loss.backward() 
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [76]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    params = params,
    train_t_u = training_t_un, # <1> 
    val_t_u = validation_t_un, # <1> 
    train_t_c = training_t_c,
    val_t_c = validation_t_c)

params

Epoch 1, Training loss 757.6071, Validation loss 307.6372
Epoch 2, Training loss 404.0941, Validation loss 200.9509
Epoch 3, Training loss 217.4629, Validation loss 138.7690
Epoch 500, Training loss 7.0935, Validation loss 31.3754
Epoch 1000, Training loss 5.8806, Validation loss 27.4340
Epoch 1500, Training loss 5.0004, Validation loss 24.2863
Epoch 2000, Training loss 4.3616, Validation loss 21.7569
Epoch 2500, Training loss 3.8980, Validation loss 19.7124
Epoch 3000, Training loss 3.5614, Validation loss 18.0503
Epoch 3500, Training loss 3.3171, Validation loss 16.6921
Epoch 4000, Training loss 3.1396, Validation loss 15.5765
Epoch 4500, Training loss 3.0107, Validation loss 14.6561
Epoch 5000, Training loss 2.9170, Validation loss 13.8935


tensor([ 0.5017, -0.5374, -0.6002], requires_grad=True)

In [77]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_fn(train_t_p, train_t_c)

        with torch.no_grad(): # <1>
            val_t_p = model(val_t_u, *params)
            val_loss = loss_fn(val_t_p, val_t_c)
            assert val_loss.requires_grad == False # <2>
            
        optimizer.zero_grad()
        
        train_loss.backward()
        optimizer.step()
        
        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")

In [78]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(
    n_epochs = 5000, 
    optimizer = optimizer,
    params = params,
    train_t_u = training_t_un, # <1> 
    val_t_u = validation_t_un, # <1> 
    train_t_c = training_t_c,
    val_t_c = validation_t_c)

params

Epoch 1, Training loss 757.6071, Validation loss 307.6372
Epoch 2, Training loss 404.0941, Validation loss 200.9509
Epoch 3, Training loss 217.4629, Validation loss 138.7690
Epoch 500, Training loss 7.0935, Validation loss 31.3754
Epoch 1000, Training loss 5.8806, Validation loss 27.4340
Epoch 1500, Training loss 5.0004, Validation loss 24.2863
Epoch 2000, Training loss 4.3616, Validation loss 21.7569
Epoch 2500, Training loss 3.8980, Validation loss 19.7124
Epoch 3000, Training loss 3.5614, Validation loss 18.0503
Epoch 3500, Training loss 3.3171, Validation loss 16.6921
Epoch 4000, Training loss 3.1396, Validation loss 15.5765
Epoch 4500, Training loss 3.0107, Validation loss 14.6561
Epoch 5000, Training loss 2.9170, Validation loss 13.8935


tensor([ 0.5017, -0.5374, -0.6002], requires_grad=True)

In [51]:
def calc_forward(t_u, t_c, is_train):
    with torch.set_grad_enabled(is_train):
        t_p = model(t_u, *params)
        loss = loss_fn(t_p, t_c)
    return loss