In [1]:
import torch

In [2]:
t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)

In [3]:
def model(t_u, w, b):
    return w * t_u + b

In [4]:
def loss_fn(t_p, t_c):
    return ((t_p - t_c)**2).mean()

In [5]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

In [10]:
params.grad is None

True

In [11]:
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()

params.grad

tensor([4517.2969,   82.6000])

In [12]:
if params.grad is not None:
    params.grad.zero_()

In [14]:
params.grad

tensor([0., 0.])

In [15]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    
    for epoch in range(1, n_epochs +1):
        
        if params.grad is not None:
            params.grad.zero_()
        
        t_p = model(t_u, *params)
        
        loss = loss_fn(t_p, t_c)
        
        loss.backward()
        
        with torch.no_grad():
            params -= learning_rate * params.grad
            
        if epoch % 500 == 0:
            print('Epoch %d \t Loss %f' % (epoch, float(loss)))
    
    return params

In [16]:
t_un = 0.1 * t_u

In [17]:
training_loop(n_epochs = 5000,
              learning_rate = 1e-2,
              params = torch.tensor([1.0, 0.0], requires_grad=True),
              t_u = t_un,
              t_c = t_c)

Epoch 500 	 Loss 7.860115
Epoch 1000 	 Loss 3.828538
Epoch 1500 	 Loss 3.092191
Epoch 2000 	 Loss 2.957698
Epoch 2500 	 Loss 2.933134
Epoch 3000 	 Loss 2.928648
Epoch 3500 	 Loss 2.927830
Epoch 4000 	 Loss 2.927679
Epoch 4500 	 Loss 2.927652
Epoch 5000 	 Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [18]:
import torch.optim as optim

In [19]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'Optimizer',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_functional',
 '_multi_tensor',
 'lr_scheduler',
 'swa_utils']

In [26]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

learning_rate = 1e-2

optimizer = optim.SGD([params], lr=learning_rate)

t_p = model(t_un, *params)

loss = loss_fn(t_p, t_c)

optimizer.zero_grad()

loss.backward()

optimizer.step()

params

tensor([1.7761, 0.1064], requires_grad=True)

In [28]:
def training_loop(n_epochs, optimizer, params, x, y):
    
    for epoch in range(1, n_epochs+1):
        
        t_p = model(x, *params)
        
        loss = loss_fn(t_p, y)
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        if epoch % 500 == 0:
            print('Epoch: %d \t Loss: %f' % (epoch, float(loss)))
    
    return params

In [29]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

learning_rate = 1e-2

optimizer = optim.SGD([params], lr=learning_rate)

training_loop(n_epochs=5000,
              optimizer = optimizer,
              params = params,
              x = t_un,
              y = t_c)

Epoch: 500 	 Loss: 7.860115
Epoch: 1000 	 Loss: 3.828538
Epoch: 1500 	 Loss: 3.092191
Epoch: 2000 	 Loss: 2.957698
Epoch: 2500 	 Loss: 2.933134
Epoch: 3000 	 Loss: 2.928648
Epoch: 3500 	 Loss: 2.927830
Epoch: 4000 	 Loss: 2.927679
Epoch: 4500 	 Loss: 2.927652
Epoch: 5000 	 Loss: 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [30]:
params = torch.tensor([1.0, 0.0], requires_grad=True)

learning_rate = 1e-1

optimizer = optim.Adam([params], lr=learning_rate)

training_loop(n_epochs=5000,
              optimizer = optimizer,
              params = params,
              x = t_u,
              y = t_c)

Epoch: 500 	 Loss: 7.612900
Epoch: 1000 	 Loss: 3.086700
Epoch: 1500 	 Loss: 2.928579
Epoch: 2000 	 Loss: 2.927644
Epoch: 2500 	 Loss: 2.927645
Epoch: 3000 	 Loss: 2.927646
Epoch: 3500 	 Loss: 2.927645
Epoch: 4000 	 Loss: 2.927646
Epoch: 4500 	 Loss: 2.927646
Epoch: 5000 	 Loss: 2.927645


tensor([  0.5368, -17.3048], requires_grad=True)

In [31]:
n_samples = t_u.shape[0]

n_val = int(0.2 * n_samples)

shuffled_indices = torch.randperm(n_samples)

In [35]:
train_indices = shuffled_indices[:-n_val]

val_indices = shuffled_indices[-n_val:]

In [36]:
train_indices, val_indices

(tensor([ 5,  9,  6,  1,  4,  7,  2, 10,  3]), tensor([0, 8]))

In [40]:
train_t_u = t_u[train_indices]
train_t_c = t_c[train_indices]

val_t_u = t_u[val_indices]
val_t_c = t_c[val_indices]

train_t_un = 0.1 * train_t_u
val_t_un = 0.1 * val_t_u

In [49]:
def model(t_u, w1, w2, b):
    return w1 * t_u ** 2 + w2 * t_u + b

In [47]:
def training_loop(n_epochs, optimizer, params, train_x, train_y, val_x, val_y):
    
    for epoch in range(1, n_epochs+1):
        
        train_t_p = model(train_x, *params)
        
        train_loss = loss_fn(train_t_p, train_y)
        
        with torch.no_grad():
            val_t_p = model(val_x, *params)
            val_loss = loss_fn(val_t_p, val_y)
            assert val_loss.requires_grad == False
        
        optimizer.zero_grad()
        
        train_loss.backward()
        
        optimizer.step()
        
        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
            f" Validation loss {val_loss.item():.4f}")
    
    return params

In [52]:
params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True)
learning_rate = 1e-4
optimizer = optim.SGD([params], lr=learning_rate)

training_loop(n_epochs = 3000,
optimizer = optimizer,
params = params,
train_x = train_t_un,
val_x = val_t_un,
train_y = train_t_c,
val_y = val_t_c)

Epoch 1, Training loss 743.0966, Validation loss 372.9341
Epoch 2, Training loss 400.7076, Validation loss 240.3446
Epoch 3, Training loss 218.3127, Validation loss 162.1796
Epoch 500, Training loss 8.5577, Validation loss 24.7192
Epoch 1000, Training loss 7.2084, Validation loss 20.8830
Epoch 1500, Training loss 6.2215, Validation loss 17.8609
Epoch 2000, Training loss 5.4992, Validation loss 15.4654
Epoch 2500, Training loss 4.9702, Validation loss 13.5549
Epoch 3000, Training loss 4.5823, Validation loss 12.0221


tensor([ 0.4579, -0.2419, -0.5777], requires_grad=True)