In [1]:
import torch 
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
t_c = [0.5, 14, 15, 28, 11, 8, 3, -4, 6, 13, 21]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]

In [3]:
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)

In [4]:
t_un = 0.1*t_u

In [5]:
t_un

tensor([3.5700, 5.5900, 5.8200, 8.1900, 5.6300, 4.8900, 3.3900, 2.1800, 4.8400,
        6.0400, 6.8400])

In [6]:
params = torch.tensor([1.0, 0.0], requires_grad = True)

In [7]:
def model(t_u, w, b):
    return w*t_u + b

In [8]:
# also define a loss function - this would be the mse

def loss_mse(t_p, t_c):
    # t_P will be the predicted values
    squared_diff = (t_p - t_c)**2
    mse = squared_diff.mean()

    return mse

In [9]:
params.grad is None

True

In [10]:
loss = loss_mse(model(t_u, *params), t_c)

In [11]:
loss

tensor(1763.8848, grad_fn=<MeanBackward0>)

In [12]:
loss.backward()

In [13]:
params.grad

tensor([4517.2969,   82.6000])

In [14]:
def training_loop(n_epochs, learning_rate, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        if params.grad is not None:  # <1>
            params.grad.zero_()
        
        t_p = model(t_u, *params) 
        loss = loss_mse(t_p, t_c)
        loss.backward()
        
        with torch.no_grad():  # <2>
            params -= learning_rate * params.grad

        if epoch % 500 == 0:
            print('Epoch %d, Loss %f' % (epoch, float(loss)))
            
    return params

In [15]:
training_loop(
    n_epochs = 5000, 
    learning_rate = 1e-2, 
    params = torch.tensor([1.0, 0.0], requires_grad=True), # <1> 
    t_u = t_un, # <2> 
    t_c = t_c)

Epoch 500, Loss 7.860115
Epoch 1000, Loss 3.828538
Epoch 1500, Loss 3.092191
Epoch 2000, Loss 2.957698
Epoch 2500, Loss 2.933134
Epoch 3000, Loss 2.928648
Epoch 3500, Loss 2.927830
Epoch 4000, Loss 2.927679
Epoch 4500, Loss 2.927652
Epoch 5000, Loss 2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [16]:
# Using different optimizers

In [17]:
# Pytorch alllows to use multiple optimizers just like tf

In [18]:
import torch.optim as optim

In [19]:
dir(optim)

['ASGD',
 'Adadelta',
 'Adafactor',
 'Adagrad',
 'Adam',
 'AdamW',
 'Adamax',
 'LBFGS',
 'NAdam',
 'Optimizer',
 'RAdam',
 'RMSprop',
 'Rprop',
 'SGD',
 'SparseAdam',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_adafactor',
 '_functional',
 'lr_scheduler',
 'swa_utils']

In [20]:
# parameter tensors are the first input to the optimizer construct

In [21]:
# Every optimizer has two methods: zero_grad and step

In [22]:
# Let's define the training loop with one of these optimizers

In [23]:
# Use SGD for now

In [24]:
params_optim = torch.tensor([1.0, 0], requires_grad = True)

In [25]:
lr = 1e-2

In [26]:
optimizer = optim.SGD([params_optim], lr = lr)

In [27]:
n_eps = 5000

In [28]:
# define the training loop

def train_loop(n_epochs, optimizer, params, t_u, t_c):
    for epoch in range(1, n_epochs + 1):
        t_p = model(t_u, *params)
        loss = loss_mse(t_p, t_c)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 500 == 0:
            print('Epoch %d, Loss%f' % (epoch, float(loss)))

    return params

In [29]:
train_loop(n_eps, optimizer, params_optim, t_un, t_c)

Epoch 500, Loss7.860120
Epoch 1000, Loss3.828538
Epoch 1500, Loss3.092191
Epoch 2000, Loss2.957698
Epoch 2500, Loss2.933134
Epoch 3000, Loss2.928648
Epoch 3500, Loss2.927830
Epoch 4000, Loss2.927679
Epoch 4500, Loss2.927652
Epoch 5000, Loss2.927647


tensor([  5.3671, -17.3012], requires_grad=True)

In [30]:
# Now what if we have a set of validation data?

n_samples = t_u.shape[0]

In [31]:
n_val = int(0.2*n_samples)

In [32]:
n_val

2

In [33]:
shuffled_samps = torch.randperm(n_samples)

In [34]:
shuffled_samps

tensor([ 2,  4,  1,  8,  5,  0,  6,  9, 10,  3,  7])

In [35]:
train_indices = shuffled_samps[:-n_val]
valid_indices = shuffled_samps[-n_val:]

In [36]:
train_indices, valid_indices

(tensor([ 2,  4,  1,  8,  5,  0,  6,  9, 10]), tensor([3, 7]))

In [37]:
# Okay, now create the train and validation data splits?

In [38]:
train_t_c = t_c[train_indices]
valid_t_c = t_c[valid_indices]

In [39]:
train_t_c

tensor([15.0000, 11.0000, 14.0000,  6.0000,  8.0000,  0.5000,  3.0000, 13.0000,
        21.0000])

In [40]:
valid_t_c

tensor([28., -4.])

In [41]:
train_t_u = t_u[train_indices]
valid_t_u = t_u[valid_indices]

In [42]:
train_t_u

tensor([58.2000, 56.3000, 55.9000, 48.4000, 48.9000, 35.7000, 33.9000, 60.4000,
        68.4000])

In [43]:
valid_t_u

tensor([81.9000, 21.8000])

In [44]:
# normalize t_u

In [45]:
train_t_un = 0.1*train_t_u

In [46]:
valid_t_un = 0.1*valid_t_u

In [47]:
train_t_un

tensor([5.8200, 5.6300, 5.5900, 4.8400, 4.8900, 3.5700, 3.3900, 6.0400, 6.8400])

In [48]:
valid_t_un

tensor([8.1900, 2.1800])

In [49]:
# Okay, now we can write out the new train loop

In [50]:
# notice that we only call loss.backward() on the train loss and this is not done on the validation data. Also, with pytorch autograd, rememeber all the gradients get accumulated - therefore we only use the latest update on the gradient - that is why we do the loss.backwrads()

In [51]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params) # <1>
        train_loss = loss_mse(train_t_p, train_t_c)
                             
        val_t_p = model(val_t_u, *params) # <1>
        val_loss = loss_mse(val_t_p, val_t_c)

        # this does the zero gradient for the optimizer, so that we don't need to do that with the with statements
        optimizer.zero_grad()
        train_loss.backward() 
        optimizer.step()

        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")
            
    return params

In [52]:
# define the optimizers here - along with the learning rate
params = torch.tensor([1.0, 0.0], requires_grad=True)
learning_rate = 1e-2
optimizer = optim.SGD([params], lr=learning_rate)

In [53]:
training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = valid_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = valid_t_c)

Epoch 1, Training loss 50.3755, Validation loss 215.3142
Epoch 2, Training loss 22.5148, Validation loss 136.3782
Epoch 3, Training loss 17.5463, Validation loss 111.6176
Epoch 500, Training loss 9.1372, Validation loss 43.9237
Epoch 1000, Training loss 5.7752, Validation loss 20.3462
Epoch 1500, Training loss 4.2485, Validation loss 10.1084
Epoch 2000, Training loss 3.5553, Validation loss 5.7752
Epoch 2500, Training loss 3.2406, Validation loss 4.0202
Epoch 3000, Training loss 3.0976, Validation loss 3.3665


tensor([  5.1257, -16.3117], requires_grad=True)

In [54]:
# but this way of code writing in the train block still computes the gradients also on the validation data, and we do not need that. Therefore we can re-write the above function as below.

In [60]:
def training_loop(n_epochs, optimizer, params, train_t_u, val_t_u,
                  train_t_c, val_t_c):
    for epoch in range(1, n_epochs + 1):
        train_t_p = model(train_t_u, *params)
        train_loss = loss_mse(train_t_p, train_t_c)

        with torch.no_grad(): # <1>
            val_t_p = model(val_t_u, *params)
            val_loss = loss_mse(val_t_p, val_t_c)
            assert val_loss.requires_grad == False # <2>
            
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if epoch <= 3 or epoch % 500 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss.item():.4f},"
                  f" Validation loss {val_loss.item():.4f}")

    return params

In [61]:
training_loop(
    n_epochs = 3000, 
    optimizer = optimizer,
    params = params,
    train_t_u = train_t_un, # <1> 
    val_t_u = valid_t_un, # <1> 
    train_t_c = train_t_c,
    val_t_c = valid_t_c)

Epoch 1, Training loss 2.9798, Validation loss 3.2881
Epoch 2, Training loss 2.9798, Validation loss 3.2882
Epoch 3, Training loss 2.9798, Validation loss 3.2882
Epoch 500, Training loss 2.9792, Validation loss 3.3114
Epoch 1000, Training loss 2.9790, Validation loss 3.3281
Epoch 1500, Training loss 2.9788, Validation loss 3.3398
Epoch 2000, Training loss 2.9788, Validation loss 3.3479
Epoch 2500, Training loss 2.9788, Validation loss 3.3534
Epoch 3000, Training loss 2.9787, Validation loss 3.3572


tensor([  5.4392, -18.0020], requires_grad=True)