## Hyperparameter Tuning and Experimenting

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120) # Display options for o/p
torch.set_grad_enabled(True) # Already on by default

from torch.utils.tensorboard import SummaryWriter

In [2]:
print(torch.__version__)
print(torchvision.__version__)

1.8.1
0.9.1


In [3]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [4]:
class Network(nn.Module):

    def __init__(self):

        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):

        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        t = F.relu(self.fc1(t.reshape(-1, 12*4*4)))
        t = F.relu(self.fc2(t))
        t = self.out(t)

        return t

In [5]:
train_set = torchvision.datasets.FashionMNIST(
    root = './FashionMNIST',
    train=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ]),
    download=True
)

### Using Tensorboard to track different hyperparameter

In [None]:
batch_size_list = [100, 1000, 10000]
lr_list = [.01, .001, .0001, .00001]

for batch_size in batch_size_list:
    for lr in lr_list:
        
        network = Network()
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
        optimizer = optim.Adam(network.parameters(), lr=lr)

        images, labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images) # to make a grid of images

        # using comments to uniquely identifying this run
        comment = f" batch_size={batch_size} lr={lr}"
        tb = SummaryWriter(comment=comment)
        tb.add_image('image', grid)
        tb.add_graph(network, images) # adding the model and this grid on the TensorBoard


        for epoch in range(10):

            total_loss = 0
            total_correct = 0

            for batch in train_loader: # Get Batch
                images, labels = batch

                preds = network(images) # Pass Batch
                loss = F.cross_entropy(preds, labels) # Calculate Loss

                optimizer.zero_grad() # zeros the gradients (as gradients are accumulated)

                loss.backward() # Calculate Gradients
                optimizer.step() # Update Weights

                total_loss += loss.item() * batch_size
                total_correct += get_num_correct(preds, labels)

            # Adding these values in TB
            tb.add_scalar('Loss', total_loss, epoch)
            tb.add_scalar('Number Correct', total_correct, epoch)
            tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

            # for tracking all the named parameters
            for name, weight in network.named_parameters():
                tb.add_histogram(name, weight, epoch)
                tb.add_histogram(f"{name}.grad", weight.grad, epoch)

            print(f"epoch: {epoch} | total_correct: {total_correct} | loss: {total_loss}")

        tb.close() # close the summary writer

 But what if we want to check for 5 hyperparameters <br>
 => 5 nested loops !! <br>
 Optimal way :

In [7]:
from itertools import product

In [8]:
parameters = dict(
    lr = [.01, .001]
    ,batch_size = [100, 1000]
    ,shuffle = [True, False]
)

In [10]:
param_values = [v for v in parameters.values()]
param_values

[[0.01, 0.001], [100, 1000], [True, False]]

In [11]:
# all combinations in a single loop
for lr, batch_size, shuffle in product(*param_values): 
    print (lr, batch_size, shuffle)

0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [None]:
batch_size_list = [100, 1000, 10000]
lr_list = [.01, .001, .0001, .00001]

for lr, batch_size, shuffle in product(*param_values): 
    
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle)
    optimizer = optim.Adam(network.parameters(), lr=lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images) # to make a grid of images

    # using comments to uniquely identifying this run
    comment = f" batch_size={batch_size} lr={lr} shuffle={shuffle}"
    tb = SummaryWriter(comment=comment)
    tb.add_image('image', grid)
    tb.add_graph(network, images) # adding the model and this grid on the TensorBoard


    for epoch in range(10):

        total_loss = 0
        total_correct = 0

        for batch in train_loader: # Get Batch
            images, labels = batch

            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss

            optimizer.zero_grad() # zeros the gradients (as gradients are accumulated)

            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights

            total_loss += loss.item() * batch_size
            total_correct += get_num_correct(preds, labels)

        # Adding these values in TB
        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        # for tracking all the named parameters
        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f"{name}.grad", weight.grad, epoch)

        print(f"epoch: {epoch} | total_correct: {total_correct} | loss: {total_loss}")

    tb.close() # close the summary writer