### Comparing different models on Tensorboard

#### Importing packages

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
                                            
torch.set_printoptions(linewidth=120)    # Display options for output
torch.set_grad_enabled(True)             # Already by default

from torch.utils.tensorboard import SummaryWriter

#### Building our Network

In [2]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        # Three fully-connected layers
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120) 
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self,t):
        
        # Forward Propagation
        
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t,kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t,kernel_size=2, stride=2)
        
        t = t.reshape(-1,12*4*4)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        
        return t

#### Dataset

In [3]:
train_set = torchvision.datasets.FashionMNIST(
    root='.data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

#### Different Learning rates and Batch size for different models

In [5]:
from itertools import product

parameter = dict(
    lr = [.01, .001],
    batch_size =[10,100,1000],
    shuffle = [True,False]
)

parameter_values = [v for v in parameter.values()]

parameter_values

[[0.01, 0.001], [10, 100, 1000], [True, False]]

Below are the different hyper-parameters for different models to compare

In [6]:
for lr,batch_size,shuffle in product(*parameter_values):
    print(lr,batch_size,shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


#### To find number of correct predictions

In [4]:
def no_of_correct(preds,labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

#### Building Network

In [7]:
for lr,batch_size,shuffle in product(*parameter_values):
    
    comment = f'batch_size={batch_size} lr={lr} shuffle={shuffle}'
    
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
    optimizer = optim.Adam(network.parameters(), lr=lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)

    tb = SummaryWriter(comment=comment)
    tb.add_image('images', grid)
    tb.add_graph(network, images)

    for epoch in range(5):

        total_loss = 0
        total_correct = 0

        for batch in train_loader: # Get Batch

            images, labels = batch

            preds = network(images)
            loss = F.cross_entropy(preds,labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            total_correct += no_of_correct(preds,labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
        tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
        tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

        print(
            "epoch", epoch, 
            "total_correct:", total_correct, 
            "loss:", total_loss
        )

    tb.close()

epoch 0 total_correct: 45855 loss: 3761.5932630929165
epoch 1 total_correct: 48558 loss: 3183.9299962569494
epoch 2 total_correct: 48901 loss: 3124.7426017308608
epoch 3 total_correct: 48668 loss: 3199.1840930195176
epoch 4 total_correct: 48518 loss: 3217.3085998054594
epoch 0 total_correct: 44342 loss: 4099.078906334005
epoch 1 total_correct: 47163 loss: 3503.6912494851276
epoch 2 total_correct: 47643 loss: 3415.998385605868
epoch 3 total_correct: 47912 loss: 3381.634083751589
epoch 4 total_correct: 48088 loss: 3324.5636867750436
epoch 0 total_correct: 48211 loss: 313.9032698273659
epoch 1 total_correct: 51749 loss: 223.5267741382122
epoch 2 total_correct: 52256 loss: 205.25677658617496
epoch 3 total_correct: 52444 loss: 201.18659153580666
epoch 4 total_correct: 52859 loss: 192.4270580112934
epoch 0 total_correct: 47341 loss: 336.9858583807945
epoch 1 total_correct: 51501 loss: 230.28303688764572
epoch 2 total_correct: 52156 loss: 211.0113084614277
epoch 3 total_correct: 52678 loss: 1

#### Accuracy of Model

In [11]:
accuracy = total_correct/len(train_set)
print('Accuracy of the model = ', accuracy*100, '%')

Accuracy of the model =  87.58666666666667 %


In [17]:
for name,weight in network.named_parameters():
    print(f'{name}.grad   ',weight.grad.shape)

conv1.weight.grad    torch.Size([6, 1, 5, 5])
conv1.bias.grad    torch.Size([6])
conv2.weight.grad    torch.Size([12, 6, 5, 5])
conv2.bias.grad    torch.Size([12])
fc1.weight.grad    torch.Size([120, 192])
fc1.bias.grad    torch.Size([120])
fc2.weight.grad    torch.Size([60, 120])
fc2.bias.grad    torch.Size([60])
out.weight.grad    torch.Size([10, 60])
out.bias.grad    torch.Size([10])
