### Tensorboard Integration with Pytorch

**Date:** 15/11/2021  
**Author:** Murad Popattia

In [1]:
# import libraries
import torch
import torch.nn as nn # all the layers
import torch.optim as optim # all the optimization algos
import torch.nn.functional as F # all the activation functions
from torch.utils.data import DataLoader # helps to create mini-batches etc.
import torchvision.datasets as datasets # for getting all datasets
import torchvision.transforms as transforms # for transformations on the dataset
from torchvision import utils
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score

from torch.utils.tensorboard import SummaryWriter # to print tensorboard

device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
# creating our CNN
class CNN(nn.Module):
    def __init__(self, in_channels = 1, num_classes = 10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=(1,1), padding=(1,1)) # same convolution : dimensions dont change
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3,3), stride=(1,1), padding=(1,1))
#         # 28 -> 14 -> 7
        self.fc1 = nn.Linear(16*7*7, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x)) # adding non-linearity for filters
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x) # reusing the same pooling
        x = x.reshape(x.shape[0], -1) # keep the mini-batches and flatten the rest out
        x = self.fc1(x)
        
        return x   

In [3]:
# hyper params
in_channels = 1
num_classes = 10
lr = 0.001
batch_size = 64
num_epochs = 1

In [6]:
# initialize the network
model = CNN().to(device) # sending model to device

In [5]:
from torchsummary import summary

summary(model, input_size=(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
         MaxPool2d-2            [-1, 8, 14, 14]               0
            Conv2d-3           [-1, 16, 14, 14]           1,168
         MaxPool2d-4             [-1, 16, 7, 7]               0
            Linear-5                   [-1, 10]           7,850
Total params: 9,098
Trainable params: 9,098
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.09
Params size (MB): 0.03
Estimated Total Size (MB): 0.13
----------------------------------------------------------------


In [7]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [4]:
train_dataset = datasets.MNIST(root='../datasets/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle=True)

test_dataset = datasets.MNIST(root='../datasets/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle=True)

In [None]:
for epoch in range(num_epochs):  # 1 epoch means the model has seen all the images
    total_loss = 0
    counter = 0 
    
    n_batches = len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs}')
    pbar = tf.keras.utils.Progbar(target=n_batches)
    
    # Training
    model.train()
    
    for idx, (data, target) in enumerate(train_loader):  # enumerating to see the batch idx
        
        # get data to cuda if possible
        data = data.to(device)
        target = target.to(device)
        
        # foward
        scores = model(data)  
        loss = criterion(scores, target)
        total_loss += loss
        
        pbar.update(idx, values=[("loss",loss.item())])
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        
        # gradient descent step
        optimizer.step()
        
        # increment for every batch
        counter += 1
        
        # using a running training accuracy
        _, preds = scores.max(1)
        num_correct = (preds == target).sum()
        
        pbar.update(idx, values=[("acc: ",float(num_correct)/float(data.shape[0]))])
        
    # for every epoch calculate test loss
    # validation
    model.eval()
    
    for idx, (data, target) in enumerate(test_loader):
        with torch.no_grad(): # no computation for gradients
            # get data to cuda
            data = data.to(device)
            target = target.to(device)

            scores = model(data)
            loss = criterion(scores, target)
            
            pbar.update(idx, values=[("validation loss",loss.item())])
            
            # using a running training accuracy
            _, preds = scores.max(1)
            num_correct = (preds == target).sum()
            
            pbar.update(idx, values=[("validation acc",float(num_correct)/float(data.shape[0]))])
    
    pbar.update(n_batches, values=None)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

### Adding batch sizes and learning rates

In [5]:
batch_sizes = [8,64] # we don't change the bz for test
learning_rates = [0.001,0.0001]
classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

In [6]:
for batch_size in batch_sizes:
    for lr in learning_rates:
        print(f'Running for Batch_size = {batch_size}, Learning_rate = {lr}')
        
        # using altering batch_sizes for the train_loader
        train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle=True)
        
        # initialize the network
        model = CNN().to(device) # sending model to device
        
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)
        
        writer = SummaryWriter(f'runs/MNIST/MiniBatchSize {batch_size}, LR {lr}')
        
        # Visualize model in TensorBoard
#         images, _ = next(iter(train_loader))
#         writer.add_graph(model, images.to(device))
#         writer.close()
        
        for epoch in range(num_epochs):  # 1 epoch means the model has seen all the images
            losses = []
            accuracies = []
            
            step = 0
            val_step = 0
            n_batches = len(train_loader)
            print(f'Epoch {epoch+1}/{num_epochs}')
            pbar = tf.keras.utils.Progbar(target=n_batches)

            # Training
            model.train()

            for idx, (data, target) in enumerate(train_loader):  # enumerating to see the batch idx

                # get data to cuda if possible
                data = data.to(device)
                target = target.to(device)

                # foward
                scores = model(data)  
                loss = criterion(scores, target)
                
                writer.add_scalar('Training loss', loss, global_step = step)
                pbar.update(idx, values=[("loss",loss.item())])
                losses.append(loss.item())

                # backward
                optimizer.zero_grad()
                loss.backward()

                # gradient descent step
                optimizer.step()

                # plotting the weight
                writer.add_histogram("fc1", model.fc1.weight)
                
                # using a running training accuracy
                _, preds = scores.max(1)
                num_correct = (preds == target).sum()
                running_train_acc = float(num_correct)/float(data.shape[0])

                accuracies.append(running_train_acc)
                pbar.update(idx, values=[("acc: ", running_train_acc)])
                writer.add_scalar('Training accuracy', running_train_acc, global_step = step)
                
                class_labels = [classes[label] for label in preds]
                
#                 if idx == 230:
#                     writer.add_embedding(
#                         features,
#                         metadata=class_labels,
#                         label_img=data,
#                         global_step=batch_idx,
#                     )
                
                step += 1
                
            # for every epoch calculate test loss
            # validation
            model.eval()

            for idx, (data, target) in enumerate(test_loader):
                with torch.no_grad(): # no computation for gradients
                    # get data to cuda
                    data = data.to(device)
                    target = target.to(device)

                    scores = model(data)
                    loss = criterion(scores, target)
                
                    writer.add_scalar('Validation loss', loss, global_step = val_step)
                    pbar.update(idx, values=[("validation loss",loss.item())])

                    # using a running training accuracy
                    _, preds = scores.max(1)
                    num_correct = (preds == target).sum()
                    running_val_acc = float(num_correct)/float(data.shape[0])
                    
                    pbar.update(idx, values=[("validation acc", running_val_acc)])
                    writer.add_scalar('Validation accuracy', running_val_acc, global_step = val_step)
                    val_step += 1
                    
            pbar.update(n_batches, values=None)
            
            # after each batch add the accuracies and losses
            writer.add_hparams(
                {"lr": lr, "bsize": batch_size},
                {
                    "accuracy": sum(accuracies) / len(accuracies),
                    "loss": sum(losses) / len(losses),
                },
            )

Running for Batch_size = 8, Learning_rate = 0.001
Epoch 1/1
Running for Batch_size = 8, Learning_rate = 0.0001
Epoch 1/1
Running for Batch_size = 64, Learning_rate = 0.001
Epoch 1/1
Running for Batch_size = 64, Learning_rate = 0.0001
Epoch 1/1


Use the following command to view the results

```
> tensorboard --logdir runs
```