## Using Tensorboard in Pytorch

In [1]:
# Import standard libraries that you may use most times
import torch 
import torch.nn as nn
import matplotlib.pyplot as plt
from torchinfo import summary
from torchvision import transforms, datasets

# You should first run this in your enviroment
# conda install -c conda-forge tensorboard
from torch.utils.tensorboard import SummaryWriter

print("PyTorch version:", torch.__version__)

PyTorch version: 1.13.1


In [2]:
# Bring back MNIST dataset

# Convert Pil image to PyTorch Tensor
data_transform = transforms.Compose([transforms.ToTensor()])

# Load the data set
mnist_train = datasets.MNIST(root='./data', train=True,  transform=data_transform, download=True)
mnist_test  = datasets.MNIST(root='./data', train=False, transform=data_transform, download=True)

# Prepare dataloaders
train_dataloader = torch.utils.data.DataLoader(mnist_train, batch_size=64, shuffle=True)
test_dataloader  = torch.utils.data.DataLoader(mnist_test,  batch_size=64, shuffle=False)

In [3]:
# Bring back the MNIST classfier model
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1     = nn.Linear(28 * 28, 128)
        self.relu    = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.fc2     = nn.Linear(128, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.flatten(x) # Flatten tensor
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.softmax(self.fc2(x))
        return x
    
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create model
model = NeuralNet().to(device)

# Print summary
summary(model, input_size=(1,28,28))

Layer (type:depth-idx)                   Output Shape              Param #
NeuralNet                                [1, 10]                   --
├─Flatten: 1-1                           [1, 784]                  --
├─Linear: 1-2                            [1, 128]                  100,480
├─ReLU: 1-3                              [1, 128]                  --
├─Dropout: 1-4                           [1, 128]                  --
├─Linear: 1-5                            [1, 10]                   1,290
├─Softmax: 1-6                           [1, 10]                   --
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
Total mult-adds (M): 0.10
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.41
Estimated Total Size (MB): 0.41

In [4]:
# Let's compare optimizers performance and log them on tensorboard

# Loss function
CEloos   = nn.CrossEntropyLoss()

# Fit the model
num_epochs = 5

In [5]:
def train_model(epochs, writer, optimizer):
    # Iterate over #epochs
    for epoch in range(num_epochs):
        # Keep track of network progress
        train_loss    = 0.0
        train_correct = 0
        test_correct  = 0

        # Visit each data sample once (random)
        for image, labels in train_dataloader: 
            # Compute model prediction and loss
            pred_labels = model(image.to(device))
            loss        = CEloos(pred_labels, labels.to(device))

            # Backpropagate
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()       

            # Add loss to history
            train_loss    += loss.item()
            # Count number of correct predictions 
            train_correct += (torch.argmax(pred_labels.cpu(), 1) == labels.cpu()).sum().item()

        # Test loop (once per epoch)
        with torch.no_grad():
            for images, labels in test_dataloader:
                pred_labels = model(images.to(device))
                test_correct += (torch.argmax(pred_labels.cpu(), 1) == labels.cpu()).sum().item()

        # Compute accuracy (train & test)
        train_acc = train_correct / len(mnist_train)
        test_acc  = test_correct  / len(mnist_test)
        
        # Log on Tensroboard using writer
        writer.add_scalar("Train Acc:", train_acc, epoch)
        writer.add_scalar("Test  Acc:", test_acc, epoch)
        writer.add_scalar("Loss/train", train_loss / len(mnist_train), epoch)


### Tensorboard

In [6]:
# Create a SummaryWriter instance.
# Writer will output to ./runs/ directory by default. 
writer_adam = SummaryWriter("logs/adam")
writer_sgd  = SummaryWriter("logs/sgd")

# Train
optimizer_adam = torch.optim.Adam(model.parameters(), lr=0.001)

train_model(num_epochs, writer_adam, optimizer_adam)

# IMPORTANT: Flush model! and train again
model = NeuralNet().to(device)
optimizer_sgd  = torch.optim.SGD(model.parameters(),  lr=0.1)

train_model(num_epochs, writer_sgd,  optimizer_sgd)

In [7]:
%load_ext tensorboard

%tensorboard --logdir logs/

## Further reading

* More PyTorch+Tensorboard examples at: https://pytorch.org/docs/stable/tensorboard.html
* Tensorboard alternative (highly recommended): https://wandb.ai/site
