> **Topic:** Introduction to Neural Network using PyTorch
>
> **Module:** TensorBoard, Checkpoint, K-Fold cross validation
>
> **Presentor:** Industry Sandbox and AI Computing (ISAIC)
>
> **Date:** 

## TensorBoard

Tensorboard is a visualization tool from `tensorflow` to track metrics while running machine learning model. We can visualize as many as we want (e.g. loss, accuracy) in real time while the model is training.

We need to launch tensorboard in jupyter notebook before running the training process in order the get real-time updates on the metrics.

Before going into tensorboard, let's reload everything.

In [3]:
#import pytorch modules
import torch
from torch.utils.data import Dataset
from torchvision import datasets
#from torchvision.transforms import ToTensor
from torchvision import transforms
from torch.utils.data import DataLoader

#import numpy module
import numpy as np

#import plotting modules
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rc('figure', dpi=350)
%matplotlib inline


pre_processing = transforms.Compose(
                [transforms.Grayscale(num_output_channels=1),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5], std=[0.5])])

train_data = datasets.FashionMNIST(
                    root='FMNIST',
                    train=True,
                    download=False,
                    transform=pre_processing)
test_data  = datasets.FashionMNIST(
                    root='FMNIST',
                    train=False,
                    download=False,
                    transform=pre_processing)


train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_data,  batch_size=32, shuffle=True)


In [4]:
#load necessary modules
import torch.nn as nn
import torch.nn.functional as F

#define the model class
class ImageClass(nn.Module):
    '''
    Main class that defines the Neural Net model (inheritence from nn.Module)
    Input - input_dim (int): Input dimension for each sample data
            hidden_dim (list): Each element in the list denotes the dimension
                            of each hidden layer
            output_dim (int): Output dimension (in our example case, 10 for 
                            total 10 classes of image)
            dropout_rate (float) [optional]: Rate at which Dropout regularization is applied
            use_batchnorm (bool) [optional]: Whether to use batch normalization after each
                                            hidden layer affine transformation
            **kwargs: Additional keyword arguments
    '''
    
    def __init__(self, input_dim, hidden_dim=[64,64,64], output_dim=10,
                dropout_rate = 0.0, use_batchnorm=False, **kwargs):
        super(ImageClass, self).__init__(**kwargs)
        
        #define an empty ModuleList container
        self.linear_model = nn.ModuleList()
        
        #first flatten our 2D image data into one dimension
        self.linear_model.append(nn.Flatten())
        
        #then we build our hidden layers iteratively (based on the # hidden layers)
        for i, (in_channel, out_channel) in enumerate(
                                zip([input_dim]+hidden_dim[:-1], hidden_dim)):
            #we first build the affine transformation
            self.linear_model.append(nn.Linear(in_channel, out_channel, bias=True))
            if use_batchnorm:
                #then apply batch normalization, if turned on for the model
                self.linear_model.append(nn.BatchNorm1d(out_channel))
            #we then apply the activation function
            self.linear_model.append(nn.ReLU())
            if dropout_rate:
                #we also add dropout is this regularization is turned on
                self.linear_model.append(nn.Dropout(dropout_rate))
        #add the last layer, i.e. the model output
        self.linear_model.append(nn.Linear(hidden_dim[-1], output_dim, bias=True))
        self.linear_model.append(nn.LogSoftmax(dim=1))
        
    def forward(self,x):
        for layer in self.linear_model:
            x = layer(x)
        return x

In [5]:
#necessary python modules (provides the optimizer library)
import torch.optim as optim


######  Hyperparameter settings  #######

#Number of epochs to train for
N_EPOCHS = 20

#define the loss function, i.e. negative log-likelihood function (acting on log-Softmax model output)
loss_function = F.nll_loss

#Initialize the model with set architecture
input_dim = 28*28 #pixel size for our dataset
hidden_dim = [64,64,64] #List of dimensions in each hidden layer
output_dim = 10 #dimension of the output layer

#Regularization parameter
dropout_rate=0.3
use_batchnorm=True

#initialize the model
model = ImageClass(input_dim, hidden_dim=hidden_dim, output_dim=output_dim,
                  dropout_rate=dropout_rate, use_batchnorm=use_batchnorm)

#****IMPORTANT: move the model to GPU (if allowed), before constructing optimizer
#set the proper device (use GPU to train on if available)
if torch.cuda.is_available():
    model = model.cuda()


#set the learning rate to use for the optimizer
learning_rate = 0.001
#initialize the optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-6)

#momentum = 0.9
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)


In [11]:
from sklearn.metrics import accuracy_score
def evaluate(model, dataloader, loss_function):
    model.eval()
    y_true = np.zeros(len(dataloader.dataset))
    y_pred = np.zeros(len(dataloader.dataset))
    
    total_loss=0
    
    for batch_idx, (X,y) in enumerate(dataloader):
        print(f'\rEvaluating {batch_idx + 1} / {len(dataloader)}', end='\r')
        y_i = model(X)
        loss = loss_function(y_i, y)
        idx_start = batch_idx*dataloader.batch_size
        idx_stop  = (batch_idx+1)*dataloader.batch_size
        y_true[idx_start:idx_stop] = y.data.numpy()
        y_pred[idx_start:idx_stop] = y_i.data.max(dim=1)[1].numpy()
        total_loss+= loss.item()
    acc = accuracy_score(y_true, y_pred)
    loss = total_loss/len(dataloader)
    print (f'Running Metrics: Accuracy Score: {acc}, Loss: {loss}')
    return (y_true, y_pred, acc, loss)

def train(model, train_loader, loss_function, optimizer, nepoch=20, book_tb=False, tb_writer=None):
    for epoch in range(nepoch):
        #print the running epch number
        print (f'\nEpoch {epoch+1}/{nepoch}')
        #set the model to training mode
        model.train()
        #variable to calculate running loss (for each epoch)
        epoch_loss = 0
        epoch_accu = 0
        for batch_idx, data in enumerate(train_loader):
            X, y = data
            optimizer.zero_grad()
            y_pred = model(X)
            loss = loss_function(y_pred, y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            epoch_accu += accuracy_score(y.data.numpy(),y_pred.data.max(dim=1)[1].numpy())
        #print the model performance using test dataset
        print ('Validation###')
        _, _, test_accu, test_loss = evaluate(model, test_loader, loss_function)
        
        if book_tb:
            epoch_loss /= len(train_loader)
            tb_writer[0].add_scalar("Loss", epoch_loss, epoch+1)
            
            epoch_accu /= (batch_idx+1)
            tb_writer[0].add_scalar("Accuracy", epoch_accu, epoch+1)
            
            tb_writer[1].add_scalar("Loss", test_loss, epoch+1)
            tb_writer[1].add_scalar("Accuracy", test_accu, epoch+1)
    return None
 

- **Launching TensorBoard**

In [12]:
import tensorboard

%load_ext tensorboard
%tensorboard --logdir=runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 22097), started 0:02:02 ago. (Use '!kill 22097' to kill it.)

To visualize the metrics, we need to first initialize a summary writer.

We then add our metrics in the writer object to view in the tensorboard.

In [13]:
from torch.utils.tensorboard import SummaryWriter

train_writer = SummaryWriter(comment="Training")
test_writer  = SummaryWriter(comment="Testing")

train(model, train_loader, loss_function, optimizer, book_tb=True, tb_writer=[train_writer, test_writer])


Epoch 1/20
Validation###
Running Metrics: Accuracy Score: 0.8535, Loss: 0.40758793591596065

Epoch 2/20
Validation###
Running Metrics: Accuracy Score: 0.856, Loss: 0.3979589608721078

Epoch 3/20
Validation###
Running Metrics: Accuracy Score: 0.8614, Loss: 0.3809892482841358

Epoch 4/20
Validation###
Running Metrics: Accuracy Score: 0.8593, Loss: 0.3770973405089622

Epoch 5/20
Validation###
Running Metrics: Accuracy Score: 0.8613, Loss: 0.3768467872192304

Epoch 6/20
Validation###
Running Metrics: Accuracy Score: 0.8681, Loss: 0.36850538842689495

Epoch 7/20
Validation###
Running Metrics: Accuracy Score: 0.8675, Loss: 0.36696951465008737

Epoch 8/20
Validation###
Running Metrics: Accuracy Score: 0.87, Loss: 0.3637193247747307

Epoch 9/20
Validation###
Running Metrics: Accuracy Score: 0.872, Loss: 0.3535024970293807

Epoch 10/20
Validation###
Running Metrics: Accuracy Score: 0.8707, Loss: 0.3567437940655044

Epoch 11/20
Validation###
Running Metrics: Accuracy Score: 0.8741, Loss: 0.3478

## Checkpoint

When the model is training through epochs iteratively, we can save each instance of the model as checkpoints and load it later from desired checkpoint either to resume training process or to investigate the model from particular epoch.

The following script shows the general saving and loading model checkpoints.

In [15]:
#saving a check point
# Additional information
EPOCH = 20
PATH = "model.pt"
LOSS = 0.3963

torch.save({
            'epoch': EPOCH,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': LOSS,
            }, PATH)


In [16]:
#loading a checkpoint

#initialize the model
model = ImageClass(input_dim, hidden_dim=hidden_dim, output_dim=output_dim,
                  dropout_rate=dropout_rate, use_batchnorm=use_batchnorm)

#****IMPORTANT: move the model to GPU (if allowed), before constructing optimizer
#set the proper device (use GPU to train on if available)
if torch.cuda.is_available():
    model = model.cuda()


#set the learning rate to use for the optimizer
learning_rate = 0.001
#initialize the optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-6)


checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()
# - or -
model.train()

ImageClass(
  (linear_model): ModuleList(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=64, bias=True)
    (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Dropout(p=0.3, inplace=False)
    (5): Linear(in_features=64, out_features=64, bias=True)
    (6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): Dropout(p=0.3, inplace=False)
    (9): Linear(in_features=64, out_features=64, bias=True)
    (10): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Dropout(p=0.3, inplace=False)
    (13): Linear(in_features=64, out_features=10, bias=True)
    (14): LogSoftmax(dim=1)
  )
)

## K-fold Cross Validation

In [17]:
from sklearn.model_selection import KFold
from torch.utils.data import ConcatDataset, SubsetRandomSampler

k_folds = 5

dataset = ConcatDataset([train_data, test_data])

# Define the K-fold Cross Validator
kfold = KFold(n_splits=k_folds, shuffle=True)

# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    
    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = SubsetRandomSampler(train_ids)
    test_subsampler = SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    train_loader = torch.utils.data.DataLoader(
                      dataset, 
                      batch_size=10, sampler=train_subsampler)
    test_loader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=10, sampler=test_subsampler)
    train(model, train_loader, loss_function, optimizer)

FOLD 0
--------------------------------
FOLD 1
--------------------------------
FOLD 2
--------------------------------
FOLD 3
--------------------------------
FOLD 4
--------------------------------


We can save model checkpoints after the end of training for each fold and load it later to compare the model performance across all K-folds to choose the best performing model for our use.