In [None]:
# CS 499 - Deep Learning (Fall 2020)
# Written by Junhyeok Jeong
# Assignment 2 - CNNs and TensorBoard

import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


# helper function to show an image
# (used in the `plot_classes_preds` function below)
def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=1)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

# 2) Add a dropout layer to the network and try to train the model with different dropout values.
# Show the training loss of each dropout value you tried with TensorBoard.
class Net(nn.Module):
  def __init__(self, p=0.0):
        super(Net, self).__init__()
        self.p = p
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.dropout = nn.Dropout(p)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

# 3) Try to add a convolutional layer to the network (think about where do you think an extra layer would help). 
# Train it again to check how does the performance change? Visualize the loss with TensorBoard.
# Answer: I could check performance change on first and middle part of epoch. However, in the end, both loss value are being similar each other (check tensorboard)
class Net2(nn.Module):
  def __init__(self, p=0.0):
        super(Net2, self).__init__()
        self.p = p
        self.conv1 = nn.Conv2d(1, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        #Q3) new layer
        self.conv3 = nn.Conv2d(32, 64, 5)
        self.pool3=nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(32 * 4 * 4, 120)
        self.dropout = nn.Dropout(p)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
        
        x = self.pool(F.relu(self.conv1(x)))
        
        x = self.pool2(F.relu(self.conv2(x)))
        #print(x.shape)
        x = x.view(-1, 32 * 4 * 4)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

# helper function
def select_n_random(data, labels, n=100):
    '''
    Selects n random datapoints and their corresponding labels from a dataset
    '''
    assert len(data) == len(labels)

    perm = torch.randperm(len(data))
    return data[perm][:n], labels[perm][:n]

# train model
    # 1) Train the model for at least 20 epochs.
    # Make tensorboard to display the training curve (training loss vs. epoch) as well as some images from the training set.
def train(model, device, train_loader, optimizer, epoch, writer, is_dropout):
    model.train()
    criterion = nn.CrossEntropyLoss()

    for batch_id, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # forward pass, calculate loss and backprop!
        optimizer.zero_grad()
        preds = model(data)
        loss = criterion(preds, target)
        loss.backward()
        optimizer.step()

        if batch_id % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_id * len(data), len(train_loader.dataset),
                       100. * batch_id / len(train_loader), loss.item()))
    # Record loss into the writer
    if is_dropout == "no":
      writer.add_scalar('Q1: training loss vs. epoch', loss.item(), epoch + 1)
      writer.flush()

    return loss.item()

def q3_train(model, device, train_loader, optimizer, epoch, writer):
    model.train()
    criterion = nn.CrossEntropyLoss()

    for batch_id, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        # forward pass, calculate loss and backprop!
        optimizer.zero_grad()
        preds = model(data)
        loss = criterion(preds, target)
        loss.backward()
        optimizer.step()

        if batch_id % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_id * len(data), len(train_loader.dataset),
                       100. * batch_id / len(train_loader), loss.item()))
    # Record loss into the writer

    return loss.item()

def test(model, device, testloader, epoch, writer, dropout):
    model.eval() # SWITCH TO TEST MODE
    i, test_loss, correct, n = [0, 0, 0, 0]

    with torch.no_grad():
        for data, target in testloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.data.max(1)[1]  # get the index of the max log-probability
            correct += pred.eq(target.data).sum()
            
            # Record images and data into the writer:
            #test_data_recorder(i, pred, writer, target, data, output, epoch)
            
    test_loss /= len(testloader)  # loss function already averages over batch size
    accuracy = 100. * correct / len(testloader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(testloader.dataset),
        accuracy))

    # 4) Try to add visualizations of testing error and testing loss with TensorBoard.
    writer.add_scalar('Q4: Testing loss vs. Epoch (dropout : %.2f)' % dropout, test_loss, epoch)
    writer.add_scalar('Q4: Accuracy vs. Epoch (dropout : %.2f)' % dropout, accuracy, epoch)
    writer.flush()

# helper functions

def images_to_probs(net, images):
    '''
    Generates predictions and corresponding probabilities from a trained
    network and a list of images
    '''
    output = net(images)
    # convert output probabilities to predicted class
    _, preds_tensor = torch.max(output, 1)
    preds = np.squeeze(preds_tensor.numpy())
    return preds, [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, output)]


def plot_classes_preds(net, images, labels):
    '''
    Generates matplotlib Figure using a trained network, along with images
    and labels from a batch, that shows the network's top prediction along
    with its probability, alongside the actual label, coloring this
    information based on whether the prediction was correct or not.
    Uses the "images_to_probs" function.
    '''
    preds, probs = images_to_probs(net, images)
    # plot the images in the batch, along with predicted and true labels
    fig = plt.figure(figsize=(12, 48))
    for idx in np.arange(4):
        ax = fig.add_subplot(1, 4, idx+1, xticks=[], yticks=[])
        matplotlib_imshow(images[idx], one_channel=True)
        ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
            classes[preds[idx]],
            probs[idx] * 100.0,
            classes[labels[idx]]),
                    color=("green" if preds[idx]==labels[idx].item() else "red"))
    return fig

# helper function
def add_pr_curve_tensorboard(class_index, test_probs, test_preds, global_step=0):
    '''
    Takes in a "class_index" from 0 to 9 and plots the corresponding
    precision-recall curve
    '''
    tensorboard_preds = test_preds == class_index
    tensorboard_probs = test_probs[:, class_index]

    writer.add_pr_curve(classes[class_index],
                        tensorboard_preds,
                        tensorboard_probs,
                        global_step=global_step)
    writer.close()



if __name__ == '__main__':

    # transforms
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))])

    # datasets
    trainset = torchvision.datasets.FashionMNIST('./data',
        download=True,
        train=True,
        transform=transform)
    testset = torchvision.datasets.FashionMNIST('./data',
        download=True,
        train=False,
        transform=transform)

    # dataloaders
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                            shuffle=True, num_workers=2)


    testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                            shuffle=False, num_workers=2)

    # constant for classes
    classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
            'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

    # tensor board part

    import tensorflow as tf
    import tensorboard as tb
    tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

    # Load the TensorBoard notebook extension
    %load_ext tensorboard

    %tensorboard --logdir=runs
    #%reload_ext tensorboard

    from torch.utils.tensorboard import SummaryWriter

    # default `log_dir` is "runs" - we'll be more specific here
    writer = SummaryWriter('runs/fashion_mnist_experiment_1')

    dataiter = iter(trainloader)

    for count in range(10):
        images, labels = dataiter.next()
        # create grid of images
        img_grid = torchvision.utils.make_grid(images)
        writer.add_image('images from trainset %d' % (count + 1), img_grid)

    # select random images and their target indices
    images, labels = select_n_random(trainset.data, trainset.targets)

    # get the class labels for each image
    class_labels = [classes[lab] for lab in labels]

    # log embeddings
    features = images.view(-1, 28 * 28)
    writer.add_embedding(features,
                        metadata=class_labels,
                        label_img=images.unsqueeze(1))
    

    if torch.cuda.is_available():  
      device = "cuda:0" 
    else:  
      device = "cpu"  


    # 2) Add a dropout layer to the network and try to train the model with different dropout values.
    # Show the training loss of each dropout value you tried with TensorBoard.

    dropouts = [0.0, 0.2, 0.4]
    for dropout in dropouts:
      print("Dropout %f" % dropout)
      net = Net(dropout)
      if torch.cuda.is_available():
        net.cuda()
      print(net)

      #criterion = nn.CrossEntropyLoss()
      optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
      
      is_dropout = 'no'
      if dropout > 0.0:
        is_dropout = 'yes'
      
      for epoch in range(0, 20):
        print("Epoch %d" % (epoch + 1))
        last_loss = train(net, device, trainloader, optimizer, epoch, writer, is_dropout)
        test(net, device, testloader, epoch, writer, dropout)
      
      # I multiplied 10 to dropout because of step size on tensorboard 
      writer.add_scalar('Q2: training loss vs. dropout (x10)', last_loss, dropout*10)

      writer.flush()
    

    # Q3) performance comparison between Net and Net2
    q3_net = Net(0.25)
    q3_net2 = Net2(0.25)
    if torch.cuda.is_available():
      q3_net.cuda()
      q3_net2.cuda()
    
    print(q3_net)
    print(q3_net2)
    
    #q3_criterion = nn.CrossEntropyLoss()
    q3_optimizer = optim.SGD(q3_net.parameters(), lr=0.001, momentum=0.9)
    q3_optimizer2 = optim.SGD(q3_net2.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(0, 20):
      print("Q3: Epoch %d" % (epoch + 1))
      net1_loss = q3_train(q3_net, device, trainloader, q3_optimizer, epoch, writer)
      net2_loss = q3_train(q3_net2, device, trainloader, q3_optimizer2, epoch, writer)
      writer.add_scalars('Q3: training loss net 1 & net 2 vs. epoch', {'net1_loss' : net1_loss, 'net2_loss' : net2_loss}, epoch)
      writer.flush()
    
    
    writer.close()


    # 1. gets the probability predictions in a test_size x num_classes Tensor
    # 2. gets the preds in a test_size Tensor
    # takes ~10 seconds to run
    class_probs = []
    class_preds = []
    with torch.no_grad():
        for data, target in testloader:
            images, labels = data.to(device), target.to(device)
            output = net(images)
            class_probs_batch = [F.softmax(el, dim=0) for el in output]
            _, class_preds_batch = torch.max(output, 1)

            class_probs.append(class_probs_batch)
            class_preds.append(class_preds_batch)

    test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
    test_preds = torch.cat(class_preds)

    # plot all the pr curves
    for i in range(len(classes)):
        add_pr_curve_tensorboard(i, test_probs, test_preds)