In [0]:
import torch
import torchvision
import torchvision.datasets as tdata
import torchvision.transforms as tTrans
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optm
import matplotlib.pyplot as plt
import argparse
# Python Imaging Library
import PIL
import numpy as np
import sys as sys

In [0]:
# Global Parameters
# Automatically detect if there is a GPU or just use CPU.
device  = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# ========================================================================================================================
# Functions and Network Template
# ========================================================================================================================
def load_data(bSize = 32):
    # bundle common args to the Dataloader module as a kewword list.
    # pin_memory reserves memory to act as a buffer for cuda memcopy 
    # operations
    comArgs = {'shuffle': True,'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {}
 
    # Data Loading -----------------------
    # ******************
    # At this point the data come in Python tuples, a 28x28 image and a label.
    # while the label is a tensor, the image is not; it needs to be converted.  
    # So we need to transform PIL image to tensor and then normalize it.
    # Normalization is quite a good practise to avoid numerical and convergence
    # problems. For that we need the dataset's mean and std which fortunately
    # can be computed!
    # ******************
    mean = 0.1307
    std  = 0.3081
    # Bundle our transforms sequentially, one after another. This is important.
    # Convert images to tensors + normalize
    transform = tTrans.Compose([tTrans.ToTensor(), tTrans.Normalize( (mean,), (std,) )])
    # Load data set
    mnistTrainset = tdata.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    mnistTestset = tdata.FashionMNIST(root='./data', train=False, download=True, transform=transform)
 
    # Once we have a dataset, torch.utils has a very nice lirary for iterating on that
    # dataset, with shuffle AND batch logic. Very usefull in larger datasets.
    trainLoader = torch.utils.data.DataLoader(mnistTrainset, batch_size = bSize, **comArgs )
    testLoader = torch.utils.data.DataLoader(mnistTestset, batch_size = bSize, **comArgs)
    # End of DataLoading -------------------
 
 
    # Sanity Prints---
    # print(len(mnistTrainset))
    # print(type(mnist_trainset[0]))
 
    return trainLoader, testLoader
 

In [0]:
# Model Definition
class Net(nn.Module):
 
    # Class variables for measures.
    accuracy = 0
    trainLoss= 0
    testLoss = 0
    # Mod init + boiler plate code
    # Skeleton of this network; the blocks to be used.
    # Similar to Fischer prize building blocks!
    def __init__(self):
        super(Net, self).__init__()
        # Declare the layers along with their dimension here!
        # NOTE: Trying to run the code with no layer declared and no architecture defined will 
        # Lead to an error "ValueError: optimizer got an empty parameter list"
        # self.conv1 = conv2d
        self.simple1 = nn.Linear(784, 392)
        self.simple2 = nn.Linear(392, 10)
        self.double1 = nn.Linear(784, 392)
        self.double2 = nn.Linear(392,64)
        self.double3 = nn.Linear(64, 10)
        self.triple1 = nn.Linear(784, 256)
        self.triple2 = nn.Linear(256, 128)
        self.triple3 = nn.Linear(128, 64)
        self.triple4 = nn.Linear(64, 10)
        self.simpleBlock = nn.Sequential(
            nn.Conv2d(1,8,kernel_size = 5, stride = 2, padding = 1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.simpleBlockFC = nn.Linear(6*6*8,10)
        self.doubleBlock1 = nn.Sequential(
            nn.Conv2d(1,16,kernel_size = 5, stride = 1, padding = 2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.doubleBlock2 = nn.Sequential(
            nn.Conv2d(16,64,kernel_size = 5, stride = 1, padding = 2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.doubleBlockFC = nn.Linear(7*7*32,10)
        self.fullFc1 = nn.Linear(7*7*64, 256)
        self.drop = nn.Dropout(0.3)
        self.fullFc2 = nn.Linear(256, 10)
        # ---|
    # ------------------
 
    # Set the aove defined building blocks as an
    # organized, meaningful architecture here.
    def forwardSimple(self, x):
        # Define the network architecture here.
        # Each layer would be given as input to the next. 
        # Output should be of size [batchSize, classes]
        # NOTE: After each layer, especially after convolutional layers the shape
        # of the size tensor changes. print(x.shape) might be your Samwise Gamtzee
        # in those difficult moments!
        x = x.view(-1,784)
        # print(x.shape)
        x = F.relu(self.simple1(x))
        x = F.relu(self.simple2(x))
        x = F.softmax(x, dim=1)
        return x

    def forwardDouble(self, x):
        x = x.view(-1,784)
        x = F.relu(self.double1(x))
        x = F.relu(self.double2(x))
        x = F.relu(self.double3(x))
        x = F.softmax(x, dim=1)
        return x
    def forwardTriple(self, x):
        x = x.view(x.shape[0],-1)
        x = F.relu(self.triple1(x))
        x = F.relu(self.triple2(x))
        x = F.relu(self.triple3(x))
        x = F.relu(self.triple4(x))
        x = F.log_softmax(x, dim=1)
        return x
    def forwardSimpleBlock(self, x):
        x = self.simpleBlock(x)
        x = x.view(x.shape[0],-1)
        x = self.simpleBlockFC(x)
        x = F.log_softmax(x, dim =1)
        return x
    def forwardDoubleBlock(self, x):
        x = self.doubleBlock1(x)
        x = self.doubleBlock2(x)
        x = x.view(x.shape[0],-1)
        x = self.doubleBlockFC(x)
        x = F.log_softmax(x, dim =1)
        return x
    def fullNetwork(self, x):
        x = self.doubleBlock1(x)
        x = self.doubleBlock2(x)
        x = x.view(x.shape[0],-1)
        x = self.fullFc1(x)
        x = self.fullFc2(x)
        x = F.log_softmax(x, dim =1)
        return x
    def robustNetwork(self, x):
        x = self.doubleBlock1(x)
        x = self.doubleBlock2(x)
        x = x.view(x.shape[0],-1)
        x = self.fullFc1(x)
        x = self.drop(x)
        x = self.fullFc2(x)
        x = F.log_softmax(x, dim =1)
        return x

    def train(self, args, device, indata, optim, verbose = True):
        for idx, (img, label) in enumerate(indata):
            data, label = img.to(device), label.to(device)
            # forward pass calculate output of model
            # output      = self.forward_no_drop(data)
            output = self.robustNetwork(data)
            # print("output shape",output.shape)
            # print("label",label.shape)
            # compute loss
            loss        = F.nll_loss(output, label)
            # loss = nn.CrossEntropyLoss(output, F.softmax(label, dim =1))

            # Backpropagation part
            # 1. Zero out Grads
            optim.zero_grad()
            # 2. Perform the backpropagation based on loss
            loss.backward()            
            # 3. Update weights 
            optim.step()
 
           # Training Progress report for sanity purposes! 
            if verbose:
                if idx % 20 == 0: 
                    print("Epoch: {}->Batch: {} / {}. Loss = {}".format(args, idx, len(indata), loss.item() ))
        # Log the current train loss
        # self.trainLoss = loss.item()*data.size(0)
        self.trainLoss = loss   
    # -----------------------
 
    # Testing and error reports are done here
    def test(self, device, testLoader):
        print("In Testing Function!")        
        loss = 0
        true = 0
        acc  = 0
        # Inform Pytorch that keeping track of gradients is not required in
        # testing phase.
        with torch.no_grad():
            for data, label in testLoader:
                data, label = data.to(device), label.to(device)
                output = self.robustNetwork(data)
                # output = self.forward_no_drop(data)
                # Sum all loss terms and tern then into a numpy number for late use.
                # loss        = nn.CrossEntropyLoss(output, label).item()*data.size(0)
                # ll = nn.CrossEntropyLoss(output, F.softmax(label, dim =1))
                # loss += ll.item()
                loss  += F.nll_loss(output, label, reduction = 'sum').item()
                # loss = nn.CrossEntropyLoss(output, label)
                
                # Find the max along a row but maitain the original dimenions.
                # in this case  a 10 -dimensional array.
                pred   = output.max(dim = 1, keepdim = True)
                # Select the indexes of the prediction maxes.
                # Reshape the output vector in the same form of the label one, so they 
                # can be compared directly; from batchsize x 10 to batchsize. Compare
                # predictions with label;  1 indicates equality. Sum the correct ones
                # and turn them to numpy number. In this case the idx of the maximum 
                # prediciton coincides with the label as we are predicting numbers 0-9.
                # So the indx of the max output of the network is essentially the predicted
                # label (number).
                true  += label.eq(pred[1].view_as(label)).sum().item()
        acc = true/len(testLoader.dataset)
        self.accuracy = acc
        self.testLoss = loss 
        # Print accuracy report!
        print("Accuracy: {} ({} / {})".format(acc, true,
                                                len(testLoader.dataset)))

    def report(self):
        print("Current stats of MNIST_NET:")
        print("Accuracy:      {}" .format(self.accuracy))
        print("Training Loss: {}" .format(self.trainLoss))
        print("Test Loss:     {}" .format(self.testLoss))

In [0]:
def parse_args():
    """ Description: This function will create an argument parser. This will accept inputs from the console.
                     But if no inputs are given, the default values listed will be used!
    """
    parser = argparse.ArgumentParser(prog='Fashion MNIST Network building!')
    # Tell parser to accept the following arguments, along with default vals.
    parser.add_argument('--lr',    type = float,metavar = 'lr',   default='0.001',help="Learning rate for the oprimizer.")
    parser.add_argument('--m',     type = float,metavar = 'float',default= 0,     help="Momentum for the optimizer, if any.")
    parser.add_argument('--bSize', type = int,  metavar = 'bSize',default=32,     help="Batch size of data loader, in terms of samples. a size of 32 means 32 images for an optimization step.")
    parser.add_argument('--epochs',type = int,  metavar = 'e',    default=12   ,  help="Number of training epochs. One epoch is to perform an optimization step over every sample, once.")
    # Parse the input from the console. To access a specific arg-> dim = args.dim
    args = parser.parse_args()
    lr, m, bSize, epochs = args.lr, args.m, args.bSize, args.epochs
    # Sanitize input
    m = m if (m>0 and m <1) else 0
    lr = lr if lr < 1 else 0.1
    # It is standard in larger project to return a dictionary instead of a myriad of args like:
    # return {'lr':lr,'m':m,'bSize':bbSize,'epochs':epochs}
    return lr, m , bSize, epochs

In [90]:
def main():
    # Get keyboard arguments, if any! (Try the dictionary approach in the code aboe for some practice!)
    # lr, m , bSize, epochs = parse_args()
    # Load data, initialize model and optimizer!
    # transform = tTrans.Compose([tTrans.ToTensor(),tTrans.Normalize((mean,)(std,))])
    bSize = 32
    epochs = 24
    lr = 0.001
    m = 0
    trainLoader, testLoader = load_data(bSize=bSize)
    model = Net().to(device) # send model to appropriate computing device (CPU or CUDA)
    print(model)
    optim = optm.SGD(model.parameters(), lr=lr, momentum=m) # Instantiate optimizer with the model's parameters.
    print("######### Initiating Fashion MNIST network training #########\n")
    print("Parameters: lr:{}, momentum:{}, batch Size:{}, epochs:{}".format(lr,m,bSize,epochs))
    for e in range(epochs):
        print("Epoch: {} start ------------\n".format(e))
        # print("Dev {}".format(device))
        args = e
        model.train(args, device, trainLoader, optim)
        model.test(device, testLoader)
 
    # Final report
    model.report()
 
# Define behavior if this module is the main executable. Standard code.
if __name__ == '__main__':
    main()

Net(
  (simple1): Linear(in_features=784, out_features=392, bias=True)
  (simple2): Linear(in_features=392, out_features=10, bias=True)
  (double1): Linear(in_features=784, out_features=392, bias=True)
  (double2): Linear(in_features=392, out_features=64, bias=True)
  (double3): Linear(in_features=64, out_features=10, bias=True)
  (triple1): Linear(in_features=784, out_features=256, bias=True)
  (triple2): Linear(in_features=256, out_features=128, bias=True)
  (triple3): Linear(in_features=128, out_features=64, bias=True)
  (triple4): Linear(in_features=64, out_features=10, bias=True)
  (simpleBlock): Sequential(
    (0): Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (simpleBlockFC): Linear(in_features=288, out_features=10, bias=True)
  (doubleBlock1): Sequential(
    (0): Conv