In [3]:
#import the required packages
import matplotlib.pyplot as plt
import numpy as np
from torch import nn, optim
from torch.autograd import Variable
import torch
from torchvision import datasets, transforms

In [2]:
#Load the helper script
%run helper_script.py

True

In [3]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# Download and load the training data
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [4]:
#construct iterator
images, label = next(iter(testloader))

# Flatten MNIST images into a 784 long vector
images = images.view(images.shape[0], -1)

In [5]:
#Define the NN architectire
model = nn.Sequential(nn.Linear(784, 256),
                      nn.ReLU(),
                      nn.Dropout(0.2),
                      nn.Linear(256, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Dropout(0.2),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))
#Define the loss function and the optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [6]:
#verify model structure
model

Sequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2)
  (3): Linear(in_features=256, out_features=128, bias=True)
  (4): ReLU()
  (5): Linear(in_features=128, out_features=64, bias=True)
  (6): ReLU()
  (7): Dropout(p=0.2)
  (8): Linear(in_features=64, out_features=10, bias=True)
  (9): LogSoftmax()
)

**Dropout**

During training, randomly zeroes some of the elements of the input tensor with probability :attr:`p` using samples from a Bernoulli distribution. The elements to zero are randomized on every forward call.

This has proven to be an effective technique for regularization and preventing the co-adaptation of neurons as described in the paper Improving neural networks by preventing co-adaptation of feature detectors`.

Furthermore, the outputs are scaled by a factor of :math:`\frac{1}{1-p}` during training. This means that during evaluation the module simply computes an identity function.

Args:
p: probability of an element to be zeroed. Default: 0.5
inplace: If set to ``True``, will do this operation in-place. Default: ``False``

Shape:
- Input: `Any`. Input can be of any shape
- Output: `Same`. Output is of the same shape as input

During training,dropout is used to prevent overfitting, but during inference it is important to use the entire network. So, dropout needs to be turned off during validation, testing, and whenever the network will be used to make predictions. To do this, use model.eval(). This sets the model to evaluation mode where the dropout probability is 0. Dropout can be turned back on by setting the model to train mode with model.train(). In general, the pattern for the validation loop will look like this, turn off gradients, set the model to evaluation mode, calculate the validation loss and metric, then set the model back to train mode.

*NB:*


loss.backward() computes dloss/dx for every parameter x which has requires_grad=True. These are accumulated into x.grad for every parameter x. In pseudo-code:

x.grad += dloss/dx

optimizer.step updates the value of x using the gradient x.grad. For example, the SGD optimizer performs:

x += -lr * x.grad

optimizer.zero_grad() clears x.grad for every parameter x in the optimizer. It’s important to call this before loss.backward(), otherwise you’ll accumulate the gradients from multiple passes.

In [None]:
#Define the number of epochs and Steps for the optimizer
epochs = 20
steps = 0

#Create empty list to record the training and test errors
train_losses, test_losses = [], []

#Use a 'for' loop to move through each epoch
for e in range(epochs):
    #The running loss is set to null at first
    running_loss = 0
    #For each image and their associated label in the training iterator
    for images, labels in trainloader:
        #Flatten the image
        images = images.view(images.shape[0], -1)
        #Clear the previous gradient computations
        optimizer.zero_grad()
        #Given that the cross entropy function is used, raw output is the logit
        log_ps = model(images)
        loss = criterion(log_ps, labels)
        #run the backpropagation algorithm to update the weights
        loss.backward()
        #performs a parameter update based on the current gradient
        optimizer.step()
        #update the running error for the training data
        running_loss += loss.item()
        
    else:
        test_loss = 0
        accuracy = 0
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():
            #Turn on evaluation for validation (because of dropout)
            model.eval()
            for images, labels in testloader:
                #Flatten the images
                images = images.view(images.shape[0], -1)
                #Run the model on the above images from the test iterator
                log_ps = model(images)
                test_loss += criterion(log_ps, labels)
                #Obtain probabilties from the logits obtained from the test data fit
                ps = torch.exp(log_ps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                #Compute the accuracy metric
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                running_loss = 0
            #Turn of evaluation for training(because of dropout)
        model.train()
                
        #Append each result to the train and test losses        
        train_losses.append(running_loss/len(trainloader))
        test_losses.append(test_loss/len(testloader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))

Epoch: 1/20..  Training Loss: 0.000..  Test Loss: 0.460..  Test Accuracy: 0.829
Epoch: 2/20..  Training Loss: 0.000..  Test Loss: 0.426..  Test Accuracy: 0.843
Epoch: 3/20..  Training Loss: 0.000..  Test Loss: 0.411..  Test Accuracy: 0.853
Epoch: 4/20..  Training Loss: 0.000..  Test Loss: 0.404..  Test Accuracy: 0.856
Epoch: 5/20..  Training Loss: 0.000..  Test Loss: 0.378..  Test Accuracy: 0.864
Epoch: 6/20..  Training Loss: 0.000..  Test Loss: 0.397..  Test Accuracy: 0.859
Epoch: 7/20..  Training Loss: 0.000..  Test Loss: 0.391..  Test Accuracy: 0.861
Epoch: 8/20..  Training Loss: 0.000..  Test Loss: 0.376..  Test Accuracy: 0.866
Epoch: 9/20..  Training Loss: 0.000..  Test Loss: 0.372..  Test Accuracy: 0.868


In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt

plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
#Fplt.legend(frameon=False)

In [None]:
#Find when does the minimum validation error occurs 
print("Epoch Number: {} at Error rate: {} ".format(np.argmin(test_losses),np.min(test_losses)))

In [None]:
## run the model evaluation
model.eval()
#select the testing data
dataiter = iter(testloader)
images, labels = dataiter.next()
img = images[0]
# Convert 2D image to 1D vector
img = img.view(1, 784)

# Calculate the class probabilities (softmax) for img
with torch.no_grad():
    output = model.forward(img)
#Obtain the probabilties from the logits
ps = torch.exp(output)

# Plot the image and probabilities
view_classify(img.view(1, 28, 28), ps)