In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms

In [2]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# Download and load the training data
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
trainloaders = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloaders = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [3]:
class FashionMnistModel(nn.Module):
    def __init__(self, input_size, output_size, hidden_layers, drop_p=0.5):
        ''' Builds a feedforward network with arbitrary hidden layers.
        
            Arguments
            ---------
            input_size: integer, size of the input layer
            output_size: integer, size of the output layer
            hidden_layers: list of integers, the sizes of the hidden layers
        
        '''
        super().__init__()
        
        self.input_size = input_size
        self.output_size = output_size
        # Input to a hidden layer
        self.hidden_layers = nn.ModuleList([nn.Linear(input_size, hidden_layers[0])])
        
        # Add a variable number of more hidden layers
        layer_sizes = zip(hidden_layers[:-1], hidden_layers[1:])
        self.hidden_layers.extend([nn.Linear(h1, h2) for h1, h2 in layer_sizes])
        
        self.output = nn.Linear(hidden_layers[-1], output_size)
        
        self.dropout = nn.Dropout(p=drop_p)
        
    def forward(self, x):
        ''' Forward pass through the network, returns the output logits '''
        
        for each in self.hidden_layers:
            x = F.relu(each(x))
            x = self.dropout(x)
        x = self.output(x)
        
        return F.log_softmax(x, dim=1)
    

In [4]:
def validation(model, testloader, criterion):
      with torch.no_grad():
            model.eval()
            test_loss = 0
            accuracy = 0
            for images, lables in testloader:
                images.resize_(images.size()[0], 784)
                log_prob = model(images)
                test_loss += criterion(log_prob, lables)
                prob = torch.exp(log_prob)
                top_prob, top_class = prob.topk(1,dim=1)
                # Class with highest probability is our predicted class, compare with true label
                equality = (lables.data == prob.max(1)[1])
                # Accuracy is number of correct predictions divided by all predictions, just take the mean
                accuracy += equality.type_as(torch.FloatTensor()).mean()
                model.train()
                
            return test_loss, accuracy
        
def train(model, trainloader, testloader, criterion, optimizer, epochs=5, print_every=40):
    for e in range(epochs):
        cum_train_loss = 0
        steps = 0
        for images, lables in trainloader:
            steps += 1
            # Resize the image inplace.
            images.resize_(images.size()[0], 784)        
            optimizer.zero_grad()
            log_prob = model(images)
            loss = criterion(log_prob, lables)            
            loss.backward()                    
            optimizer.step()
            cum_train_loss += loss.item()
            #print(f'Epoch = {e} cum_loss = {cum_train_loss}.')
            if steps % print_every == 0:
                with torch.no_grad():
                    #model.eval()
                    test_loss, accuracy = validation(model, testloader, criterion)
                    print("Epoch: {}/{}.. ".format(e+1, epochs),
                          "Training Loss: {:.3f}.. ".format(cum_train_loss/print_every),
                          "Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
                          "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
                    cum_train_loss = 0
                #mode.train()
            
            
            
            

In [5]:
finalModel = FashionMnistModel(784, 10, [256, 128, 64])
criterion = nn.NLLLoss()
optimizer = optim.Adam(finalModel.parameters(), lr=0.001)

In [None]:
train(model=finalModel, trainloader=trainloaders, testloader=testloaders, criterion=criterion, epochs=3, optimizer=optimizer)

In [15]:
print("Our model: \n\n", finalModel, '\n')
print("The state dict keys: \n\n", finalModel.state_dict().keys())

Our model: 

 FashionMnistModel(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=128, bias=True)
    (2): Linear(in_features=128, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.5)
) 

The state dict keys: 

 odict_keys(['hidden_layers.0.weight', 'hidden_layers.0.bias', 'hidden_layers.1.weight', 'hidden_layers.1.bias', 'hidden_layers.2.weight', 'hidden_layers.2.bias', 'output.weight', 'output.bias'])


In [24]:
#finalModel.state_dict()['hidden_layers.0.weight']

## Saving the model

In [25]:
checkpoint = {'input_size': 784,
              'output_size': 10,
              'hidden_layers': [each.out_features for each in finalModel.hidden_layers],
              'state_dict': finalModel.state_dict()}

torch.save(checkpoint, 'checkpoint.pth')

In [27]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = FashionMnistModel(checkpoint['input_size'],
                             checkpoint['output_size'],
                             checkpoint['hidden_layers'])
    model.load_state_dict(checkpoint['state_dict'])
    
    return model

## Load the model

In [28]:
model = load_checkpoint('checkpoint.pth')
print(model)

FashionMnistModel(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=128, bias=True)
    (2): Linear(in_features=128, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.5)
)


## Scratch pad

In [7]:
imgs, lbls = next(iter(trainloaders))

In [8]:
import numpy as np
hid_layer = np.array([256, 128, 64, 32])

In [9]:
hid_layer

array([256, 128,  64,  32])

In [10]:
hid_layer[:-1]

array([256, 128,  64])

In [11]:
hid_layer[1:]

array([128,  64,  32])

In [12]:
hid_layer[-1]

32

In [13]:
lay = zip(hid_layer[:-1], hid_layer[1:])

In [14]:
for h1, h2 in lay:
    print(f'h1 = {h1} h2 = {h2}')

h1 = 256 h2 = 128
h1 = 128 h2 = 64
h1 = 64 h2 = 32
