In [2]:
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms

In [3]:
#Hyperparameter definitions
learning_rate = 1e-3
input_dim = 28*28
output_dim = 10
batch_size = 100
epochs = 20

In [6]:
#Import FashionMNIST training and test datasets
training_data = datasets.FashionMNIST('data', train=True, transform=transforms.ToTensor(), download=True)
test_data = datasets.FashionMNIST('data', train=False, transform=transforms.ToTensor(), download=True)

print(f'Training set size = {len(training_data)}')
print(f'Test set size = {len(test_data)}')

#Create loaders for our datasets
training_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

Training set size = 60000
Test set size = 10000


In [7]:
#Define our multilayer perceptron model by extending the nn.Module class
class MNIST_MLP(nn.Module):
  def __init__(self, input_dim, output_dim):
      super(MNIST_MLP, self).__init__()

      self.flatten = nn.Flatten()

      self.h0 = nn.Sequential(
          nn.Linear(input_dim, 512, bias=True),
          nn.ReLU()
      )
      self.h1 = nn.Sequential(
          nn.Linear(512, 512, bias=True),
          nn.ReLU()
      )
      self.h2 = nn.Sequential(
          nn.Linear(512, output_dim),
      )

  def forward(self, x):
      flat_x = self.flatten(x)
      h0_outputs = self.h0(flat_x)
      h1_outputs = self.h1(h0_outputs)
      h2_outputs = self.h2(h1_outputs)

      return h2_outputs

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

model = MNIST_MLP(input_dim, output_dim).to(device)
print(model)

Using cuda device
MNIST_MLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (h0): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
  )
  (h1): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=True)
    (1): ReLU()
  )
  (h2): Sequential(
    (0): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [8]:
#Define our loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=learning_rate)
optimizer = optim.Adam(model.parameters()) # Use Adam with default hyperparams

In [9]:
#Define training and test loops
def training_loop(dataloader, _model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  for batch_number, (X, y) in enumerate(dataloader):
    #Send batch tensors to device
    X = X.to(device)
    y = y.to(device)

    # Run forward prop and calculate loss
    pred = _model(X)
    loss = loss_fn(pred, y)

    # Run backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch_number % 60 == 0: #Print loss every 60 batches (6000 samples)
      loss, current_sample = loss.item(), batch_number * len(X)
      print(f"loss: {loss:>7f}   [{current_sample:>5d}/{size:>5d}]")


def test_loop(dataloader, _model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  test_loss, correct = 0,0

  with torch.no_grad():
    for X, y in dataloader:
      #Send batch tensors to device
      X = X.to(device)
      y = y.to(device)

      #Run inference, calculate loss and determine if prediction is correct
      pred = _model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg. loss: {test_loss:>8f} \n")

In [10]:
# Run training and test loops
for i in range(epochs):
  print(f'Epoch: {i+1}\n-----------------------------------')
  training_loop(training_loader, model, loss_fn, optimizer)
  test_loop(test_loader, model, loss_fn)

print('Done!')

Epoch: 1
-----------------------------------
loss: 2.302933   [    0/60000]
loss: 0.613497   [ 6000/60000]
loss: 0.671353   [12000/60000]
loss: 0.603223   [18000/60000]
loss: 0.393405   [24000/60000]
loss: 0.486514   [30000/60000]
loss: 0.459886   [36000/60000]
loss: 0.461749   [42000/60000]
loss: 0.444568   [48000/60000]
loss: 0.413948   [54000/60000]
Test Error: 
 Accuracy: 85.5%, Avg. loss: 0.404855 

Epoch: 2
-----------------------------------
loss: 0.346717   [    0/60000]
loss: 0.319498   [ 6000/60000]
loss: 0.344431   [12000/60000]
loss: 0.583753   [18000/60000]
loss: 0.462203   [24000/60000]
loss: 0.300186   [30000/60000]
loss: 0.459999   [36000/60000]
loss: 0.408995   [42000/60000]
loss: 0.287063   [48000/60000]
loss: 0.273903   [54000/60000]
Test Error: 
 Accuracy: 86.5%, Avg. loss: 0.383439 

Epoch: 3
-----------------------------------
loss: 0.214381   [    0/60000]
loss: 0.299496   [ 6000/60000]
loss: 0.323769   [12000/60000]
loss: 0.252697   [18000/60000]
loss: 0.317236 

In [11]:
# Save the model
model_filename = 'helloMNIST_model.pth'

torch.save(model.state_dict(), model_filename)

In [None]:
# Create a new network, load the model into it and run an additional 20 epochs
new_model = MNIST_MLP(input_dim, output_dim)
new_model.load_state_dict(torch.load(model_filename))
new_model.to(device)

# Create a new optimizer for new model's parameters
new_optimizer = optim.Adam(new_model.parameters())

for i in range(epochs):
  print(f'Epoch: {i+1}\n-----------------------------------')
  training_loop(training_loader, new_model, loss_fn, new_optimizer)
  test_loop(test_loader, new_model, loss_fn)

print('Done!')