In [1]:
import math
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data.dataset import Subset

from torchvision import transforms
from torchvision.datasets.mnist import MNIST

from poutyne.framework import Model, ModelCheckpoint, CSVLogger
from poutyne import torch_to_numpy
from poutyne.layers import Flatten

In [2]:
torch.manual_seed(42)
np.random.seed(42)

In [3]:
# Training hyperparameters

cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")
train_split_percent = 0.8
batch_size = 32
learning_rate = 0.01
n_epoch = 5
num_classes = 10

In [4]:
# Loading the MNIST dataset and creating DataLoaders.

train_dataset = MNIST('./mnist/', train=True, download=True, transform=transforms.ToTensor())
valid_dataset = MNIST('./mnist/', train=True, download=True, transform=transforms.ToTensor())
test_dataset = MNIST('./mnist/', train=False, download=True, transform=transforms.ToTensor())

num_data = len(train_dataset)
indices = list(range(num_data))
np.random.shuffle(indices)

split = math.floor(train_split_percent * num_data)

train_indices = indices[:split]
train_dataset = Subset(train_dataset, train_indices)

valid_indices = indices[split:]
valid_dataset = Subset(valid_dataset, valid_indices)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [5]:
def create_fully_connected_network():
    return nn.Sequential(
        Flatten(),
        nn.Linear(28*28, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, num_classes)
    )

def create_convolutional_network():
    return nn.Sequential(
        nn.Conv2d(1, 32, 3, padding=1),
        nn.ReLU(),
        nn.Conv2d(32, 64, 3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Dropout(0.25),
        Flatten(),
        nn.Linear(64*14*14 , 128),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(128, num_classes)
    )

In [6]:
def train(name, pytorch_module):
    # One nice feature of Poutyne is callbacks.
    callbacks = [
        # Save the latest weights to be able to continue the optimization at the end for more epochs.
        ModelCheckpoint(name + '_last_epoch.ckpt', temporary_filename='last_epoch.ckpt.tmp'),

        # Save the weights in a new file when the current model is better than all previous models.
        ModelCheckpoint(name + '_best_epoch_{epoch}.ckpt', monitor='val_acc', mode='max', save_best_only=True, restore_best=True, verbose=False, temporary_filename='best_epoch.ckpt.tmp'),

        # Save the losses and accuracies for each epoch in a TSV.
        CSVLogger(name + '_log.tsv', separator='\t'),
    ]
    
    # Finally, we start the training and output its final test 
    # loss and accuracy.

    # Optimizer and loss function
    optimizer = optim.SGD(pytorch_module.parameters(), lr=learning_rate, weight_decay=0.001)
    loss_function = nn.CrossEntropyLoss()

    # Poutyne Model
    model = Model(pytorch_module, optimizer, loss_function, metrics=['accuracy'])

    # Send model on GPU
    model.to(device)

    # Train
    model.fit_generator(train_loader, valid_loader, epochs=n_epoch, callbacks=callbacks)

    # Test
    test_loss, test_acc = model.evaluate_generator(test_loader)
    print('Test:\n\tLoss: {}\n\tAccuracy: {}'.format(test_loss, test_acc))

In [7]:
# Initialize my network
fc_net = create_fully_connected_network()
print(fc_net)

# Start training
train('fc', fc_net)

Sequential(
  (0): Flatten()
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=128, bias=True)
  (4): ReLU()
  (5): Linear(in_features=128, out_features=64, bias=True)
  (6): ReLU()
  (7): Linear(in_features=64, out_features=10, bias=True)
)
Epoch 1/5 9.23s Step 1500/1500: loss: 2.038025, acc: 37.295833, val_loss: 1.054108, val_acc: 68.633333
Epoch 2/5 9.30s Step 1500/1500: loss: 0.613594, acc: 81.795833, val_loss: 0.438431, val_acc: 87.200000
Epoch 3/5 10.32s Step 1500/1500: loss: 0.390026, acc: 88.685417, val_loss: 0.351080, val_acc: 89.925000
Epoch 4/5 9.55s Step 1500/1500: loss: 0.322791, acc: 90.743750, val_loss: 0.305179, val_acc: 91.133333
Epoch 5/5 9.81s Step 1500/1500: loss: 0.277492, acc: 92.050000, val_loss: 0.265470, val_acc: 92.533333
Test:
	Loss: 0.25376319630146027
	Accuracy: 92.93


In [8]:
# Initialize my network
conv_net = create_convolutional_network()
print(conv_net)

# Start training
train('conv', conv_net)

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU()
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Dropout(p=0.25)
  (6): Flatten()
  (7): Linear(in_features=12544, out_features=128, bias=True)
  (8): ReLU()
  (9): Dropout(p=0.5)
  (10): Linear(in_features=128, out_features=10, bias=True)
)
Epoch 1/5 11.26s Step 1500/1500: loss: 0.617333, acc: 81.035417, val_loss: 0.266608, val_acc: 91.850000
Epoch 2/5 11.45s Step 1500/1500: loss: 0.320636, acc: 90.189583, val_loss: 0.179926, val_acc: 94.291667
Epoch 3/5 11.40s Step 1500/1500: loss: 0.268208, acc: 91.954167, val_loss: 0.159907, val_acc: 95.075000
Epoch 4/5 11.36s Step 1500/1500: loss: 0.241208, acc: 92.604167, val_loss: 0.139435, val_acc: 95.816667
Epoch 5/5 11.33s Step 1500/1500: loss: 0.220420, acc: 93.366667, val_loss: 0.129128, val_acc: 96.175000
Test:
	Loss: 