In [None]:
import math
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data.dataset import Subset

from torchvision import transforms
from torchvision.datasets.mnist import MNIST

from pytoune.framework import Model, ModelCheckpoint, CSVLogger
from pytoune import torch_to_numpy
from pytoune.layers import Flatten

In [None]:
torch.manual_seed(42)
np.random.seed(42)

In [None]:
# Training hyperparameters

cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")
train_split_percent = 0.8
batch_size = 32
learning_rate = 0.01
n_epoch = 5
num_classes = 10

In [None]:
# Loading the MNIST dataset and creating DataLoaders.

train_dataset = MNIST('./mnist/', train=True, download=True, transform=transforms.ToTensor())
valid_dataset = MNIST('./mnist/', train=True, download=True, transform=transforms.ToTensor())
test_dataset = MNIST('./mnist/', train=False, download=True, transform=transforms.ToTensor())

num_data = len(train_dataset)
indices = list(range(num_data))
np.random.shuffle(indices)

split = math.floor(train_split_percent * num_data)

train_indices = indices[:split]
train_dataset = Subset(train_dataset, train_indices)

valid_indices = indices[split:]
valid_dataset = Subset(valid_dataset, valid_indices)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [None]:
def create_fully_connected_network():
    return nn.Sequential(
        Flatten(),
        nn.Linear(28*28, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, num_classes)
    )

def create_convolutional_network():
    return nn.Sequential(
        nn.Conv2d(1, 32, 3, padding=1),
        nn.ReLU(),
        nn.Conv2d(32, 64, 3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Dropout(0.25),
        Flatten(),
        nn.Linear(64*14*14 , 128),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(128, num_classes)
    )

In [None]:
def train(name, pytorch_module):
    # One nice feature of Pytoune is callbacks.
    callbacks = [
        # Save the latest weights to be able to continue the optimization at the end for more epochs.
        ModelCheckpoint(name + '_last_epoch.ckpt', temporary_filename='last_epoch.ckpt.tmp'),

        # Save the weights in a new file when the current model is better than all previous models.
        ModelCheckpoint(name + '_best_epoch_{epoch}.ckpt', monitor='val_acc', mode='max', save_best_only=True, restore_best=True, verbose=False, temporary_filename='best_epoch.ckpt.tmp'),

        # Save the losses and accuracies for each epoch in a TSV.
        CSVLogger(name + '_log.tsv', separator='\t'),
    ]
    
    # Finally, we start the training and output its final test 
    # loss and accuracy.

    # Optimizer and loss function
    optimizer = optim.SGD(pytorch_module.parameters(), lr=learning_rate, weight_decay=0.001)
    loss_function = nn.CrossEntropyLoss()

    # Pytoune Model
    fc_model = Model(pytorch_module, optimizer, loss_function, metrics=['accuracy'])

    # Send model on GPU
    fc_model.to(device)

    # Train
    fc_model.fit_generator(train_loader, valid_loader, epochs=n_epoch, callbacks=callbacks)

    # Test
    test_loss, test_acc = fc_model.evaluate_generator(test_loader)
    print('Test:\n\tLoss: {}\n\tAccuracy: {}'.format(test_loss, test_acc))

In [None]:
# Initialize my network
fc_net = create_fully_connected_network()
print(fc_net)

# Start training
train('fc', fc_net)

In [None]:
# Initialize my network
cnn = create_convolutional_network()
print(cnn)

# Start training
train('cnn', cnn)