In [3]:
import torch
from torchvision.transforms import ToTensor, Normalize, Compose
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn.functional as F

In [4]:
import torch.nn as nn
class MLPModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(MLPModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 10)
        )
    
    def forward(self, input):
        input = input.view(input.size(0), -1)
        return self.layers(input)

In [5]:
import numpy as np
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def train(model, train_loader, optimizer, loss_fn, print_every=100):
    '''
    Trains the model for one epoch
    '''
    model.train()
    losses = []
    n_correct = 0
    for iteration, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        output = model(images)
        optimizer.zero_grad()
        loss = loss_fn(output, labels)
        loss.backward()
        optimizer.step()
#         if iteration % print_every == 0:
#             print('Training iteration {}: loss {:.4f}'.format(iteration, loss.item()))
        losses.append(loss.item())
        n_correct += torch.sum(output.argmax(1) == labels).item()
    accuracy = 100.0 * n_correct / len(train_loader.dataset)
    return np.mean(np.array(losses)), accuracy
            
def test(model, test_loader, loss_fn):
    '''
    Tests the model on data from test_loader
    '''
    model.eval()
    test_loss = 0
    n_correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            output = model(images)
            loss = loss_fn(output, labels)
            test_loss += loss.item()
            n_correct += torch.sum(output.argmax(1) == labels).item()

    average_loss = test_loss / len(test_loader)
    accuracy = 100.0 * n_correct / len(test_loader.dataset)
#     print('Test average loss: {:.4f}, accuracy: {:.3f}'.format(average_loss, accuracy))
    return average_loss, accuracy


def fit(train_dataloader, val_dataloader, model, optimizer, loss_fn, n_epochs, scheduler=None):
    train_losses, train_accuracies = [], []
    val_losses, val_accuracies = [], []

    for epoch in range(n_epochs):
        train_loss, train_accuracy = train(model, train_dataloader, optimizer, loss_fn)
        val_loss, val_accuracy = test(model, val_dataloader, loss_fn)
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
        if scheduler:
            scheduler.step() # argument only needed for ReduceLROnPlateau
        print('Epoch {}/{}: train_loss: {:.4f}, train_accuracy: {:.4f}, val_loss: {:.4f}, val_accuracy: {:.4f}'.format(epoch+1, n_epochs,
                                                                                                          train_losses[-1],
                                                                                                          train_accuracies[-1],
                                                                                                          val_losses[-1],
                                                                                                          val_accuracies[-1]))
    
    return train_losses, train_accuracies, val_losses, val_accuracies

In [9]:
train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=100, shuffle=True)


In [10]:
for tensor in train_loader:
    print(tensor[0].shape)
    break

torch.Size([100, 1, 28, 28])


In [11]:
model_mlp = MLPModel(28*28, 128)
model_mlp = model_mlp.to(device)
learning_rate = 0.001
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=learning_rate)
n_epochs = 25
loss_fn = nn.CrossEntropyLoss()

In [12]:
curves_mlp = fit(train_loader, test_loader, model_mlp, optimizer, loss_fn, n_epochs)

Epoch 1/25: train_loss: 0.3037, train_accuracy: 90.9533, val_loss: 0.1428, val_accuracy: 95.4300
Epoch 2/25: train_loss: 0.1149, train_accuracy: 96.3850, val_loss: 0.1028, val_accuracy: 96.6800
Epoch 3/25: train_loss: 0.0825, train_accuracy: 97.3967, val_loss: 0.0934, val_accuracy: 97.1700
Epoch 4/25: train_loss: 0.0636, train_accuracy: 97.9750, val_loss: 0.0888, val_accuracy: 97.1700
Epoch 5/25: train_loss: 0.0510, train_accuracy: 98.4000, val_loss: 0.0964, val_accuracy: 97.2700
Epoch 6/25: train_loss: 0.0413, train_accuracy: 98.6833, val_loss: 0.0802, val_accuracy: 97.7600
Epoch 7/25: train_loss: 0.0349, train_accuracy: 98.8633, val_loss: 0.0843, val_accuracy: 97.6200
Epoch 8/25: train_loss: 0.0311, train_accuracy: 98.9400, val_loss: 0.1028, val_accuracy: 97.0400
Epoch 9/25: train_loss: 0.0293, train_accuracy: 99.0667, val_loss: 0.0784, val_accuracy: 97.9200
Epoch 10/25: train_loss: 0.0231, train_accuracy: 99.2167, val_loss: 0.0965, val_accuracy: 97.6800
Epoch 11/25: train_loss: 0.02