In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from zoo.dataset.mnist import MNISTloader

train_loader, val_loader, test_loader = MNISTloader(train_val_split=0.1).load()

Image Shape:    (1, 32, 32)

Training Set:   54000 samples
Validation Set: 6000 samples
Test Set:       10000 samples


In [18]:
import torch
from torchinfo import summary
from zoo.model.mnist.simplenet_bn import SimpleNetBN

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleNetBN()
summary(model, input_size=(1, 1, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
SimpleNetBN                              --                        --
├─Conv2d: 1-1                            [1, 6, 14, 14]            150
├─BatchNorm2d: 1-2                       [1, 6, 14, 14]            12
├─Conv2d: 1-3                            [1, 16, 5, 5]             2,400
├─BatchNorm2d: 1-4                       [1, 16, 5, 5]             32
├─Linear: 1-5                            [1, 120]                  48,000
├─BatchNorm1d: 1-6                       [1, 120]                  240
├─Linear: 1-7                            [1, 84]                   10,080
├─BatchNorm1d: 1-8                       [1, 84]                   168
├─Linear: 1-9                            [1, 10]                   840
├─BatchNorm1d: 1-10                      [1, 10]                   20
Total params: 61,942
Trainable params: 61,942
Non-trainable params: 0
Total mult-adds (M): 0.15
Input size (MB): 0.00
Forward/backward pass 

In [24]:
def train(device, lr, model, optimizer, criterion, train_loader):

    train_loss_running, train_acc_running = 0, 0

    model.train().cuda() if torch.cuda.is_available() else model.train()

    for inputs, labels in train_loader:

        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)

        _, predictions = torch.max(outputs, dim=1)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()

        train_loss_running += loss.item() * inputs.shape[0]
        train_acc_running += torch.sum(predictions == labels.data)

    train_loss = train_loss_running / len(train_loader.sampler)
    train_acc = train_acc_running / len(train_loader.sampler)
    
    return train_loss, train_acc
    
def evaluate(device, model, criterion, val_loader):

    val_loss_running, val_acc_running = 0, 0
    
    model.eval().cuda() if torch.cuda.is_available() else model.eval()

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, predictions = torch.max(outputs, dim=1)
            val_loss_running += loss.item() * inputs.shape[0]
            val_acc_running += torch.sum(predictions == labels.data)

        val_loss = val_loss_running / len(val_loader.sampler)
        val_acc = val_acc_running / len(val_loader.sampler)

    return val_loss, val_acc

In [25]:
import torch.optim as optim
import torch.nn as nn

lr = 0.001
num_epochs = 3
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr)
criterion = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    train_loss, train_acc = train(device, lr, model, optimizer, criterion, train_loader)
    val_loss, val_acc = evaluate(device, model, criterion, val_loader)
    info = "Epoch: {:3}/{} \t train_Loss: {:.3f} \t train_acc: {:.3f} \t val_loss: {:.3f} \t val_acc: {:.3f}"
    print(info.format(epoch + 1, num_epochs, train_loss, train_acc, val_loss, val_acc))

Epoch:   1/3 	 train_Loss: 0.317 	 train_acc: 0.956 	 val_loss: 0.129 	 val_acc: 0.982
Epoch:   2/3 	 train_Loss: 0.126 	 train_acc: 0.980 	 val_loss: 0.082 	 val_acc: 0.984
Epoch:   3/3 	 train_Loss: 0.076 	 train_acc: 0.987 	 val_loss: 0.063 	 val_acc: 0.985


In [26]:
 evaluate(device, model, criterion, test_loader)

(0.06022411012500525, tensor(0.9872))