In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim

from torchvision import transforms

In [2]:


trainset = torchvision.datasets.MNIST('./ds', True, transform=transforms.ToTensor())
valset = torchvision.datasets.MNIST('./ds', False, transform=transforms.ToTensor())


BATCH_SIZE = 32
trainloader = torch.utils.data.DataLoader(trainset, BATCH_SIZE, True)
valloader = torch.utils.data.DataLoader(valset, BATCH_SIZE, False)

len(trainloader), len(valloader)

(1875, 313)

In [None]:
import torch.nn.functional as F

class MNIST_NET(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(16)

        self.fc = nn.Linear(7 * 7 * 16, 10)

    def forward(self, x):

        x = self.conv1(x)
        x = torch.relu(self.bn1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = self.conv2(x)
        x = torch.relu(self.bn2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [7]:
mnist_net = MNIST_NET()

optimizer = optim.Adam(mnist_net.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
loss_fn = nn.CrossEntropyLoss()
device = 'cpu'

epoch = 20

for i in range(1, epoch+1):

    mnist_net.train()
    train_running_loss = 0.0
    val_running_loss = 0.0

    current_lr = optimizer.param_groups[0]['lr']
    print(f"Learning Rate: {current_lr}")

    for imgs, labels in trainloader:

        imgs, labels = imgs.to(device), labels.to(device)

        pred = mnist_net(imgs)

        loss = loss_fn(pred, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_running_loss += loss.item()

    mnist_net.eval()

    with torch.no_grad():
        for imgs, labels in valloader:

            imgs, labels = imgs.to(device), labels.to(device)

            pred = mnist_net(imgs)
            loss = loss_fn(pred, labels)
            val_running_loss += loss.item()

    train_el = train_running_loss / len(trainloader)
    val_el = val_running_loss / len(valloader)
    scheduler.step()

    print("Epoch {}, t-loss {:3.4f}, v-loss {:3.4f}".format(i, train_el, val_el))
    print()
    

Learning Rate: 0.001
Epoch 1, t-loss 0.1277, v-loss 0.0513

Learning Rate: 0.001
Epoch 2, t-loss 0.0511, v-loss 0.0395

Learning Rate: 0.001
Epoch 3, t-loss 0.0385, v-loss 0.0367

Learning Rate: 0.001
Epoch 4, t-loss 0.0329, v-loss 0.0370

Learning Rate: 0.001
Epoch 5, t-loss 0.0282, v-loss 0.0433

Learning Rate: 0.0001
Epoch 6, t-loss 0.0148, v-loss 0.0270

Learning Rate: 0.0001
Epoch 7, t-loss 0.0121, v-loss 0.0277

Learning Rate: 0.0001
Epoch 8, t-loss 0.0110, v-loss 0.0266

Learning Rate: 0.0001
Epoch 9, t-loss 0.0101, v-loss 0.0277

Learning Rate: 0.0001
Epoch 10, t-loss 0.0093, v-loss 0.0279

Learning Rate: 1e-05
Epoch 11, t-loss 0.0081, v-loss 0.0272

Learning Rate: 1e-05
Epoch 12, t-loss 0.0079, v-loss 0.0272

Learning Rate: 1e-05
Epoch 13, t-loss 0.0077, v-loss 0.0272

Learning Rate: 1e-05
Epoch 14, t-loss 0.0077, v-loss 0.0272

Learning Rate: 1e-05
Epoch 15, t-loss 0.0077, v-loss 0.0276

Learning Rate: 1.0000000000000002e-06
Epoch 16, t-loss 0.0075, v-loss 0.0273

Learning Ra

In [8]:
torch.save(mnist_net.state_dict(), './models/mnist-1012.pt')

In [None]:
nn.ReLU()
nn.PReLU()