In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

In [2]:
input_size = 28
num_classes = 10
num_epochs = 3
batch_size = 64

train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)

test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

### construct model

In [7]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),    #[b, 1, 28, 28] -> [b, 16, 28, 28]
            nn.ReLU(),
            nn.MaxPool2d(2)                                #[b, 16, 28, 28] -> [b, 16, 14, 14]   
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),    #[b, 16, 14, 14] -> [b, 32, 14, 14]
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=5, padding=2),     #[b, 32, 14, 14] -> [b, 32, 14, 14] 
            nn.ReLU(),
            nn.MaxPool2d(2)                               #[b, 32, 14, 14] -> [b, 32, 7, 7]
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, padding=2),   #[b, 32, 7, 7] -> [b, 64, 7, 7]
            nn.ReLU()
        )
        self.fc = nn.Linear(7*7*64, 10)                  #[b, 7*7*64] -> [b, 10]
        # self.act = nn.Softmax(dim=1)


    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        # x = self.act(x)

        return x

### Accuracy as metric

In [8]:
def accuracy(y_pred, y_true):
    _, predicted = torch.max(y_pred.data, 1)
    total = y_true.size(0)
    correct = (predicted == y_true).sum().item()
    return correct, total

### Training the model

In [9]:
net = CNN()
criterion = nn.CrossEntropyLoss()


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(num_epochs):
    train_rights = []

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        outputs = net(data)
        loss = criterion(outputs, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        correct = accuracy(outputs, target)
        train_rights.append(correct)

        if batch_idx % 100 == 0:
            net.eval()
            val_right = []

            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                outputs = net(data)
                correct = accuracy(outputs, target)
                val_right.append(correct)

            train_r = (sum([tup[0] for tup in train_rights]), sum([tup[1] for tup in train_rights]))
            val_r = (sum([tup[0] for tup in val_right]), sum([tup[1] for tup in val_right]))

            print('Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tTrain Acc: {:.3f}%\tVal Acc: {:.3f}%'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item(),
                100. * train_r[0] / train_r[1],
                100. * val_r[0] / val_r[1]
            ))

