Device configuration:

In [1]:
import torch

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

Hyper-paramters:

In [6]:
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001

MNIST dataset:

In [4]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [18]:
train_dataset = torchvision.datasets.MNIST(
    root=r'D:\pytorch_data', train=True, download=True, transform=transforms.ToTensor()
)

test_dataset = torchvision.datasets.MNIST(
    root=r'D:\pytorch_data', train=False, download=True, transform=transforms.ToTensor()
)

In [19]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Build convolutional neural network:

In [8]:
import torch.nn as nn

In [29]:
class ConvNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(num_features=16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(num_features=32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc = nn.Linear(in_features=32*7*7, out_features=num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        return self.fc(out)

In [30]:
model = ConvNet(num_classes=num_classes).to(device)

Loss and optimizer:

In [13]:
import torch.optim as optim

In [31]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate) 

Train model:

In [32]:
total = len(train_dataset)
model.train()
for epoch in range(num_epochs):
    current_items = 0
    for step, (images, labels) in enumerate(train_loader):
        # cast data to device
        images = images.to(device)
        labels = labels.to(device)

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # update parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        current_items += labels.size(0)
        print(f'Epoch {epoch+1}/{num_epochs}, item {current_items}/{total}, loss: {loss.item()}')

Epoch 1/5, item 100/60000, loss: 2.596728563308716
Epoch 1/5, item 200/60000, loss: 2.411627769470215
Epoch 1/5, item 300/60000, loss: 2.1073029041290283
Epoch 1/5, item 400/60000, loss: 2.0531539916992188
Epoch 1/5, item 500/60000, loss: 1.5883256196975708
Epoch 1/5, item 600/60000, loss: 1.6314201354980469
Epoch 1/5, item 700/60000, loss: 1.5498285293579102
Epoch 1/5, item 800/60000, loss: 1.591380000114441
Epoch 1/5, item 900/60000, loss: 1.4928380250930786
Epoch 1/5, item 1000/60000, loss: 1.229426383972168
Epoch 1/5, item 1100/60000, loss: 1.0513721704483032
Epoch 1/5, item 1200/60000, loss: 1.045264720916748
Epoch 1/5, item 1300/60000, loss: 0.9074149131774902
Epoch 1/5, item 1400/60000, loss: 1.0196408033370972
Epoch 1/5, item 1500/60000, loss: 0.6913506984710693
Epoch 1/5, item 1600/60000, loss: 0.7270841002464294
Epoch 1/5, item 1700/60000, loss: 0.7313469648361206
Epoch 1/5, item 1800/60000, loss: 0.7448035478591919
Epoch 1/5, item 1900/60000, loss: 0.7110031247138977
Epoch 1

Evaluate:

In [33]:
model.eval()
total = len(test_dataset)
correct = 0
for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)

    # forward pass
    outputs = model(images)
    _, predict = torch.max(outputs, 1)
    correct += (predict == labels).sum()

print(f'Number of images: {total}, accuracy: {100*correct/total:.2f}')

Number of images: 10000, accuracy: 99.07


Save model:

In [34]:
torch.save(model.state_dict(), '../weights/conv_net.ckpt')