In [8]:
import torch.nn
import torchvision

In [9]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 64, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.Conv2d(64, 128, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(stride=2, kernel_size=2)
        )
        self.dense = torch.nn.Sequential(
            torch.nn.Linear(128 * 14 * 14, 1024),
            torch.nn.ReLU(),
            torch.nn.Dropout(p=0.5),
            torch.nn.Linear(1024, 10)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = x.view(-1, 128 * 14 * 14)
        x = self.dense(x)
        return x


In [11]:
train_dataset = torchvision.datasets.MNIST(root='.',
        train=True, transform=torchvision.transforms.ToTensor(),
        download=True)
test_dataset = torchvision.datasets.MNIST(root='.',
        train=False, transform=torchvision.transforms.ToTensor(),
        download=True)

batch_size = 100

train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset, batch_size=batch_size)

In [12]:
net = Net().cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())

num_epochs = 5

for epoch in range(num_epochs):
    for idx, (images, labels) in enumerate(train_loader):
        images = images.cuda()
        labels = labels.cuda()
        optimizer.zero_grad()
        preds = net(images)
        loss = criterion(preds, labels)
        loss.backward()
        optimizer.step()

        if idx % 100 == 0:
            print('epoch [%d/%d], Step [%d/%d], Loss = {%.4f}'
                % (epoch + 1, num_epochs, idx,
                len(train_dataset) // batch_size, loss.data))

epoch [1/5], Step [0/600], Loss = {2.2999}
epoch [1/5], Step [100/600], Loss = {0.1963}
epoch [1/5], Step [200/600], Loss = {0.2332}
epoch [1/5], Step [300/600], Loss = {0.1053}
epoch [1/5], Step [400/600], Loss = {0.0544}
epoch [1/5], Step [500/600], Loss = {0.0415}
epoch [2/5], Step [0/600], Loss = {0.0880}
epoch [2/5], Step [100/600], Loss = {0.0391}
epoch [2/5], Step [200/600], Loss = {0.0515}
epoch [2/5], Step [300/600], Loss = {0.1384}
epoch [2/5], Step [400/600], Loss = {0.0127}
epoch [2/5], Step [500/600], Loss = {0.0492}
epoch [3/5], Step [0/600], Loss = {0.0255}
epoch [3/5], Step [100/600], Loss = {0.0432}
epoch [3/5], Step [200/600], Loss = {0.0186}
epoch [3/5], Step [300/600], Loss = {0.0686}
epoch [3/5], Step [400/600], Loss = {0.0124}
epoch [3/5], Step [500/600], Loss = {0.0022}
epoch [4/5], Step [0/600], Loss = {0.0426}
epoch [4/5], Step [100/600], Loss = {0.0193}
epoch [4/5], Step [200/600], Loss = {0.0423}
epoch [4/5], Step [300/600], Loss = {0.0074}
epoch [4/5], Step 

In [15]:
correct = 0
total = 0

for images, labels in test_loader:
    images = images.cuda()
    labels = labels.cuda()
    outputs = net(images)
    pred = torch.argmax(outputs, 1)
    total += labels.size(0)
    correct += (pred == labels).sum().item()

accuracy = correct / total
print('Accuracy of the network on the 10000 test images: %d %%'
    % (100 * accuracy))

Accuracy of the network on the 10000 test images: 98 %
