In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor

# Set the device to the GPU if available, otherwise use the CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = nn.functional.relu(nn.functional.max_pool2d(self.conv1(x), 2))
        x = nn.functional.relu(nn.functional.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return nn.functional.log_softmax(x, dim=1)

# Load the MNIST dataset
train_dataset = MNIST(root='data/', train=True, transform=ToTensor(), download=True)
test_dataset = MNIST(root='data/', train=False, transform=ToTensor(), download=True)

# Set the batch size and the number of epochs
batch_size = 128
num_epochs = 10

# Create data loaders for the training and test datasets
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

# Initialize the network and move it to the GPU
net = Net().to(device)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5)

# Train the network
for epoch in range(num_epochs):
    net.train()
    train_loss = 0.0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = net(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0)
    train_loss /= len(train_loader.dataset)

    net.eval()
    test_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = net(data)
            test_loss += criterion(output, target).item() * data.size(0)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100.0 * correct / len(test_loader.dataset)
    print(f'Epoch {epoch}: train loss = {train_loss:.4f}, test loss = {test_loss:.4f}, accuracy = {accuracy:.2f}%')


Epoch 0: train loss = 1.2907, test loss = 0.3768, accuracy = 88.32%
Epoch 1: train loss = 0.3218, test loss = 0.2555, accuracy = 92.24%
Epoch 2: train loss = 0.2154, test loss = 0.1561, accuracy = 95.36%
Epoch 3: train loss = 0.1613, test loss = 0.1183, accuracy = 96.57%
Epoch 4: train loss = 0.1292, test loss = 0.0986, accuracy = 96.88%
Epoch 5: train loss = 0.1100, test loss = 0.0848, accuracy = 97.44%
Epoch 6: train loss = 0.0974, test loss = 0.0804, accuracy = 97.63%
Epoch 7: train loss = 0.0873, test loss = 0.0695, accuracy = 97.80%
Epoch 8: train loss = 0.0804, test loss = 0.0646, accuracy = 97.91%
Epoch 9: train loss = 0.0745, test loss = 0.0633, accuracy = 98.04%
