Device configuration:

In [2]:
import torch

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

Hyper-parameters:

In [14]:
input_size = 28 * 28
hidden1_size = 500
hidden2_size = 100
num_classes = 10
num_epochs = 5
batch_size = 64
learning_rate = 0.005

MNIST dataset:

In [4]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [27]:
train_dataset = torchvision.datasets.MNIST(
    root=r'D:\pytorch_data', train=True, download=True, transform=transforms.ToTensor()
)

test_dataset = torchvision.datasets.MNIST(
    root=r'D:\pytorch_data', train=False, download=True, transform=transforms.ToTensor()
)

In [28]:
# data loader 
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Build fully connected neural network:

In [7]:
import torch.nn as nn

In [29]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(in_features=input_size, out_features=hidden1_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(in_features=hidden1_size, out_features=hidden2_size)
        self.fc3 = nn.Linear(in_features=hidden2_size, out_features=num_classes)
        self.sequential = nn.Sequential(self.fc1, self.relu, self.fc2, self.relu, self.fc3)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        return self.fc3(out)

In [44]:
model = NeuralNetwork(input_size, num_classes).to(device)

Loss and optimizer:

In [10]:
import torch.optim as optim

In [45]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Train model:

In [46]:
total = len(train_dataset)
model.train()
for epoch in range(num_epochs):
    current_items = 0
    for step, (images, labels) in enumerate(train_loader):
        # cast data to device
        images = images.view(images.size(0), -1).to(device)
        labels = labels.to(device)

        # forward pass
        outputs = model(images)
        loss = criterion(outputs, labels) # CrossEntropyLoss automatically applies softmax on our outputs
        
        # update parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        current_items += labels.size(0)

        print(f'Epoch {epoch+1}/{num_epochs}, item {current_items}/{total}, loss: {loss.item():.2f}')

Epoch 1/5, item 64/60000, loss: 2.32
Epoch 1/5, item 128/60000, loss: 2.28
Epoch 1/5, item 192/60000, loss: 2.07
Epoch 1/5, item 256/60000, loss: 1.76
Epoch 1/5, item 320/60000, loss: 1.55
Epoch 1/5, item 384/60000, loss: 1.41
Epoch 1/5, item 448/60000, loss: 1.05
Epoch 1/5, item 512/60000, loss: 1.20
Epoch 1/5, item 576/60000, loss: 0.76
Epoch 1/5, item 640/60000, loss: 0.79
Epoch 1/5, item 704/60000, loss: 0.97
Epoch 1/5, item 768/60000, loss: 0.80
Epoch 1/5, item 832/60000, loss: 1.15
Epoch 1/5, item 896/60000, loss: 0.77
Epoch 1/5, item 960/60000, loss: 0.57
Epoch 1/5, item 1024/60000, loss: 0.72
Epoch 1/5, item 1088/60000, loss: 0.60
Epoch 1/5, item 1152/60000, loss: 0.94
Epoch 1/5, item 1216/60000, loss: 0.74
Epoch 1/5, item 1280/60000, loss: 0.61
Epoch 1/5, item 1344/60000, loss: 0.75
Epoch 1/5, item 1408/60000, loss: 0.56
Epoch 1/5, item 1472/60000, loss: 0.45
Epoch 1/5, item 1536/60000, loss: 0.48
Epoch 1/5, item 1600/60000, loss: 0.51
Epoch 1/5, item 1664/60000, loss: 0.70
Ep

Evaluate:

In [48]:
model.eval()
total = len(test_loader) * batch_size
correct = 0
for images, labels in test_loader:
    images = images.view(images.size(0), -1).to(device)
    labels = labels.to(device)

    outputs = model(images)
    _, predict = torch.max(outputs, 1)
    correct += (predict == labels).sum()

print(f'Number of images {total}, accuracy: {100 * correct / total:.2f}')

Number of images 10048, accuracy: 96.69


Save model:

In [49]:
torch.save(model.state_dict(), 'neural_network.ckpt')