In [35]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [36]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_size = 784 # 28x28
num_classes = 10
batch_size = 100
learning_rate = 0.001

In [37]:
train_dataset = torchvision.datasets.MNIST(root='.', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='.', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [38]:
class FeedForwardNeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(FeedForwardNeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, 600)
        self.linear2 = nn.Linear(600, 600)
        self.linear3 = nn.Linear(600, num_classes)
        self.leakyRelu = nn.LeakyReLU()
    
    def forward(self, x):
        x = self.linear1(x)
        x = self.leakyRelu(x)
        x = self.linear2(x)
        x = self.leakyRelu(x)
        x = self.linear3(x)
        # no activation or softmax used 
        return x

In [39]:
model = FeedForwardNeuralNet(input_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [40]:
def test_model(model, test_loader):
    # Test the model
    # In test phase, we don't need to compute gradients (for memory efficiency)
    with torch.no_grad():
        n_correct = 0
        n_samples = 0
        for images, labels in test_loader:
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            results = model(images)
            # max returns (value ,index)
            _, predicted = torch.max(results.data, 1)
            n_samples += labels.size(0)
            n_correct += (predicted == labels).sum().item()

        acc = n_correct / n_samples
        print(f'Accuracy of the model: {acc * 100.0:.4f} %')
        return acc

In [41]:
def train_model(train_loader, num_epochs, model):
    total_steps = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            # flatten images
            images = images.reshape(-1, 784).to(device)
            labels = labels.to(device)

            # forward pass
            results = model(images)
            loss = criterion(results, labels)

            # backwards and optimise
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if (i+1) % 200 == 0:
                print(f'Epoch: {epoch+1} out of {num_epochs}, Step: {i+1} out of {total_steps}, Loss: {loss.item():.4f}')
        acc = test_model(model, test_loader)
        torch.save(model.state_dict(), f'./epoch:{epoch+1}acc:{acc * 100.0:.3f}.pt')


In [42]:
train_model(train_loader, 20, model)

Epoch: 1 out of 20, Step: 200 out of 600, Loss: 0.2089
Epoch: 1 out of 20, Step: 400 out of 600, Loss: 0.1262
Epoch: 1 out of 20, Step: 600 out of 600, Loss: 0.0723
Accuracy of the model: 96.7700 %
Epoch: 2 out of 20, Step: 200 out of 600, Loss: 0.2343
Epoch: 2 out of 20, Step: 400 out of 600, Loss: 0.1108
Epoch: 2 out of 20, Step: 600 out of 600, Loss: 0.0154
Accuracy of the model: 97.7100 %
Epoch: 3 out of 20, Step: 200 out of 600, Loss: 0.0332
Epoch: 3 out of 20, Step: 400 out of 600, Loss: 0.0428
Epoch: 3 out of 20, Step: 600 out of 600, Loss: 0.0079
Accuracy of the model: 97.6500 %
Epoch: 4 out of 20, Step: 200 out of 600, Loss: 0.0427
Epoch: 4 out of 20, Step: 400 out of 600, Loss: 0.1241
Epoch: 4 out of 20, Step: 600 out of 600, Loss: 0.0668
Accuracy of the model: 98.1000 %
Epoch: 5 out of 20, Step: 200 out of 600, Loss: 0.0076
Epoch: 5 out of 20, Step: 400 out of 600, Loss: 0.0469
Epoch: 5 out of 20, Step: 600 out of 600, Loss: 0.0406
Accuracy of the model: 98.0700 %
Epoch: 6 o