In [9]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from tqdm.notebook import tqdm
import numpy as np

In [10]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 32),
            nn.ReLU(),
            nn.Linear(32, 10),
        )
        
    def forward(self, x):
        return self.layers(x)

In [11]:
## Load data
mnist_train = datasets.MNIST(root="./datasets",train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST(root="./datasets", train=False, download=True, transform=transforms.ToTensor()) 

train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=100, shuffle=False)

In [12]:
torch.cuda.is_available()

True

In [13]:
# Move tensors to device
def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [14]:
class DeviceDataLoader():
    def __init__(self, dl, device):
        # Wrap a dataloader to move data to a device
        self.dl = dl
        self.device = device

    def __iter__(self):
        # Yield a batch of data
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        # Number of batches in dataloader
        return len(self.dl)

In [15]:
train_loader = DeviceDataLoader(train_loader, torch.device("cuda"))
test_loader = DeviceDataLoader(test_loader, torch.device("cuda"))

In [16]:
## Model
mlp = MLP()
to_device(mlp, torch.device("cuda"))

MLP(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=32, bias=True)
    (2): ReLU()
    (3): Linear(in_features=32, out_features=10, bias=True)
  )
)

In [17]:
## Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(mlp.parameters(), lr=0.1)

In [18]:
## Run the training loop for 5 epochs
for epoch in range(5):   
    
    # Print epoch
    print(f'Starting epoch {epoch+1}')
    
    for images, labels in tqdm(train_loader):
        
        # Zero out the gradients
        optimizer.zero_grad()

        # Forward pass
        y = mlp(images)
        
        # Compute loss
        loss = criterion(y, labels)
        
        # Backward pass
        loss.backward()
        
        # Optimization
        optimizer.step()

Starting epoch 1


  0%|          | 0/600 [00:00<?, ?it/s]

Starting epoch 2


  0%|          | 0/600 [00:00<?, ?it/s]

Starting epoch 3


  0%|          | 0/600 [00:00<?, ?it/s]

Starting epoch 4


  0%|          | 0/600 [00:00<?, ?it/s]

Starting epoch 5


  0%|          | 0/600 [00:00<?, ?it/s]

In [19]:
## Testing
correct = 0
total = len(mnist_test)

with torch.no_grad():
    # Iterate through test set minibatchs 
    for images, labels in tqdm(test_loader):
        # Forward pass
        y = mlp(images)
        
        # Prediction
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

  0%|          | 0/100 [00:00<?, ?it/s]

In [20]:
print('Test accuracy: {}'.format(correct / total))

Test accuracy: 0.9478999972343445
