In [56]:
import torch
from torch import nn, optim
from torchsummary import summary
import torchvision
import matplotlib.pyplot as plt

In [57]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device: {}".format(device))

Device: cuda:0


In [58]:
batch_size_train = 64
batch_size_test = 64

train_dataset = torchvision.datasets.MNIST('/files/', train=True, download=True, transform=torchvision.transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True)
test_dataset = torchvision.datasets.MNIST('/files/', train=False, download=True, transform=torchvision.transforms.ToTensor())
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size_test, shuffle=True)

print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x0000021E6567B050>


In [59]:
MLP = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28,256),
    nn.ReLU(),
    nn.Dropout(p = 0.2),
    nn.Linear(256,10),
    nn.Softmax(-1)
)

MLP = MLP.to(device)
summary(MLP, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                  [-1, 784]               0
            Linear-2                  [-1, 256]         200,960
              ReLU-3                  [-1, 256]               0
           Dropout-4                  [-1, 256]               0
            Linear-5                   [-1, 10]           2,570
           Softmax-6                   [-1, 10]               0
Total params: 203,530
Trainable params: 203,530
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.78
Estimated Total Size (MB): 0.79
----------------------------------------------------------------


In [60]:
optimizer = optim.SGD(MLP.parameters(),momentum=0.9, lr=0.01, weight_decay= 5e-4)
loss_func = nn.CrossEntropyLoss(reduction='mean')
num_epochs = 100

In [None]:
for epoch in range(num_epochs):
    MLP.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = MLP(inputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    MLP.eval()
    test_loss = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = MLP(inputs)
            loss = loss_func(outputs, labels)
            test_loss += loss.item()
    print(
        f"Epoch {epoch + 1}/{num_epochs} - Training Loss: {train_loss / len(train_loader):.4f} - Validation Loss: {test_loss / len(test_loader):.4f}")


In [None]:
MLP.eval()
test_acc = 0
for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    predictions = MLP(images)
    accuracy = (torch.max(predictions, dim=-1, keepdim=True)[1].flatten() == labels).sum() / len(labels)
    test_acc += accuracy.item()
test_acc /= len(test_loader)
print(f"Test accuracy: {test_acc:.3f}")