In [18]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import torch.nn.functional as F
from torchvision import datasets, transforms
import os

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

In [3]:
train_ds = datasets.MNIST("./data", train=True, download=True, transform=transform)
test_ds = datasets.MNIST("./data", train=True, download=False, transform=transform)

In [4]:
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=1000, shuffle=False)

In [12]:
class MLPModel(nn.Module):
    def __init__(self):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

In [13]:
model = MLPModel()

loss_fn = nn.CrossEntropyLoss()
optim = Adam(model.parameters())

In [15]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}]  Loss: {loss.item():.6f}')

In [16]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += loss_fn(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)')

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(1, 11):
    train(model, device, train_loader, optim, epoch)
    test(model, device, test_loader)

Train Epoch: 1 [0/60000]  Loss: 2.336066
Train Epoch: 1 [6400/60000]  Loss: 0.486879
Train Epoch: 1 [12800/60000]  Loss: 0.398596
Train Epoch: 1 [19200/60000]  Loss: 0.296878
Train Epoch: 1 [25600/60000]  Loss: 0.171500
Train Epoch: 1 [32000/60000]  Loss: 0.119654
Train Epoch: 1 [38400/60000]  Loss: 0.139519
Train Epoch: 1 [44800/60000]  Loss: 0.166283
Train Epoch: 1 [51200/60000]  Loss: 0.180262
Train Epoch: 1 [57600/60000]  Loss: 0.161799
Test set: Average loss: 0.0001, Accuracy: 57408/60000 (96%)
Train Epoch: 2 [0/60000]  Loss: 0.078120
Train Epoch: 2 [6400/60000]  Loss: 0.214819
Train Epoch: 2 [12800/60000]  Loss: 0.159792
Train Epoch: 2 [19200/60000]  Loss: 0.126897
Train Epoch: 2 [25600/60000]  Loss: 0.133875
Train Epoch: 2 [32000/60000]  Loss: 0.115174
Train Epoch: 2 [38400/60000]  Loss: 0.131928
Train Epoch: 2 [44800/60000]  Loss: 0.070494
Train Epoch: 2 [51200/60000]  Loss: 0.098839
Train Epoch: 2 [57600/60000]  Loss: 0.140501
Test set: Average loss: 0.0001, Accuracy: 58417/60

In [20]:
torch.save(model.state_dict(), os.path.join("../../weights", "mlp_model.pth"))