In [22]:
from torchvision.datasets import MNIST
from torchvision import transforms
import os

train_data = MNIST(
    root=os.path.join("dataset", "mnist"),
    train=True,
    download=True,
    transform=transforms.ToTensor(),
)
test_data = MNIST(
    root=os.path.join("dataset", "mnist"),
    train=False,
    download=True,
    transform=transforms.ToTensor(),
)


In [23]:
import torch
from torch import nn
from collections import OrderedDict

class Model(nn.Module):
    def __init__(self, in_features, out_features=10) -> None:
        super().__init__()
        self.in_features = in_features
        self.layer1 = nn.Linear(in_features=self.in_features, out_features=256)
        self.layer2 = nn.Linear(in_features=256, out_features=128)
        self.layer3 = nn.Linear(in_features=128, out_features=64)
        self.layer4 = nn.Linear(in_features=64, out_features=10)

    def forward(self, x: torch.Tensor):
        x = x.view(-1, self.in_features)
        x = self.layer1(x)
        x = nn.functional.relu(x)
        x = self.layer2(x)
        x = nn.functional.relu(x)
        x = self.layer3(x)
        x = nn.functional.relu(x)
        x = self.layer4(x)
        return x


model = Model(28 * 28).to(device="cuda")
model


Model(
  (layer1): Linear(in_features=784, out_features=256, bias=True)
  (layer2): Linear(in_features=256, out_features=128, bias=True)
  (layer3): Linear(in_features=128, out_features=64, bias=True)
  (layer4): Linear(in_features=64, out_features=10, bias=True)
)

In [28]:
from torch import optim
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()


In [29]:
from torch.utils.data import DataLoader

data_loader = DataLoader(
    dataset=train_data, batch_size=64, shuffle=True, drop_last=True
)


In [30]:
epochs = 10
for epoch in range(1, epochs + 1):
    for X, y in data_loader:
        X = X.to(device="cuda")
        y = y.to(device="cuda")

        train_preds = model(X)
        loss = criterion(train_preds, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"epoch {epoch:3}: {loss.item()}")


epoch   1: 0.04490001127123833
epoch   2: 0.008067803457379341
epoch   3: 0.005276951938867569
epoch   4: 0.0014863050309941173
epoch   5: 0.047759849578142166
epoch   6: 2.9906799682066776e-05
epoch   7: 0.0006231588777154684
epoch   8: 2.867712828447111e-05
epoch   9: 0.00044782229815609753
epoch  10: 8.746065577724949e-05


In [37]:
test_preds = model(test_data.data.to(device="cuda", dtype=torch.float))
test_labels = test_data.targets
from sklearn.metrics import classification_report

print(
    classification_report(
        test_preds.to(device="cpu").detach().numpy().argmax(axis=1), test_labels,
        digits=4
    )
)


              precision    recall  f1-score   support

           0     0.9888    0.9828    0.9858       986
           1     0.9930    0.9809    0.9869      1149
           2     0.9797    0.9806    0.9801      1031
           3     0.9901    0.9606    0.9751      1041
           4     0.9796    0.9816    0.9806       980
           5     0.9753    0.9853    0.9803       883
           6     0.9833    0.9864    0.9848       955
           7     0.9825    0.9740    0.9782      1037
           8     0.9507    0.9893    0.9696       936
           9     0.9722    0.9790    0.9756      1002

    accuracy                         0.9798     10000
   macro avg     0.9795    0.9800    0.9797     10000
weighted avg     0.9800    0.9798    0.9798     10000



https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html