In [13]:
from torchvision.datasets import MNIST
from torchvision import transforms
import torch
import os

train_data = MNIST(
    root=os.path.join("dataset", "mnist"),
    train=True,
    download=True,
    transform=transforms.ToTensor(),
)
test_data = MNIST(
    root=os.path.join("dataset", "mnist"),
    train=False,
    download=True,
    transform=transforms.ToTensor(),
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [20]:
import torch
from torch import nn
from collections import OrderedDict


class Model(nn.Module):
    def __init__(self, in_features, out_features=10) -> None:
        super().__init__()
        self.in_features = in_features
        self.layer1 = nn.Linear(in_features=self.in_features, out_features=256)
        self.layer2 = nn.Linear(in_features=256, out_features=128)
        self.layer3 = nn.Linear(in_features=128, out_features=64)
        self.layer4 = nn.Linear(in_features=64, out_features=10)

    def forward(self, x: torch.Tensor):
        x = x.view(-1, self.in_features)
        x = self.layer1(x)
        x = nn.functional.relu(x)
        x = self.layer2(x)
        x = nn.functional.relu(x)
        x = self.layer3(x)
        x = nn.functional.relu(x)
        x = self.layer4(x)
        return x


model = Model(28 * 28).to(device=device)
model


Model(
  (layer1): Linear(in_features=784, out_features=256, bias=True)
  (layer2): Linear(in_features=256, out_features=128, bias=True)
  (layer3): Linear(in_features=128, out_features=64, bias=True)
  (layer4): Linear(in_features=64, out_features=10, bias=True)
)

In [21]:
from torch import optim

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()


In [22]:
from torch.utils.data import DataLoader

data_loader = DataLoader(
    dataset=train_data, batch_size=64, shuffle=True, drop_last=True
)


In [34]:
epochs = 3
for epoch in range(1, epochs + 1):
    for X, y in data_loader:
        X = X.to(device=device)
        y = y.to(device=device)

        train_preds = model(X)
        loss = criterion(train_preds, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"epoch {epoch:3}: {loss.item()}")


epoch   1: 0.009460562840104103
epoch   2: 0.04853010177612305
epoch   3: 0.04203240945935249


In [35]:
test_preds = model(test_data.data.to(device=device, dtype=torch.float))
test_labels = test_data.targets
from sklearn.metrics import classification_report

print(
    classification_report(
        test_preds.to(device="cpu").detach().numpy().argmax(axis=1),
        test_labels,
        digits=4,
    )
)


              precision    recall  f1-score   support

           0     0.9929    0.9838    0.9883       989
           1     0.9912    0.9903    0.9908      1136
           2     0.9835    0.9769    0.9802      1039
           3     0.9851    0.9595    0.9722      1037
           4     0.9817    0.9787    0.9802       985
           5     0.9641    0.9729    0.9685       884
           6     0.9666    0.9914    0.9789       934
           7     0.9864    0.9750    0.9807      1040
           8     0.9682    0.9843    0.9762       958
           9     0.9713    0.9820    0.9766       998

    accuracy                         0.9795     10000
   macro avg     0.9791    0.9795    0.9792     10000
weighted avg     0.9796    0.9795    0.9795     10000



https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html


In [49]:
def print_weights(*weights):
    for weight in weights:
        print("=" * 10)
        print("weight:", weight[0, :5])
        print("weight.grad:", weight.grad[0, :5])


for X, y in data_loader:
    X = X.to(device=device)
    y = y.to(device=device)

    train_preds = model(X)
    loss = criterion(train_preds, y)
    loss.backward()
    print_weights(
        model.layer1.weight,
        model.layer2.weight,
        model.layer3.weight,
        model.layer4.weight,
    )
    break


weight: tensor([-0.0327,  0.0330, -0.0311,  0.0176, -0.0032], grad_fn=<SliceBackward0>)
weight.grad: tensor([0., 0., 0., 0., 0.])
weight: tensor([-0.1657,  0.0672, -0.0361,  0.1261, -0.0182], grad_fn=<SliceBackward0>)
weight.grad: tensor([-1.4438e-06,  1.4810e-02, -1.4210e-05,  1.2742e-02,  0.0000e+00])
weight: tensor([ 0.0395,  0.0097, -0.0126, -0.0770, -0.0158], grad_fn=<SliceBackward0>)
weight.grad: tensor([-4.3883e-04, -2.3837e-03,  0.0000e+00, -4.3627e-06,  9.6070e-11])
weight: tensor([ 0.1217,  0.0398, -0.1383, -0.1216, -0.0594], grad_fn=<SliceBackward0>)
weight.grad: tensor([0.0906, 0.0168, 0.0180, 0.0000, 0.0048])
