In [2]:
import torch
from torch import nn
import torchvision as tv
from torchsummary import summary
import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f' Device: {torch.cuda.get_device_name(0)}. Now using: {device.type}', '\n',
      f'Torch Version: {torch.version.__version__}')

 Device: NVIDIA GeForce RTX 4070 Ti. Now using: cuda 
 Torch Version: 2.2.1


In [79]:
BATCH_SIZE=512
train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=False)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=False)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [98]:
model = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size = 3, padding = 1),
        nn.BatchNorm2d(32),
        nn.Softmax(),
        nn.Conv2d(32, 32, kernel_size = 3, padding = 1),
        nn.BatchNorm2d(32),
        nn.Softmax(),
        nn.Conv2d(32, 64, kernel_size = 1),
        nn.BatchNorm2d(64),
        nn.Softmax(),
        nn.MaxPool2d(kernel_size=3, stride=3),
        nn.BatchNorm2d(64),
        nn.Conv2d(64, 32, kernel_size=3),
        nn.Softmax(),
        nn.Flatten(),
        nn.Linear(1568, 6000),
        nn.Dropout(0.5),
        nn.ReLU(),
        nn.Linear(6000, 2048),
        nn.Dropout(0.5),
        nn.ReLU(),
        nn.Linear(2048, 1024),
        nn.Dropout(0.5),
        nn.ReLU(),
        nn.Linear(1024, 10),
        )

model.to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
       BatchNorm2d-2           [-1, 32, 28, 28]              64
           Softmax-3           [-1, 32, 28, 28]               0
            Conv2d-4           [-1, 32, 28, 28]           9,248
       BatchNorm2d-5           [-1, 32, 28, 28]              64
           Softmax-6           [-1, 32, 28, 28]               0
            Conv2d-7           [-1, 64, 28, 28]           2,112
       BatchNorm2d-8           [-1, 64, 28, 28]             128
           Softmax-9           [-1, 64, 28, 28]               0
        MaxPool2d-10             [-1, 64, 9, 9]               0
      BatchNorm2d-11             [-1, 64, 9, 9]             128
           Conv2d-12             [-1, 32, 7, 7]          18,464
          Softmax-13             [-1, 32, 7, 7]               0
          Flatten-14                 [-

In [55]:
loss = torch.nn.CrossEntropyLoss()

In [64]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0, 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(device), y.to(device)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

def train(net, train_iter, test_iter, trainer, num_epochs):
    net.to(device)
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        
        for i, (X, y) in enumerate(train_iter):
            X, y = X.to(device), y.to(device)
            trainer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]

        test_acc = evaluate_accuracy(test_iter, net.to(device))
        print(f'epoch {epoch + 1}, loss {train_l_sum / n:.4f}, train acc {train_acc_sum / n:.4f}'
              f', test acc {test_acc:.4f}, time {time.time() - start:.1f} sec')

In [99]:
lr, num_epochs  = 0.001, 40
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs)

epoch 1, loss 0.5924, train acc 0.7770, test acc 0.8687, time 8.5 sec
epoch 2, loss 0.2655, train acc 0.9031, test acc 0.8936, time 8.2 sec
epoch 3, loss 0.2067, train acc 0.9233, test acc 0.9001, time 7.9 sec
epoch 4, loss 0.1780, train acc 0.9337, test acc 0.9050, time 7.9 sec
epoch 5, loss 0.1592, train acc 0.9403, test acc 0.8952, time 7.9 sec
epoch 6, loss 0.1466, train acc 0.9450, test acc 0.9080, time 7.9 sec
epoch 7, loss 0.1240, train acc 0.9540, test acc 0.8988, time 7.8 sec
epoch 8, loss 0.1077, train acc 0.9601, test acc 0.9005, time 7.9 sec
epoch 9, loss 0.0986, train acc 0.9636, test acc 0.9117, time 7.9 sec
epoch 10, loss 0.0873, train acc 0.9670, test acc 0.9066, time 8.0 sec
epoch 11, loss 0.0770, train acc 0.9717, test acc 0.9084, time 8.0 sec
epoch 12, loss 0.0707, train acc 0.9738, test acc 0.9081, time 8.1 sec
epoch 13, loss 0.0552, train acc 0.9791, test acc 0.9115, time 8.1 sec
epoch 14, loss 0.0509, train acc 0.9816, test acc 0.9133, time 8.1 sec
epoch 15, loss 