In [1]:
import time
import torch
import torchvision as tv

In [2]:
BATCH_SIZE=256

train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

100.0%
100.0%
100.0%
100.0%


In [3]:
# shape of first train example (normalized) and first label
print(train_dataset[0][0].shape, train_dataset[0][1])

torch.Size([1, 28, 28]) 9


In [4]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 2048),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(2048),
    # torch.nn.Dropout(.5),
    torch.nn.Linear(2048, 512),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(512),
    # torch.nn.Dropout(.5),
    torch.nn.Linear(512, 64),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(64),
    # torch.nn.Dropout(.5),
    torch.nn.Linear(64, 10)
)

In [5]:
loss_fn = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters(), lr=.01)
num_epochs = 26

In [9]:
def train_model():
    for ep in range(num_epochs):
        #  train phase
        train_iters, train_passed = 0, 0
        train_loss, train_acc = 0., 0.
        start = time.time()
        model.train()
        for X, y in train:
            trainer.zero_grad()
            # make calculation tree
            y_pred = model(X)
            loss = loss_fn(y_pred, y)
            # compute gradients of model parameters
            loss.backward()
            # updates the parameters after gradients are computed
            trainer.step()
            # accumulate resuts
            train_loss += loss.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        #  test phase
        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            loss = loss_fn(y_pred, y)
            test_loss += loss.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
        
        print("ep: {}, time: {:.3f}, trn_l: {:.6f}, trn_a: {:.6f}, tst_l: {:.6f}, tst_a: {:.6f}".format(
            ep, time.time() - start, 
            train_loss / train_iters, train_acc / train_passed, 
            test_loss / test_iters, test_acc / test_passed)
        )


In [10]:
train_model()

ep: 0, time: 11.173, trn_l: 0.190114, trn_a: 0.929067, tst_l: 0.767738, tst_a: 0.876900
ep: 1, time: 11.539, trn_l: 0.147753, trn_a: 0.943783, tst_l: 2.067132, tst_a: 0.878000
ep: 2, time: 11.759, trn_l: 0.132908, trn_a: 0.948833, tst_l: 2.032338, tst_a: 0.869800
ep: 3, time: 11.872, trn_l: 0.127515, trn_a: 0.951417, tst_l: 0.690243, tst_a: 0.865900
ep: 4, time: 11.996, trn_l: 0.129903, trn_a: 0.950250, tst_l: 2.299798, tst_a: 0.876700
ep: 5, time: 12.094, trn_l: 0.116718, trn_a: 0.955367, tst_l: 2.978481, tst_a: 0.880400
ep: 6, time: 12.168, trn_l: 0.111579, trn_a: 0.957567, tst_l: 6.962951, tst_a: 0.871000
ep: 7, time: 12.144, trn_l: 0.105514, trn_a: 0.959367, tst_l: 5.718908, tst_a: 0.876400
ep: 8, time: 12.124, trn_l: 0.106049, trn_a: 0.958883, tst_l: 5.078902, tst_a: 0.881300
ep: 9, time: 11.986, trn_l: 0.106538, trn_a: 0.959683, tst_l: 3.161078, tst_a: 0.875000
ep: 10, time: 12.195, trn_l: 0.100460, trn_a: 0.960683, tst_l: 2.634406, tst_a: 0.885400
ep: 11, time: 12.401, trn_l: 0.

In [None]:
# test accuracy greater than 0.88 at epoch: 5, 8, 10, 14, 16, 19, 20, 22, 23 