In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset, Subset
from torchinfo import summary


In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [3]:
dataset = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

In [4]:
subset_indices = list(range(32000))  # First 10,000 samples
subset = Subset(dataset, subset_indices)

In [5]:
dataloader = DataLoader(subset, batch_size=16, shuffle=True, num_workers=4)

In [6]:
for x, y in dataloader:
    print(x.shape)
    print(y.shape)
    break

torch.Size([16, 1, 28, 28])
torch.Size([16])


In [7]:
class Model(nn.Module):
    def __init__(self, num_fetures):
        super(Model, self).__init__()
        self.architecture = nn.Sequential(
            nn.Linear(num_fetures, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10)
        )
    
    def forward(self, x):
        return self.architecture(x)

In [8]:
model = Model(28*28)
summary(model, input_size=(32, 28*28), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [32, 10]                  --
├─Sequential: 1-1                        [32, 10]                  --
│    └─Linear: 2-1                       [32, 128]                 100,480
│    └─ReLU: 2-2                         [32, 128]                 --
│    └─Linear: 2-3                       [32, 64]                  8,256
│    └─ReLU: 2-4                         [32, 64]                  --
│    └─Linear: 2-5                       [32, 10]                  650
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 3.50
Input size (MB): 0.10
Forward/backward pass size (MB): 0.05
Params size (MB): 0.44
Estimated Total Size (MB): 0.59

In [9]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [10]:
epochs = 100
for epoch in range(epochs):
    for x, y in dataloader:
        x = x.reshape(-1, 28*28)
        y_hat = model(x)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    print(f'Epoch: {epoch+1}, Loss: {l.item()}')

Epoch: 1, Loss: 0.20664797723293304
Epoch: 2, Loss: 0.2782812714576721
Epoch: 3, Loss: 0.5583669543266296
Epoch: 4, Loss: 0.23579327762126923
Epoch: 5, Loss: 0.0799611508846283
Epoch: 6, Loss: 0.13393153250217438
Epoch: 7, Loss: 0.5386178493499756
Epoch: 8, Loss: 0.7153672575950623
Epoch: 9, Loss: 0.2215988039970398
Epoch: 10, Loss: 0.5621047616004944
Epoch: 11, Loss: 0.40200304985046387
Epoch: 12, Loss: 0.12228085100650787
Epoch: 13, Loss: 0.6429758071899414
Epoch: 14, Loss: 0.28159821033477783
Epoch: 15, Loss: 0.5115989446640015
Epoch: 16, Loss: 0.40377721190452576
Epoch: 17, Loss: 0.2825738787651062
Epoch: 18, Loss: 0.35609170794487
Epoch: 19, Loss: 0.23811422288417816
Epoch: 20, Loss: 0.14780987799167633
Epoch: 21, Loss: 0.2213563323020935
Epoch: 22, Loss: 0.2490115463733673
Epoch: 23, Loss: 0.3457556962966919
Epoch: 24, Loss: 0.10373054444789886
Epoch: 25, Loss: 0.1455027461051941
Epoch: 26, Loss: 0.20255503058433533
Epoch: 27, Loss: 0.43879804015159607
Epoch: 28, Loss: 0.51245129