In [None]:
import torch
import torchvision
from torch import nn


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_data = torchvision.datasets.FashionMNIST(
    root="fashion",
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=True
)

test_data = torchvision.datasets.FashionMNIST(
    root="fashion",
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True
)

train_dataload, test_dataload = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True), torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False)


train_features, train_labels = next(iter(train_dataload))
test_features, test_labels = next(iter(test_dataload))

100%|██████████| 26.4M/26.4M [00:00<00:00, 113MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 3.43MB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 58.0MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 10.5MB/s]


In [None]:
class MNIST(nn.Module):
    def __init__(self, input_size=784, output_size=10, hidden_size=128):
        super().__init__()
        self.flatten = nn.Flatten()
        self.stack = nn.Sequential(
            nn.Linear(in_features=input_size, out_features=hidden_size),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.Linear(in_features=hidden_size, out_features=output_size)
        )

    def forward(self, x):
        x = self.flatten(x)
        return self.stack(x)

In [None]:
model = MNIST().to(device)

In [None]:
import requests
from pathlib import Path

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  # Note: you need the "raw" GitHub URL for this to work
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

Downloading helper_functions.py


In [None]:
from helper_functions import accuracy_fn

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
import timeit

def time_fn(start, end):
    return print(f"Time to train: {end - start:.3f} seconds")

In [None]:
import tqdm

epochs = 3


start_time = timeit.default_timer()



for epoch in tqdm.tqdm(range(epochs)):

    train_loss = 0

    for batch, (X, y) in enumerate(train_dataload):
        model.train()
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            print(f"Epoch: {epoch} - Batch: {batch} - Loss: {loss.item():.4f}")

    train_loss /= len(train_dataload)

    test_loss, test_acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in test_dataload:
            X, y = X.to(device), y.to(device)
            test_pred = model(X)
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
        test_loss /= len(test_dataload)
        test_acc /= len(test_dataload)
        print(f"Train Loss: {train_loss:.4f} - Test Loss: {test_loss:.4f} - Test Accuracy: {test_acc:.2f}%")

end_time = timeit.default_timer()

time_fn(start_time, end_time)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 0 - Batch: 0 - Loss: 2.3230
Epoch: 0 - Batch: 100 - Loss: 0.6336
Epoch: 0 - Batch: 200 - Loss: 0.7441
Epoch: 0 - Batch: 300 - Loss: 0.4377
Epoch: 0 - Batch: 400 - Loss: 0.4664
Epoch: 0 - Batch: 500 - Loss: 0.4939
Epoch: 0 - Batch: 600 - Loss: 0.4149
Epoch: 0 - Batch: 700 - Loss: 0.4340
Epoch: 0 - Batch: 800 - Loss: 0.3453
Epoch: 0 - Batch: 900 - Loss: 0.4244


 33%|███▎      | 1/3 [00:10<00:20, 10.03s/it]

Train Loss: 0.5532 - Test Loss: 0.5089 - Test Accuracy: 82.31%
Epoch: 1 - Batch: 0 - Loss: 0.5370
Epoch: 1 - Batch: 100 - Loss: 0.4101
Epoch: 1 - Batch: 200 - Loss: 0.3658
Epoch: 1 - Batch: 300 - Loss: 0.3172
Epoch: 1 - Batch: 400 - Loss: 0.2524
Epoch: 1 - Batch: 500 - Loss: 0.4046
Epoch: 1 - Batch: 600 - Loss: 0.2123
Epoch: 1 - Batch: 700 - Loss: 0.5523
Epoch: 1 - Batch: 800 - Loss: 0.4022
Epoch: 1 - Batch: 900 - Loss: 0.4557


 67%|██████▋   | 2/3 [00:19<00:09,  9.79s/it]

Train Loss: 0.4576 - Test Loss: 0.4781 - Test Accuracy: 83.36%
Epoch: 2 - Batch: 0 - Loss: 0.4570
Epoch: 2 - Batch: 100 - Loss: 0.7887
Epoch: 2 - Batch: 200 - Loss: 0.3016
Epoch: 2 - Batch: 300 - Loss: 0.4094
Epoch: 2 - Batch: 400 - Loss: 0.3842
Epoch: 2 - Batch: 500 - Loss: 0.5246
Epoch: 2 - Batch: 600 - Loss: 0.3014
Epoch: 2 - Batch: 700 - Loss: 0.5198
Epoch: 2 - Batch: 800 - Loss: 0.3065
Epoch: 2 - Batch: 900 - Loss: 0.5907


100%|██████████| 3/3 [00:29<00:00,  9.77s/it]

Train Loss: 0.4400 - Test Loss: 0.4710 - Test Accuracy: 83.45%
Time to train: 29.318 seconds





In [None]:
def model_evaluation(
        dataloader,
        model,
        loss_fn,
        accuracy_fn,
        device=device
        ):

    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            loss += loss_fn(pred, y)
            acc += accuracy_fn(y_true=y, y_pred=pred.argmax(dim=1))
        loss /= len(dataloader)
        acc /= len(dataloader)

    return {
        "model": model.__class__.__name__,
        "loss": loss.item(),
        "accuracy": acc
    }

model_results = model_evaluation(model=model,
                              dataloader=test_dataload,
                              loss_fn=loss_fn,
                              accuracy_fn=accuracy_fn)

In [None]:
!nvidia-smi

Thu Jul 24 22:14:06 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   69C    P0             31W /   70W |     160MiB /  15360MiB |      2%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
class MNIST1(nn.Module):
    def __init__(self, input_size=784, output_size=10, hidden_size=128):
        super().__init__()
        self.flatten = nn.Flatten()
        self.stack = nn.Sequential(
            nn.Linear(in_features=input_size, out_features=hidden_size),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=output_size)
        )

    def forward(self, x):
        x = self.flatten(x)
        return self.stack(x)

In [None]:
model_1 = MNIST1().to(device)

In [None]:
optimizer = torch.optim.Adam(model_1.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

In [None]:
def train_step(model,
               loss_fn,
               optimizer,
               train_dataloader,
               device=device,
               accuracy_fn=accuracy_fn
               ):

    train_loss, train_acc = 0, 0
    for batch, (X, y) in enumerate(train_dataloader):
        model.train()
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_pred=y_pred.argmax(dim=1), y_true=y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 500 == 0:
          print(f"{batch*train_dataloader.batch_size}/{train_dataloader.batch_size*len(train_dataloader)} samples")

    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)

    print(f"Train Loss: {train_loss:.5f} - Train Accuracy: {train_acc:.3f}")

In [None]:
def test_step(
    model,
    loss_fn,
    test_dataloader,
    device=device,
    accuracy_fn=accuracy_fn
    ):

  test_loss, test_acc = 0, 0
  model.eval()
  with torch.inference_mode():
        for X, y in test_dataloader:
            X, y = X.to(device), y.to(device)
            test_pred = model(X)
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y, y_pred=test_pred.argmax(dim=1))
        test_loss /= len(test_dataload)
        test_acc /= len(test_dataload)
        print(f"Train Loss: {train_loss:.4f} - Test Loss: {test_loss:.4f} - Test Accuracy: {test_acc:.3f}%")

In [None]:
for _ in range(3):
  train_step(
      model=model_1,
      loss_fn=loss_fn,
      train_dataloader=train_dataload,
      optimizer=optimizer
  )
  test_step(
      model=model_1,
      loss_fn=loss_fn,
      test_dataloader=test_dataload
  )

0/60032 samples
32000/60032 samples
Train Loss: 0.54045 - Train Accuracy: 80.769
Train Loss: 0.4400 - Test Loss: 0.4393 - Test Accuracy: 83.877%
0/60032 samples
32000/60032 samples
Train Loss: 0.37804 - Train Accuracy: 86.267
Train Loss: 0.4400 - Test Loss: 0.3958 - Test Accuracy: 85.768%
0/60032 samples
32000/60032 samples
Train Loss: 0.33839 - Train Accuracy: 87.555
Train Loss: 0.4400 - Test Loss: 0.3606 - Test Accuracy: 87.361%


In [None]:
model_1_results = model_evaluation(model=model_1,
                              dataloader=test_dataload,
                              loss_fn=loss_fn,
                              accuracy_fn=accuracy_fn)

model_1_results

{'model': 'MNIST1', 'loss': 0.47096750140190125, 'accuracy': 83.44944267515923}

In [None]:
class MNIST2(nn.Module):
  def __init__(self, input_units, hidden_units, output_units):
      super().__init__()
      self.flatten = nn.Flatten()
      self.conv_block_1 = nn.Sequential(
          nn.Conv2d(in_channels=input_units, out_channels=hidden_units, kernel_size=3, padding=1),
          nn.ReLU(),
          nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2)
          )

      self.conv_block_2 = nn.Sequential(
          nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1),
          nn.ReLU(),
          nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, padding=1),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2)
          )
      self.classifier = nn.Sequential(
          nn.Flatten(),
          nn.Linear(in_features=hidden_units*7*7, out_features=output_units)
      )

  def forward(self,x):
    x = self.conv_block_2(self.conv_block_1(x))
    x = self.classifier(x)
    return x


In [None]:
model_2 = MNIST2(input_units=1, hidden_units=10, output_units=10).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_2.parameters(), lr=0.001)

In [None]:
for epoch in range(10):
  train_step(
      model=model_2,
      loss_fn=loss_fn,
      train_dataloader=train_dataload,
      optimizer=optimizer
  )
  test_step(
      model=model_2,
      loss_fn=loss_fn,
      test_dataloader=test_dataload
  )

0/60032 samples
32000/60032 samples
Train Loss: 0.32590 - Train Accuracy: 88.225
Train Loss: 0.4400 - Test Loss: 0.3415 - Test Accuracy: 87.609%
0/60032 samples
32000/60032 samples
Train Loss: 0.30625 - Train Accuracy: 88.884
Train Loss: 0.4400 - Test Loss: 0.3203 - Test Accuracy: 88.346%
0/60032 samples
32000/60032 samples
Train Loss: 0.29037 - Train Accuracy: 89.366
Train Loss: 0.4400 - Test Loss: 0.3122 - Test Accuracy: 88.744%
0/60032 samples
32000/60032 samples
Train Loss: 0.27817 - Train Accuracy: 89.895
Train Loss: 0.4400 - Test Loss: 0.3003 - Test Accuracy: 89.441%
0/60032 samples
32000/60032 samples
Train Loss: 0.26719 - Train Accuracy: 90.239
Train Loss: 0.4400 - Test Loss: 0.2928 - Test Accuracy: 89.640%
0/60032 samples
32000/60032 samples
Train Loss: 0.25843 - Train Accuracy: 90.607
Train Loss: 0.4400 - Test Loss: 0.2982 - Test Accuracy: 89.451%
0/60032 samples
32000/60032 samples
Train Loss: 0.25074 - Train Accuracy: 90.827
Train Loss: 0.4400 - Test Loss: 0.3065 - Test Acc