# Building Custom Image Classifier in PyTorch

Notebook inspired by [Hands-On Machine Learning with Scikit-Learn and PyTorch](https://www.oreilly.com/library/view/hands-on-machine-learning/9798341607972/).

## Load in Dataset

In [12]:
# set device depending on what's available
if torch.cuda.is_available():
  device = 'cuda'
elif torch.backends.mps.is_available():
  device = 'mps'
else:
  device = 'cpu'

In [13]:
import torch
import torchvision
import torchvision.transforms.v2 as T

# create tensor object we'll transform FashionMNIST data to
toTensor = T.Compose([T.ToImage(), T.ToDtype(torch.float32, scale = True)])

# bring in train, test, valid data
train_and_valid_data = torchvision.datasets.FashionMNIST(
    root = 'datasets',
    train = True,
    download = True,
    transform = toTensor
)

test_data = torchvision.datasets.FashionMNIST(
    root = 'datasets',
    train = False,
    download = True,
    transform = toTensor
)

# reproducibility
torch.manual_seed(42)

# save back 5_000 from train to be reserved for validation
train_data, valid_data = torch.utils.data.random_split(
    train_and_valid_data,
    [55_000, 5_000]
)

In [14]:
from torch.utils.data import DataLoader

# create data loaders
train_loader = DataLoader(train_data, batch_size = 32, shuffle = True)
valid_loader = DataLoader(valid_data, batch_size = 32)
test_loader = DataLoader(test_data, batch_size = 32)

In [15]:
# look at first image in training set
X_sample, y_sample = train_data[0]

X_sample.shape

torch.Size([1, 28, 28])

In [16]:
X_sample.dtype # check type

torch.float32

In [17]:
# check class of sample
train_and_valid_data.classes[y_sample]

'Ankle boot'

## Build Classifier

In [18]:
from torch import nn
# custom classification MLP w/ 2 hidden layers
class ImageClassifier(nn.Module):
  def __init__(self, n_inputs, n_hidden1, n_hidden2, n_classes):
    super().__init__()
    self.mlp = nn.Sequential(
        nn.Flatten(),
        nn.Linear(n_inputs, n_hidden1),
        nn.ReLU(),
        nn.Linear(n_hidden1, n_hidden2),
        nn.ReLU(),
        nn.Linear(n_hidden2, n_classes)
    )

  def forward(self, X):
    return self.mlp(X)

In [19]:
torch.manual_seed(42)

# create model instance
model = ImageClassifier(
    n_inputs = 28 * 28,
    n_hidden1 = 400,
    n_hidden2 = 200,
    n_classes = 10
)

# use cross entropy loss for multi-class classification loss
xentropy = nn.CrossEntropyLoss()

In [20]:
# set model training params
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
n_epochs = 100

In [21]:
# train function to implement mb gd
def train_mbgd(model, optimizer, criterion, train_loader, n_epochs):
  model.train() # set training mode
  for epoch in range(n_epochs):
    total_loss = 0
    for X_batch, y_batch in train_loader:
      # get batch
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)
      # mod pred
      y_pred = model(X_batch)
      # calc loss and tally
      loss = criterion(y_pred, y_batch)
      total_loss += loss.item()
      # calc grads and do step
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

    mean_loss = total_loss / len(train_loader)
    if epoch % 10 == 0: # every ten epochs, print out loss
      print(f'Epoch {epoch + 1}, Loss: {mean_loss}')

In [23]:
model.to(device) # Move model to the correct device
train_mbgd(model, optimizer, criterion, train_loader, n_epochs)

Epoch 1, Loss: 1.0793743771373567
Epoch 11, Loss: 0.3632791165247291
Epoch 21, Loss: 0.2936345736055682
Epoch 31, Loss: 0.24963734634849066
Epoch 41, Loss: 0.21416325257238328
Epoch 51, Loss: 0.1848576282665194
Epoch 61, Loss: 0.1579446381330707
Epoch 71, Loss: 0.13501447004576525
Epoch 81, Loss: 0.11510268070612054
Epoch 91, Loss: 0.09713919390233504


## Evaluate

In [29]:
## create evaluation function
def evaluate(model, data_loader, metric, aggregate = torch.mean):
  model.eval() # change model mode to evaluation (no gradient work)
  metrics = []

  with torch.no_grad():
    for X_batch, y_batch in data_loader:
      # move data to GPU / cuda
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)
      y_pred = model(X_batch)
      metric_val = metric(y_pred, y_batch)
      metrics.append(metric_val)

  # retrun agg met over all batches
  return aggregate(torch.stack(metrics))

In [26]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.15.2 torchmetrics-1.8.2


In [31]:
# use accuracy metric to evaluate predictive ability
import torchmetrics
accuracy = torchmetrics.Accuracy(task = 'multiclass', num_classes = 10).to(device)

In [36]:
# accuracy on validation data
# calc batch-wise accuracy w/ lambda func
# get average of batches via aggregate
accuracy_val = evaluate(model, valid_loader,
                        lambda y_pred, y_batch: (y_pred.argmax(dim=1)
                        == y_batch).float().mean(),
                        aggregate = torch.mean)

print(f'Validation Accuracy: {accuracy_val.item()*100:.4f}%')

Validation Accuracy: 88.7938%


## Predict with New Images

In [38]:
# set model to evaluate mode (no training)
model.eval()

# get batch
X_new, y_new = next(iter(valid_loader))

# grab first three in batch
X_new = X_new[:3].to(device)

In [39]:
with torch.no_grad():
  y_pred_logits = model(X_new)

# grab index of the largets logit model predicts
y_pred = y_pred_logits.argmax(dim=1)

y_pred

tensor([7, 4, 2], device='cuda:0')

In [42]:
# grab labels predicted via list comp
[train_and_valid_data.classes[i] for i in y_pred]

['Sneaker', 'Coat', 'Pullover']

In [43]:
# get mods estimated probs
import torch.nn.functional as F

y_proba = F.softmax(y_pred_logits, dim = 1)

y_proba.round(decimals = 3)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.9970, 0.0000,
         0.0030],
        [0.0000, 0.0000, 0.0200, 0.0000, 0.9800, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.9620, 0.0000, 0.0030, 0.0000, 0.0350, 0.0000, 0.0000,
         0.0000]], device='cuda:0')

In [44]:
# largest prob value predicted for each instance
[max(i) for i in y_proba]

[tensor(0.9968, device='cuda:0'),
 tensor(0.9795, device='cuda:0'),
 tensor(0.9619, device='cuda:0')]

In [49]:
# get largest predicted probability and index of that probability's pos in
# y_proba
for i in y_proba:
  print(i.max(), i.argmax())

tensor(0.9968, device='cuda:0') tensor(7, device='cuda:0')
tensor(0.9795, device='cuda:0') tensor(4, device='cuda:0')
tensor(0.9619, device='cuda:0') tensor(2, device='cuda:0')


In [50]:
# mod top-k preds
y_top4_logits, y_top4_indices = torch.topk(
    y_pred_logits,
    k = 4,
    dim = 1
)

# pass through softmax
y_top4_probas = F.softmax(y_top4_logits, dim = 1)

y_top4_probas.round(decimals = 3)

tensor([[0.9970, 0.0030, 0.0000, 0.0000],
        [0.9800, 0.0200, 0.0000, 0.0000],
        [0.9620, 0.0350, 0.0030, 0.0000]], device='cuda:0')

In [51]:
# indices of top 4 preds
y_top4_indices

tensor([[7, 9, 5, 8],
        [4, 2, 6, 8],
        [2, 6, 4, 0]], device='cuda:0')