In [26]:
import torchvision
import torchvision.transforms.v2 as T
import torch
import torch.nn as nn
import torchmetrics

In [9]:
torch.__version__

'2.9.1+cu128'

In [10]:
toTensor = T.Compose([T.ToImage(), T.ToDtype(torch.float32, scale=True)])

train_and_valid_data = torchvision.datasets.FashionMNIST(
    root="dataset", train=True, transform=toTensor,download=True
)

test_data = torchvision.datasets.FashionMNIST(
    root="dataset", train=False, transform=toTensor, download=True
)

torch.manual_seed(42)
train_data, valid_data = torch.utils.data.random_split(
    train_and_valid_data, [55000, 5000]
)


In [11]:
from torch.utils.data import DataLoader
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

In [12]:
X_sample, y_sample = train_data[0]
X_sample.shape, X_sample.dtype

(torch.Size([1, 28, 28]), torch.float32)

In [13]:
train_and_valid_data.classes

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

#### Building the Classifier

In [16]:
class ImageClassifier(nn.Module):
    def __init__(self, n_inputs, n_hidden1, n_hidden2, n_classes):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Flatten(),
            nn.Linear(n_inputs, n_hidden1),
            nn.ReLU(),
            nn.Linear(n_hidden1, n_hidden2),
            nn.ReLU(),
            nn.Linear(n_hidden2, n_classes)
        )

    def forward(self, X):
        return self.mlp(X)

In [30]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
torch.manual_seed(42)
model = ImageClassifier(n_inputs=1*28*28, n_hidden1=300, n_hidden2=100,
                        n_classes=10).to(device)

xentropy = nn.CrossEntropyLoss()

In [32]:
import torchmetrics

def evaluate_tm(model, data_Loader, metric):
    model.eval()
    metric.reset()
    with torch.inference_mode():
        for X_batch, y_batch in data_Loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)
            metric.update(y_pred, y_batch)
    return metric.compute()


In [40]:
def train2(model, optimizer, criterion, metric, train_loader, valid_loader, n_epochs):
    history = {"train_losses":[],
               "train_metrics":[],
               "valid_metrics": []}
    for epoch in range(n_epochs):
        total_loss = 0.
        metric.reset()
        for X_batch, y_batch in train_loader:
            model.train()
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            metric(y_pred, y_batch)
        mean_loss = total_loss / len(train_loader)
        history["train_losses"].append(mean_loss)
        history["train_metrics"].append(metric.compute().item())
        history["valid_metrics"].append(evaluate_tm(model, valid_loader, metric).item())

        print(f"Epoch: {epoch+1}/{n_epochs},",
              f"Train Loss: {history["train_losses"][-1]:.4f}",
              f"Train metric: {history["train_metrics"][-1]:.4f}",
              f"Valid Metrics: {history["valid_metrics"][-1]:.4f}")
    return history


Un comment the line to train the model and see metrics

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)
accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
n_epochs = 20
# _ = train2(model, optimizer, xentropy, accuracy, train_loader, valid_loader, n_epochs)

Epoch: 1/20, Train Loss: 0.0000 Train metric: 0.9288 Valid Metrics: 0.8828
Epoch: 2/20, Train Loss: 0.0000 Train metric: 0.9301 Valid Metrics: 0.8892
Epoch: 3/20, Train Loss: 0.0000 Train metric: 0.9320 Valid Metrics: 0.8888
Epoch: 4/20, Train Loss: 0.0000 Train metric: 0.9319 Valid Metrics: 0.8870
Epoch: 5/20, Train Loss: 0.0000 Train metric: 0.9351 Valid Metrics: 0.8916
Epoch: 6/20, Train Loss: 0.0000 Train metric: 0.9372 Valid Metrics: 0.8772
Epoch: 7/20, Train Loss: 0.0000 Train metric: 0.9377 Valid Metrics: 0.8874
Epoch: 8/20, Train Loss: 0.0000 Train metric: 0.9393 Valid Metrics: 0.8926
Epoch: 9/20, Train Loss: 0.0000 Train metric: 0.9399 Valid Metrics: 0.8874
Epoch: 10/20, Train Loss: 0.0000 Train metric: 0.9427 Valid Metrics: 0.8912
Epoch: 11/20, Train Loss: 0.0000 Train metric: 0.9423 Valid Metrics: 0.8854
Epoch: 12/20, Train Loss: 0.0000 Train metric: 0.9456 Valid Metrics: 0.8804
Epoch: 13/20, Train Loss: 0.0000 Train metric: 0.9454 Valid Metrics: 0.8896
Epoch: 14/20, Train L

In [42]:
model.eval()
X_new, y_new = next(iter(valid_loader))
X_new = X_new[:3].to(device)
with torch.inference_mode():
    y_pred_logits = model(X_new)
y_pred = y_pred_logits.argmax(dim = 1)
y_pred

tensor([7, 4, 2], device='cuda:0')

In [43]:
[train_and_valid_data.classes[idx] for idx in y_pred]

['Sneaker', 'Coat', 'Pullover']

In [44]:
y_new[:3]

tensor([7, 4, 2])

In [45]:
import torch.nn.functional as F
y_proba = F.softmax(y_pred_logits, dim = 1)
if device == "mps":
    y_proba = y_proba.cpu()
y_proba.round(decimals=3)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.8940, 0.0000,
         0.1060],
        [0.0000, 0.0000, 0.0030, 0.0000, 0.9970, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.8590, 0.0000, 0.1290, 0.0000, 0.0120, 0.0000, 0.0000,
         0.0000]], device='cuda:0')

In [47]:
y_top4_logits, y_top4_indices = torch.topk(y_pred_logits, k = 4, dim = 1)
y_top4_proba = F.softmax(y_top4_logits, dim = 1)
y_top4_proba.round(decimals = 3)

tensor([[0.8940, 0.1060, 0.0000, 0.0000],
        [0.9970, 0.0030, 0.0000, 0.0000],
        [0.8590, 0.1290, 0.0120, 0.0000]], device='cuda:0')

In [48]:
y_top4_indices

tensor([[7, 9, 5, 0],
        [4, 2, 6, 8],
        [2, 4, 6, 8]], device='cuda:0')

### Fine-tuning Hyperparameters with optuna