In [47]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [50]:
# dataloader
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)


In [53]:
# define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

## Loss Function
When presented with some training data, our untrained network is likely not to give the correct answer. Loss function measures the degree of dissimilarity of obtained result to the target value, and it is the loss function that we want to minimize during training. To calculate the loss we make a prediction using the inputs of our given data sample and compare it against the true data label value.

Common loss functions include nn.MSELoss (Mean Square Error) for regression tasks, and nn.NLLLoss (Negative Log Likelihood) for classification. nn.CrossEntropyLoss combines nn.LogSoftmax and nn.NLLLoss.

We pass our model’s output logits to nn.CrossEntropyLoss, which will normalize the logits and compute the prediction error.



In [57]:
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

## optimizer
Inside the training loop, optimization happens in three steps:
* Call optimizer.zero_grad() to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.
* Backpropagate the prediction loss with a call to loss.backward(). PyTorch deposits the gradients of the loss w.r.t. each parameter.
* Once we have our gradients, we call optimizer.step() to adjust the parameters by the gradients collected in the backward pass.

In [58]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [59]:
def train_loop(data_loader, model, loss_fn, optimizer):
    size = len(data_loader.dataset)
    for batch, (X, y) in enumerate(data_loader):
        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
            
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    

In [54]:
# define hyperparameters
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [60]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.294921  [    0/60000]
loss: 2.293026  [ 6400/60000]
loss: 2.277829  [12800/60000]
loss: 2.277014  [19200/60000]
loss: 2.250424  [25600/60000]
loss: 2.223075  [32000/60000]
loss: 2.231553  [38400/60000]
loss: 2.191958  [44800/60000]
loss: 2.185193  [51200/60000]
loss: 2.175397  [57600/60000]
Test Error: 
 Accuracy: 40.2%, Avg loss: 2.156733 

Epoch 2
-------------------------------
loss: 2.153295  [    0/60000]
loss: 2.156245  [ 6400/60000]
loss: 2.102339  [12800/60000]
loss: 2.126546  [19200/60000]
loss: 2.075315  [25600/60000]
loss: 2.013240  [32000/60000]
loss: 2.043741  [38400/60000]
loss: 1.964818  [44800/60000]
loss: 1.957394  [51200/60000]
loss: 1.911524  [57600/60000]
Test Error: 
 Accuracy: 59.0%, Avg loss: 1.898060 

Epoch 3
-------------------------------
loss: 1.914265  [    0/60000]
loss: 1.900998  [ 6400/60000]
loss: 1.785880  [12800/60000]
loss: 1.835323  [19200/60000]
loss: 1.726408  [25600/60000]
loss: 1.671811  [32000/600