# Quickstart

### Imports

In [22]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

## Data Manipulation

### Download Data

In [23]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

### Load Data

In [24]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for x, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {x.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


## Creating Models

### Select Device

In [25]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


### Model Creation

In [26]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


## Optimization Pipeline

### Loss Function 

In [27]:
# We are using cross entropy here, others are available
loss_fn = nn.CrossEntropyLoss()

### Optimizer

In [28]:
# We are using stochastic gradient descent, others are available
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

### Training Pipeline

Training will take in the dataloader, model, loss function, and an optimizer. The training method will loop through the batches in the dataloader and complete forward passes through the model followed by backpropogation to adjust weights and biases accordingly.

In [29]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

### Validation Pipeline

Validation will take in the dataloader, model, and loss function in order to validate against the now trained model to evaluate performance. This does not take in an optimizer because no optimization is going to be done during validation. This is why `with torch.no_grad()` is used to relay to torch that gradients will not be evaluated.

In [30]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

### Execution

In [31]:
# Epochs are the number of times to go through the training and testing loop
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.293347  [   64/60000]
loss: 2.284591  [ 6464/60000]
loss: 2.271797  [12864/60000]
loss: 2.267611  [19264/60000]
loss: 2.241725  [25664/60000]
loss: 2.220431  [32064/60000]
loss: 2.226617  [38464/60000]
loss: 2.190961  [44864/60000]
loss: 2.187884  [51264/60000]
loss: 2.161678  [57664/60000]
Test Error: 
 Accuracy: 43.1%, Avg loss: 2.153034 

Epoch 2
-------------------------------
loss: 2.160940  [   64/60000]
loss: 2.149451  [ 6464/60000]
loss: 2.100042  [12864/60000]
loss: 2.111482  [19264/60000]
loss: 2.057364  [25664/60000]
loss: 2.007777  [32064/60000]
loss: 2.023328  [38464/60000]
loss: 1.952967  [44864/60000]
loss: 1.959490  [51264/60000]
loss: 1.880557  [57664/60000]
Test Error: 
 Accuracy: 56.5%, Avg loss: 1.882261 

Epoch 3
-------------------------------
loss: 1.919310  [   64/60000]
loss: 1.882825  [ 6464/60000]
loss: 1.778069  [12864/60000]
loss: 1.804026  [19264/60000]
loss: 1.693349  [25664/60000]
loss: 1.663537  [32064/600

### Saving and Loading Models

In [32]:
model_path = "saved_models/quickstart_model.pth"

# Saving Model
torch.save(model.state_dict(), model_path)
print(f"Saved PyTorch Model State to {model_path}")

# Loading Model
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load(model_path, weights_only=True))
print(f"Loaded PyTorch Model State from {model_path}")

Saved PyTorch Model State to saved_models/quickstart_model.pth
Loaded PyTorch Model State from saved_models/quickstart_model.pth


### Making Predictions

In [33]:
# These classes are dataset dependant
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

# Using the model loaded in the previous step
model.eval()
x, y = test_data[0][0], test_data[0][1]

# Bypassing gradient similar to validation
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
