### Initialization

In [None]:
# Libraries
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchmetrics.classification import MulticlassAccuracy
import matplotlib.pyplot as plt
from pathlib import Path
from timeit import default_timer as timer
from tqdm.auto import tqdm
from helper_functions import train_step, test_step

# Path to models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# Device agnostic code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"PyTorch: {torch.__version__}")

# Setup Data

In [None]:
# Training data
train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

# Testing data
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

len(train_data.data), len(train_data.targets), len(test_data.data), len(test_data.targets)

### Prepare DataLoader

In [None]:
BATCH_SIZE = 32

# Turn datasets into iterables over batches
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

### Visualize first batch

In [None]:
torch.manual_seed(42)
fig = plt.figure(figsize=(18, 9))
rows, cols = 4, 8

# Get random batch
batch_features, batch_labels = next(iter(train_dataloader))

for i in range(0, BATCH_SIZE):
    img, label = batch_features[i], batch_labels[i]
    
    fig.add_subplot(rows, cols, i + 1)
    plt.imshow(img.squeeze(), cmap="gray")
    plt.title(train_data.classes[label])
    plt.axis(False)

# Build Simple Model

In [None]:
class FashionMNISTModelV0(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            # nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            # nn.ReLU()
        )
        
    def forward(self, x):
        return self.layer_stack(x)

In [None]:
torch.manual_seed(42)

model_0 = FashionMNISTModelV0(
    input_shape=784, 
    hidden_units=10, 
    output_shape=len(train_data.classes)
).to(device)

## Train Model

### Setup loss function, optimizer, and evaluation metrics

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)
accuracy_fn = MulticlassAccuracy(num_classes=len(train_data.classes)).to(device)

### Training and testing loops

In [None]:
torch.manual_seed(42)
start = timer()
epochs = 3

for epoch in tqdm(range(epochs)):
    # Training
    train_step(
        model=model_0, 
        dataloader=train_dataloader, 
        loss_fn=loss_fn, 
        optimizer=optimizer, 
        device=device
    )
    
    # Testing
    result = test_step(
        model=model_0, 
        dataloader=test_dataloader, 
        loss_fn=loss_fn, 
        accuracy_fn=accuracy_fn, 
        device=device
    )

end = timer()
train_time = end - start
print(f"Final Loss: {result['loss']:.5f} | Final Accuracy: {result['accuracy']:.2f}%")
print(f"Training Time: {(str(int(train_time / 60)) + ' min ') if train_time >= 60 else ''}{(train_time % 60):.3f} sec")

# Rebuild Model as Convolutional Neural Network

In [None]:
class FashionMNISTModelV1(nn.Module):
    def __init__(self, input_shape: int, hidden1: int, hidden2: int, output_shape: int, imgsize: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(input_shape, hidden1, 5, 1, 2),
            nn.BatchNorm2d(hidden1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden1, hidden2, 5, 1, 2),
            nn.BatchNorm2d(hidden2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(int(hidden2 * imgsize / 16), output_shape)
        )
        self.layer_stack = nn.Sequential(
            self.block_1,
            self.block_2,
            self.classifier
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layer_stack(x)

In [None]:
torch.manual_seed(42)

model_1 = FashionMNISTModelV1(1, 16, 32, len(train_data.classes), 28 * 28).to(device)

### Update optimizer

In [None]:
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.1)

### Training and testing loops

In [None]:
torch.manual_seed(42)
start = timer()
epochs = 100

for epoch in tqdm(range(epochs)):
    # Training
    train_step(model_1, train_dataloader, loss_fn, optimizer, device)

# Testing
result = test_step(model_1, test_dataloader, loss_fn, accuracy_fn, device)
end = timer()
train_time = end - start
print(f"Final Loss: {result['loss']:.5f} | Final Accuracy: {result['accuracy']:.2f}%")
print(f"Training Time: {(str(int(train_time / 60)) + ' min ') if train_time >= 60 else ''}{(train_time % 60):.3f} sec")

In [None]:
MODEL_NAME = "fashionMNIST_model.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_1.state_dict(), f=MODEL_SAVE_PATH)