In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
print(torch.cuda.is_available())

False


In [3]:
def get_data_loader(training = True):
    
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

    training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
    )

    test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
    )

    DataLoader = torch.utils.data.DataLoader
    train_dataloader = DataLoader(training_data, batch_size=64)
    test_dataloader = DataLoader(test_data, batch_size=64)
    
    if training:
        return train_dataloader
    else:
        return test_dataloader

In [4]:
train_loader = get_data_loader()
print(type(train_loader))
print(train_loader.dataset)

<class 'torch.utils.data.dataloader.DataLoader'>
Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )


In [5]:
def build_model():
    model = nn.Sequential(
        nn.Flatten(),
        nn.Linear(28*28, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10),
        )
    return model

In [6]:
model = build_model()
print(model)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=128, bias=True)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): ReLU()
  (5): Linear(in_features=64, out_features=10, bias=True)
)


In [7]:
def train_model(model, train_loader, criterion, T):
    def train_step(model, train_loader, criterion):
        # Set the model to training mode - important for batch normalization and dropout layers
        # Unnecessary in this situation but added for best practices
        model.train()

        correct = 0  # To keep track of correct predictions
        total = 0    # Total number of samples
        total_loss = 0

        otim = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
        for batch, (X, y) in enumerate(train_loader):
            batch_size = X.size(0)
            # Compute prediction and loss
            pred = model(X)

            otim.zero_grad()
            
            loss = criterion(pred, y)
            total_loss += loss.item()
            # include an optimizer
            
            # Backpropagation
            loss.backward()
            otim.step()
            
            # find the predicted class of each item in the batch:
            predicted = pred.argmax(1)
            # if it's the correct class, make it a 1. Then find the sum to see how many we got correct
            to_add = (predicted == y).sum()
            # update current to include the newly classified batch
            correct += to_add
            total += batch_size

        los = total_loss / len(train_loader)
            
        return los, correct, total
        #print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    size = len(train_loader.dataset)
    for epoch in range(T):
        loss, correct, total = train_step(model, train_loader, criterion)
        print(f"Train Epoch: {epoch}      Accuracy: {correct}/{total} ({correct/total*100:.2f}%)     Loss: {loss:.3f}")

In [8]:
criterion = nn.CrossEntropyLoss()
train_model(model, train_loader, criterion, 5)

Train Epoch: 0      Accuracy: 41866/60000 69.78%     Loss: 0.901
Train Epoch: 1      Accuracy: 49333/60000 82.22%     Loss: 0.510
Train Epoch: 2      Accuracy: 50434/60000 84.06%     Loss: 0.453
Train Epoch: 3      Accuracy: 50978/60000 84.96%     Loss: 0.423
Train Epoch: 4      Accuracy: 51483/60000 85.81%     Loss: 0.402


In [9]:
def evaluate_model(model, test_loader, criterion, show_loss = True):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(test_loader.dataset)
    num_batches = len(test_loader)
    loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in test_loader:
            pred = model(X)
            loss += criterion(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item() # the same thing as before just more compact

    loss /= num_batches
    correct /= size
    if show_loss:
        print(f"Average loss: {loss:.4f}\nAccuracy: {(100*correct):.2f}%")
    else:
        print(f"Accuracy: {(100*correct):.2f}%")

In [10]:
criterion = nn.CrossEntropyLoss()
test_loader = get_data_loader(training = False)

In [11]:
evaluate_model(model, test_loader, criterion, show_loss = True)
evaluate_model(model, test_loader, criterion, show_loss = False)

Average loss: 0.4286
Accuracy: 84.67%
Accuracy: 84.67%


In [12]:
def predict_label(model, test_images, index):
    logits = model(test_images[index])
    prob = F.softmax(logits, dim = 1)

    class_names = ['T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal','Shirt','Sneaker','Bag','Ankle Boot']
    
    # find the top 3 values and match them to the classes
    listy = [p.item() for p in prob[0]]
    top_3 = sorted(listy)[-3:]
    both = [(value, class_names[listy.index(value)]) for value in top_3]
    print(both)
    print(f"{both[2][1]}: {both[2][0] * 100:.2f}%\n{both[1][1]}: {both[1][0] * 100:.2f}%\n{both[0][1]}: {both[0][0] * 100:.2f}%")

In [13]:
test_images = next(iter(test_loader))[0]
predict_label(model, test_images, 1)

[(0.009726941585540771, 'Coat'), (0.03216451406478882, 'Shirt'), (0.9576472640037537, 'Pullover')]
Pullover: 95.76%
Shirt: 3.22%
Coat: 0.97%
