In [1]:
skip_training = False

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import tools

In [3]:
data_dir = tools.select_data_dir()

The data directory is ../data


In [4]:
device = torch.device('cpu')
# device = torch.device('cuda:0')

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Transform to tensor
    transforms.Normalize((0.5,), (0.5,))  # Scale images to [-1, 1]
])

trainset = torchvision.datasets.FashionMNIST(root=data_dir, train=True, download=True, transform=transform)
testset = torchvision.datasets.FashionMNIST(root=data_dir, train=False, download=True, transform=transform)

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
           'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

In [6]:
class VGGNet(nn.Module):
    def __init__(self):
        super(VGGNet, self).__init__()
        # Layer 1
        self.conv1  = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=3, padding=(1, 1))
        self.batch1 = nn.BatchNorm2d(20)
        self.relu1   = nn.ReLU()
        
        self.conv2  = nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, padding=(1, 1))
        self.batch2 = nn.BatchNorm2d(20)
        self.relu2  = nn.ReLU()
        
        self.conv3  = nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, padding=(1, 1))
        self.batch3 = nn.BatchNorm2d(20)
        self.relu3  = nn.ReLU()

        self.pool3  = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Layer 2
        self.conv4  = nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, padding=(1, 1))
        self.batch4 = nn.BatchNorm2d(40)
        self.relu4  = nn.ReLU()
        
        self.conv5  = nn.Conv2d(in_channels=40, out_channels=40, kernel_size=3, padding=(1, 1))
        self.batch5 = nn.BatchNorm2d(40)
        self.relu5  = nn.ReLU()
        
        self.conv6  = nn.Conv2d(in_channels=40, out_channels=40, kernel_size=3, padding=(1, 1))
        self.batch6 = nn.BatchNorm2d(40)
        self.relu6  = nn.ReLU()

        self.pool6  = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Layer 3
        self.conv7  = nn.Conv2d(in_channels=40, out_channels=60, kernel_size=3)
        self.batch7 = nn.BatchNorm2d(60)
        self.relu7  = nn.ReLU()
        
        # Layer 4
        self.conv8  = nn.Conv2d(in_channels=60, out_channels=40, kernel_size=1)
        self.batch8 = nn.BatchNorm2d(40)
        self.relu8  = nn.ReLU()
        
        # Layer 5
        self.conv9  = nn.Conv2d(in_channels=40, out_channels=20, kernel_size=1)
        self.batch9 = nn.BatchNorm2d(20)
        self.relu9  = nn.ReLU()
        
        self.pool9  = nn.AvgPool2d(kernel_size=5)
        
        # Layer 6
        self.fc10   = nn.Linear(in_features=20, out_features=10)
        

    def forward(self, x, verbose=True):
        """
        Args:
          x of shape (batch_size, 1, 28, 28): Input images.
          verbose: True if you want to print the shapes of the intermediate variables.
        
        Returns:
          y of shape (batch_size, 10): Outputs of the network.
        """
        # Layer 1
        y = self.conv1(x)
        y = self.batch1(y)
        y = self.relu1(y)
        
        y = self.conv2(y)
        y = self.batch2(y)
        y = self.relu2(y)
        
        y = self.conv3(y)
        y = self.batch3(y)
        y = self.relu3(y)
        
        y = self.pool3(y)
        
        # Layer 2
        y = self.conv4(y)
        y = self.batch4(y)
        y = self.relu4(y)
        
        y = self.conv5(y)
        y = self.batch5(y)
        y = self.relu5(y)
        
        y = self.conv6(y)
        y = self.batch6(y)
        y = self.relu6(y)
        
        y = self.pool6(y)
        
        # Layer 3
        y = self.conv7(y)
        y = self.batch7(y)
        y = self.relu7(y)
        
        # Layer 4
        y = self.conv8(y)
        y = self.batch8(y)
        y = self.relu8(y)
        
        # Layer 5
        y = self.conv9(y)
        y = self.batch9(y)
        y = self.relu9(y)
        
        y = self.pool9(y)
        y = y.squeeze(-1).squeeze(-1)

        # Layer 6
        y = self.fc10(y)
        
        return y

In [7]:
def test_VGGNet_shapes():
    net = VGGNet()
    net.to(device)

    # Feed a batch of images from the training data to test the network
    with torch.no_grad():
        images, labels = iter(trainloader).next()
        images = images.to(device)
        print('Shape of the input tensor:', images.shape)

        y = net(images, verbose=True)
        assert y.shape == torch.Size([trainloader.batch_size, 10]), f"Bad y.shape: {y.shape}"

    print('Success')

test_VGGNet_shapes()

Shape of the input tensor: torch.Size([32, 1, 28, 28])
Success


In [8]:
# This function computes the accuracy on the test dataset
def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [9]:
net = VGGNet()
# Training loop
if not skip_training:
    optimizer = optim.Adam(net.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss()
    epochs = 10
    total_loss = 0.0
    
    for epoch in range(epochs):
        print('Epoch: {}'.format(epoch))
        for idx, (train_x, train_label) in enumerate(trainloader):
            optimizer.zero_grad()
            predict_y = net(train_x.float())
            loss = criterion(predict_y, train_label.long())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            if idx % 100 == 0:
                print('idx: {}, loss: {}'.format(idx, loss))

Epoch: 0
idx: 0, loss: 2.3608996868133545
idx: 100, loss: 0.5725903511047363
idx: 200, loss: 0.8095713257789612
idx: 300, loss: 0.4059826731681824
idx: 400, loss: 0.4826407730579376
idx: 500, loss: 0.4274907112121582
idx: 600, loss: 0.47818905115127563
idx: 700, loss: 0.40309813618659973
idx: 800, loss: 0.4287862479686737
idx: 900, loss: 0.19556410610675812
idx: 1000, loss: 0.2674909830093384
idx: 1100, loss: 0.2615368962287903
idx: 1200, loss: 0.3028201162815094
idx: 1300, loss: 0.8580081462860107
idx: 1400, loss: 0.28564900159835815
idx: 1500, loss: 0.3364502787590027
idx: 1600, loss: 0.2108387053012848
idx: 1700, loss: 0.14377668499946594
idx: 1800, loss: 0.2606153190135956
Epoch: 1
idx: 0, loss: 0.376329243183136
idx: 100, loss: 0.31173932552337646
idx: 200, loss: 0.14687323570251465
idx: 300, loss: 0.17319948971271515
idx: 400, loss: 0.35542377829551697
idx: 500, loss: 0.2077597975730896
idx: 600, loss: 0.5888223052024841
idx: 700, loss: 0.3369978666305542
idx: 800, loss: 0.172428

In [10]:
# Save the model to disk
if not skip_training:
    tools.save_model(net, '2_vgg_net.pth')
else:
    net = VGGNet()
    tools.load_model(net, '2_vgg_net.pth', device)

Do you want to save the model (type yes to confirm)? yes
Model saved to 2_vgg_net.pth.


In [11]:
# Compute the accuracy on the test set
accuracy = compute_accuracy(net, testloader)
print(f'Accuracy of the VGG net on the test images: {accuracy: .3f}')
assert accuracy > 0.89, 'Poor accuracy'
print('Success')

Accuracy of the VGG net on the test images:  0.925
Success
