# VGG-16 Pytorch Implementation
### Code written following this tutorial blog: https://blog.paperspace.com/vgg-from-scratch-pytorch/
### VGG paper https://arxiv.org/pdf/1409.1556.pdf?ref=blog.paperspace.com

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

# DirectML for AMD compatibility
import torch_directml

In [2]:
# Device configuration
device = torch_directml.device(torch_directml.default_device())
print(device)

privateuseone:0


## Data Loaders
### Some notes on this data:
  - These data loaders create 228x228 images whereas the original VGG paper states that the images inputted to the model were 224x224
  - The images from the CIFAR-10 dataset have only a 32x32 resolution. The original VGG model was trained with the ILSVRC dataset which had an average resolution of 469x387

In [3]:
# Create a data loader for handling the CIFAR-10 dataset
  # this version of CIFAR  60,000 images with 10 different animal classes
def data_loader(data_dir,
                batch_size,
                random_seed=42,
                valid_size=0.1,
                shuffle=True,
                test=False):

    # mean and std of the rgb values in the images
    normalize = transforms.Normalize( # tutorial claimed these values available online
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    transform = transforms.Compose([
            transforms.Resize((227,227)), # in vgg paper, images are 224x224, not sure why they chose 227 (228x228)
            transforms.ToTensor(),
            normalize,
    ])

    ### Test Data ###
    if test:
        dataset = datasets.CIFAR100(
          root=data_dir, train=False,
          download=True, transform=transform,
        )

        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=shuffle
        )

        return data_loader

    ### Training and Validation Data ###
    train_dataset = datasets.CIFAR100(
        root=data_dir, train=True,
        download=True, transform=transform,
    )

    valid_dataset = datasets.CIFAR100(
        root=data_dir, train=True,
        download=True, transform=transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)

In [4]:
# Data loaders
train_loader, valid_loader = data_loader(data_dir='./data',
                                         batch_size=32)

test_loader = data_loader(data_dir='./data',
                              batch_size=32,
                              test=True)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


## VGG-16

Some notes on this implementation:
  - After each convolution, nn.BatchNorm2d() is used to normalize all the convolution channel outputs. However, the original paper explicitly states that local normalization does not improve performance.
  - The forward function uses out.reshape(out.size(0), -1) to change the data outputted by the convolution layers before they are sent to the linear/fully connected layers. However, there is no mention of any data transformation between these layers in the original VGG paper.

In [5]:
# Design the model's neural network
class VGG16(nn.Module): # all torch nn models must subclass nn.Module

    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()

        # Convolution layer: 3 input channels (rgb), 64 output channels, 3x3 kernel
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU())  # activation function essentially throws out values < 0

        # Convolution layer: 64 input, 64 output, 3x3 kernel
        # Max pooling into a 2x2 kernel
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        # Convolution layer: 64 input, 128 output, 3x3 kernel
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

        # Convolution layer: 128 input, 128 output, 3x3 kernel
        # Max pooling into a 2x2 kernel
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        # Convolution layer: 128 input, 256 output, 3x3 kernel
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

        # Convolution layer: 256 input, 256 output, 3x3 kernel
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

        # Convolution layer: 256 input, 256 output, 3x3 kernel
        # Max pooling into a 2x2 kernel
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        # Convolution layer: 256 input, 512 output, 3x3 kernel
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

        # Convolution layer: 512 input, 512 output, 3x3 kernel
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

        # Convolution layer: 512 input, 512 output, 3x3 kernel
        # Max pooling into a 2x2 kernel
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        # Convolution layer: 512 input, 512 output, 3x3 kernel
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

        # Convolution layer: 512 input, 512 output, 3x3 kernel
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU())

        # Convolution layer: 512 input, 512 output, 3x3 kernel
        # Max pooling into a 2x2 kernel
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(7*7*512, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())

        # This is the layer that performs the classification
          # it takes the 4096 input channels from fc1 and outputs probabilities of each class in CIFAR
          # the outputs (num_classes) depend on if we classify super or fine classes in CIFAR-100 (10 or 100 classes)
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

    # Define how the layers are connected
    def forward(self, x):
      out = self.layer1(x)
      out = self.layer2(out)
      out = self.layer3(out)
      out = self.layer4(out)
      out = self.layer5(out)
      out = self.layer6(out)
      out = self.layer7(out)
      out = self.layer8(out)
      out = self.layer9(out)
      out = self.layer10(out)
      out = self.layer11(out)
      out = self.layer12(out)
      out = self.layer13(out)
      out = out.reshape(out.size(0), -1) # Not fully sure what this is doing or if it is true to the original VGG
      out = self.fc(out)
      out = self.fc1(out)
      out = self.fc2(out)
      return out

## Training the VGG model

In [6]:
### Hyper Parameters ###

num_classes = 100
num_epochs = 2
learning_rate = 0.008

model = VGG16(num_classes).to(device) # use GPU


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)

total_step = len(train_loader)

In [7]:
# Train the model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
        
    # Validation
      # a validation dataset allows us to see model progress along the way while saving our true test data for the end
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

Epoch [1/2], Step [1/1407], Loss: 4.6057
Epoch [1/2], Step [2/1407], Loss: 4.6030
Epoch [1/2], Step [3/1407], Loss: 4.6065
Epoch [1/2], Step [4/1407], Loss: 4.6054
Epoch [1/2], Step [5/1407], Loss: 4.6051
Epoch [1/2], Step [6/1407], Loss: 4.6042
Epoch [1/2], Step [7/1407], Loss: 4.6053
Epoch [1/2], Step [8/1407], Loss: 4.6069
Epoch [1/2], Step [9/1407], Loss: 4.6015
Epoch [1/2], Step [10/1407], Loss: 4.6029
Epoch [1/2], Step [11/1407], Loss: 4.6054
Epoch [1/2], Step [12/1407], Loss: 4.6087
Epoch [1/2], Step [13/1407], Loss: 4.6076
Epoch [1/2], Step [14/1407], Loss: 4.6053
Epoch [1/2], Step [15/1407], Loss: 4.6025
Epoch [1/2], Step [16/1407], Loss: 4.6072
Epoch [1/2], Step [17/1407], Loss: 4.6048
Epoch [1/2], Step [18/1407], Loss: 4.6053
Epoch [1/2], Step [19/1407], Loss: 4.6067
Epoch [1/2], Step [20/1407], Loss: 4.6056
Epoch [1/2], Step [21/1407], Loss: 4.6014
Epoch [1/2], Step [22/1407], Loss: 4.6065
Epoch [1/2], Step [23/1407], Loss: 4.6043
Epoch [1/2], Step [24/1407], Loss: 4.6049
E

Epoch [1/2], Step [195/1407], Loss: 4.6093
Epoch [1/2], Step [196/1407], Loss: 4.6046
Epoch [1/2], Step [197/1407], Loss: 4.6053
Epoch [1/2], Step [198/1407], Loss: 4.6015
Epoch [1/2], Step [199/1407], Loss: 4.6151
Epoch [1/2], Step [200/1407], Loss: 4.6056
Epoch [1/2], Step [201/1407], Loss: 4.6015
Epoch [1/2], Step [202/1407], Loss: 4.6026
Epoch [1/2], Step [203/1407], Loss: 4.6057
Epoch [1/2], Step [204/1407], Loss: 4.6056
Epoch [1/2], Step [205/1407], Loss: 4.5989
Epoch [1/2], Step [206/1407], Loss: 4.6054
Epoch [1/2], Step [207/1407], Loss: 4.6035
Epoch [1/2], Step [208/1407], Loss: 4.6117
Epoch [1/2], Step [209/1407], Loss: 4.6097
Epoch [1/2], Step [210/1407], Loss: 4.6023
Epoch [1/2], Step [211/1407], Loss: 4.6099
Epoch [1/2], Step [212/1407], Loss: 4.6050
Epoch [1/2], Step [213/1407], Loss: 4.6057
Epoch [1/2], Step [214/1407], Loss: 4.6078
Epoch [1/2], Step [215/1407], Loss: 4.6051
Epoch [1/2], Step [216/1407], Loss: 4.6027
Epoch [1/2], Step [217/1407], Loss: 4.6037
Epoch [1/2]

Epoch [1/2], Step [386/1407], Loss: 4.6094
Epoch [1/2], Step [387/1407], Loss: 4.6092
Epoch [1/2], Step [388/1407], Loss: 4.6085
Epoch [1/2], Step [389/1407], Loss: 4.6072
Epoch [1/2], Step [390/1407], Loss: 4.6119
Epoch [1/2], Step [391/1407], Loss: 4.6052
Epoch [1/2], Step [392/1407], Loss: 4.6129
Epoch [1/2], Step [393/1407], Loss: 4.6036
Epoch [1/2], Step [394/1407], Loss: 4.6087
Epoch [1/2], Step [395/1407], Loss: 4.6002
Epoch [1/2], Step [396/1407], Loss: 4.6033
Epoch [1/2], Step [397/1407], Loss: 4.6060
Epoch [1/2], Step [398/1407], Loss: 4.5981
Epoch [1/2], Step [399/1407], Loss: 4.6085
Epoch [1/2], Step [400/1407], Loss: 4.6036
Epoch [1/2], Step [401/1407], Loss: 4.6106
Epoch [1/2], Step [402/1407], Loss: 4.6000
Epoch [1/2], Step [403/1407], Loss: 4.6061
Epoch [1/2], Step [404/1407], Loss: 4.6067
Epoch [1/2], Step [405/1407], Loss: 4.6035
Epoch [1/2], Step [406/1407], Loss: 4.6057
Epoch [1/2], Step [407/1407], Loss: 4.6025
Epoch [1/2], Step [408/1407], Loss: 4.6030
Epoch [1/2]

Epoch [1/2], Step [577/1407], Loss: 4.6053
Epoch [1/2], Step [578/1407], Loss: 4.6030
Epoch [1/2], Step [579/1407], Loss: 4.6025
Epoch [1/2], Step [580/1407], Loss: 4.6073
Epoch [1/2], Step [581/1407], Loss: 4.6096
Epoch [1/2], Step [582/1407], Loss: 4.6028
Epoch [1/2], Step [583/1407], Loss: 4.6066
Epoch [1/2], Step [584/1407], Loss: 4.6121
Epoch [1/2], Step [585/1407], Loss: 4.6051
Epoch [1/2], Step [586/1407], Loss: 4.6148
Epoch [1/2], Step [587/1407], Loss: 4.6124
Epoch [1/2], Step [588/1407], Loss: 4.6101
Epoch [1/2], Step [589/1407], Loss: 4.6115
Epoch [1/2], Step [590/1407], Loss: 4.6069
Epoch [1/2], Step [591/1407], Loss: 4.6054
Epoch [1/2], Step [592/1407], Loss: 4.6080
Epoch [1/2], Step [593/1407], Loss: 4.6104
Epoch [1/2], Step [594/1407], Loss: 4.6025
Epoch [1/2], Step [595/1407], Loss: 4.6016
Epoch [1/2], Step [596/1407], Loss: 4.6108
Epoch [1/2], Step [597/1407], Loss: 4.6043
Epoch [1/2], Step [598/1407], Loss: 4.6029
Epoch [1/2], Step [599/1407], Loss: 4.6063
Epoch [1/2]

Epoch [1/2], Step [768/1407], Loss: 4.6052
Epoch [1/2], Step [769/1407], Loss: 4.6092
Epoch [1/2], Step [770/1407], Loss: 4.6090
Epoch [1/2], Step [771/1407], Loss: 4.6064
Epoch [1/2], Step [772/1407], Loss: 4.6065
Epoch [1/2], Step [773/1407], Loss: 4.6103
Epoch [1/2], Step [774/1407], Loss: 4.6081
Epoch [1/2], Step [775/1407], Loss: 4.6052
Epoch [1/2], Step [776/1407], Loss: 4.6056
Epoch [1/2], Step [777/1407], Loss: 4.6090
Epoch [1/2], Step [778/1407], Loss: 4.6137
Epoch [1/2], Step [779/1407], Loss: 4.6084
Epoch [1/2], Step [780/1407], Loss: 4.6044
Epoch [1/2], Step [781/1407], Loss: 4.6042
Epoch [1/2], Step [782/1407], Loss: 4.6028
Epoch [1/2], Step [783/1407], Loss: 4.6150
Epoch [1/2], Step [784/1407], Loss: 4.6077
Epoch [1/2], Step [785/1407], Loss: 4.6080
Epoch [1/2], Step [786/1407], Loss: 4.6040
Epoch [1/2], Step [787/1407], Loss: 4.6051
Epoch [1/2], Step [788/1407], Loss: 4.6121
Epoch [1/2], Step [789/1407], Loss: 4.6101
Epoch [1/2], Step [790/1407], Loss: 4.6031
Epoch [1/2]

Epoch [1/2], Step [959/1407], Loss: 4.6023
Epoch [1/2], Step [960/1407], Loss: 4.6048
Epoch [1/2], Step [961/1407], Loss: 4.6064
Epoch [1/2], Step [962/1407], Loss: 4.6066
Epoch [1/2], Step [963/1407], Loss: 4.6082
Epoch [1/2], Step [964/1407], Loss: 4.5997
Epoch [1/2], Step [965/1407], Loss: 4.6053
Epoch [1/2], Step [966/1407], Loss: 4.6029
Epoch [1/2], Step [967/1407], Loss: 4.6098
Epoch [1/2], Step [968/1407], Loss: 4.6069
Epoch [1/2], Step [969/1407], Loss: 4.5990
Epoch [1/2], Step [970/1407], Loss: 4.6056
Epoch [1/2], Step [971/1407], Loss: 4.6071
Epoch [1/2], Step [972/1407], Loss: 4.6044
Epoch [1/2], Step [973/1407], Loss: 4.6051
Epoch [1/2], Step [974/1407], Loss: 4.6092
Epoch [1/2], Step [975/1407], Loss: 4.6056
Epoch [1/2], Step [976/1407], Loss: 4.6036
Epoch [1/2], Step [977/1407], Loss: 4.6062
Epoch [1/2], Step [978/1407], Loss: 4.6075
Epoch [1/2], Step [979/1407], Loss: 4.6062
Epoch [1/2], Step [980/1407], Loss: 4.6064
Epoch [1/2], Step [981/1407], Loss: 4.6064
Epoch [1/2]

Epoch [1/2], Step [1147/1407], Loss: 4.6092
Epoch [1/2], Step [1148/1407], Loss: 4.6102
Epoch [1/2], Step [1149/1407], Loss: 4.6069
Epoch [1/2], Step [1150/1407], Loss: 4.6054
Epoch [1/2], Step [1151/1407], Loss: 4.6086
Epoch [1/2], Step [1152/1407], Loss: 4.6113
Epoch [1/2], Step [1153/1407], Loss: 4.6111
Epoch [1/2], Step [1154/1407], Loss: 4.6085
Epoch [1/2], Step [1155/1407], Loss: 4.6067
Epoch [1/2], Step [1156/1407], Loss: 4.6099
Epoch [1/2], Step [1157/1407], Loss: 4.6097
Epoch [1/2], Step [1158/1407], Loss: 4.6077
Epoch [1/2], Step [1159/1407], Loss: 4.6117
Epoch [1/2], Step [1160/1407], Loss: 4.6140
Epoch [1/2], Step [1161/1407], Loss: 4.6059
Epoch [1/2], Step [1162/1407], Loss: 4.6056
Epoch [1/2], Step [1163/1407], Loss: 4.6084
Epoch [1/2], Step [1164/1407], Loss: 4.6057
Epoch [1/2], Step [1165/1407], Loss: 4.6081
Epoch [1/2], Step [1166/1407], Loss: 4.6044
Epoch [1/2], Step [1167/1407], Loss: 4.6071
Epoch [1/2], Step [1168/1407], Loss: 4.6097
Epoch [1/2], Step [1169/1407], L

Epoch [1/2], Step [1334/1407], Loss: 4.6050
Epoch [1/2], Step [1335/1407], Loss: 4.6115
Epoch [1/2], Step [1336/1407], Loss: 4.6043
Epoch [1/2], Step [1337/1407], Loss: 4.6073
Epoch [1/2], Step [1338/1407], Loss: 4.6031
Epoch [1/2], Step [1339/1407], Loss: 4.6113
Epoch [1/2], Step [1340/1407], Loss: 4.6024
Epoch [1/2], Step [1341/1407], Loss: 4.6071
Epoch [1/2], Step [1342/1407], Loss: 4.6012
Epoch [1/2], Step [1343/1407], Loss: 4.6084
Epoch [1/2], Step [1344/1407], Loss: 4.6016
Epoch [1/2], Step [1345/1407], Loss: 4.6039
Epoch [1/2], Step [1346/1407], Loss: 4.6099
Epoch [1/2], Step [1347/1407], Loss: 4.6077
Epoch [1/2], Step [1348/1407], Loss: 4.6075
Epoch [1/2], Step [1349/1407], Loss: 4.6098
Epoch [1/2], Step [1350/1407], Loss: 4.6025
Epoch [1/2], Step [1351/1407], Loss: 4.6031
Epoch [1/2], Step [1352/1407], Loss: 4.6063
Epoch [1/2], Step [1353/1407], Loss: 4.6052
Epoch [1/2], Step [1354/1407], Loss: 4.6078
Epoch [1/2], Step [1355/1407], Loss: 4.6080
Epoch [1/2], Step [1356/1407], L

Epoch [2/2], Step [117/1407], Loss: 4.5992
Epoch [2/2], Step [118/1407], Loss: 4.6032
Epoch [2/2], Step [119/1407], Loss: 4.6025
Epoch [2/2], Step [120/1407], Loss: 4.6072
Epoch [2/2], Step [121/1407], Loss: 4.6086
Epoch [2/2], Step [122/1407], Loss: 4.6098
Epoch [2/2], Step [123/1407], Loss: 4.6036
Epoch [2/2], Step [124/1407], Loss: 4.6047
Epoch [2/2], Step [125/1407], Loss: 4.6083
Epoch [2/2], Step [126/1407], Loss: 4.6048
Epoch [2/2], Step [127/1407], Loss: 4.6102
Epoch [2/2], Step [128/1407], Loss: 4.6008
Epoch [2/2], Step [129/1407], Loss: 4.6007
Epoch [2/2], Step [130/1407], Loss: 4.6041
Epoch [2/2], Step [131/1407], Loss: 4.6048
Epoch [2/2], Step [132/1407], Loss: 4.6034
Epoch [2/2], Step [133/1407], Loss: 4.6117
Epoch [2/2], Step [134/1407], Loss: 4.6052
Epoch [2/2], Step [135/1407], Loss: 4.5997
Epoch [2/2], Step [136/1407], Loss: 4.6065
Epoch [2/2], Step [137/1407], Loss: 4.6004
Epoch [2/2], Step [138/1407], Loss: 4.5993
Epoch [2/2], Step [139/1407], Loss: 4.6066
Epoch [2/2]

Epoch [2/2], Step [308/1407], Loss: 4.6085
Epoch [2/2], Step [309/1407], Loss: 4.6083
Epoch [2/2], Step [310/1407], Loss: 4.6129
Epoch [2/2], Step [311/1407], Loss: 4.6134
Epoch [2/2], Step [312/1407], Loss: 4.6071
Epoch [2/2], Step [313/1407], Loss: 4.6020
Epoch [2/2], Step [314/1407], Loss: 4.6095
Epoch [2/2], Step [315/1407], Loss: 4.6021
Epoch [2/2], Step [316/1407], Loss: 4.6109
Epoch [2/2], Step [317/1407], Loss: 4.6056
Epoch [2/2], Step [318/1407], Loss: 4.6082
Epoch [2/2], Step [319/1407], Loss: 4.6105
Epoch [2/2], Step [320/1407], Loss: 4.6130
Epoch [2/2], Step [321/1407], Loss: 4.6031
Epoch [2/2], Step [322/1407], Loss: 4.6035
Epoch [2/2], Step [323/1407], Loss: 4.6134
Epoch [2/2], Step [324/1407], Loss: 4.6057
Epoch [2/2], Step [325/1407], Loss: 4.6070
Epoch [2/2], Step [326/1407], Loss: 4.6069
Epoch [2/2], Step [327/1407], Loss: 4.6096
Epoch [2/2], Step [328/1407], Loss: 4.6046
Epoch [2/2], Step [329/1407], Loss: 4.6033
Epoch [2/2], Step [330/1407], Loss: 4.6071
Epoch [2/2]

Epoch [2/2], Step [499/1407], Loss: 4.6044
Epoch [2/2], Step [500/1407], Loss: 4.5973
Epoch [2/2], Step [501/1407], Loss: 4.6075
Epoch [2/2], Step [502/1407], Loss: 4.6129
Epoch [2/2], Step [503/1407], Loss: 4.5999
Epoch [2/2], Step [504/1407], Loss: 4.6085
Epoch [2/2], Step [505/1407], Loss: 4.6084
Epoch [2/2], Step [506/1407], Loss: 4.5997
Epoch [2/2], Step [507/1407], Loss: 4.6070
Epoch [2/2], Step [508/1407], Loss: 4.6112
Epoch [2/2], Step [509/1407], Loss: 4.6090
Epoch [2/2], Step [510/1407], Loss: 4.6010
Epoch [2/2], Step [511/1407], Loss: 4.6063
Epoch [2/2], Step [512/1407], Loss: 4.6077
Epoch [2/2], Step [513/1407], Loss: 4.6057
Epoch [2/2], Step [514/1407], Loss: 4.5995
Epoch [2/2], Step [515/1407], Loss: 4.6142
Epoch [2/2], Step [516/1407], Loss: 4.6052
Epoch [2/2], Step [517/1407], Loss: 4.6024
Epoch [2/2], Step [518/1407], Loss: 4.6060
Epoch [2/2], Step [519/1407], Loss: 4.6075
Epoch [2/2], Step [520/1407], Loss: 4.6059
Epoch [2/2], Step [521/1407], Loss: 4.6063
Epoch [2/2]

Epoch [2/2], Step [690/1407], Loss: 4.5992
Epoch [2/2], Step [691/1407], Loss: 4.6029
Epoch [2/2], Step [692/1407], Loss: 4.5998
Epoch [2/2], Step [693/1407], Loss: 4.6115
Epoch [2/2], Step [694/1407], Loss: 4.6109
Epoch [2/2], Step [695/1407], Loss: 4.6032
Epoch [2/2], Step [696/1407], Loss: 4.6020
Epoch [2/2], Step [697/1407], Loss: 4.6039
Epoch [2/2], Step [698/1407], Loss: 4.6064
Epoch [2/2], Step [699/1407], Loss: 4.6101
Epoch [2/2], Step [700/1407], Loss: 4.6029
Epoch [2/2], Step [701/1407], Loss: 4.6084
Epoch [2/2], Step [702/1407], Loss: 4.5973
Epoch [2/2], Step [703/1407], Loss: 4.6068
Epoch [2/2], Step [704/1407], Loss: 4.6173
Epoch [2/2], Step [705/1407], Loss: 4.6071
Epoch [2/2], Step [706/1407], Loss: 4.5998
Epoch [2/2], Step [707/1407], Loss: 4.6056
Epoch [2/2], Step [708/1407], Loss: 4.6032
Epoch [2/2], Step [709/1407], Loss: 4.6025
Epoch [2/2], Step [710/1407], Loss: 4.6096
Epoch [2/2], Step [711/1407], Loss: 4.6048
Epoch [2/2], Step [712/1407], Loss: 4.6106
Epoch [2/2]

Epoch [2/2], Step [881/1407], Loss: 4.6019
Epoch [2/2], Step [882/1407], Loss: 4.6066
Epoch [2/2], Step [883/1407], Loss: 4.6020
Epoch [2/2], Step [884/1407], Loss: 4.6029
Epoch [2/2], Step [885/1407], Loss: 4.6067
Epoch [2/2], Step [886/1407], Loss: 4.6079
Epoch [2/2], Step [887/1407], Loss: 4.6014
Epoch [2/2], Step [888/1407], Loss: 4.6039
Epoch [2/2], Step [889/1407], Loss: 4.6058
Epoch [2/2], Step [890/1407], Loss: 4.6002
Epoch [2/2], Step [891/1407], Loss: 4.6053
Epoch [2/2], Step [892/1407], Loss: 4.6012
Epoch [2/2], Step [893/1407], Loss: 4.6030
Epoch [2/2], Step [894/1407], Loss: 4.6015
Epoch [2/2], Step [895/1407], Loss: 4.6082
Epoch [2/2], Step [896/1407], Loss: 4.6084
Epoch [2/2], Step [897/1407], Loss: 4.6061
Epoch [2/2], Step [898/1407], Loss: 4.6093
Epoch [2/2], Step [899/1407], Loss: 4.6136
Epoch [2/2], Step [900/1407], Loss: 4.6094
Epoch [2/2], Step [901/1407], Loss: 4.6073
Epoch [2/2], Step [902/1407], Loss: 4.6055
Epoch [2/2], Step [903/1407], Loss: 4.6085
Epoch [2/2]

Epoch [2/2], Step [1070/1407], Loss: 4.6065
Epoch [2/2], Step [1071/1407], Loss: 4.6013
Epoch [2/2], Step [1072/1407], Loss: 4.6063
Epoch [2/2], Step [1073/1407], Loss: 4.6030
Epoch [2/2], Step [1074/1407], Loss: 4.6056
Epoch [2/2], Step [1075/1407], Loss: 4.6055
Epoch [2/2], Step [1076/1407], Loss: 4.6105
Epoch [2/2], Step [1077/1407], Loss: 4.6062
Epoch [2/2], Step [1078/1407], Loss: 4.6113
Epoch [2/2], Step [1079/1407], Loss: 4.6035
Epoch [2/2], Step [1080/1407], Loss: 4.6118
Epoch [2/2], Step [1081/1407], Loss: 4.6092
Epoch [2/2], Step [1082/1407], Loss: 4.6030
Epoch [2/2], Step [1083/1407], Loss: 4.6058
Epoch [2/2], Step [1084/1407], Loss: 4.6100
Epoch [2/2], Step [1085/1407], Loss: 4.6063
Epoch [2/2], Step [1086/1407], Loss: 4.6033
Epoch [2/2], Step [1087/1407], Loss: 4.6079
Epoch [2/2], Step [1088/1407], Loss: 4.6039
Epoch [2/2], Step [1089/1407], Loss: 4.6111
Epoch [2/2], Step [1090/1407], Loss: 4.6020
Epoch [2/2], Step [1091/1407], Loss: 4.6019
Epoch [2/2], Step [1092/1407], L

Epoch [2/2], Step [1257/1407], Loss: 4.6081
Epoch [2/2], Step [1258/1407], Loss: 4.5993
Epoch [2/2], Step [1259/1407], Loss: 4.6111
Epoch [2/2], Step [1260/1407], Loss: 4.6113
Epoch [2/2], Step [1261/1407], Loss: 4.6061
Epoch [2/2], Step [1262/1407], Loss: 4.6041
Epoch [2/2], Step [1263/1407], Loss: 4.6053
Epoch [2/2], Step [1264/1407], Loss: 4.6031
Epoch [2/2], Step [1265/1407], Loss: 4.6012
Epoch [2/2], Step [1266/1407], Loss: 4.6067
Epoch [2/2], Step [1267/1407], Loss: 4.6061
Epoch [2/2], Step [1268/1407], Loss: 4.6051
Epoch [2/2], Step [1269/1407], Loss: 4.6023
Epoch [2/2], Step [1270/1407], Loss: 4.6040
Epoch [2/2], Step [1271/1407], Loss: 4.6111
Epoch [2/2], Step [1272/1407], Loss: 4.6036
Epoch [2/2], Step [1273/1407], Loss: 4.6090
Epoch [2/2], Step [1274/1407], Loss: 4.6094
Epoch [2/2], Step [1275/1407], Loss: 4.6041
Epoch [2/2], Step [1276/1407], Loss: 4.6022
Epoch [2/2], Step [1277/1407], Loss: 4.6093
Epoch [2/2], Step [1278/1407], Loss: 4.6061
Epoch [2/2], Step [1279/1407], L

## Testing

In [8]:
# Evaluation on the test dataset
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

Accuracy of the network on the 10000 test images: 1.0 %
