In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

### Data Config

In [3]:
# first and foremost we want data which is interprable by the network
# so we define a series of transformations to apply to our images
# to do this we define an Compose object from the torch transforms
# module which will take the images and apply 2 operations to them
# -first to convert them all to tensors
# -next normalize all the images by giving them the same mean and standard deviation
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# load the CIFAR10 dataset using our predefined transform object
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

# object to load data during the torch training process in this instance
# images are loaded in in batches of 4, and 2 subprocesses (workers)
# are used to handle loading the data
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

# this is analagous to the previous 2 steps
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


### Model Config

In [4]:
import torch.nn as nn
import torch.nn.functional as F

#### About Nets
In our net we got a couple of different objects being created.
- Conv2d
    - Conv2d(3, 6, 5) means we got 3 in channels, 6 out channels, a kernel size of 5. In addition it means we got a bunch of default things being set. Stride is 1, padding is 0, dilation is 1, groups is 1, bias is True and padding mode is zeros.
    - Conv2d will increase the number of channels but reduce the size of each channel. Eg
    ```
    torch.Size([3, 32, 32]) -(conv(3, 6, 5)-> torch.Size([6, 28, 28]) -(conv(6, 16, 5)->  torch.Size([16, 24, 24])
    ```

- MaxPool2d
    - MaxPool2d(2, 2) means we kernel size of 2 and a stride of 2, along with 0 padding, 1 dilation, indices not being returned and ceilings not being calculate.
    - Pool2d will keep the keep the same number of channels reduce the size of each channel. Eg
    ```
    torch.Size([6, 28, 28]) -(pool(2, 2)-> torch.Size([6, 14, 14]) -(pool(2, 2)-> torch.Size([6, 7, 7])
    ```

- Linear
    - Linear(16 * 5 * 5, 120) means we take an input of 16 * 5 * 5 features and we spit out 120 features.
    ```
    torch.Size([400]) -linear(400, 120)-> torch.Size([120])
    ```

Then we also got a 
- relu
    - sets all the negative elements in a tensor to 0
- view
    - x.view(-1, 16\*5\*5) takes the tensor x and reshapes it to have 16\*5\*5 columns
    ```
    torch.Size([16, 5, 5]) -view(-1, 16*5*5)-> torch.Size([400])
    ```

### Math
The output size of a 2d convolution is given by
$$\frac{\text{Width} - \text{Kernel Size} + 2*\text{Padding}}{\text{Stride}} + 1$$

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

### Training Model

In [6]:
import torch.optim as optim

#### About training
- nn.CrossEntropyLoss() 
    - criterion(outputs, labels)
        - Calculates Cross Entropy Criterion (useful when training a classification problem with `C` classes)
    - loss.backward()
        - Calculates the gradient (a scalar) and multiplies the images by some scalar to get the output result
- optim.SGD()
    - optim.zero_grad()
        - loss.backward() accumulates gradientsm in between minibatches, the gradient must be cleared to avoid this issue
    - optim.step()
        - All optimizers implement a step() method, that updates the parameters after loss.backward is calculated

In [7]:
def train(device, num_epochs=2):
    net = Net()
    net = net.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    
    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    return net

In [8]:
device = torch.device("cuda:0")
net = train(device)

[1,  2000] loss: 2.168
[1,  4000] loss: 1.834
[1,  6000] loss: 1.671
[1,  8000] loss: 1.587
[1, 10000] loss: 1.527
[1, 12000] loss: 1.454
[2,  2000] loss: 1.398
[2,  4000] loss: 1.356
[2,  6000] loss: 1.339
[2,  8000] loss: 1.297
[2, 10000] loss: 1.315
[2, 12000] loss: 1.274


### Testing the Model

In [10]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        print(predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

tensor([3, 1, 8, 8], device='cuda:0')
tensor([4, 6, 1, 6], device='cuda:0')
tensor([3, 1, 0, 9], device='cuda:0')
tensor([6, 7, 9, 8], device='cuda:0')
tensor([5, 3, 8, 4], device='cuda:0')
tensor([7, 0, 2, 9], device='cuda:0')
tensor([2, 6, 4, 4], device='cuda:0')
tensor([9, 6, 3, 2], device='cuda:0')
tensor([2, 3, 9, 2], device='cuda:0')
tensor([4, 9, 9, 5], device='cuda:0')
tensor([0, 6, 3, 6], device='cuda:0')
tensor([0, 9, 3, 9], device='cuda:0')
tensor([4, 4, 9, 2], device='cuda:0')
tensor([6, 6, 8, 8], device='cuda:0')
tensor([7, 6, 3, 3], device='cuda:0')
tensor([7, 5, 4, 2], device='cuda:0')
tensor([6, 6, 1, 0], device='cuda:0')
tensor([3, 7, 2, 6], device='cuda:0')
tensor([8, 8, 9, 2], device='cuda:0')
tensor([7, 3, 3, 8], device='cuda:0')
tensor([8, 1, 1, 2], device='cuda:0')
tensor([2, 7, 2, 8], device='cuda:0')
tensor([8, 9, 0, 4], device='cuda:0')
tensor([8, 6, 4, 6], device='cuda:0')
tensor([6, 2, 0, 7], device='cuda:0')
tensor([5, 3, 6, 3], device='cuda:0')
tensor([1, 1

tensor([3, 0, 3, 7], device='cuda:0')
tensor([2, 4, 9, 4], device='cuda:0')
tensor([8, 7, 4, 4], device='cuda:0')
tensor([9, 4, 6, 4], device='cuda:0')
tensor([8, 2, 5, 5], device='cuda:0')
tensor([4, 1, 4, 2], device='cuda:0')
tensor([2, 1, 6, 4], device='cuda:0')
tensor([5, 4, 4, 2], device='cuda:0')
tensor([8, 8, 4, 3], device='cuda:0')
tensor([7, 5, 6, 9], device='cuda:0')
tensor([1, 6, 7, 2], device='cuda:0')
tensor([9, 1, 7, 5], device='cuda:0')
tensor([3, 9, 3, 4], device='cuda:0')
tensor([4, 3, 4, 0], device='cuda:0')
tensor([3, 2, 9, 2], device='cuda:0')
tensor([2, 3, 5, 2], device='cuda:0')
tensor([3, 6, 2, 9], device='cuda:0')
tensor([2, 3, 3, 3], device='cuda:0')
tensor([2, 4, 5, 5], device='cuda:0')
tensor([6, 9, 5, 2], device='cuda:0')
tensor([1, 4, 3, 9], device='cuda:0')
tensor([4, 4, 3, 3], device='cuda:0')
tensor([6, 1, 1, 3], device='cuda:0')
tensor([7, 0, 2, 6], device='cuda:0')
tensor([8, 1, 2, 2], device='cuda:0')
tensor([4, 3, 2, 9], device='cuda:0')
tensor([2, 0

tensor([3, 2, 9, 0], device='cuda:0')
tensor([6, 8, 4, 2], device='cuda:0')
tensor([6, 7, 2, 4], device='cuda:0')
tensor([7, 7, 4, 9], device='cuda:0')
tensor([0, 7, 0, 6], device='cuda:0')
tensor([7, 2, 0, 3], device='cuda:0')
tensor([2, 7, 5, 8], device='cuda:0')
tensor([0, 6, 8, 7], device='cuda:0')
tensor([5, 0, 6, 6], device='cuda:0')
tensor([4, 9, 1, 8], device='cuda:0')
tensor([4, 3, 9, 4], device='cuda:0')
tensor([9, 3, 5, 4], device='cuda:0')
tensor([5, 3, 7, 3], device='cuda:0')
tensor([4, 0, 8, 4], device='cuda:0')
tensor([8, 9, 3, 2], device='cuda:0')
tensor([1, 9, 4, 3], device='cuda:0')
tensor([8, 9, 1, 8], device='cuda:0')
tensor([1, 1, 0, 8], device='cuda:0')
tensor([9, 9, 8, 7], device='cuda:0')
tensor([6, 9, 1, 9], device='cuda:0')
tensor([2, 9, 1, 2], device='cuda:0')
tensor([8, 9, 9, 6], device='cuda:0')
tensor([8, 9, 6, 1], device='cuda:0')
tensor([7, 2, 7, 6], device='cuda:0')
tensor([3, 9, 8, 3], device='cuda:0')
tensor([0, 6, 7, 7], device='cuda:0')
tensor([5, 8

tensor([4, 4, 5, 1], device='cuda:0')
tensor([6, 8, 4, 8], device='cuda:0')
tensor([3, 2, 2, 3], device='cuda:0')
tensor([9, 7, 1, 4], device='cuda:0')
tensor([7, 4, 6, 1], device='cuda:0')
tensor([2, 4, 0, 0], device='cuda:0')
tensor([6, 2, 8, 6], device='cuda:0')
tensor([4, 0, 3, 0], device='cuda:0')
tensor([9, 4, 8, 2], device='cuda:0')
tensor([9, 1, 5, 4], device='cuda:0')
tensor([6, 4, 8, 2], device='cuda:0')
tensor([7, 1, 2, 6], device='cuda:0')
tensor([6, 4, 3, 8], device='cuda:0')
tensor([2, 3, 6, 4], device='cuda:0')
tensor([6, 0, 4, 6], device='cuda:0')
tensor([9, 8, 2, 9], device='cuda:0')
tensor([3, 9, 3, 9], device='cuda:0')
tensor([4, 1, 6, 9], device='cuda:0')
tensor([9, 5, 5, 9], device='cuda:0')
tensor([0, 1, 3, 9], device='cuda:0')
tensor([3, 9, 0, 9], device='cuda:0')
tensor([7, 3, 4, 4], device='cuda:0')
tensor([6, 6, 6, 4], device='cuda:0')
tensor([1, 9, 0, 6], device='cuda:0')
tensor([5, 1, 8, 6], device='cuda:0')
tensor([7, 8, 6, 5], device='cuda:0')
tensor([1, 3

tensor([5, 6, 5, 5], device='cuda:0')
tensor([7, 2, 4, 1], device='cuda:0')
tensor([2, 7, 6, 3], device='cuda:0')
tensor([9, 4, 8, 6], device='cuda:0')
tensor([2, 2, 2, 4], device='cuda:0')
tensor([8, 6, 4, 0], device='cuda:0')
tensor([2, 5, 5, 0], device='cuda:0')
tensor([2, 4, 4, 4], device='cuda:0')
tensor([9, 2, 0, 3], device='cuda:0')
tensor([3, 2, 8, 2], device='cuda:0')
tensor([4, 4, 7, 9], device='cuda:0')
tensor([4, 7, 6, 4], device='cuda:0')
tensor([5, 1, 9, 9], device='cuda:0')
tensor([6, 4, 3, 1], device='cuda:0')
tensor([2, 1, 6, 3], device='cuda:0')
tensor([2, 2, 9, 5], device='cuda:0')
tensor([6, 9, 0, 9], device='cuda:0')
tensor([4, 3, 8, 8], device='cuda:0')
tensor([8, 5, 5, 2], device='cuda:0')
tensor([9, 9, 9, 4], device='cuda:0')
tensor([0, 4, 0, 4], device='cuda:0')
tensor([4, 6, 5, 3], device='cuda:0')
tensor([2, 9, 6, 1], device='cuda:0')
tensor([1, 7, 7, 7], device='cuda:0')
tensor([0, 8, 4, 2], device='cuda:0')
tensor([7, 7, 4, 4], device='cuda:0')
tensor([0, 3

tensor([4, 6, 8, 6], device='cuda:0')
tensor([3, 0, 8, 0], device='cuda:0')
tensor([2, 1, 9, 4], device='cuda:0')
tensor([2, 8, 6, 6], device='cuda:0')
tensor([0, 7, 9, 0], device='cuda:0')
tensor([0, 0, 0, 9], device='cuda:0')
tensor([6, 0, 2, 7], device='cuda:0')
tensor([9, 6, 8, 8], device='cuda:0')
tensor([4, 4, 9, 4], device='cuda:0')
tensor([5, 2, 3, 1], device='cuda:0')
tensor([5, 3, 0, 2], device='cuda:0')
tensor([5, 2, 6, 6], device='cuda:0')
tensor([8, 4, 4, 4], device='cuda:0')
tensor([9, 9, 4, 1], device='cuda:0')
tensor([7, 0, 7, 2], device='cuda:0')
tensor([6, 0, 2, 9], device='cuda:0')
tensor([4, 5, 0, 3], device='cuda:0')
tensor([3, 3, 0, 6], device='cuda:0')
tensor([0, 9, 4, 7], device='cuda:0')
tensor([9, 9, 0, 7], device='cuda:0')
tensor([2, 6, 7, 1], device='cuda:0')
tensor([2, 3, 7, 4], device='cuda:0')
tensor([6, 8, 6, 6], device='cuda:0')
tensor([8, 4, 9, 8], device='cuda:0')
tensor([3, 1, 0, 2], device='cuda:0')
tensor([5, 6, 6, 5], device='cuda:0')
tensor([8, 3

tensor([3, 8, 7, 4], device='cuda:0')
tensor([2, 8, 6, 2], device='cuda:0')
tensor([4, 3, 1, 0], device='cuda:0')
tensor([4, 2, 8, 8], device='cuda:0')
tensor([4, 9, 3, 4], device='cuda:0')
tensor([3, 3, 9, 2], device='cuda:0')
tensor([0, 6, 8, 9], device='cuda:0')
tensor([4, 4, 8, 3], device='cuda:0')
tensor([5, 0, 5, 7], device='cuda:0')
tensor([6, 6, 5, 4], device='cuda:0')
tensor([6, 3, 3, 9], device='cuda:0')
tensor([0, 1, 5, 3], device='cuda:0')
tensor([3, 9, 6, 0], device='cuda:0')
tensor([4, 2, 2, 5], device='cuda:0')
tensor([6, 3, 1, 9], device='cuda:0')
tensor([5, 6, 0, 4], device='cuda:0')
tensor([5, 0, 6, 7], device='cuda:0')
tensor([2, 8, 1, 2], device='cuda:0')
tensor([0, 7, 9, 8], device='cuda:0')
tensor([0, 1, 1, 1], device='cuda:0')
tensor([5, 0, 0, 8], device='cuda:0')
tensor([7, 1, 4, 4], device='cuda:0')
tensor([2, 2, 7, 4], device='cuda:0')
tensor([2, 2, 6, 6], device='cuda:0')
tensor([0, 1, 6, 9], device='cuda:0')
tensor([8, 3, 7, 8], device='cuda:0')
tensor([8, 4

In [None]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

### Model Improvements

#### vgg19

In [None]:
model = torch.hub.load('pytorch/vision:v0.6.0', 'vgg11', pretrained=True)