Refer to the Pytorch tutorial for more details: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import pandas as pd

## Load data

The training data is a standard image benchmark from torchvision.

The test data is downloaded from the Kaggle competition which is a subset of the benchmark test set.

In [None]:
# load and transfor training data from standard source
mean = (0.5, 0.5, 0.5) # 0.485, 0.456, 0.406
std = (0.5, 0.5, 0.5)  #  0.229, 0.224, 0.255
#transforms.resize(224, 224)
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(mean=mean, std=std)]) # normalize image to [-1, 1]

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# dataloader for batch training (mini-batch gradient descent)
batch_size=8
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# 10 classes in total
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
# load test data (note that the data has been transformed already)
test_images = torch.load('./data/test_image.pt')

## Take a look at your image data

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get a random batch of training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

print(images.shape, labels)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

## Build a simple CNN model

Your tasks:
* Check the Pytorch document about Conv2d, MaxPool2d and Linear to understand the meaning of arguments
* Tune the model hyperparameters

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# YOUR TASK: set the hyperparameters of CNN
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        #self.conv1 = nn.Conv2d(3, 2, 5) # input channel=3, num of filter=2, size of filter=5*5
        """[1,  1000] loss: 2.307
        [1,  2000] loss: 2.302
        [1,  3000] loss: 2.291
        [1,  4000] loss: 2.226
        [1,  5000] loss: 2.121
        [1,  6000] loss: 2.072"""
        #self.conv1 = nn.Conv2d(3, 2, 3) # input channel=3, num of filter=2, size of filter=3*3
        """[1,  1000] loss: 2.306
        [1,  2000] loss: 2.304
        [1,  3000] loss: 2.303
        [1,  4000] loss: 2.302
        [1,  5000] loss: 2.302
        [1,  6000] loss: 2.300"""
        #self.conv1 = nn.Conv2d(3, 2, 4) # input channel=3, num of filter=2, size of filter=4
        """ [1,  1000] loss: 2.305
        [1,  2000] loss: 2.304
        [1,  3000] loss: 2.303
        [1,  4000] loss: 2.303
        [1,  5000] loss: 2.303
        [1,  6000] loss: 2.303 """

        #self.pool = nn.MaxPool2d(2, 2)
        #self.conv2 = nn.Conv2d(2, 2, 5)
        #self.fc1 = nn.Linear(2 * 5 * 5, 64)
        #self.fc2 = nn.Linear(64, 32)
        #self.fc3 = nn.Linear(32, 10)

        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)#
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model = CNN()

In [None]:
model.parameters

## Set up optimization method

You tasks:
* Check the Pytorch document about SGD to understand the optimization method
* Tune the optimization hyperparameters

In [None]:
import torch.optim as optim

# YOUR TASK: observe the convergence speed and tune the learning rate
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.9)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

## Train CNN on training data via mini-batch SGD

Your tasks:
* Check how loss changes
* Based on its convergence, tune the optimizer and epoch number
* Try adjust the model to see if the low can futher decrease to lower value

In [None]:
# YOUR TASK: observe the loss change and set the right epoch number
total_epoch = 10
for epoch in range(total_epoch):  # loop over the dataset 'total_epoch' times
    running_loss = 0.0
    accumulated_running_loss = 0.0
    for i, data in enumerate(trainloader, 0): # for each batch of data
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs) # forward pass
        loss = criterion(outputs, labels) # calc loss
        loss.backward() # back propagation
        optimizer.step() # one step gradient descent

        # print statistics
        running_loss += loss.item()
        accumulated_running_loss += loss.item()

        if i % 1000 == 999:    # print average loss every 1000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 1000:.3f}')
            running_loss = 0.0

    print(f'[Accumulated => {epoch + 1}, {i + 1:5d}] loss: {accumulated_running_loss/i:.3f}')


print('Finished Training')

In [None]:
#HC to try lr(s)
import torch.optim as optim

# YOUR TASK: observe the convergence speed and tune the learning rate

#optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.9)
#for lr_ in (0.003, 0.002, 0.001, 0.0009, 0.0007):
for lr_ in (0.0007, 0.0005, 0.0003, 0.0001):
  print(f'lr= {lr_}')
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=lr_, momentum=0.9)

  # YOUR TASK: observe the loss change and set the right epoch number
  total_epoch = 1
  for epoch in range(total_epoch):  # loop over the dataset 'total_epoch' times
      running_loss = 0.0
      for i, data in enumerate(trainloader, 0): # for each batch of data
          # get the inputs; data is a list of [inputs, labels]
          inputs, labels = data

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = model(inputs) # forward pass
          loss = criterion(outputs, labels) # calc loss
          loss.backward() # back propagation
          optimizer.step() # one step gradient descent

          # print statistics


          running_loss += loss.item()
          if i % 1000 == 999:    # print average loss every 1000 mini-batches
              print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 1000:.3f}')
              running_loss = 0.0

  print("------------------------------------")

print('Finished Training')

## Make prediction on test images

In [None]:
# since now we're testing (not training), we set no_grad to NOT calculate the gradients
with torch.no_grad():
    # calculate outputs by running images through the network
    outputs = model(test_images)
    # the class with the highest probability is what we choose as prediction
    _, predicted = torch.max(outputs.data, 1)
    predicted = np.array([classes[i] for i in predicted])

print(predicted)
# show images
imshow(torchvision.utils.make_grid(test_images[:32]))

submission = pd.DataFrame()
submission['label'] = predicted
submission.to_csv("submission_tr-10.csv", index=True, index_label='id')
submission.head(32)

In [None]:
correct = 0
total = 0

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in trainloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on test images: {100 * correct // total} %')