<a href="https://colab.research.google.com/github/7201krap/Introduction_to_Pytorch/blob/main/pytorch_in_60_mins/CIFAR10_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim 

In [5]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )

trainset    = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                        download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, 
                                          shuffle=True, num_workers=10)

testset     = torchvision.datasets.CIFAR10(root='./data', train=False, 
                                        download=True, transform=transform)

testloader  = torch.utils.data.DataLoader(trainset, batch_size=8, 
                                          shuffle=False, num_workers=10)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [11]:
trainset

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )

In [12]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

In [13]:
print(f"Image size             : {images.size()}")
print(f"Its corresponding label: {labels}")

Image size             : torch.Size([8, 3, 32, 32])
Its corresponding label: tensor([3, 6, 7, 7, 3, 0, 1, 3])


In [14]:
classes = ('plane', 'car', 'bird', 'cat', 'dder', 'dog', 'frog', 'horse', 'ship', 'truck')

In [32]:
# Relationship between 'before' and 'after'

# Size of the convoluted image could be expressed by the following equation:
# ((W - F + 2P) / S) + 1
# W : width/height of 'before' (convoluted) image
# F : Filter size 
# P : Padding size. If there is no padding then P=0 (default)
# S : Stride (default stride is 1)

# In this example, the original image size is 32 X 32 
# By applying the first convoltional layer   : ((32 - 5 + 0) / 1) + 1 = 28
# By applying the first pooling layer        : 28 / 2 = 14 
# By applying the second convolutional layer : ((14 - 5 + 0) / 1) + 1 = 10
# By applying the second pooling layer       : 10 / 2 = 5 

# These above proecess indicates that now the final shape of the convoluted image 
# just before the Fully connected layer is: 16 * 5 * 5 
# 16 * 5 * 5 could be translated : 16 channels(depth) and 5 by 5 image 

# Build a model
class MyNet(nn.Module):

  # default setting
  def __init__(self):
    super(MyNet, self).__init__()

    # before channel size=3, after channel size=6, kernel size=5, no padding 
    self.conv1 = nn.Conv2d(3, 6, 5)

    # before chanel size=6, after channel size=16, kernel size=5, no padding 
    self.conv2 = nn.Conv2d(6, 16, 5)

    # max pooling kernel size=2
    self.pool = nn.MaxPool2d(2, 2)
    
    # fully connected layer 1
    self.fc1   = nn.Linear(16 * 5 * 5, 120)

    # fully connected layer 2
    self.fc2   = nn.Linear(120, 84)

    # fully connected layer 3
    self.fc3   = nn.Linear(84, 10)

  # default setting 
  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))

    # flatten. 1차원으로 펴준다. 
    x = x.view(-1, 16 * 5 * 5)

    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))

    # we do not apply relu at the final stage 
    x = self.fc3(x)

    return x

net = MyNet()

In [33]:
print(f"MyNet looks like this :\n {net}")

MyNet looks like this :
 MyNet(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [34]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Stochastic Gradient Descent 
# net.parameters() : parameters that were used in MyNet
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [43]:
for epoch in range(5):  # loop over the dataset multiple times
  running_loss = 0.0
  for i, data in enumerate(trainloader, 0):
    inputs, labels = data 

    # zero the parameter gradients. This is neccessary 
    optimizer.zero_grad()

    # forward + backward + optimize 
    # foward 
    outputs = net(inputs)

    # calculate the loss 
    # batch 1개에 대한 loss = 이미지 8개에 대한 loss 
    loss    = criterion(outputs, labels)

    # backward 
    loss.backward()

    # optimize 
    optimizer.step()

    # print statistics 
    running_loss = running_loss + loss.item()
    if i % 2000 == 1999:
      # one epoch = one cycle through the full training dataset.

      print(f"epoch: {epoch+1}, iteration: {i+1}, loss: {running_loss/2000}")
      running_loss = 0.0

print('Training done')


epoch: 1, iteration: 2000, loss: 0.713645027528517
epoch: 1, iteration: 4000, loss: 0.7257232715301216
epoch: 1, iteration: 6000, loss: 0.7693922595698386
epoch: 2, iteration: 2000, loss: 0.6555384932989254
epoch: 2, iteration: 4000, loss: 0.7152977728694677
epoch: 2, iteration: 6000, loss: 0.7466560474745929
epoch: 3, iteration: 2000, loss: 0.6450035705287009
epoch: 3, iteration: 4000, loss: 0.6889512957660482
epoch: 3, iteration: 6000, loss: 0.7006388131072745
epoch: 4, iteration: 2000, loss: 0.6177340848101303
epoch: 4, iteration: 4000, loss: 0.6714210423505865
epoch: 4, iteration: 6000, loss: 0.6940041318517178
epoch: 5, iteration: 2000, loss: 0.5914084784854203
epoch: 5, iteration: 4000, loss: 0.6329249900691212
epoch: 5, iteration: 6000, loss: 0.6685748264398426
Training done


In [46]:
# save the model 
PATH = './cifar100.pth'
torch.save(net.state_dict(), PATH)

In [48]:
# load the trained model that we have just saved 
net = MyNet()
net.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [50]:
correct = 0
total   = 0

# We are now testing the testset; therefore, we do not need gradient update 
with torch.no_grad():
  for data in testloader:
    images, labels = data 

    # get output 
    outputs = net(images)

    # get predicted label 
    _, predicted = torch.max(outputs.data, 1)

    # find total number of total images 
    total = total + labels.size(0)

    # find only correctly labeled image and accumulate it to correct 
    correct = correct + (predicted==labels).sum().item()

print(f"accuracy : {(correct / total) * 100}")

accuracy : 81.004
