In [None]:
import matplotlib.pyplot as plt   # to plot
import torch
import torch.nn as nn    # contains all the functions to build NN
import torchvision       # contains datasets, some functions which can be applied on cv problem
import torchvision.transforms as transforms    # to transform data

In [None]:
device = torch.device('cuda:0'if torch.cuda.is_available() else 'cpu')  # set the device where the computation is to be perfomed. 
                                                                        # If cuda avaliable, it will set the device to that, else cpu

Importing datasets from torchvision

Train data

In [None]:
train_data = torchvision.datasets.CIFAR10(root='/data',          # the directory in which dataset is to be stored
                                          train = True,          # since this data is used for training
                                          transform = transforms.ToTensor(),    # this is image dataset, so we tranform it to tensor, 
                                                                                # to perfom faster computation
                                          download = True ) 

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting /data/cifar-10-python.tar.gz to /data


Test data

In [None]:
test_data = torchvision.datasets.CIFAR10(root='/data',
                                          train = False,
                                          transform = transforms.ToTensor(),
                                          download = True )

Files already downloaded and verified


All the images are stored as tensors in train_data and test_data

In [None]:
# Since this is a fairly large dataset, so faster computation we divide this data into batches/chunks
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_data,
                                           batch_size = batch_size,
                                           shuffle = True)


Setting the parameters

In [None]:
num_epochs = 15    # the number of times our dataset passes through the NN
num_classes = 10
learning_rate = 0.001

In Pytorch, NN is defined in form of classes.

In [None]:
#define class
class Classify(nn.Module):       # nn.Modules - base class for nn modules
  
  def __init__(self,num_classes = 10):    # define constructor, it initialises the object at the time of creation
                                          # we will have 3 layer 
    super(Classify, self).__init__()

    self.layer1 = nn.Sequential(          # it will perform sequential operations
        nn.Conv2d(3,16,kernel_size = 3, stride = 1, padding=1),     # since colored images, so input channel = 3
                                                                    # output channels = 16, kernel/filtersize =3*3
                                                                    # stride - by how many pixel should our window moves
                                                                    # padding - how may 0's we want to add to our compressed image
                   nn.BatchNorm2d(16),                              # can improve lr of model, minimize internal covariate shift(mean-0, variance-1)
                   nn.ReLU(),                                       # avtivation function
                   nn.Conv2d(16, 32, kernel_size = 3, stride =1, padding = 1),
                   nn.BatchNorm2d(32), 
                   nn.ReLU(),

                   nn.MaxPool2d(kernel_size = 2, stride = 2)        # Max pooling will reduce th size of image into half
    )      

    #after this layer the size will be 16*16

    self.layer2 = nn.Sequential(
        
        nn.Conv2d(32, 64, kernel_size = 3, stride =1, padding = 1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.Conv2d(64, 128, kernel_size = 3, stride =1, padding = 1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2, stride = 2 )
    )        

    self.layer3 = nn.Sequential(
        
        nn.Conv2d(128, 256, kernel_size = 3, stride =1, padding = 1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(256, 512, kernel_size = 3, stride =1, padding = 1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size = 2, stride = 2 )
    )        

   # after this layer, the size will be 4*4

    self.fc = nn.Linear(4*4*512, num_classes)             # fc - fully connected network, applies linear transformation to the upcoming data
                                                         # 8*8 is the size of image, and the last no of output channels was 32


  def forward(self, x):                                  # this function will combine all the above information in a specific order
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)
    out = out.reshape(out.size(0), -1)                   # the layers give 2D data as output, so to convert to 1D(vector), reshape. 
                                                         # -1 means number of columns will be calculated automaticaly.
                                                         # out.size(0) gives number of rows
    out = self.fc(out)                                                     
    return out     

1. Don't keep kernel size too large, or a lot of information would be lost.
2. To find size of output image : H = (H - FilterSize + 2*Padding/Stride)+1

In [None]:
model = Classify(num_classes).to(device)

In [None]:
#Loss and optimizer

criterion = nn.CrossEntropyLoss()       # since this is a classification problem
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)  # default learning rate of Adam is 0.001


Training Loop

In [None]:
total_step = len(train_loader)

for epoch in range(num_epochs):   # first loop for the epochs

  for i, (images, labels) in enumerate(train_loader):         # second loop for the training batches
                                                              # at a time from the train_loader we pick the image,label (100images and 100 labels)
    images = images.to(device)                                
    labels = labels.to(device)                                # set the image and label to device


    # Forward pass
    output = model(images)
    loss = criterion(output, labels)                          # both values are compared and stored in loss

    # Backward pass
    optimizer.zero_grad()                                     # to optimize the loss,to not accumulate the previous gradient
    loss.backward()                                           # to calculate the gradient
    optimizer.step()                                          # it applies the optimizer to change the parameter 
                                                              # once paramters are changed, we can test it on the test data

    if (i+1) % 100 == 0:
      print('Epoch [{} {}], Step [{} {}], Loss: {:4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))                                                      





Epoch [1 15], Step [100 500], Loss: 1.821341
Epoch [1 15], Step [200 500], Loss: 1.543175
Epoch [1 15], Step [300 500], Loss: 1.363162
Epoch [1 15], Step [400 500], Loss: 0.849861
Epoch [1 15], Step [500 500], Loss: 0.773534
Epoch [2 15], Step [100 500], Loss: 0.867153
Epoch [2 15], Step [200 500], Loss: 1.051483
Epoch [2 15], Step [300 500], Loss: 0.957628
Epoch [2 15], Step [400 500], Loss: 0.973563
Epoch [2 15], Step [500 500], Loss: 0.743706
Epoch [3 15], Step [100 500], Loss: 0.689746
Epoch [3 15], Step [200 500], Loss: 0.662957
Epoch [3 15], Step [300 500], Loss: 0.573839
Epoch [3 15], Step [400 500], Loss: 0.607144
Epoch [3 15], Step [500 500], Loss: 0.860277
Epoch [4 15], Step [100 500], Loss: 0.445273
Epoch [4 15], Step [200 500], Loss: 0.521645
Epoch [4 15], Step [300 500], Loss: 0.485138
Epoch [4 15], Step [400 500], Loss: 0.529458
Epoch [4 15], Step [500 500], Loss: 0.437997
Epoch [5 15], Step [100 500], Loss: 0.519241
Epoch [5 15], Step [200 500], Loss: 0.402658
Epoch [5 1

Test the model on test data

In [None]:
model.eval()                      # put the model in this

with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)

    output = model(images)           # this output will give us probabilities, we need to convert them into classes

    _, predicted = torch.max(output.data, 1)      # this will provide us with the class with max probablitiy

    total += labels.size(0)                       # used to calculate the total size of the labels
    correct += (predicted == labels).sum().item() # used to see how many predicted match with labels

  print('Test accuracy : {} %'.format(100* correct/total))


Test accuracy : 81.01 %
