## imports

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR, OneCycleLR

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

  import pandas.util.testing as tm


In [None]:
!pip install torchsummary
from torchsummary import summary



In [None]:
def getDeviceType():
  if torch.cuda.is_available():
    device = torch.device("cuda")
  else:
    device = torch.device("cpu")
  return device

In [None]:
print(getDeviceType())

cuda


##Define model - Cifar 10 training model

input image size - 32x32x3

In [None]:
class Cifar10Model(nn.Module):
  def __init__(self):
    super(Cifar10Model, self).__init__()
    self.dropout = 0.1

    # Input conv block
    in_ch = 3
    out_ch = 32
    self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=(3,3), 
                      padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.Dropout(self.dropout),
            nn.ReLU()
        ) # input_side = 3, output_size = 32, RF = 3

    # convolution block - 1
    in_ch = 32
    out_ch = 64
    self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=(3,3),
                      padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.Dropout(self.dropout),
            nn.ReLU()
        ) # input_side = 32, output_size = 64, RF = 5

    # Transition block - 1
    in_ch = 64
    out_ch = 32
    self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=(1,1),
                      padding=0, bias=False),
        )# input_side = 64, output_size = 32, RF = 5
    self.pool1 = nn.MaxPool2d(2,2)
    # input_side = 32, output_size = 16, RF = 6

    # convolution block - 2
    # Depthwise convolution - 1
    in_ch = 32
    out_ch = 64
    self.depthwise1 = nn.Sequential(
          nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=(3,3), 
                    padding=0, groups=in_ch, bias=False),
          nn.BatchNorm2d(out_ch),
          nn.Dropout(self.dropout),
          nn.ReLU()
      ) # input_side = 16, output_size = 14, RF = ?

    in_ch = 64
    out_ch = 128
    self.convblock4 = nn.Sequential(
        nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=(1,1),
                  padding=0, bias=False),
        nn.BatchNorm2d(num_features=out_ch),
        nn.Dropout2d(self.dropout),
        nn.ReLU()
        )  # input_side = 14, output_size = 14, RF = ?

    self.pool2 = nn.MaxPool2d(2,2)
    # input_side = 14, output_size = 7, RF = ?

    # convolution block - 3
    # diated 1
    in_ch = 128
    out_ch = 256
    self.convblock5 = nn.Sequential(
      nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=(3,3),
                padding=4, dilation=2, bias=False),
      nn.BatchNorm2d(num_features=out_ch),
      nn.Dropout2d(self.dropout),
      nn.ReLU()
        )  # input_side = 7, output_size = 11, RF = ?
    
    in_ch = 256
    out_ch = 256
    self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=(3,3),
                      padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.Dropout(self.dropout),
            nn.ReLU()
        )  # input_side = 11, output_size = 11, RF = ?

    self.pool3 = nn.MaxPool2d(2,2)
    # input_side = 11, output_size = 5, RF = ?

    # GAP
    self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=5)
        )  # output_size = 1
    
    # Add one more layer after GAP
    in_ch = 256
    out_ch = 128
    self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=in_ch, out_channels=out_ch,
                      kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.Dropout(self.dropout),
            nn.ReLU()
        )

    # output layer
    in_ch = 128
    self.convblock8 = nn.Sequential(
        nn.Conv2d(in_channels=in_ch, out_channels=10,
                  kernel_size=(1, 1), padding=0, bias=False),
        )

    self.dropout = nn.Dropout(self.dropout)


  def forward(self, x):
    x = self.convblock1(x)
    x = self.convblock2(x)
    x = self.convblock3(x)
    x = self.pool1(x)
    x = self.depthwise1(x)
    x = self.convblock4(x)
    x = self.pool2(x)
    x = self.convblock5(x)
    x = self.convblock6(x)
    x = self.pool3(x)
    x = self.gap(x)
    x = self.convblock7(x)
    x = self.convblock8(x)

    x = x.view(-1, 10)
    return F.log_softmax(x, dim=-1)
    # return x


In [None]:
device = getDeviceType()
cifarmodel = Cifar10Model().to(device)

In [None]:
summary(cifarmodel, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             864
       BatchNorm2d-2           [-1, 32, 32, 32]              64
           Dropout-3           [-1, 32, 32, 32]               0
              ReLU-4           [-1, 32, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          18,432
       BatchNorm2d-6           [-1, 64, 32, 32]             128
           Dropout-7           [-1, 64, 32, 32]               0
              ReLU-8           [-1, 64, 32, 32]               0
            Conv2d-9           [-1, 32, 32, 32]           2,048
        MaxPool2d-10           [-1, 32, 16, 16]               0
           Conv2d-11           [-1, 64, 14, 14]             576
      BatchNorm2d-12           [-1, 64, 14, 14]             128
          Dropout-13           [-1, 64, 14, 14]               0
             ReLU-14           [-1, 64,

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
lossfunc = nn.CrossEntropyLoss()
optimizer = optim.SGD(cifarmodel.parameters(), lr=0.001, momentum=0.9)

In [None]:
from tqdm import tqdm

class Trainer():

  def __init__(self):
    self.train_losses = []
    self.test_losses = []
    self.train_acc = []
    self.test_acc = []

  def train(self, model, device, train_loader, optimizer, loss_func, epoch, lambda_l1):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    processed = 0
    for batch_idx, (data, target) in enumerate(pbar):
      # get samples
      data, target = data.to(device), target.to(device)

      # Init
      optimizer.zero_grad()
      # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
      # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

      # Predict
      y_pred = model(data)

      # Calculate loss
      loss = loss_func(y_pred, target)

      # L2 loss

      # L1 loss
      l1 = 0
      # lambda_l1 = 0.05
      for p in model.parameters():
        l1 = l1 + p.abs().sum()
      loss = loss + lambda_l1*l1

      self.train_losses.append(loss)

      # Backpropagation
      loss.backward()
      optimizer.step()

      # Learning rate for onecycle LR # Vamsi - added
      # scheduler.step()

      # Update pbar-tqdm
      
      pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
      correct += pred.eq(target.view_as(pred)).sum().item()
      processed += len(data)

      # pbar.set_description(desc= f'Train set: Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
      self.train_acc.append(100*correct/processed)

  def test(self, model, device, test_loader):
      model.eval()
      test_loss = 0
      correct = 0
      with torch.no_grad():
          for data, target in test_loader:
              data, target = data.to(device), target.to(device)
              output = model(data)
              test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
              pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
              correct += pred.eq(target.view_as(pred)).sum().item()

      test_loss /= len(test_loader.dataset)
      self.test_losses.append(test_loss)

      print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
          test_loss, correct, len(test_loader.dataset),
          100. * correct / len(test_loader.dataset)))
      
      self.test_acc.append(100. * correct / len(test_loader.dataset))

  def getValues(self):
    return (self.train_losses, self.test_losses, self.train_acc, self.test_acc)

  def get_misclassified(self, model, test_loader, device):
    misclassified = []
    misclassified_pred = []
    misclassified_target = []
    # put the model to evaluation mode
    model.eval()
    # turn off gradients
    with torch.no_grad():
        for data, target in test_loader:
          # move them to the respective device
          data, target = data.to(device), target.to(device)
          # do inferencing
          output = model(data)
          # get the predicted output
          pred = output.argmax(dim=1, keepdim=True)

          # get the current misclassified in this batch
          list_misclassified = (pred.eq(target.view_as(pred)) == False)
          batch_misclassified = data[list_misclassified]
          batch_mis_pred = pred[list_misclassified]
          batch_mis_target = target.view_as(pred)[list_misclassified]

          misclassified.append(batch_misclassified)
          misclassified_pred.append(batch_mis_pred)
          misclassified_target.append(batch_mis_target)

    # group all the batched together
    misclassified = torch.cat(misclassified)
    misclassified_pred = torch.cat(misclassified_pred)
    misclassified_target = torch.cat(misclassified_target)

    return list(map(lambda x, y, z: (x, y, z), misclassified, misclassified_pred, misclassified_target))

In [None]:
transform = transforms.Compose(
    [
     # https://github.com/pytorch/vision/issues/1759
    #  transforms.RandomRotation((-10.0, 10.0), fill=(0,)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
     ])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
EPOCHS = 20

trainer = Trainer()

for epoch in range(EPOCHS):
  # print("EPOCH:", epoch, "last LR=",scheduler.get_last_lr(), "LR = ", scheduler.get_lr())
  print("EPOCH:", epoch)
  # def train(model, device, train_loader, optimizer, epoch, lambda_l1):
  trainer.train(cifarmodel, device, trainloader, optimizer, lossfunc, epoch, 5e-4)
  trainer.test(cifarmodel, device, testloader)
  # scheduler.step() # for StepLR

(train_losses, test_losses, train_acc, test_acc) = trainer.getValues()

  0%|          | 0/12500 [00:00<?, ?it/s]

EPOCH: 0


100%|██████████| 12500/12500 [03:41<00:00, 56.48it/s]
  0%|          | 0/12500 [00:00<?, ?it/s]


Test set: Average loss: 1.5476, Accuracy: 4376/10000 (43.76%)

EPOCH: 1


100%|██████████| 12500/12500 [03:38<00:00, 57.27it/s]
  0%|          | 0/12500 [00:00<?, ?it/s]


Test set: Average loss: 1.4273, Accuracy: 5103/10000 (51.03%)

EPOCH: 2


100%|██████████| 12500/12500 [03:33<00:00, 58.47it/s]
  0%|          | 0/12500 [00:00<?, ?it/s]


Test set: Average loss: 1.4537, Accuracy: 4919/10000 (49.19%)

EPOCH: 3


 39%|███▉      | 4873/12500 [01:28<02:16, 56.03it/s]

In [None]:
print('Finished Training')

In [None]:
trainer.test(cifarmodel, device, testloader)

In [None]:
# for epoch in range(2):  # loop over the dataset multiple times

#     running_loss = 0.0
#     for i, data in enumerate(trainloader, 0):
#         # get the inputs
#         inputs, labels = data

#         inputs, labels = inputs.to(device), labels.to(device)

#         # zero the parameter gradients
#         optimizer.zero_grad()

#         # forward + backward + optimize
#         outputs = cifarmodel(inputs)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()

#         # print statistics
#         running_loss += loss.item()
#         if i % 2000 == 1999:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, running_loss / 2000))
#             running_loss = 0.0

# print('Finished Training')

In [None]:
# dataiter = iter(testloader)
# images, labels = dataiter.next()

# # print images
# # imshow(torchvision.utils.make_grid(images))
# # print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))