The model is based on the ResNet architecture and I have implemented a 20 layer network (n=3) and trained it on the CIFAR-10 dataset. The architecture is given the original paper for the CIFAR-10 dataset. 

In [1]:
import numpy as np
import torch

In [2]:
gpu=torch.cuda.is_available()

In [3]:
from torchvision import datasets
from torchvision import transforms as transforms
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

traindata=datasets.CIFAR10('data',download=True,train=True,transform=transform)
trainloader=torch.utils.data.DataLoader(traindata,batch_size=64,shuffle=True)
testdata=datasets.CIFAR10('data',download=True,train=False,transform=transform)
testloader=torch.utils.data.DataLoader(traindata,batch_size=64,shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [4]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
  def __init__(self,n):
    super(Net,self).__init__()
    self.conv0=nn.Conv2d(3,16,3,padding=1)
    self.conv11=nn.Conv2d(16,16,3,padding=1)
    self.conv12=nn.Conv2d(16,16,3,padding=1)
    self.short1=nn.Identity()
    self.conv21=nn.Conv2d(16,32,3,2,1)
    self.conv22=nn.Conv2d(32,32,3,padding=1)
    self.short2=nn.Conv2d(16,32,1,2)
    self.conv31=nn.Conv2d(32,64,3,2,1)
    self.conv32=nn.Conv2d(64,64,3,padding=1)
    self.short3=nn.Conv2d(32,64,1,2)
    self.pool=nn.AvgPool2d(8)
    self.fc=nn.Linear(64,10)
    self.n=n

  def forward(self,x):
    n=self.n

    x=F.relu(self.conv0(x))

    for i in range(n):
      idx=self.short1(x)
      x=F.relu(self.conv11(x))
      x=self.conv12(x)
      x=F.relu(x+idx)
    
    idx=self.short2(x)
    x=F.relu(self.conv21(x))
    x=self.conv22(x)
    x=F.relu(x+idx)
    for i in range(n-1):
      idx=self.short1(x)
      x=F.relu(self.conv22(x))
      x=self.conv22(x)
      x=F.relu(x+idx)

    idx=self.short3(x)
    x=F.relu(self.conv31(x))
    x=self.conv32(x)
    x=F.relu(x+idx)
    for i in range(n-1):
      idx=self.short1(x)
      x=F.relu(self.conv32(x))
      x=self.conv32(x)
      x=F.relu(x+idx)

    x=self.pool(x)
    x=torch.flatten(x,1)
    #print(x.shape)
    x=self.fc(x)
    x=F.log_softmax(x,dim=1)

    return x

In [9]:
model=Net(3)
print(model)
if gpu:
  model.cuda()

Net(
  (conv0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv11): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv12): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (short1): Identity()
  (conv21): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv22): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (short2): Conv2d(16, 32, kernel_size=(1, 1), stride=(2, 2))
  (conv31): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv32): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (short3): Conv2d(32, 64, kernel_size=(1, 1), stride=(2, 2))
  (pool): AvgPool2d(kernel_size=8, stride=8, padding=0)
  (fc): Linear(in_features=64, out_features=10, bias=True)
)


In [10]:
from torch import optim as optim
criterion=nn.NLLLoss()
optimizer=optim.SGD(model.parameters(),lr=0.01)

In [11]:
epochs=10
for e in range(epochs):
  tloss=0
  model.train()
  for images,classes in trainloader:
    if gpu:
      images,classes=images.cuda(),classes.cuda()
    optimizer.zero_grad()
    output=model(images)
    loss=criterion(output,classes)
    loss.backward()
    optimizer.step()
    tloss+=loss.item()
    #print(loss.item())
  print("Training loss epoch",e+1,":",tloss/len(trainloader))

  ttloss=0
  model.eval()
  for images,classes in testloader:
    if gpu:
      images,classes=images.cuda(),classes.cuda()
    output=model(images)
    loss=criterion(output,classes)
    ttloss+=loss.item()
  print("Test loss epoch",e+1,":",ttloss/len(testloader))

Training loss epoch 1 : 2.301597967781984
Test loss epoch 1 : 2.2941754196610904
Training loss epoch 2 : 2.1588935669120923
Test loss epoch 2 : 2.139811635627161
Training loss epoch 3 : 1.9549921220525757
Test loss epoch 3 : 1.8665366684994125
Training loss epoch 4 : 1.8649653271031197
Test loss epoch 4 : 2.2369304782594255
Training loss epoch 5 : 1.765254335177829
Test loss epoch 5 : 1.9704207357238321
Training loss epoch 6 : 1.6606706896096544
Test loss epoch 6 : 1.7344280246578518
Training loss epoch 7 : 1.587932579962494
Test loss epoch 7 : 1.78180452609611
Training loss epoch 8 : 1.5188596404117087
Test loss epoch 8 : 1.524208110921523
Training loss epoch 9 : 1.4641558613313739
Test loss epoch 9 : 1.4793665642323701
Training loss epoch 10 : 1.4130890811495769
Test loss epoch 10 : 1.419819954411148


In [12]:
from torchsummary import summary
summary(model,(3,32,32),64)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [64, 16, 32, 32]             448
          Identity-2           [64, 16, 32, 32]               0
            Conv2d-3           [64, 16, 32, 32]           2,320
            Conv2d-4           [64, 16, 32, 32]           2,320
          Identity-5           [64, 16, 32, 32]               0
            Conv2d-6           [64, 16, 32, 32]           2,320
            Conv2d-7           [64, 16, 32, 32]           2,320
          Identity-8           [64, 16, 32, 32]               0
            Conv2d-9           [64, 16, 32, 32]           2,320
           Conv2d-10           [64, 16, 32, 32]           2,320
           Conv2d-11           [64, 32, 16, 16]             544
           Conv2d-12           [64, 32, 16, 16]           4,640
           Conv2d-13           [64, 32, 16, 16]           9,248
         Identity-14           [64, 32,

In [21]:
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval()
# iterate over test data
for data, target in testloader:
    # move tensors to GPU if CUDA is available
    if gpu:
        data, target = data.cuda(), target.cuda()
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the batch loss
    loss = criterion(output, target)
    # update test loss 
    test_loss += loss.item()*data.size(0)
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)    
    # compare predictions to true label
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not gpu else np.squeeze(correct_tensor.cpu().numpy())
    # calculate test accuracy for each object class
    for i in range(target.shape[0]):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# average test loss
test_loss = test_loss/len(testloader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            classes[i], 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 1.419690

Test Accuracy of tensor(5, device='cuda:0'): 63% (3168/5000)
Test Accuracy of tensor(6, device='cuda:0'): 69% (3491/5000)
Test Accuracy of tensor(9, device='cuda:0'): 31% (1579/5000)
Test Accuracy of tensor(7, device='cuda:0'): 33% (1693/5000)
Test Accuracy of tensor(1, device='cuda:0'): 24% (1222/5000)
Test Accuracy of tensor(3, device='cuda:0'): 18% (900/5000)
Test Accuracy of tensor(3, device='cuda:0'): 81% (4073/5000)
Test Accuracy of tensor(4, device='cuda:0'): 67% (3387/5000)
Test Accuracy of tensor(5, device='cuda:0'): 33% (1675/5000)
Test Accuracy of tensor(1, device='cuda:0'): 52% (2642/5000)

Test Accuracy (Overall): 47% (23830/50000)
