In [9]:
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [3]:
# Dataset
# We want to normalize for each channels. Should add normalizing code here in the data loader
train = datasets.CIFAR10('./data', train=True, download=True, transform=transforms.ToTensor())
test = datasets.CIFAR10('./data', train=False, download=True, transform=transforms.ToTensor())

100.0%


In [29]:
# Data Loader
train_data_loader  = torch.utils.data.DataLoader(train, batch_size=64,shuffle=True)
test_data_loader  = torch.utils.data.DataLoader(test, batch_size=64,shuffle=False)

In [30]:
images, labels = next(iter(train_data_loader))

In [48]:
class ResNet_Block(torch.nn.Module): # This signifies one block with one skip connection
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResNet_Block, self).__init__()
        self.convolution1 = torch.nn.Sequential(
                                torch.nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding=1), # Here the stride is not 1 because we might be coming from a bigger image size and we might need to downsample
                                torch.nn.BatchNorm2d(out_channels),
                                torch.nn.ReLU())
        self.convolution2 = torch.nn.Sequential(
                                torch.nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1), # Here the stride is always 1 because this is the second conv layer is not downsampled
                                torch.nn.BatchNorm2d(out_channels))
        self.downsample = downsample # This will be not none if we are now changing. (Now we have a new block configuration)
        self.relu = torch.nn.ReLU()
        self.out_channels = out_channels


    def forward(self, x):
        res = x
        if self.downsample is not None:
            res = self.downsample(x)
        
        out1 = self.convolution1(x)
        out2 = self.convolution2(out1)
        z = out2 + res
        z = self.relu(z)
        
        return z

        

In [60]:
class ResNet(torch.nn.Module):
    def __init__(self, block, layers):
        super(ResNet, self).__init__()
        self.block = block
        # Input = 3 channels, 32 * 32
        self.convolution1 = torch.nn.Sequential(
                                torch.nn.Conv2d(3, 16, kernel_size = 7, stride = 2, padding = 3),
                                torch.nn.BatchNorm2d(16),
                                torch.nn.ReLU()) 
        # Output = 16 Channels, (input(32) - kernel(7) + 2*Padding(3))/Stride(2)) + 1 = 16 * 16
        #---------------------------------------------
        self.maxpool = torch.nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        # Output = 16 Channels, (input(16) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 8 * 8
        #---------------------------------------------
        # Input = Output, no downsampling
        self.layer0 = self.add_res_net_block(16, 16, layers[0], first_layer_stride = 1)
        # Output = 16 Channels, (input(8) - kernel(3) + 2*Padding(1))/Stride(1)) + 1 = 8 * 8
        #---------------------------------------------
        # Going from 16 channels to 32 channels, downsampling for the identity required
        self.layer1 = self.add_res_net_block(16, 32, layers[1], first_layer_stride = 2)
        # Output = 32 Channels, (input(8) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 4 * 4
        #---------------------------------------------
        # Going from 32 channels to 64 channels, downsampling for the identity required
        self.layer2 = self.add_res_net_block(32, 64, layers[2], first_layer_stride = 2)
        # Output = 64 Channels, (input(4) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 2 * 2
        #---------------------------------------------
        # Going from 64 channels to 128 channels, downsampling for the identity required
        self.layer3 = self.add_res_net_block(64, 128, layers[3], first_layer_stride = 2)
        # Output = 128 Channels, (input(2) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 1 * 1
        #---------------------------------------------
        self.avgpool = torch.nn.AvgPool2d(7, stride=1)
        #---------------------------------------------
        self.fc = torch.nn.Linear(128, 10)

    def add_res_net_block(self, in_channels, out_channels, num_layers, first_layer_stride):
        downsample = None
        if in_channels != out_channels:
            downsample = torch.nn.Sequential(
                torch.nn.Conv2d(in_channels, out_channels, kernel_size = 1, stride=first_layer_stride),
                torch.nn.BatchNorm2d(out_channels)
            )
        block_layers = []
        block_layers.append(self.block(in_channels, out_channels, first_layer_stride, downsample))
        for _ in range(num_layers-1):
            block_layers.append(self.block(out_channels, out_channels, 1, None))

        return torch.nn.Sequential(*block_layers)

    def forward(self, x):
        x = self.convolution1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [57]:
model = ResNet(ResNet_Block, [3,3,4,1]).to(device)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, weight_decay = 0.001, momentum=0.8)

In [58]:
train_loss = []
test_loss = []
train_accuracies = []
test_accuracies = []
for epoch in range(30):
  trainloss = 0.0
  testloss = 0.0
  test_correct = 0
  train_correct = 0
  test_total = 0
  train_total = 0
  model.train() # telling python that we are intereseted in updating any trainable parameters in the network

  for i, data in enumerate(train_data_loader):
    images, labels = data
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad() # makes sure we have zeroes out gradients for trainable parameters from the previous iteration
    pred = model(images) # forward pass
    fit = loss(pred, labels)
    fit.backward() # backward pass
    optimizer.step() # updates the weight
    trainloss += fit.item()
    _, predicted = torch.max(pred, 1)
    train_correct += (predicted == labels).sum().item()
    train_total += labels.size(0)

  model.eval()
  for i, data in enumerate(test_data_loader):
    with torch.no_grad():
      images, labels = data
      images = images.to(device)
      labels = labels.to(device)
      pred = model(images)
      fit = loss(pred, labels)
      testloss += fit.item()
      _, predicted = torch.max(pred, 1)
      test_correct += (predicted == labels).sum().item()
      test_total += labels.size(0)


  trainloss = trainloss/len(train_data_loader)
  testloss = testloss/len(test_data_loader)

  train_loss.append(trainloss)
  test_loss.append(testloss)

  test_accuracy = 100 * test_correct/test_total
  train_accuracy = 100 * train_correct/train_total
  test_accuracies.append(test_accuracy)
  train_accuracies.append(train_accuracy)

  print(f'Epoch: {epoch}, Train Loss: {trainloss:.2f}, Test loss: {testloss:.2f}, Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%')

KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots(1,2,figsize=(10,4))
ax[0].plot(train_loss, label="Train Loss")
ax[0].plot(test_loss, label="Test Loss")
ax[0].set_xlabel("Epochs")
ax[0].set_ylabel("Loss")
ax[0].legend()

ax[1].plot(train_accuracies, label="Train Accuracy")
ax[1].plot(test_accuracies, label="Test Accuracy")
ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Accuracy")
ax[1].legend()
plt.tight_layout()

In [None]:
#Parameters
'''
# self.convolution1 = torch.nn.Sequential(
#                         torch.nn.Conv2d(3, 16, kernel_size = 7, stride = 2, padding = 3),
#                         torch.nn.BatchNorm2d(16),
#                         torch.nn.ReLU()) 
Parameters
conv = 3 * 16 * 7 * 7
batch = 2 * 16
Total = 2384
# Output = 16 Channels, (input(32) - kernel(7) + 2*Padding(3))/Stride(2)) + 1 = 16 * 16
#---------------------------------------------
self.maxpool = torch.nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
# Output = 16 Channels, (input(16) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 8 * 8
#---------------------------------------------
# Input = Output, no downsampling
self.layer0 = self.add_res_net_block(16, 16, layers[0], first_layer_stride = 1)
Parameters
conv1 = 16 * 16 * 3 * 3 = 2304
Bath1 = 16 * 2 = 32
conv2 = 16 * 16 * 3 * 3 = 2304
bath2 = 16 * 2 = 32

Total = 4672 * 3 = 14016
# Output = 16 Channels, (input(8) - kernel(3) + 2*Padding(1))/Stride(1)) + 1 = 8 * 8
#---------------------------------------------
# Going from 16 channels to 32 channels, downsampling for the identity required
self.layer1 = self.add_res_net_block(16, 32, layers[1], first_layer_stride = 2)
Parameters
Downsampler = 16 * 32 * 1 * 1 = 512
conv1 = 16 * 32 * 3 * 3 = 4608
Bath1 = 32 * 2 = 64
conv2 = 32 * 32 * 3 * 3 = 9216
bath2 = 32 * 2 = 64
Total = 13952 * 3 = 41856
# Output = 32 Channels, (input(8) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 4 * 4
#---------------------------------------------
# Going from 32 channels to 64 channels, downsampling for the identity required
self.layer2 = self.add_res_net_block(32, 64, layers[2], first_layer_stride = 2)
Downsampler = 32 * 64 * 1 * 1 = 2048
conv1 =  32 * 64 * 3 * 3 = 18432
Bath1 = 64 * 2 = 128
conv2 = 64 * 64 * 3 * 3 = 36864
bath2 = 64 * 2 = 128
Total = 57600 * 4 = 230400
# Output = 256 Channels, (input(4) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 2 * 2
#---------------------------------------------
# Going from 64 channels to 128 channels, downsampling for the identity required
self.layer3 = self.add_res_net_block(64, 128, layers[3], first_layer_stride = 2)
Downsampler = 64 * 128 * 1 * 1 = 8192
conv1 =  64 * 128 * 3 * 3 = 73728
Bath1 = 128 * 2 = 256
conv2 = 128 * 128 * 3 * 3 = 147456
bath2 = 128 * 2 = 256
Total = 229888
# Output = 128 Channels, (input(2) - kernel(3) + 2*Padding(1))/Stride(2)) + 1 = 1 * 1
#---------------------------------------------
self.avgpool = torch.nn.AvgPool2d(7, stride=1)
#---------------------------------------------
self.fc = torch.nn.Linear(128, 10)
'''

# Total Prameters = 518544 + 1280