#CIFAR-10

**Imports**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import random
random.seed(365)

**Device configuration**

In [None]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

##CONV4

**Hyper-parameters**

In [None]:
# Hyper-parameters 
num_epochs = 10
batch_size = 32
learning_rate = 0.0003 #0.01
beta = 10
optimizer_name = 'Adam'

### DNN

**Model Building**

In [None]:
class ConvNet(nn.Module):
  def __init__(self):
    super(ConvNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, 3)
    self.conv2 = nn.Conv2d(64, 64, 3)
    self.conv3 = nn.Conv2d(64, 128, 3)
    self.conv4 = nn.Conv2d(128, 128, 3)
    # self.gap = nn.AvgPool2d(24)
    self.gap = nn.AdaptiveAvgPool2d((1,1))
    self.fc1 = nn.Linear(128 * 1 * 1, 256)
    self.fc2 = nn.Linear(256, 10)

  def forward(self, x,beta):
    # -> n, 3, 32, 32
    x = F.relu(self.conv1(x))  # -> n, 64, 30, 30
    # print(x.shape)
    x = F.relu(self.conv2(x))  # -> n, 64, 28, 28
    # print(x.shape)
    x = F.relu(self.conv3(x))  # -> n, 128, 26, 26
    # print(x.shape)
    x = F.relu(self.conv4(x))  # -> n, 128, 24, 24
    # print(x.shape)
    x = self.gap(x)           # -> n, 128, 1, 1
    # print(x.shape)

    x = x.view(-1, 128 * 1 * 1)            # -> n, 128 * 1 * 1
    x = self.fc1(x)               # -> n, 256
    x = F.relu(x)
    x = self.fc2(x)                       # -> n, 10
    return x


model = ConvNet().to(device)

criterion = nn.CrossEntropyLoss()
if optimizer_name == 'SGD':
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
if optimizer_name == 'Adam':
  optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

### DGN

In [None]:
class ConvNet(nn.Module):
  def __init__(self):
    super(ConvNet, self).__init__()
    self.conv1_gt = nn.Conv2d(3, 64, 3)
    self.conv2_gt = nn.Conv2d(64, 64, 3)
    self.conv3_gt = nn.Conv2d(64, 128, 3)
    self.conv4_gt = nn.Conv2d(128, 128, 3)

    self.conv1_wt = nn.Conv2d(3, 64, 3)
    self.conv2_wt = nn.Conv2d(64, 64, 3)
    self.conv3_wt = nn.Conv2d(64, 128, 3)
    self.conv4_wt = nn.Conv2d(128, 128, 3)
    self.galu = nn.Sigmoid()
    # self.gap = nn.AvgPool2d(24)
    self.gap = nn.AdaptiveAvgPool2d((1,1))
    self.fc1 = nn.Linear(128 * 1 * 1, 256)
    self.fc2 = nn.Linear(256, 10)

  def forward(self, x, beta):
    x_wt=x  #x_wt = x is the input for bottom weight network

    # print("Initial weights")
    # print(self.conv1.weight)
    # print(self.conv1.weight.shape)
    # print("Initial biases")
    # print(self.conv1.bias)
    # print(self.conv1.bias.shape)
    ## 1st Block
    x_gt=self.conv1_gt(x)   #x_gt is for top gate network
    # print("X_gt")
    # print(x_gt)
    # print(x_gt.shape)
    g1=self.galu(beta*x_gt)
    # print("g1")
    # print(g1)
    # print(g1.shape)
    x_wt=self.conv1_wt(x_wt)
    # print("X_wt")
    # print(x_wt)
    # print(x_wt.shape)
    x_wt=g1*x_wt
    # print("X_wt")
    # print(x_wt)
    # print(x_wt.shape)
    x_gt = F.relu(x_gt) 
    # print("X_gt")
    # print(x_gt)
    # print(x_gt.shape)
    # print("====================================================================================================================================")    
    # print("====================================================================================================================================")    
    # print("====================================================================================================================================")    
    # print("====================================================================================================================================")    
    ## 2nd Block
    x_gt=self.conv2_gt(x_gt)
    g2=self.galu(beta*x_gt)
    x_wt=self.conv2_wt(x_wt)
    x_wt=g2*x_wt
    x_gt = F.relu(x_gt)

    ## 3rd Block
    x_gt=self.conv3_gt(x_gt)
    g3=self.galu(beta*x_gt)
    x_wt=self.conv3_wt(x_wt)
    x_wt=g3*x_wt
    x_gt = F.relu(x_gt) 

    ## 4th Block
    x_gt=self.conv4_gt(x_gt)
    g4=self.galu(beta*x_gt)
    x_wt=self.conv4_wt(x_wt)
    x_wt=g4*x_wt

    x_gt = F.relu(x_gt)
    x_gt = self.gap(x_gt)
    x_gt = x_gt.view(-1, 128 * 1 * 1)           
    x_gt = self.fc1(x_gt)              
    g5_fc=self.galu(beta*x_gt)

    x_wt = self.gap(x_wt)


    x_wt = x_wt.view(-1, 128 * 1 * 1)           
    x_wt = self.fc1(x_wt) 
    x_wt=g5_fc*x_wt             
    x_wt = self.fc2(x_wt)                     
    return x_wt


model = ConvNet().to(device)

criterion = nn.CrossEntropyLoss()
if optimizer_name == 'SGD':
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
if optimizer_name == 'Adam':
  optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

### DLGN

In [None]:
class ConvNet(nn.Module):
  def __init__(self):
    super(ConvNet, self).__init__()
    self.conv1_gt = nn.Conv2d(3, 64, 3)
    self.conv2_gt = nn.Conv2d(64, 64, 3)
    self.conv3_gt = nn.Conv2d(64, 128, 3)
    self.conv4_gt = nn.Conv2d(128, 128, 3)

    self.conv1_wt = nn.Conv2d(3, 64, 3)
    self.conv2_wt = nn.Conv2d(64, 64, 3)
    self.conv3_wt = nn.Conv2d(64, 128, 3)
    self.conv4_wt = nn.Conv2d(128, 128, 3)
    self.galu = nn.Sigmoid()
    # self.gap = nn.AvgPool2d(24)
    self.gap = nn.AdaptiveAvgPool2d((1,1))
    self.fc1 = nn.Linear(128 * 1 * 1, 256)
    self.fc2 = nn.Linear(256, 10)

  def forward(self, x, beta):
    import numpy as np

    x_wt = np.linspace(1.0, 1.0, 3072*x.shape[0])  #x_wt = 1 is the input for bottom weight network
    x_wt= x_wt.reshape(x.shape[0],3,32,32)
    x_wt = torch.from_numpy(x_wt).float()
    x_wt = x_wt.to(device)

    ## 1st Block
    x_gt=self.conv1_gt(x)  #x_gt is for top gate network
    g1=self.galu(beta*x_gt)
    x_wt=self.conv1_wt(x_wt)
    x_wt=g1*x_wt
    # x = F.relu(x) 

    ## 2nd Block
    x_gt=self.conv2_gt(x_gt)
    g2=self.galu(beta*x_gt)
    x_wt=self.conv2_wt(x_wt)
    x_wt=g2*x_wt
    # x = F.relu(x)

    ## 3rd Block
    x_gt=self.conv3_gt(x_gt)
    g3=self.galu(beta*x_gt)
    x_wt=self.conv3_wt(x_wt)
    x_wt=g3*x_wt
    # x = F.relu(x) 
    
    ## 4th Block
    x_gt=self.conv4_gt(x_gt)
    g4=self.galu(beta*x_gt)
    x_wt=self.conv4_wt(x_wt)
    x_wt=g4*x_wt

    x_gt = self.gap(x_gt)
    x_gt = x_gt.view(-1, 128 * 1 * 1)           
    x_gt = self.fc1(x_gt)              
    g5_fc=self.galu(beta*x_gt)

    x_wt = self.gap(x_wt)          

    x_wt = x_wt.view(-1, 128 * 1 * 1)           
    x_wt = self.fc1(x_wt)
    x_wt=g5_fc*x_wt                           
    x_wt = self.fc2(x_wt)                     
    return x_wt


model = ConvNet().to(device)

criterion = nn.CrossEntropyLoss()
if optimizer_name == 'SGD':
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
if optimizer_name == 'Adam':
  optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

### Result SGD

In [None]:
SGD
-------------
DNN
----
5-->57.13
10-->62.74
15-->67.74
20-->66.9
25-->71.95
30-->71.03
35-->71.59
40-->70.33
45-->70.46
50-->69.34
55-->70.8
75-->72.21
95-->72.54
DGN
----
5-->56.73
10-->65.06
15-->67.23
20-->71.62
25-->69.25
30-->70.48
35-->68.81
40-->68.73 
45-->70.61
50-->71.07
55-->70.07
75-->72.11
92-->72.58
DLGN
----
5-->62
10-->66.1
15--> 67.28
20-->66.36
25-->66.14
30-->66.45
35-->66.11
40-->65.3
45-->66.92
50-->66.81
55-->66.81
75-->66.33 
100-->66.17

### Result Adam

In [None]:
Adam
-------------
DNN
----
5--> 54.91
10-->61.81
15-->
20-->69.86
25-->
30-->74.5
35-->75.89
40-->77.0
45-->77.27
50-->78.64
55-->77.89
60-->78.49
62-->79.23
64-->79.28
65-->79.14
66-->79.54
69-->79.77
70-->78.7
75-->78.98
80-->78.38

DGN
----
5-->
10-->
15-->
20-->
25-->76.65
30-->
35-->
40-->
45-->
50-->76.69
55-->
75-->
92-->
DLGN
----
5-->
10-->
15-->
20-->
25-->72.46
30-->
35-->73.78
40-->
45-->73.29
50-->
55-->
75--> 
100-->

## VGG-16

In [None]:
# Imports
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions

VGG_types = {
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG16": [
        64,
        64,
        "M",
        128,
        128,
        "M",
        256,
        256,
        256,
        "M",
        512,
        512,
        512,
        "M",
        512,
        512,
        512,
        "M",
    ],
    "VGG19": [
        64,
        64,
        "M",
        128,
        128,
        "M",
        256,
        256,
        256,
        256,
        "M",
        512,
        512,
        512,
        512,
        "M",
        512,
        512,
        512,
        512,
        "M",
    ],
}


class VGG_net(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types["VGG16"])

        self.fcs = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x

    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == int:
                out_channels = x

                layers += [
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(x),
                    nn.ReLU(),
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

        return nn.Sequential(*layers)


if __name__ == "__main__":
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = VGG_net(in_channels=3, num_classes=1000).to(device)
    print(model)
    ## N = 3 (Mini batch size)
    # x = torch.randn(3, 3, 224, 224).to(device)
    # print(model(x).shape)

### VGG-16

In [None]:
# Hyper-parameters 
num_epochs = 32
batch_size = 128
learning_rate = 0.01
beta = 10
optimizer_name = 'SGD'

In [None]:
class VGG_Net(nn.Module):
  def __init__(self):
    super(VGG_Net, self).__init__()
    self.conv11 = nn.Conv2d(3, 64, 3, 1, 1)
    self.conv12 = nn.Conv2d(64, 64, 3, 1, 1)

    self.conv21 = nn.Conv2d(64, 128, 3, 1, 1)
    self.conv22 = nn.Conv2d(128, 128, 3, 1, 1)

    self.conv31 = nn.Conv2d(128, 256, 3, 1, 1)
    self.conv32 = nn.Conv2d(256, 256, 3, 1, 1)
    self.conv33 = nn.Conv2d(256, 256, 3, 1, 1)

    self.conv41 = nn.Conv2d(256, 512, 3, 1, 1)
    self.conv42 = nn.Conv2d(512, 512, 3, 1, 1)
    self.conv43 = nn.Conv2d(512, 512, 3, 1, 1)

    self.conv51 = nn.Conv2d(512, 512, 3, 1, 1)
    self.conv52 = nn.Conv2d(512, 512, 3, 1, 1)
    self.conv53 = nn.Conv2d(512, 512, 3, 1, 1)

    # self.gap = nn.AvgPool2d(24)
    self.gap = nn.AdaptiveAvgPool2d((1,1))
    self.fc1 = nn.Linear(512 * 1 * 1, 4096)
    self.fc2 = nn.Linear(4096, 4096)
    self.fc3 = nn.Linear(4096, 10)

  def forward(self, x,beta):
    x = F.relu(self.conv11(x))  
    x = F.relu(self.conv12(x))

    x = F.relu(self.conv21(x))  
    x = F.relu(self.conv22(x))  

    x = F.relu(self.conv31(x))  
    x = F.relu(self.conv32(x))  
    x = F.relu(self.conv33(x)) 

    x = F.relu(self.conv41(x))  
    x = F.relu(self.conv42(x))  
    x = F.relu(self.conv43(x)) 

    x = F.relu(self.conv51(x))  
    x = F.relu(self.conv52(x))  
    x = F.relu(self.conv53(x))  
    x = self.gap(x)           

    x = x.view(-1, 512 * 1 * 1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    x = F.relu(x)
    x = self.fc3(x)     

    return x


model = VGG_Net().to(device)

criterion = nn.CrossEntropyLoss()
if optimizer_name == 'SGD':
  optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum= 0.9)
if optimizer_name == 'Adam':
  optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

## Dataloading, Training and Testing

**Dataset Preparation & Loading**

In [None]:
# dataset has PILImage images of range [0, 1]. 
# We transform them to Tensors of normalized range [-1, 1]
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                          shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                         shuffle=False)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def imshow(img):
  img = img / 2 + 0.5  # unnormalize
  npimg = img.numpy()
  plt.imshow(np.transpose(npimg, (1, 2, 0)))
  plt.show()


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))

**Training**

In [None]:
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # origin shape: [32, 3, 32, 32] = 32, 3, 1024
    # input_layer: 3 input channels, 64 output channels, 3 kernel size
    images = images.to(device)
    labels = labels.to(device)

    # Forward pass
    outputs = model(images,beta)
    loss = criterion(outputs, labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 200 == 0:
      print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished Training')

**Saving the model**

In [None]:
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)

**Testing the model**

In [None]:
with torch.no_grad():
  n_correct = 0
  n_samples = 0
  n_class_correct = [0 for i in range(10)]
  n_class_samples = [0 for i in range(10)]
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images,beta)
    # max returns (value ,index)
    _, predicted = torch.max(outputs, 1)
    n_samples += labels.size(0)
    n_correct += (predicted == labels).sum().item()
    
    for i in range(outputs.shape[0]):
      label = labels[i]
      pred = predicted[i]
      if (label == pred):
          n_class_correct[label] += 1
      n_class_samples[label] += 1

  acc = 100.0 * n_correct / n_samples
  print(f'Accuracy of the network: {acc} %')

  for i in range(10):
    acc = 100.0 * n_class_correct[i] / n_class_samples[i]
    print(f'Accuracy of {classes[i]}: {acc} %')
