<a href="https://colab.research.google.com/github/Sooryakiran/HARTS/blob/master/Harts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hardware Architectural Search (HARTS)
--------------------------------------

DISCLAIMER: This is not a binary neural network where the weights are zeros and ones.

This notebook illustrates the use of ideas from deep learning to design hardwares for predictive applications. The below code can generate architectures to classify MNIST handwritten digits using just logic gates.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import numpy as np

# Choice of gates between any two values

Here I have implimented 3 possible choices of gates, AND, OR and XOR. The choice has to be differentiable. So I used a weight value for each gate over which softmax will be taken to choose the gate. Since softmax is differentiable, the choice of gates become differentiable and we can use backpropagation to tune the weights to choose the gate that gives better results.


 1 represents on state
 
-1 represents off state

In [8]:
class BinLayer(nn.Module):
    """
    Class BinLayer

    """
    def __init__(self, in_neurons, temp = 0.01):
        """
        The class constructor

        @param in_neurons : int, bit size of input
        @param temp       : float, temperature of softmax

        Let us say that there are n bits as input. We have n x n possible input pairs.
        However this reduces by nearly a factor of 2 for commutative bitwise operations.
        In this module, we take the entire n x n as search domain, so that we can support
        bitwise operations that are not commutative in future.

        Assume we have an input bit vector x. We compute 3 output matrices,
        OR  (x', x) 
        AND (x', x)
        XOR (x', x)
        Where x' is the transpose of x.

        During forward pass, we choose one among theses 3 matrices element wise to
        create an nxn matrix. The choice is done by doing a softmax over the stored weights.
        The weight matrix is updated using backpropagation to improve the choice.

        """

        super(BinLayer, self).__init__()
        self.in_neurons = in_neurons

        self.and_ = AND()
        self.or_  = OR()
        self.xor_ = XOR()
        
        self.weights = torch.nn.Parameter(data = torch.Tensor(in_neurons*in_neurons, 3), requires_grad = True)
        self.softmax = nn.Softmax(dim = -1)
        self.temp    = temp

        self.weights.data.uniform_(-1, 1)

    def forward(self, x):
        """
        The forward pass

        @param x: input bit vector

        """
        and_output = self.and_(x).unsqueeze(-1)
        or_output  = self.or_(x).unsqueeze(-1)
        xor_output = self.xor_(x).unsqueeze(-1)

        soft_weights   = self.softmax(self.weights/self.temp)
        massive_input  = torch.cat([and_output, or_output, xor_output], -1)
        massive_weight = soft_weights.unsqueeze(0).repeat(x.size(0), 1, 1)
        massive_output = torch.mul(massive_input, massive_weight)

        return torch.sum(massive_output, axis = -1)

class SELECT(nn.Module):
    """
    The SELECT class

    """
    def __init__(self, in_neurons, out_neurons, temp = 0.01):
        """
        Since the BinLayer defined above is not scalable, i.e it produces nxn outputs
        for an input of size n, we define a module that subsamples the input. The sub
        sampling choices are also trainable using backpropagation, since we use 
        softmax over the stored weights to perform the choice.

        @param in_neurons  : int, input bit vector length
        @param out_neurons : int, subsampled output bit vector length

        """
        super(SELECT, self).__init__()
        self.weights = torch.nn.Parameter(data = torch.Tensor(in_neurons, out_neurons), requires_grad = True)
        self.softmax = nn.Softmax(dim = 0)
        self.temp    = temp

        self.weights.data.uniform_(-1, 1)

    def forward(self, x):
        """
        Selects the most suitable top out_neurons

        """
        soft_weights = self.softmax(self.weights/self.temp)
        return x @ soft_weights

class NotLayer(nn.Module):
    """
    The NOT layer

    """
    def __init__(self, in_neurons, temp = 0.01):
        """
        The NOT layer can be used as a possible 'activation function'. This layer 
        chooses whether to put a NOT gate or not on each elements in the input bit
        vector. Like above, the choice is differentiable.

        @param in_neurons : int, input bit vector length
        @param temp       : float, softmax temperature

        """
        super(NotLayer, self).__init__()
        self.weights = torch.nn.Parameter(data = torch.Tensor(in_neurons), requires_grad = True)
        self.sigmoid = nn.Sigmoid()
        self.temp    = temp

        self.weights.data.uniform_(-1, 1)

    def forward(self, x):
        """
        The forward pass

        """
        x_in            = x
        x_compliment_in = -x_in
        soft_weights    = self.sigmoid(self.weights/self.temp)
        return torch.mul(x_in, soft_weights) + torch.mul(x_compliment_in, 1 - selft_weights)

class XOR(nn.Module):
    """
    The XOR module

    """
    def __init__(self):
        super(XOR, self).__init__()
    
    def forward(self, x):
        x      = x.unsqueeze(-1)
        output = -x @ torch.transpose(x, 1, 2)

        return output.view(x.size(0), -1)


class AND(nn.Module):
    """
    The AND module

    """
    def __init__(self):
        super(AND, self).__init__()
        self.xor = XOR();

    def forward(self, x):
        x = (x+1)/2 
        return - self.xor(x)*2 -1

class OR(nn.Module):
    """
    The OR module
    
    """
    def __init__(self):
        super(OR, self).__init__()
        self.xor  =  XOR()
        self.and_ = AND();

    def forward(self, x):
        return self.xor(x) + self.and_(x) + 1

In [19]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()

        self.select_1 = SELECT(in_neurons = 784, out_neurons = 256)
        self.bin_1    = BinLayer(in_neurons = 256)

        self.select_2 = SELECT(in_neurons = 256*256, out_neurons = 256)
        self.not_2    = NotLayer(in_neurons = 256)
        self.bin_2    = BinLayer(in_neurons = 256)

        self.select_3 = SELECT(in_neurons = 256*256, out_neurons = 128)
        self.not_3    = NotLayer(in_neurons = 128)
        self.bin_3    = BinLayer(in_neurons = 128)

        self.select_4 = SELECT(in_neurons = 128*128, out_neurons= 64)
        self.not_4    = NotLayer(in_neurons = 64)
        self.bin_4    = BinLayer(in_neurons = 64)

        self.select_5 = SELECT(in_neurons = 64*64, out_neurons = 32)
        self.not_5    = NotLayer(in_neurons = 32)
        self.bin_5    = BinLayer(in_neurons = 32)

        self.select_6 = SELECT(in_neurons = 32*32, out_neurons = 10)

    def forward(self, x):
        x = self.select_1(x)
        x = self.bin_1(x)

        x = self.select_2(x)
        #x = self.not_2(x)
        x = self.bin_2(x)

        x = self.select_3(x)
        #x = self.not_3(x)
        x = self.bin_3(x)

        x = self.select_4(x)
        #x = self.not_4(x)
        x = self.bin_4(x)

        x = self.select_5(x)
        #x = self.not_5(x)
        x = self.bin_5(x)
        x = self.select_6(x)
        return x

# Hyperparameters

In [20]:
EPOCHS = 10
LEARNING_RATES = [1e-2, 1e-3]
MOMENTUM = 0.5
BATCH_SIZE = 512
LOG_INTERVAL = 5

In [21]:
class CustomTransform:
    def __init__(self):
        pass
    def __call__(self, x):
        x = x*2 - 1
        return x.view(-1)

TRAIN_LOADER = torch.utils.data.DataLoader(torchvision.datasets.MNIST('/files/', train=True, download=True,
                                           transform=torchvision.transforms.Compose([
                                           torchvision.transforms.ToTensor(),
                                           CustomTransform()])),
                                           batch_size=BATCH_SIZE, shuffle=True)

TEST_LOADER = torch.utils.data.DataLoader(torchvision.datasets.MNIST('/files/', train=False, download=True,
                                          transform=torchvision.transforms.Compose([
                                          torchvision.transforms.ToTensor(),
                                          CustomTransform()])),
                                          batch_size=BATCH_SIZE, shuffle=True)

In [22]:
def train(network, epoch):
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda"
        print("TRAINING ON GPU")

    network = network.to(device)
    criterion = nn.CrossEntropyLoss()
    if epoch < len(LEARNING_RATES):
        lr = LEARNING_RATES[epoch]
    else:
        lr = LEARNING_RATES[-1]
    optimizer = torch.optim.Adam(network.parameters(), lr=lr)
    for index, (data, target) in enumerate(TRAIN_LOADER):
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = network(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if index%LOG_INTERVAL == 0:
            pred = output.data.max(1, keepdim=True)[1]
            acc = pred.eq(target.data.view_as(pred)).sum()*1.0/BATCH_SIZE
        
            print("EPOCH: %d: BATCH (%d), \tLOSS = %0.3f, \tACC:%0.2f" %(epoch, index, loss.item(), acc.item()))

def test(network):
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda"
        print("TESTING ON GPU")

    network = network.to(device)
    criterion = nn.CrossEntropyLoss()
    

    for index, (data, target) in enumerate(TEST_LOADER):
        data = data.to(device)
        target = target.to(device)
        output = network(data)
        loss = criterion(output, target)
        loss.backward()

        if index%LOG_INTERVAL == 0:
            pred = output.data.max(1, keepdim=True)[1]
            acc = pred.eq(target.data.view_as(pred)).sum()*1.0/BATCH_SIZE
        
            print("BATCH (%d), \tLOSS = %0.3f, \tACC:%0.2f" %(index, loss.item(), acc.item()))


def train_multiple_epochs(network):
    for epoch in range(EPOCHS):
        train(network, epoch)


In [23]:
network = Network()
train_multiple_epochs(network)

TRAINING ON GPU
EPOCH: 0: BATCH (0), 	LOSS = 2.306, 	ACC:0.11
EPOCH: 0: BATCH (5), 	LOSS = 2.155, 	ACC:0.17
EPOCH: 0: BATCH (10), 	LOSS = 2.061, 	ACC:0.26
EPOCH: 0: BATCH (15), 	LOSS = 1.892, 	ACC:0.36
EPOCH: 0: BATCH (20), 	LOSS = 1.668, 	ACC:0.55
EPOCH: 0: BATCH (25), 	LOSS = 1.549, 	ACC:0.59
EPOCH: 0: BATCH (30), 	LOSS = 1.601, 	ACC:0.54
EPOCH: 0: BATCH (35), 	LOSS = 1.612, 	ACC:0.53
EPOCH: 0: BATCH (40), 	LOSS = 1.550, 	ACC:0.55
EPOCH: 0: BATCH (45), 	LOSS = 1.538, 	ACC:0.62
EPOCH: 0: BATCH (50), 	LOSS = 1.507, 	ACC:0.61
EPOCH: 0: BATCH (55), 	LOSS = 1.529, 	ACC:0.56
EPOCH: 0: BATCH (60), 	LOSS = 1.513, 	ACC:0.59
EPOCH: 0: BATCH (65), 	LOSS = 1.560, 	ACC:0.57
EPOCH: 0: BATCH (70), 	LOSS = 1.523, 	ACC:0.61
EPOCH: 0: BATCH (75), 	LOSS = 1.553, 	ACC:0.60
EPOCH: 0: BATCH (80), 	LOSS = 1.563, 	ACC:0.60
EPOCH: 0: BATCH (85), 	LOSS = 1.520, 	ACC:0.59
EPOCH: 0: BATCH (90), 	LOSS = 1.507, 	ACC:0.61
EPOCH: 0: BATCH (95), 	LOSS = 1.558, 	ACC:0.57
EPOCH: 0: BATCH (100), 	LOSS = 1.565, 	ACC:0.5

In [24]:
torch.save(network, "saved_model.pth")

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [25]:
test(network)

TESTING ON GPU
BATCH (0), 	LOSS = 1.371, 	ACC:0.71
BATCH (5), 	LOSS = 1.368, 	ACC:0.73
BATCH (10), 	LOSS = 1.327, 	ACC:0.76
BATCH (15), 	LOSS = 1.359, 	ACC:0.72


# Check the confidence of each choice so that we can freeze and convert them into gates.

In [26]:
for name, param in network.named_parameters():
    if param.requires_grad:
        if "select" in name:
            print("SELECT")
            softmax = nn.Softmax(dim = 0)
            weights = param.data / 0.01
            outs = torch.transpose(softmax(weights), 0, 1)
            outs, _ = torch.max(outs, dim = -1)
            print(torch.mean(outs))
            
        elif "bin" in name:
            print("BIN")
            softmax = nn.Softmax(dim = -1)
            weights = param.data / 0.01
            outs, _ = torch.max(softmax(weights), dim = -1)
            print(torch.mean(outs))
        else:
            print("UNKNOWN")
        #print(name, param.data)

SELECT
tensor(0.9382, device='cuda:0')
BIN
tensor(0.9980, device='cuda:0')
SELECT
tensor(0.9382, device='cuda:0')
UNKNOWN
BIN
tensor(0.9984, device='cuda:0')
SELECT
tensor(0.9388, device='cuda:0')
UNKNOWN
BIN
tensor(0.9980, device='cuda:0')
SELECT
tensor(0.9251, device='cuda:0')
UNKNOWN
BIN
tensor(0.9970, device='cuda:0')
SELECT
tensor(0.9166, device='cuda:0')
UNKNOWN
BIN
tensor(0.9934, device='cuda:0')
SELECT
tensor(0.9854, device='cuda:0')
