In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision

import torchvision.transforms as transforms
import torch.nn.functional as F

import matplotlib.pyplot as plt
import numpy as np
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# hyper parameters for MNIST
inputSize = 784
hiddenLayerSize = 100
numberClasses = 10
learningRate = 0.001

# Load MNIST dataset
trainDataset = torchvision.datasets.MNIST('./data', train = True, transform = transforms.ToTensor(), download = True)
testDataset = torchvision.datasets.MNIST('./data', train = False, transform = transforms.ToTensor())
trainLoader = torch.utils.data.DataLoader(dataset = trainDataset, batch_size=24, shuffle=False)
testLoader = torch.utils.data.DataLoader(dataset = testDataset, batch_size=24, shuffle=False)

In [8]:
class LinearFunction_3(torch.autograd.Function):
    @staticmethod
    # ctx is the first argument to forward
    def forward(ctx, input, weight, bias=None):
        logg = False
        # The forward pass can use ctx.
        #ctx.save_for_backward(input, weight, bias)
        count = torch.zeros_like(weight)
        for k in range(4):
            referenceParam = torch.rand_like(weight)
            if logg: print("fowd.ge", torch.ge(weight, referenceParam))
            count = torch.add( torch.ge(weight, referenceParam).float(), count)
            if logg: print("fowd.count", count)
        tempWeight = torch.ge(count, 2).float()
        if logg: print("fowd.tempWeight", tempWeight)
        output = input.mm(tempWeight.t())
        if bias is not None:
            count = torch.zeros_like(bias)
            for k in range(4):
                referenceParam = torch.rand_like(bias)
                count = torch.add( torch.ge(bias, referenceParam).float(), count)
            tempBias = torch.ge(count, 2).float()
            if logg: print("fowd.tempBias", tempBias)
            output += tempBias.unsqueeze(0).expand_as(output)
        ctx.save_for_backward(input, tempWeight, tempBias)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        logg = False
        input, weight, bias = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None
        
        grad_output = F.hardtanh(grad_output)
        
        if ctx.needs_input_grad[0]:
            grad_input = grad_output.mm(weight)
            # if logg: print("back.grad_input", grad_input)
        if ctx.needs_input_grad[1]:
            grad_weight = grad_output.t().mm(input)
            if logg: print("back.grad_weight", grad_weight)
        if bias is not None and ctx.needs_input_grad[2]:
            grad_bias = grad_output.sum(0)
            
        return grad_input, grad_weight, grad_bias

In [19]:
import torch
from torch.nn import functional as F
from torch.autograd import gradcheck

class TestLinear(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(TestLinear, self).__init__()
        self.weight = torch.nn.Parameter(torch.randn(output_dim, input_dim))
        self.bias = torch.nn.Parameter(torch.randn(output_dim))

    def forward(self, input):
        return LinearFunction_3.apply(input, self.weight, self.bias)

# Use double precision as gradcheck requires it to create accurate numerical gradients
x = torch.randn(20, 20, requires_grad=True)
model = TestLinear(20, 30)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
# print weight before BP
print("weight before BP", model.weight[0])
res = model(x)
target = torch.randn(30)
loss_func = torch.nn.MSELoss()
loss = loss_func(res, target)
loss.backward()
optimizer.step()
# print weight after BP
print("weight after BP", model.weight[0])
print("loss", loss)


weight before BP tensor([ 0.6662,  0.5546,  1.5303,  1.7600, -0.8034, -0.3979, -0.6043, -0.4749,
        -1.5421,  0.4470, -2.0763,  2.0132,  0.5412, -0.8705, -2.0640,  1.5530,
        -0.2717,  0.2575, -0.2516, -0.2860], grad_fn=<SelectBackward0>)
back.grad_weight tensor([[ 3.8006e-02,  3.8738e-02,  1.3425e-02,  1.8539e-02, -2.0649e-02,
         -3.8879e-02,  1.1634e-02,  1.8019e-02, -2.4792e-02,  5.9863e-02,
         -3.5627e-02,  1.8798e-02,  1.4933e-02, -3.8263e-02,  1.1714e-02,
         -1.9371e-02,  8.8255e-04,  1.6703e-02,  7.1645e-03, -8.2428e-03],
        [-2.3888e-02, -2.0530e-02,  2.1598e-02, -4.5838e-02,  5.6860e-03,
          2.4632e-02,  1.5597e-02,  5.1192e-03,  5.5282e-02,  6.5036e-02,
         -3.0847e-02, -1.6248e-03,  5.6512e-02,  4.9141e-02, -2.6338e-02,
         -7.1078e-02,  5.6660e-02,  1.7842e-02,  1.6961e-02,  9.3998e-02],
        [ 5.5395e-02,  6.9032e-02,  4.8156e-02, -1.4181e-02, -2.0249e-03,
         -3.4189e-02,  6.3744e-03,  5.0665e-02, -3.7175e-02,  7.66

In [24]:
class CustomLinear_3(nn.Module):
    def __init__(self, input_features, output_features, bias=True):
        super().__init__()
        self.input_features = input_features
        self.output_features = output_features

        # nn.Parameter is a special kind of Tensor, that will get
        # automatically registered as Module's parameter once it's assigned
        # as an attribute. Parameters and buffers need to be registered, or
        # they won't appear in .parameters() (doesn't apply to buffers), and
        # won't be converted when e.g. .cuda() is called. You can use
        # .register_buffer() to register buffers.
        # nn.Parameters require gradients by default.
        self.weight = nn.Parameter(torch.empty(output_features, input_features))
        if bias:
            self.bias = nn.Parameter(torch.empty(output_features))
        else:
            # You should always register all possible parameters, but the
            # optional ones can be None if you want.
            self.register_parameter('bias', None)

        # Not a very smart way to initialize weights
        nn.init.uniform_(self.weight, -0.1, 0.1)
        if self.bias is not None:
            nn.init.uniform_(self.bias, -0.1, 0.1)

    def forward(self, input):
        # See the autograd section for explanation of what happens here
        return LinearFunction_3.apply(input, self.weight, self.bias)

    def extra_repr(self):
        # (Optional)Set the extra information about this module. You can test
        # it by printing an object of this class.
        return 'input_features={}, output_features={}, bias={}'.format(
            self.input_features, self.output_features, self.bias is not None
        )

In [27]:
# MODEL _ 2 _ 24
# Unipolar
# bitwidth = 1
# Voting Mechansim = 4

class NeuralNetwork(nn.Module):
    def __init__(self, inputSize, hiddenLayersSize, numberClasses):
        super(NeuralNetwork, self).__init__()
        self.linear_1 = CustomLinear_3(inputSize, hiddenLayersSize, bias = True)
        self.linear_2 = CustomLinear_3(hiddenLayersSize, numberClasses, bias = True)
        
        self.inputSize = inputSize
        self.hiddenLayersSize = hiddenLayersSize
        self.numberClasses = numberClasses
        
    def forward(self, x):
        output = self.linear_1(x)
        hid = output
        output = torch.clamp(output, 0, 1)
        activ = output
        output = self.linear_2(output)
        return output, hid, activ
    
model_2_24 = NeuralNetwork(inputSize, 500, numberClasses)
model_2_24_losses = []
model_2_24_acc = []

### train test

In [28]:
# loss

trainLoader = torch.utils.data.DataLoader(dataset = trainDataset, batch_size=24, shuffle=False, drop_last=True)
testLoader = torch.utils.data.DataLoader(dataset = testDataset, batch_size=24, shuffle=False, drop_last=True)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model_2_24.parameters(), lr = 7e-3)

# training loop
n_total_steps = len(trainLoader)

for epoch in range(10):
    totalLoss = 0
    for i, (images, labels) in enumerate(trainLoader):
        
        # Change the input data format of the mnist dataset to stochastic form using reference random numbers
        referenceTrainData = torch.rand(24, 1, 28, 28)/20
        # samples = torch.zeros(24, 1, 28, 28)
        
        samples = torch.ge(images, referenceTrainData).float()
        # plot the samples, then break
        # plt.imshow(samples[i][0], cmap='gray')
        # plt.show()
                
        samples = samples.reshape(-1, 784)
             
        # forward
        outputs, _, _ = model_2_24(samples)
        loss = criterion(outputs, labels)
        totalLoss += loss.item()
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
        if (i+1) % 2500 == 0:
            model_2_24_losses.append(totalLoss/2500)
            print(f'epoch {epoch+1}/{300}, step {i+1} / {n_total_steps}, loss = {totalLoss/2500:.4f}')
    
    # Inference
    with torch.no_grad():
        numberCorrect = 0
        numberSamples = 0
        for z, (images, labels) in enumerate(testLoader):  

            # Change the input data format of the mnist dataset to stochastic form using reference random numbers
            samples = torch.zeros(24, 1, 28, 28)
            referenceTrainData = torch.rand(24, 1, 28, 28)/20
            
            samples = torch.ge(images, referenceTrainData).float()

            images = samples.reshape(-1, 784)

            outputs, hid, activ = model_2_24(images)

            # value and index of the correct predictions
            _, predictions = torch.max(outputs, 1)

            numberSamples += labels.shape[0]

            numberCorrect += (predictions == labels).sum().item()

            # if z == 415:
            #     break

        accuracy = 100.0 * (numberCorrect) / numberSamples
        print("hid", hid, '\n')
        print("activ", activ, '\n')

        print(f'accuracy = {accuracy}')
        model_2_24_acc.append(accuracy)

epoch 1/300, step 2500 / 2500, loss = 2.2801
hid tensor([[ 0.,  0.,  1.,  ...,  0.,  0.,  5.],
        [ 1.,  0.,  0.,  ...,  3.,  0.,  1.],
        [ 3.,  0.,  2.,  ...,  3.,  2., 11.],
        ...,
        [ 3.,  0.,  0.,  ...,  4.,  1.,  0.],
        [ 1.,  0.,  0.,  ...,  1.,  1.,  0.],
        [ 4.,  1.,  1.,  ...,  2.,  2., 13.]]) 

activ tensor([[0., 0., 1.,  ..., 0., 0., 1.],
        [1., 0., 0.,  ..., 1., 0., 1.],
        [1., 0., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 0., 0.,  ..., 1., 1., 0.],
        [1., 0., 0.,  ..., 1., 1., 0.],
        [1., 1., 1.,  ..., 1., 1., 1.]]) 

accuracy = 78.26522435897436
epoch 2/300, step 2500 / 2500, loss = 1.2963
hid tensor([[0., 3., 2.,  ..., 0., 3., 1.],
        [0., 0., 1.,  ..., 3., 0., 0.],
        [2., 2., 2.,  ..., 1., 7., 9.],
        ...,
        [1., 0., 1.,  ..., 4., 2., 0.],
        [0., 0., 2.,  ..., 1., 1., 0.],
        [2., 4., 2.,  ..., 1., 7., 9.]]) 

activ tensor([[0., 1., 1.,  ..., 0., 1., 1.],
        [0., 0., 

KeyboardInterrupt: 

In [22]:
with torch.no_grad():
    numberCorrect = 0
    numberSamples = 0
    for z, (images, labels) in enumerate(testLoader):  

        # Change the input data format of the mnist dataset to stochastic form using reference random numbers
        samples = torch.zeros(24, 1, 28, 28)
        referenceTrainData = torch.rand(24, 1, 28, 28)/20
        
        samples = torch.ge(images, referenceTrainData).float()

        images = samples.reshape(-1, 784)

        outputs = model_2_24(images)
        print("outputs", outputs[0])
        break

hid:  tensor([[0., 0., 0.,  ..., 0., 0., 2.],
        [3., 0., 1.,  ..., 4., 1., 3.],
        [1., 0., 0.,  ..., 0., 0., 1.],
        ...,
        [0., 0., 1.,  ..., 2., 0., 4.],
        [3., 0., 0.,  ..., 0., 1., 4.],
        [0., 0., 1.,  ..., 0., 1., 3.]])
activ:  tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [1., 0., 1.,  ..., 1., 1., 1.],
        [1., 0., 0.,  ..., 0., 0., 1.],
        ...,
        [0., 0., 1.,  ..., 1., 0., 1.],
        [1., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 1.,  ..., 0., 1., 1.]]) 

outputs tensor([3., 4., 4., 4., 2., 2., 1., 3., 1., 1.])
