In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np


In [2]:

def build_masks(img_width, kernel_size, out_channels=16):
    masks = []
    for k in range(kernel_size**2):
        mask = []
        rows = 0   
        ## first row of mask, ll 0s if in first 3 kernels
        if k < kernel_size:
            for i in range(img_width):
                mask.append(0)
            rows += 1
        
        # middle rows, a and b are values of the left and rightmost columns
        a = int(k % kernel_size != 0)
        b = int((k+1) % kernel_size != 0)
        for j in range(img_width-rows):
            mask.append(a)
            for i in range(img_width-2):
                mask.append(1)
            mask.append(b)
            rows += 1
        
        # last row
        if k >= kernel_size*(kernel_size-1):
            for j in range(img_width):
                mask[(img_width**2)-1-j] = 0
                
        masks.append(np.tile(np.array(mask), out_channels))
    return masks
'''
def build_mask(starting_padding, ending_padding, window_length, max_length):
    mask = []
    for i in range(starting_padding):
        mask.append(0)
    while len(mask) < (max_length - ending_padding):
        for j in range(window_length):
            mask.append(1)
        mask.append(0)
        
    while len(mask) > max_length:
        mask.pop()
    while len(mask) < max_length:
        mask.append(0)
        
    for i in range(ending_padding):
        mask[max_length - i - 1] = 0
        
    return mask


def build_masks(img_width, kernel_size, channels_in):
    masks = []
    masks.append(np.tile(np.array(build_mask(img_width + 1, 0, img_width -1, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(img_width, 0, img_width ** 2, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(img_width, 0, img_width - 1, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(1, 0, img_width - 1, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(0, 0, img_width ** 2, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(0, 1, img_width - 1, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(1, img_width - 1, img_width - 1, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(0, img_width, img_width ** 2, img_width ** 2)), channels_in))
    masks.append(np.tile(np.array(build_mask(0, img_width + 1, img_width - 1, img_width ** 2)), channels_in))
    return masks
'''    
# converts binary mask from stride 1 to stride s
def altalena(mask, img_width, stride=2):
    new_v = []
    for i in range(len(mask)):
        if i % stride != 0:
            new_v.append(0)
        elif i % (img_width*stride) >= img_width:
            new_v.append(0)
        else:
            new_v.append(mask[i])
    return new_v




# this block works fine for the masks discussed in the paper, need to see under what conditions this holds generally
# odd dimension kernel ? what size padding and stride ?

In [3]:

# Initial Layer
def initialLayer(img_width, kernel_size, channels_in, channels_out, conv_weight, bn, bin_masks):
    #calculate weight values for affine bn
    A = bn.weight / torch.sqrt(bn.running_var + bn.eps)
    b = -(bn.weight * bn.running_mean / torch.sqrt(bn.running_var + bn.eps)) + bn.bias
    print(b)
    A = A.detach()
    for i in range(channels_out):
        kernels = [np.array([]) for z in range(kernel_size**2)]
        for j in range(channels_in):
            # specific to intial layer
            weights = conv_weight[i][j].reshape(kernel_size**2)
            for k in range(kernel_size**2):
                kernels[k] = np.append(kernels[k], np.repeat(weights[k].detach(), img_width**2))
        # fill remaining slots with 0s
        for j in range(channels_out-channels_in):
            for k in range(kernel_size**2):
                kernels[k] = np.append(kernels[k], np.repeat(0, img_width**2))
        # mask and save
        for k in range(kernel_size**2):
            kernels[k] = np.multiply(kernels[k], bin_masks[k])
            kernels[k] = np.multiply(kernels[k], np.repeat(A[i], channels_out*(img_width**2)))
            np.savetxt('../weights/conv1bn1-ch{}-k{}.bin'.format(i, k+1), kernels[k], delimiter=',')
    np.savetxt('../weights/conv1bn1-bias.bin', np.repeat(b.detach(), img_width**2), delimiter=',')

In [4]:
# convbn weight encoding
def convbn(img_width, kernel_size, channels_in, channels_out, conv_weight, bn, layerNum, convNum, bin_masks):
    A = bn.weight / torch.sqrt(bn.running_var + bn.eps)
    b = -(bn.weight * bn.running_mean / torch.sqrt(bn.running_var + bn.eps)) + bn.bias
    A = A.detach()
    for i in range(channels_in):
        ## build repeated kernel weights
        kernels = [np.array([]) for z in range(kernel_size**2)]
        for j in range(channels_out):
            weights = conv_weight[j][(j+i)%channels_in].reshape(kernel_size**2)
            for k in range(kernel_size**2):
                kernels[k] = np.append(kernels[k], np.repeat(weights[k].detach(), img_width**2))
        
        ## apply binary masks to allow for padding
        for k in range(kernel_size**2):
            kernels[k] = np.multiply(kernels[k], bin_masks[k])
            kernels[k] = np.multiply(kernels[k], np.repeat(A, img_width**2))
            kernels[k] = np.roll(kernels[k], (img_width**2)*i)
            np.savetxt('../weights/layer{}-conv{}bn{}-ch{}-k{}.bin'.format(layerNum, convNum, convNum, i,k+1), kernels[k], delimiter=',')
        
    # save biases
    np.savetxt('../weights/layer{}-conv{}bn{}-bias.bin'.format(layerNum, convNum, convNum), np.repeat(b.detach(), img_width**2), delimiter=',')
    
        

In [5]:
def downsamplingConvbn(img_width, kernel_size, channels_in, channels_out, conv_weight, bn, layerNum, convNum, bin_masks):
    A = bn.weight / torch.sqrt(bn.running_var + bn.eps)
    b = -(bn.weight * bn.running_mean / torch.sqrt(bn.running_var + bn.eps)) + bn.bias
    A = A.detach()
    for i in range(channels_in):
        ## build repeated kernel weights
        kernels = [np.array([]) for z in range(kernel_size**2)]
        for j in range(channels_out):
            weights = conv_weight[j][(j+i)%channels_in].reshape(kernel_size**2)
            for k in range(kernel_size**2):
                kernels[k] = np.append(kernels[k], np.repeat(weights[k].detach(), img_width**2))
        
        ## apply binary masks to allow for padding
        for k in range(kernel_size**2):
            kernels[k] = np.multiply(kernels[k], altalena(np.tile(bin_masks[k], 2), img_width))
            kernels[k] = np.multiply(kernels[k], np.repeat(A.numpy(), img_width**2))
            kernels[k] = np.add(kernels[k], np.roll(kernels[k], (img_width**2)*(channels_in*-1)+1))[:(img_width**2)*(channels_in)]
            np.savetxt('../weights/layer{}-conv{}bn{}-ch{}-k{}.bin'.format(layerNum, convNum, convNum, i,k+1), altalena(np.roll(kernels[k], (img_width**2)*i), img_width), delimiter=',')
            np.savetxt('../weights/layer{}-conv{}bn{}-ch{}-k{}.bin'.format(layerNum, convNum, convNum, i+channels_in,k+1), altalena(np.roll(kernels[k], (i*(img_width**2))-1), img_width), delimiter=',')
    bs = b.detach().numpy()
    bias_corrected1 = altalena(np.repeat(bs[:int(channels_out/2)], img_width**2),img_width)
    bias_corrected2 = altalena(np.roll(np.repeat(bs[int(channels_out/2):channels_out], img_width**2), -1), img_width)
    np.savetxt('../weights/layer{}-conv{}bn{}-bias1.bin'.format(layerNum, convNum, convNum), bias_corrected1, delimiter=',')
    np.savetxt('../weights/layer{}-conv{}bn{}-bias2.bin'.format(layerNum, convNum, convNum), bias_corrected2, delimiter=',')
                

In [6]:
def dx(img_width, channels_in, channels_out, downsample_weight, downsample_bias, layerNum, convNum, bin_masks):
    
    A = downsample_bias.weight / torch.sqrt(downsample_bias.running_var + downsample_bias.eps)
    b = -(downsample_bias.weight * downsample_bias.running_mean / torch.sqrt(downsample_bias.running_var + downsample_bias.eps)) + downsample_bias.bias
    A = A.detach()
    for i in range(channels_in):
        kernel = np.array([])
        for j in range(channels_out):
            weight = downsample_weight[j][(j+i)%channels_in].reshape(1)[0]
            kernel = np.append(kernel, np.repeat(weight.detach(), img_width**2))
        
        kernel = np.multiply(kernel, altalena(np.tile(bin_masks[4],2), img_width))
        kernel = np.multiply(kernel, np.repeat(A.numpy(), img_width**2))
        kernel = np.add(kernel, np.roll(kernel, (img_width**2)*(channels_in*-1)+1))[:(img_width**2)*(channels_in)]
        
        np.savetxt('../weights/layer{}dx-conv{}bn{}-ch{}-k1.bin'.format(layerNum, convNum, convNum, i), altalena(np.roll(kernel, (img_width**2)*i), img_width), delimiter=',')
        np.savetxt('../weights/layer{}dx-conv{}bn{}-ch{}-k1.bin'.format(layerNum, convNum, convNum, i+channels_in), altalena(np.roll(kernel, (i*(img_width**2))-1), img_width), delimiter=',')
    bs = b.detach().numpy()
    bias_corrected1 = altalena(np.repeat(bs[:int(channels_out/2)], img_width**2), img_width)
    bias_corrected2 = altalena(np.repeat(bs[int(channels_out/2):channels_out], img_width**2), img_width)
    np.savetxt('../weights/layer{}dx-conv{}bn{}-bias1.bin'.format(layerNum, convNum, convNum), bias_corrected1, delimiter=',')
    np.savetxt('../weights/layer{}dx-conv{}bn{}-bias2.bin'.format(layerNum, convNum, convNum), bias_corrected2, delimiter=',')

In [7]:
def fc(fc_weight, i, features_out):
    for j in range(features_out):
        np.savetxt('../weights/fc{}-f{}.bin'.format(i, j+1), fc_weight[j].reshape(-1).detach().numpy())
    

In [9]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn4 = nn.BatchNorm2d(64) 
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn6 = nn.BatchNorm2d(128)

        self.fc1 = nn.Linear(128*4*4,100, bias =  False)
        self.fc2 = nn.Linear(100, 10, bias = False)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu (x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = F.relu(x)
        x = self.conv5(x)
        x = self.bn5(x)
        x = F.relu(x)
        x = self.conv6(x)
        x = self.bn6(x)
        x = F.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.sigmoid(x)
        return x


model = AlexNet()
network_state_dict = torch.load('AlexNet.pth', map_location='cpu')
model.load_state_dict(network_state_dict)

model.eval()

AlexNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn6): 

In [13]:
img_width = 32
channels = 3
kernel_size = 3
bin_masks = build_masks(img_width, kernel_size=3, out_channels = 16)

initialLayer(img_width, kernel_size, channels_in=channels, channels_out=16, 
             conv_weight=model.conv1.weight, bn=model.bn1, bin_masks=bin_masks)


downsamplingConvbn(img_width, kernel_size, channels_in=16, channels_out=32,
                   conv_weight=model.conv2.weight, bn=model.bn2,
                   layerNum=2, convNum=1, bin_masks=bin_masks)

img_width = int(img_width/2)

bin_masks = build_masks(img_width, kernel_size=3, out_channels = 32)

downsamplingConvbn(img_width, kernel_size, channels_in=32, channels_out=64,
                   conv_weight=model.conv3.weight, bn=model.bn3,
                   layerNum=3, convNum=1, bin_masks=bin_masks)

img_width = int(img_width/2)

bin_masks = build_masks(img_width, kernel_size=3, out_channels = 64)

convbn(img_width, kernel_size, channels_in=64, channels_out=64, 
       conv_weight=model.conv4.weight, bn=model.bn4, 
       layerNum=4, convNum=1, bin_masks=bin_masks)


downsamplingConvbn(img_width, kernel_size, channels_in=64, channels_out=128,
                   conv_weight=model.conv5.weight, bn=model.bn5,
                   layerNum=5, convNum=1, bin_masks=bin_masks)

img_width = int(img_width/2)

bin_masks = build_masks(img_width, kernel_size=3, out_channels = 128)

convbn(img_width, kernel_size, channels_in=128, channels_out=128, 
       conv_weight=model.conv6.weight, bn=model.bn6, 
       layerNum=6, convNum=1, bin_masks=bin_masks)


# layer
fc(model.fc1.weight, 128*4*4, 100)
fc(model.fc2.weight, 100, 10)


tensor([ 0.8445,  0.5438,  0.4589,  0.2408,  0.1983,  0.0510,  0.0862, -0.3756,
        -0.0574,  0.6503, -0.0759,  0.2794,  0.0695, -0.6765,  0.1684,  0.1373],
       grad_fn=<AddBackward0>)


  kernels[k] = np.multiply(kernels[k], np.repeat(A[i], channels_out*(img_width**2)))
  kernels[k] = np.multiply(kernels[k], np.repeat(A, img_width**2))
