In [2]:
import torch
from torchvision import transforms
import torchvision
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import math

model = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet20", pretrained=True)

Using cache found in /Users/narger/.cache/torch/hub/chenyaofo_pytorch-cifar-models_master


In [3]:
model.eval()

CifarResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias

In [4]:
def build_mask(starting_padding, ending_padding, window_length, max_length):
    mask = []
    for i in range(starting_padding):
        mask.append(0)
    while len(mask) < (max_length - ending_padding):
        for j in range(window_length):
            mask.append(1)
        mask.append(0)
        
    while len(mask) > max_length:
        mask.pop()
    while len(mask) < max_length:
        mask.append(0)
        
    for i in range(ending_padding):
        mask[max_length - i - 1] = 0
        
    return mask

In [7]:
img_width = 32
padding = 1

bin_mask1 = np.tile(np.array(build_mask(img_width + 1, 0, img_width -1, img_width ** 2)), 16)
bin_mask2 = np.tile(np.array(build_mask(img_width, 0, img_width ** 2, img_width ** 2)), 16)
bin_mask3 = np.tile(np.array(build_mask(img_width, 0, img_width - 1, img_width ** 2)), 16)
bin_mask4 = np.tile(np.array(build_mask(1, 0, img_width - 1, img_width ** 2)), 16)
bin_mask5 = np.tile(np.array(build_mask(0, 0, img_width ** 2, img_width ** 2)), 16)
bin_mask6 = np.tile(np.array(build_mask(0, 1, img_width - 1, img_width ** 2)), 16)
bin_mask7 = np.tile(np.array(build_mask(1, img_width - 1, img_width - 1, img_width ** 2)), 16)
bin_mask8 = np.tile(np.array(build_mask(0, img_width, img_width ** 2, img_width ** 2)), 16)
bin_mask9 = np.tile(np.array(build_mask(0, img_width + 1, img_width - 1, img_width ** 2)), 16)

## Initial Layer

This layer is particular, if interested in the workings of Algorithm 2, check Conv1+Bn1

In [24]:
A = model.bn1.weight / torch.sqrt(model.bn1.running_var + model.bn1.eps)
b = -(model.bn1.weight * model.bn1.running_mean / torch.sqrt(model.bn1.running_var + model.bn1.eps)) + model.bn1.bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(3):
        k1 = np.append(k1, np.repeat(model.conv1.weight[i][j].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.conv1.weight[i][j].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.conv1.weight[i][j].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.conv1.weight[i][j].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.conv1.weight[i][j].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.conv1.weight[i][j].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.conv1.weight[i][j].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.conv1.weight[i][j].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.conv1.weight[i][j].reshape(9)[8].detach(), 1024))
        
        
    for j in range(16 - 3):
        k1 = np.append(k1, np.repeat(0, 1024))
        k2 = np.append(k2, np.repeat(0, 1024))
        k3 = np.append(k3, np.repeat(0, 1024))
        k4 = np.append(k4, np.repeat(0, 1024))
        k5 = np.append(k5, np.repeat(0, 1024))
        k6 = np.append(k6, np.repeat(0, 1024))
        k7 = np.append(k7, np.repeat(0, 1024))
        k8 = np.append(k8, np.repeat(0, 1024))
        k9 = np.append(k9, np.repeat(0, 1024))
        
        
    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)
    
    
    k1 = np.multiply(k1, np.repeat(A[i].detach(), 16384))
    k2 = np.multiply(k2, np.repeat(A[i].detach(), 16384))
    k3 = np.multiply(k3, np.repeat(A[i].detach(), 16384))
    k4 = np.multiply(k4, np.repeat(A[i].detach(), 16384))
    k5 = np.multiply(k5, np.repeat(A[i].detach(), 16384))
    k6 = np.multiply(k6, np.repeat(A[i].detach(), 16384))
    k7 = np.multiply(k7, np.repeat(A[i].detach(), 16384))
    k8 = np.multiply(k8, np.repeat(A[i].detach(), 16384))
    k9 = np.multiply(k9, np.repeat(A[i].detach(), 16384))
    
    
    """
    mul1 = np.roll(k1, 1024 * i)
    mul2 = np.roll(k2, 1024 * i)
    mul3 = np.roll(k3, 1024 * i)
    mul4 = np.roll(k4, 1024 * i)
    mul5 = np.roll(k5, 1024 * i)
    mul6 = np.roll(k6, 1024 * i)
    mul7 = np.roll(k7, 1024 * i)
    mul8 = np.roll(k8, 1024 * i)
    mul9 = np.roll(k9, 1024 * i)
    """
    
    np.savetxt('weights/conv1bn1-ch{}-k1.bin'.format(i), k1, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k2.bin'.format(i), k2, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k3.bin'.format(i), k3, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k4.bin'.format(i), k4, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k5.bin'.format(i), k5, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k6.bin'.format(i), k6, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k7.bin'.format(i), k7, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k8.bin'.format(i), k8, delimiter=',')
    np.savetxt('weights/conv1bn1-ch{}-k9.bin'.format(i), k9, delimiter=',')
    
np.savetxt('weights/conv1bn1-bias.bin'.format(i), np.repeat(b.detach(), 1024), delimiter=',')

A: tensor([0.1612, 0.0118, 0.3612, 0.1782, 0.4380, 0.3213, 0.3849, 0.2592, 0.4423,
        0.2112, 0.0057, 0.2565, 0.3944, 0.1887, 0.2364, 0.4103],
       grad_fn=<DivBackward0>)

b: tensor([ 1.5648e-01, -1.5094e-03,  2.5889e-01,  2.2570e-01,  6.0322e-01,
        -1.9860e-02,  2.1012e-01,  2.5123e-01,  2.7174e-01,  1.6638e-01,
        -9.3954e-05,  2.8412e-01, -5.0124e-01,  1.6165e-01,  1.4023e-01,
        -3.3344e-01], grad_fn=<AddBackward0>)


## Layer1[0]: Conv1+Bn1

In [12]:
A = model.layer1[0].bn1.weight / torch.sqrt(model.layer1[0].bn1.running_var + model.layer1[0].bn1.eps)
b = -(model.layer1[0].bn1.weight * model.layer1[0].bn1.running_mean / torch.sqrt(model.layer1[0].bn1.running_var + model.layer1[0].bn1.eps)) + model.layer1[0].bn1.bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(16):
        k1 = np.append(k1, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer1[0].conv1.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
        
    #The binary mask is required to put zeros in values coming from other channels and so on
    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)
    
    k1 = np.multiply(k1, np.repeat(A.detach(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach(), 1024))

    mul1 = np.roll(k1, 1024 * i)
    mul2 = np.roll(k2, 1024 * i)
    mul3 = np.roll(k3, 1024 * i)
    mul4 = np.roll(k4, 1024 * i)
    mul5 = np.roll(k5, 1024 * i)
    mul6 = np.roll(k6, 1024 * i)
    mul7 = np.roll(k7, 1024 * i)
    mul8 = np.roll(k8, 1024 * i)
    mul9 = np.roll(k9, 1024 * i)
    
    
    np.savetxt('weights/layer1-conv1bn1-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer1-conv1bn1-ch{}-k9.bin'.format(i), mul9, delimiter=',')
    
np.savetxt('weights/layer1-conv1bn1-bias.bin'.format(i), np.repeat(b.detach(), 1024), delimiter=',')

A: tensor([1.2913, 1.4062, 1.5185, 1.3907, 1.2287, 1.1996, 2.3732, 1.5576, 1.2722,
        0.8757, 1.2385, 1.2919, 0.6880, 1.8140, 1.3442, 1.9891],
       grad_fn=<DivBackward0>)

b: tensor([ 0.1030, -0.5854,  0.2304,  0.1297,  0.6845,  0.6135, -0.9472,  0.2344,
         0.6070,  0.5252,  0.2973,  0.2070,  0.3914, -0.2960,  0.2170,  0.9015],
       grad_fn=<AddBackward0>)


## Layer1[0]: Conv2+Bn2

In [13]:
A = model.layer1[0].bn2.weight / torch.sqrt(model.layer1[0].bn2.running_var + model.layer1[0].bn2.eps)
b = -(model.layer1[0].bn2.weight * model.layer1[0].bn2.running_mean / torch.sqrt(model.layer1[0].bn2.running_var + model.layer1[0].bn2.eps)) + model.layer1[0].bn2.bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(16):
        k1 = np.append(k1, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer1[0].conv2.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
        
    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)
    
    k1 = np.multiply(k1, np.repeat(A.detach(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach(), 1024))

    mul1 = np.roll(k1, 1024 * i)
    mul2 = np.roll(k2, 1024 * i)
    mul3 = np.roll(k3, 1024 * i)
    mul4 = np.roll(k4, 1024 * i)
    mul5 = np.roll(k5, 1024 * i)
    mul6 = np.roll(k6, 1024 * i)
    mul7 = np.roll(k7, 1024 * i)
    mul8 = np.roll(k8, 1024 * i)
    mul9 = np.roll(k9, 1024 * i)
    
    
    np.savetxt('weights/layer1-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer1-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')
    
np.savetxt('weights/layer1-conv2bn2-bias.bin'.format(i), np.repeat(b.detach(), 1024), delimiter=',')

A: tensor([1.7887, 1.8403, 1.9545, 1.4659, 2.3055, 1.2812, 2.2585, 1.7085, 1.8006,
        1.1454, 2.3978, 1.1639, 1.8895, 1.8854, 1.6533, 2.3907],
       grad_fn=<DivBackward0>)

b: tensor([-0.1006,  0.3757,  0.0637,  0.2553, -0.0838,  0.1023, -0.3876,  0.2350,
        -0.3053, -0.1146,  0.2839,  0.0036,  0.0646,  0.2457,  0.2339, -0.1216],
       grad_fn=<AddBackward0>)


## Layer1[1]: Conv1+Bn1

In [14]:
A = model.layer1[1].bn1.weight / torch.sqrt(model.layer1[1].bn1.running_var + model.layer1[1].bn1.eps)
b = -(model.layer1[1].bn1.weight * model.layer1[1].bn1.running_mean / torch.sqrt(model.layer1[1].bn1.running_var + model.layer1[1].bn1.eps)) + model.layer1[1].bn1.bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(16):
        k1 = np.append(k1, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer1[1].conv1.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
        
    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)
    
    k1 = np.multiply(k1, np.repeat(A.detach(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach(), 1024))

    mul1 = np.roll(k1, 1024 * i)
    mul2 = np.roll(k2, 1024 * i)
    mul3 = np.roll(k3, 1024 * i)
    mul4 = np.roll(k4, 1024 * i)
    mul5 = np.roll(k5, 1024 * i)
    mul6 = np.roll(k6, 1024 * i)
    mul7 = np.roll(k7, 1024 * i)
    mul8 = np.roll(k8, 1024 * i)
    mul9 = np.roll(k9, 1024 * i)
    
    
    np.savetxt('weights/layer2-conv1bn1-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer2-conv1bn1-ch{}-k9.bin'.format(i), mul9, delimiter=',')
    
np.savetxt('weights/layer2-conv1bn1-bias.bin'.format(i), np.repeat(b.detach(), 1024), delimiter=',')

A: tensor([0.9512, 0.9473, 0.9568, 0.8786, 0.9743, 0.7121, 0.8849, 0.8243, 0.8628,
        0.9009, 0.7942, 0.9439, 0.7756, 1.5203, 1.0352, 0.8761],
       grad_fn=<DivBackward0>)

b: tensor([ 0.1961,  0.4934,  0.4587,  0.0786,  0.1283,  0.1039,  0.3006,  0.4095,
         0.4549,  0.1759,  0.1904,  0.4729,  0.1916, -0.2593, -0.1272,  0.3947],
       grad_fn=<AddBackward0>)


## Layer1[1]: Conv2+Bn2

In [15]:
A = model.layer1[1].bn2.weight / torch.sqrt(model.layer1[1].bn2.running_var + model.layer1[1].bn2.eps)
b = -(model.layer1[1].bn2.weight * model.layer1[1].bn2.running_mean / torch.sqrt(model.layer1[1].bn2.running_var + model.layer1[1].bn2.eps)) + model.layer1[1].bn2.bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(16):
        k1 = np.append(k1, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer1[1].conv2.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
        
    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)
    
    k1 = np.multiply(k1, np.repeat(A.detach(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach(), 1024))

    mul1 = np.roll(k1, 1024 * i)
    mul2 = np.roll(k2, 1024 * i)
    mul3 = np.roll(k3, 1024 * i)
    mul4 = np.roll(k4, 1024 * i)
    mul5 = np.roll(k5, 1024 * i)
    mul6 = np.roll(k6, 1024 * i)
    mul7 = np.roll(k7, 1024 * i)
    mul8 = np.roll(k8, 1024 * i)
    mul9 = np.roll(k9, 1024 * i)
    
    
    np.savetxt('weights/layer2-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer2-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')
    
np.savetxt('weights/layer2-conv2bn2-bias.bin'.format(i), np.repeat(b.detach(), 1024), delimiter=',')

A: tensor([1.7232, 1.8680, 1.9703, 1.6121, 2.5556, 1.2564, 2.3420, 1.0104, 1.5695,
        1.5707, 2.2538, 0.8839, 2.6276, 2.3788, 0.9073, 2.1205],
       grad_fn=<DivBackward0>)

b: tensor([-0.1004,  0.0595, -0.0448,  0.0679, -0.0494,  0.1123, -0.0568,  0.1378,
        -0.0281,  0.0318,  0.1749,  0.1474,  0.2554,  0.3776,  0.0850, -0.1313],
       grad_fn=<AddBackward0>)


## Layer1[2]: Conv1+Bn1

In [19]:
A = model.layer1[2].bn1.weight / torch.sqrt(model.layer1[2].bn1.running_var + model.layer1[2].bn1.eps)
b = -(model.layer1[2].bn1.weight * model.layer1[2].bn1.running_mean / torch.sqrt(model.layer1[2].bn1.running_var + model.layer1[2].bn1.eps)) + model.layer1[2].bn1.bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(16):
        k1 = np.append(k1, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer1[2].conv1.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
        
    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)
    
    k1 = np.multiply(k1, np.repeat(A.detach(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach(), 1024))

    mul1 = np.roll(k1, 1024 * i)
    mul2 = np.roll(k2, 1024 * i)
    mul3 = np.roll(k3, 1024 * i)
    mul4 = np.roll(k4, 1024 * i)
    mul5 = np.roll(k5, 1024 * i)
    mul6 = np.roll(k6, 1024 * i)
    mul7 = np.roll(k7, 1024 * i)
    mul8 = np.roll(k8, 1024 * i)
    mul9 = np.roll(k9, 1024 * i)
    
    
    np.savetxt('weights/layer3-conv1bn1-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer3-conv1bn1-ch{}-k9.bin'.format(i), mul9, delimiter=',')
    
np.savetxt('weights/layer3-conv1bn1-bias.bin'.format(i), np.repeat(b.detach(), 1024), delimiter=',')

A: tensor([0.4727, 0.6169, 0.6216, 0.6147, 0.5112, 0.4841, 0.6642, 0.9769, 0.6833,
        0.7435, 0.5036, 1.4373, 0.5373, 0.7193, 0.4917, 0.5333],
       grad_fn=<DivBackward0>)

b: tensor([ 0.0948, -0.0024,  0.1346,  0.3193, -0.0341, -0.0317,  0.0181, -0.0961,
         0.2751,  0.0100, -0.0352,  0.2013,  0.0923,  0.1866, -0.0487, -0.1360],
       grad_fn=<AddBackward0>)


## Layer1[2]: Conv2+Bn2

In [28]:
A = model.layer1[2].bn2.weight / torch.sqrt(model.layer1[2].bn2.running_var + model.layer1[2].bn2.eps)
b = -(model.layer1[2].bn2.weight * model.layer1[2].bn2.running_mean / torch.sqrt(model.layer1[2].bn2.running_var + model.layer1[2].bn2.eps)) + model.layer1[2].bn2.bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(16):
        k1 = np.append(k1, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer1[2].conv2.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
        
    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)
    
    k1 = np.multiply(k1, np.repeat(A.detach(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach(), 1024))

    mul1 = np.roll(k1, 1024 * i)
    mul2 = np.roll(k2, 1024 * i)
    mul3 = np.roll(k3, 1024 * i)
    mul4 = np.roll(k4, 1024 * i)
    mul5 = np.roll(k5, 1024 * i)
    mul6 = np.roll(k6, 1024 * i)
    mul7 = np.roll(k7, 1024 * i)
    mul8 = np.roll(k8, 1024 * i)
    mul9 = np.roll(k9, 1024 * i)
    
    
    np.savetxt('weights/layer3-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer3-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')
    
np.savetxt('weights/layer3-conv2bn2-bias.bin'.format(i), np.repeat(b.detach(), 1024), delimiter=',')

A: tensor([1.4420, 2.3079, 2.0004, 1.8164, 3.1310, 1.0375, 1.6597, 1.2105, 1.5634,
        1.9707, 2.7410, 0.9834, 3.0938, 2.3891, 1.0242, 1.6197],
       grad_fn=<DivBackward0>)

b: tensor([-0.0065, -0.3248, -0.2871,  0.1492, -0.2957,  0.0458, -0.0024,  0.0452,
         0.3127, -0.1194,  0.1744,  0.0773,  0.3441,  0.1627,  0.0258, -0.1078],
       grad_fn=<AddBackward0>)


## Layer2[0]: Conv1+Bn1 SX DownsamplingFast

In [21]:
#Dispari sempre 0, riempe i primi 32 mod 64
def altalena(v):
    new_v = []
    for i in range(len(v)):
        if i % 2 != 0:
            new_v.append(0)
        elif i % 64 >= 32 and i % 64 < 64:
            new_v.append(0)
        else:
            new_v.append(v[i])
    return new_v

In [23]:
#Con singola rotazione

A = model.layer2[0].bn1.weight / torch.sqrt(model.layer2[0].bn1.running_var + model.layer2[0].bn1.eps)
b = -(model.layer2[0].bn1.weight * model.layer2[0].bn1.running_mean / torch.sqrt(model.layer2[0].bn1.running_var + model.layer2[0].bn1.eps)) + model.layer2[0].bn1.bias

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(32):
        k1 = np.append(k1, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
    
    
    k1 = np.multiply(k1, altalena(np.tile(bin_mask1, 2)))
    k2 = np.multiply(k2, altalena(np.tile(bin_mask2, 2)))
    k3 = np.multiply(k3, altalena(np.tile(bin_mask3, 2)))
    k4 = np.multiply(k4, altalena(np.tile(bin_mask4, 2)))
    k5 = np.multiply(k5, altalena(np.tile(bin_mask5, 2)))
    k6 = np.multiply(k6, altalena(np.tile(bin_mask6, 2)))
    k7 = np.multiply(k7, altalena(np.tile(bin_mask7, 2)))
    k8 = np.multiply(k8, altalena(np.tile(bin_mask8, 2)))
    k9 = np.multiply(k9, altalena(np.tile(bin_mask9, 2)))

    k1 = np.multiply(k1, np.repeat(A.detach().numpy(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach().numpy(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach().numpy(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach().numpy(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach().numpy(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach().numpy(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach().numpy(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach().numpy(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach().numpy(), 1024))
    
    
    #Affianco CH1[0]-CH16[0]-CH1[1]-CH16[1]-CH1[2]...
    k1 = np.add(k1, np.roll(k1, -16384 + 1))[:16384]
    k2 = np.add(k2, np.roll(k2, -16384 + 1))[:16384]
    k3 = np.add(k3, np.roll(k3, -16384 + 1))[:16384]
    k4 = np.add(k4, np.roll(k4, -16384 + 1))[:16384]
    k5 = np.add(k5, np.roll(k5, -16384 + 1))[:16384]
    k6 = np.add(k6, np.roll(k6, -16384 + 1))[:16384]
    k7 = np.add(k7, np.roll(k7, -16384 + 1))[:16384]
    k8 = np.add(k8, np.roll(k8, -16384 + 1))[:16384]
    k9 = np.add(k9, np.roll(k9, -16384 + 1))[:16384]

    np.savetxt('weights/layer4-conv1bn1-ch{}-k1.bin'.format(i), altalena(np.roll(k1, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k2.bin'.format(i), altalena(np.roll(k2, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k3.bin'.format(i), altalena(np.roll(k3, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k4.bin'.format(i), altalena(np.roll(k4, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k5.bin'.format(i), altalena(np.roll(k5, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k6.bin'.format(i), altalena(np.roll(k6, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k7.bin'.format(i), altalena(np.roll(k7, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k8.bin'.format(i), altalena(np.roll(k8, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k9.bin'.format(i), altalena(np.roll(k9, 1024 * i)), delimiter=',')
    
    np.savetxt('weights/layer4-conv1bn1-ch{}-k1.bin'.format(i+16), altalena(np.roll(k1, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k2.bin'.format(i+16), altalena(np.roll(k2, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k3.bin'.format(i+16), altalena(np.roll(k3, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k4.bin'.format(i+16), altalena(np.roll(k4, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k5.bin'.format(i+16), altalena(np.roll(k5, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k6.bin'.format(i+16), altalena(np.roll(k6, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k7.bin'.format(i+16), altalena(np.roll(k7, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k8.bin'.format(i+16), altalena(np.roll(k8, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k9.bin'.format(i+16), altalena(np.roll(k9, 1024 * i - 1)), delimiter=',')

bias_corrected = np.add(altalena(np.repeat(b.detach().numpy(),1024)), np.roll(altalena(np.repeat(b.detach().numpy(),1024)), -16384 + 1))[:16384]
bias_corrected016 = altalena(np.repeat(b.detach().numpy()[:16], 1024))
bias_corrected1632 = altalena(np.roll(np.repeat(b.detach().numpy()[16:32], 1024), -1))

np.savetxt('weights/layer4-conv1bn1-bias1.bin', bias_corrected016, delimiter=',')
np.savetxt('weights/layer4-conv1bn1-bias2.bin', bias_corrected1632, delimiter=',')

#N.b il 1632 deve essere ruotato a DX di uno

## Layer2[0]: Conv1+Bn1

In [29]:
A = model.layer2[0].bn1.weight / torch.sqrt(model.layer2[0].bn1.running_var + model.layer2[0].bn1.eps)
b = -(model.layer2[0].bn1.weight * model.layer2[0].bn1.running_mean / torch.sqrt(model.layer2[0].bn1.running_var + model.layer2[0].bn1.eps)) + model.layer2[0].bn1.bias
print("A: {}\n\nb: {}".format(A, b))

channels = []

for i in range(16):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(32):
        k1 = np.append(k1, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[0].detach(), 1024))
        k2 = np.append(k2, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[1].detach(), 1024))
        k3 = np.append(k3, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[2].detach(), 1024))
        k4 = np.append(k4, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[3].detach(), 1024))
        k5 = np.append(k5, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[4].detach(), 1024))
        k6 = np.append(k6, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[5].detach(), 1024))
        k7 = np.append(k7, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[6].detach(), 1024))
        k8 = np.append(k8, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[7].detach(), 1024))
        k9 = np.append(k9, np.repeat(model.layer2[0].conv1.weight[j][(j+i) % 16].reshape(9)[8].detach(), 1024))
    
    k1 = np.multiply(k1, altalena(np.tile(bin_mask1, 2)))
    k2 = np.multiply(k2, altalena(np.tile(bin_mask2, 2)))
    k3 = np.multiply(k3, altalena(np.tile(bin_mask3, 2)))
    k4 = np.multiply(k4, altalena(np.tile(bin_mask4, 2)))
    k5 = np.multiply(k5, altalena(np.tile(bin_mask5, 2)))
    k6 = np.multiply(k6, altalena(np.tile(bin_mask6, 2)))
    k7 = np.multiply(k7, altalena(np.tile(bin_mask7, 2)))
    k8 = np.multiply(k8, altalena(np.tile(bin_mask8, 2)))
    k9 = np.multiply(k9, altalena(np.tile(bin_mask9, 2)))

    k1 = np.multiply(k1, np.repeat(A.detach().numpy(), 1024))
    k2 = np.multiply(k2, np.repeat(A.detach().numpy(), 1024))
    k3 = np.multiply(k3, np.repeat(A.detach().numpy(), 1024))
    k4 = np.multiply(k4, np.repeat(A.detach().numpy(), 1024))
    k5 = np.multiply(k5, np.repeat(A.detach().numpy(), 1024))
    k6 = np.multiply(k6, np.repeat(A.detach().numpy(), 1024))
    k7 = np.multiply(k7, np.repeat(A.detach().numpy(), 1024))
    k8 = np.multiply(k8, np.repeat(A.detach().numpy(), 1024))
    k9 = np.multiply(k9, np.repeat(A.detach().numpy(), 1024))

    
    #Affianco CH1[0]-CH16[0]-CH1[1]-CH16[1]-CH1[2]...
    k1 = np.add(k1, np.roll(k1, -16384 + 1))[:16384]
    k2 = np.add(k2, np.roll(k2, -16384 + 1))[:16384]
    k3 = np.add(k3, np.roll(k3, -16384 + 1))[:16384]
    k4 = np.add(k4, np.roll(k4, -16384 + 1))[:16384]
    k5 = np.add(k5, np.roll(k5, -16384 + 1))[:16384]
    k6 = np.add(k6, np.roll(k6, -16384 + 1))[:16384]
    k7 = np.add(k7, np.roll(k7, -16384 + 1))[:16384]
    k8 = np.add(k8, np.roll(k8, -16384 + 1))[:16384]
    k9 = np.add(k9, np.roll(k9, -16384 + 1))[:16384]

    
    np.savetxt('weights/layer4-conv1bn1-ch{}-k1.bin'.format(i), altalena(np.roll(k1, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k1.bin'.format(i+16), altalena(np.roll(k1, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k2.bin'.format(i), altalena(np.roll(k2, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k2.bin'.format(i+16), altalena(np.roll(k2, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k3.bin'.format(i), altalena(np.roll(k3, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k3.bin'.format(i+16), altalena(np.roll(k3, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k4.bin'.format(i), altalena(np.roll(k4, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k4.bin'.format(i+16), altalena(np.roll(k4, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k5.bin'.format(i), altalena(np.roll(k5, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k5.bin'.format(i+16), altalena(np.roll(k5, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k6.bin'.format(i), altalena(np.roll(k6, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k6.bin'.format(i+16), altalena(np.roll(k6, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k7.bin'.format(i), altalena(np.roll(k7, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k7.bin'.format(i+16), altalena(np.roll(k7, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k8.bin'.format(i), altalena(np.roll(k8, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k8.bin'.format(i+16), altalena(np.roll(k8, 1024 * i - 1)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k9.bin'.format(i), altalena(np.roll(k9, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4-conv1bn1-ch{}-k9.bin'.format(i+16), altalena(np.roll(k9, 1024 * i - 1)), delimiter=',')
    
    
bias_corrected = np.add(altalena(np.repeat(b.detach().numpy(),1024)), np.roll(altalena(np.repeat(b.detach().numpy(),1024)), -16384 + 1))[:16384]
bias_corrected016 = altalena(np.repeat(b.detach().numpy()[:16], 1024))
bias_corrected1632 = altalena(np.repeat(b.detach().numpy()[16:32], 1024))

np.savetxt('weights/layer4-conv1bn1-bias1.bin'.format(i), bias_corrected016, delimiter=',')
np.savetxt('weights/layer4-conv1bn1-bias2.bin'.format(i), bias_corrected1632, delimiter=',')

A: tensor([0.8141, 0.6018, 0.8376, 0.6278, 0.8170, 0.6266, 0.6413, 1.0384, 0.7254,
        0.7201, 0.7055, 0.4483, 0.5056, 0.6135, 0.7686, 0.8012, 0.8914, 0.6620,
        0.7178, 0.7898, 0.4978, 0.8644, 0.5556, 0.5686, 0.8200, 0.8590, 0.8207,
        0.9914, 0.6215, 0.6291, 0.6780, 0.9323], grad_fn=<DivBackward0>)

b: tensor([-0.2723,  0.4193,  0.3852,  0.1376, -0.2815, -0.4210, -0.0659,  0.0707,
        -0.8956,  0.2584, -0.5191,  0.2679, -0.0894,  0.3139,  0.1139,  0.8343,
        -0.2597,  0.1324, -0.2290,  0.2976,  0.6561, -0.0629,  0.3382,  0.2354,
        -0.1413,  0.1835, -0.1575, -0.3741,  0.4320,  0.0388,  0.2430, -0.3959],
       grad_fn=<AddBackward0>)


## Layer2[0]: Conv1+Bn1 DX

In [32]:
A = model.layer2[0].downsample[1].weight / torch.sqrt(model.layer2[0].downsample[1].running_var + model.layer2[0].downsample[1].eps)
b = -(model.layer2[0].downsample[1].weight * model.layer2[0].downsample[1].running_mean / torch.sqrt(model.layer2[0].downsample[1].running_var + model.layer2[0].downsample[1].eps)) + model.layer2[0].downsample[1].bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(16):
    k1 = np.array([])
    
    for j in range(32):
        k1 = np.append(k1, np.repeat(model.layer2[0].downsample[0].weight[j][(j+i) % 16].reshape(1)[0].detach(), 1024))
    
    k1 = np.multiply(k1, altalena(np.tile(bin_mask5, 2)))

    k1 = np.multiply(k1, np.repeat(A.detach().numpy(), 1024))
    
    #Affianco CH1[0]-CH16[0]-CH1[1]-CH16[1]-CH1[2]...
    k1 = np.add(k1, np.roll(k1, -16384 + 1))[:16384]
    
    print(k1[0])

    np.savetxt('weights/layer4dx-conv1bn1-ch{}-k1.bin'.format(i), altalena(np.roll(k1, 1024 * i)), delimiter=',')
    np.savetxt('weights/layer4dx-conv1bn1-ch{}-k1.bin'.format(i+16), altalena(np.roll(k1, 1024 * i - 1)), delimiter=',')
    
bias_corrected016 = altalena(np.repeat(b.detach().numpy()[:16], 1024))
bias_corrected1632 = altalena(np.repeat(b.detach().numpy()[16:32], 1024))
#bias_corrected = np.add(altalena(np.repeat(b.detach().numpy(),1024)), np.roll(altalena(np.repeat(b.detach().numpy(),1024)), -16384 + 1))[:16384]
#bias_corrected016 = altalena(np.repeat(b.detach().numpy()[:16], 1024))
#bias_corrected1632 = altalena(np.repeat(b.detach().numpy()[16:32], 1024))

np.savetxt('weights/layer4dx-conv1bn1-bias1.bin'.format(i), bias_corrected016, delimiter=',')
np.savetxt('weights/layer4dx-conv1bn1-bias2.bin'.format(i), bias_corrected1632, delimiter=',')

A: tensor([1.6594, 1.0308, 1.2125, 1.3979, 1.4946, 1.1669, 0.9063, 1.0715, 1.4008,
        1.4876, 1.2260, 0.6966, 1.4486, 1.2567, 1.2241, 1.3491, 1.3268, 0.9128,
        1.1587, 1.1738, 1.6666, 1.2284, 0.9154, 0.8671, 1.3171, 1.2489, 0.8858,
        1.1073, 1.3800, 0.9875, 0.7897, 1.3719], grad_fn=<DivBackward0>)

b: tensor([-0.3772,  0.3567, -0.2218,  0.0857, -0.1227,  0.3010, -0.1443,  0.0978,
         0.0103, -0.1659,  0.2356,  0.0668, -0.0912, -0.3083, -0.2250,  0.1427,
        -0.1723, -0.0055,  0.0505,  0.4404, -0.0018, -0.1341,  0.4309,  0.0110,
         0.0985,  0.3431,  0.0982,  0.4294, -0.0392, -0.0426, -0.0612, -0.0200],
       grad_fn=<AddBackward0>)
0.05225483749636339
0.02010823338312462
0.17328202992647768
0.34971032783107603
0.10023729103920065
0.08835127783088126
0.12291725034051737
0.10110638386563231
-0.066929951827702
0.07851494665516423
-0.0862680785652774
-0.004445228822175962
-0.03972815183723721
0.15218021798123793
-0.18105619819658791
-0.08752643368645341


## Layer2[0]: Conv2+Bn2

In [33]:
img_width = 16
padding = 1

bin_mask1 = np.tile(np.array(build_mask(img_width + 1, 0, img_width -1, img_width ** 2)), 32)
bin_mask2 = np.tile(np.array(build_mask(img_width, 0, img_width ** 2, img_width ** 2)), 32)
bin_mask3 = np.tile(np.array(build_mask(img_width, 0, img_width - 1, img_width ** 2)), 32)
bin_mask4 = np.tile(np.array(build_mask(1, 0, img_width - 1, img_width ** 2)), 32)
bin_mask5 = np.tile(np.array(build_mask(0, 0, img_width ** 2, img_width ** 2)), 32)
bin_mask6 = np.tile(np.array(build_mask(0, 1, img_width - 1, img_width ** 2)), 32)
bin_mask7 = np.tile(np.array(build_mask(1, img_width - 1, img_width - 1, img_width ** 2)), 32)
bin_mask8 = np.tile(np.array(build_mask(0, img_width, img_width ** 2, img_width ** 2)), 32)
bin_mask9 = np.tile(np.array(build_mask(0, img_width + 1, img_width - 1, img_width ** 2)), 32)

In [35]:
A = model.layer2[0].bn2.weight / torch.sqrt(model.layer2[0].bn2.running_var + model.layer2[0].bn2.eps)
b = -(model.layer2[0].bn2.weight * model.layer2[0].bn2.running_mean / torch.sqrt(model.layer2[0].bn2.running_var + model.layer2[0].bn2.eps)) + model.layer2[0].bn2.bias

ks = []

for i in range(32):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(32):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[0].detach(), 256))
        k2 = np.append(k2, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[1].detach(), 256))
        k3 = np.append(k3, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[2].detach(), 256))
        k4 = np.append(k4, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[3].detach(), 256))
        k5 = np.append(k5, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[4].detach(), 256))
        k6 = np.append(k6, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[5].detach(), 256))
        k7 = np.append(k7, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[6].detach(), 256))
        k8 = np.append(k8, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[7].detach(), 256))
        k9 = np.append(k9, np.repeat(model.layer2[0].conv2.weight[j][(j+i) % 32].reshape(9)[8].detach(), 256))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 256))
    k2 = np.multiply(k2, np.repeat(A.detach(), 256))
    k3 = np.multiply(k3, np.repeat(A.detach(), 256))
    k4 = np.multiply(k4, np.repeat(A.detach(), 256))
    k5 = np.multiply(k5, np.repeat(A.detach(), 256))
    k6 = np.multiply(k6, np.repeat(A.detach(), 256))
    k7 = np.multiply(k7, np.repeat(A.detach(), 256))
    k8 = np.multiply(k8, np.repeat(A.detach(), 256))
    k9 = np.multiply(k9, np.repeat(A.detach(), 256))
    
    mul1 = np.roll(k1, 256 * i)
    mul2 = np.roll(k2, 256 * i)
    mul3 = np.roll(k3, 256 * i)
    mul4 = np.roll(k4, 256 * i)
    mul5 = np.roll(k5, 256 * i)
    mul6 = np.roll(k6, 256 * i)
    mul7 = np.roll(k7, 256 * i)
    mul8 = np.roll(k8, 256 * i)
    mul9 = np.roll(k9, 256 * i)
    
    np.savetxt('weights/layer4-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer4-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer4-conv2bn2-bias.bin', np.repeat(b.detach(), 256), delimiter=',')

## Layer2[1]: Conv1+Bn1

In [36]:
A = model.layer2[1].bn1.weight / torch.sqrt(model.layer2[1].bn1.running_var + model.layer2[1].bn1.eps)
b = -(model.layer2[1].bn1.weight * model.layer2[1].bn1.running_mean / torch.sqrt(model.layer2[1].bn1.running_var + model.layer2[1].bn1.eps)) + model.layer2[1].bn1.bias

ks = []

for i in range(32):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(32):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[0].detach(), 256))
        k2 = np.append(k2, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[1].detach(), 256))
        k3 = np.append(k3, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[2].detach(), 256))
        k4 = np.append(k4, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[3].detach(), 256))
        k5 = np.append(k5, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[4].detach(), 256))
        k6 = np.append(k6, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[5].detach(), 256))
        k7 = np.append(k7, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[6].detach(), 256))
        k8 = np.append(k8, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[7].detach(), 256))
        k9 = np.append(k9, np.repeat(model.layer2[1].conv1.weight[j][(j+i) % 32].reshape(9)[8].detach(), 256))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 256))
    k2 = np.multiply(k2, np.repeat(A.detach(), 256))
    k3 = np.multiply(k3, np.repeat(A.detach(), 256))
    k4 = np.multiply(k4, np.repeat(A.detach(), 256))
    k5 = np.multiply(k5, np.repeat(A.detach(), 256))
    k6 = np.multiply(k6, np.repeat(A.detach(), 256))
    k7 = np.multiply(k7, np.repeat(A.detach(), 256))
    k8 = np.multiply(k8, np.repeat(A.detach(), 256))
    k9 = np.multiply(k9, np.repeat(A.detach(), 256))
    
    mul1 = np.roll(k1, 256 * i)
    mul2 = np.roll(k2, 256 * i)
    mul3 = np.roll(k3, 256 * i)
    mul4 = np.roll(k4, 256 * i)
    mul5 = np.roll(k5, 256 * i)
    mul6 = np.roll(k6, 256 * i)
    mul7 = np.roll(k7, 256 * i)
    mul8 = np.roll(k8, 256 * i)
    mul9 = np.roll(k9, 256 * i)
    
    np.savetxt('weights/layer5-conv1bn1-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer5-conv1bn1-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer5-conv1bn1-bias.bin', np.repeat(b.detach(), 256), delimiter=',')

## Layer2[1]: Conv2+Bn2

In [38]:
A = model.layer2[1].bn2.weight / torch.sqrt(model.layer2[1].bn2.running_var + model.layer2[1].bn2.eps)
b = -(model.layer2[1].bn2.weight * model.layer2[1].bn2.running_mean / torch.sqrt(model.layer2[1].bn2.running_var + model.layer2[1].bn2.eps)) + model.layer2[1].bn2.bias

ks = []

for i in range(32):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(32):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[0].detach(), 256))
        k2 = np.append(k2, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[1].detach(), 256))
        k3 = np.append(k3, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[2].detach(), 256))
        k4 = np.append(k4, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[3].detach(), 256))
        k5 = np.append(k5, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[4].detach(), 256))
        k6 = np.append(k6, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[5].detach(), 256))
        k7 = np.append(k7, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[6].detach(), 256))
        k8 = np.append(k8, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[7].detach(), 256))
        k9 = np.append(k9, np.repeat(model.layer2[1].conv2.weight[j][(j+i) % 32].reshape(9)[8].detach(), 256))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 256))
    k2 = np.multiply(k2, np.repeat(A.detach(), 256))
    k3 = np.multiply(k3, np.repeat(A.detach(), 256))
    k4 = np.multiply(k4, np.repeat(A.detach(), 256))
    k5 = np.multiply(k5, np.repeat(A.detach(), 256))
    k6 = np.multiply(k6, np.repeat(A.detach(), 256))
    k7 = np.multiply(k7, np.repeat(A.detach(), 256))
    k8 = np.multiply(k8, np.repeat(A.detach(), 256))
    k9 = np.multiply(k9, np.repeat(A.detach(), 256))
    
    mul1 = np.roll(k1, 256 * i)
    mul2 = np.roll(k2, 256 * i)
    mul3 = np.roll(k3, 256 * i)
    mul4 = np.roll(k4, 256 * i)
    mul5 = np.roll(k5, 256 * i)
    mul6 = np.roll(k6, 256 * i)
    mul7 = np.roll(k7, 256 * i)
    mul8 = np.roll(k8, 256 * i)
    mul9 = np.roll(k9, 256 * i)
    
    np.savetxt('weights/layer5-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer5-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer5-conv2bn2-bias.bin', np.repeat(b.detach(), 256), delimiter=',')

## Layer2[2]: Conv1+Bn1

In [40]:
A = model.layer2[2].bn1.weight / torch.sqrt(model.layer2[2].bn1.running_var + model.layer2[2].bn1.eps)
b = -(model.layer2[2].bn1.weight * model.layer2[2].bn1.running_mean / torch.sqrt(model.layer2[2].bn1.running_var + model.layer2[2].bn1.eps)) + model.layer2[2].bn1.bias

ks = []

for i in range(32):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(32):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[0].detach(), 256))
        k2 = np.append(k2, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[1].detach(), 256))
        k3 = np.append(k3, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[2].detach(), 256))
        k4 = np.append(k4, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[3].detach(), 256))
        k5 = np.append(k5, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[4].detach(), 256))
        k6 = np.append(k6, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[5].detach(), 256))
        k7 = np.append(k7, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[6].detach(), 256))
        k8 = np.append(k8, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[7].detach(), 256))
        k9 = np.append(k9, np.repeat(model.layer2[2].conv1.weight[j][(j+i) % 32].reshape(9)[8].detach(), 256))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 256))
    k2 = np.multiply(k2, np.repeat(A.detach(), 256))
    k3 = np.multiply(k3, np.repeat(A.detach(), 256))
    k4 = np.multiply(k4, np.repeat(A.detach(), 256))
    k5 = np.multiply(k5, np.repeat(A.detach(), 256))
    k6 = np.multiply(k6, np.repeat(A.detach(), 256))
    k7 = np.multiply(k7, np.repeat(A.detach(), 256))
    k8 = np.multiply(k8, np.repeat(A.detach(), 256))
    k9 = np.multiply(k9, np.repeat(A.detach(), 256))
    
    mul1 = np.roll(k1, 256 * i)
    mul2 = np.roll(k2, 256 * i)
    mul3 = np.roll(k3, 256 * i)
    mul4 = np.roll(k4, 256 * i)
    mul5 = np.roll(k5, 256 * i)
    mul6 = np.roll(k6, 256 * i)
    mul7 = np.roll(k7, 256 * i)
    mul8 = np.roll(k8, 256 * i)
    mul9 = np.roll(k9, 256 * i)
    
    np.savetxt('weights/layer6-conv1bn1-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer6-conv1bn1-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer6-conv1bn1-bias.bin', np.repeat(b.detach(), 256), delimiter=',')

## Layer2[2]: Conv2+Bn2

In [41]:
A = model.layer2[2].bn2.weight / torch.sqrt(model.layer2[2].bn2.running_var + model.layer2[2].bn2.eps)
b = -(model.layer2[2].bn2.weight * model.layer2[2].bn2.running_mean / torch.sqrt(model.layer2[2].bn2.running_var + model.layer2[2].bn2.eps)) + model.layer2[2].bn2.bias

ks = []

for i in range(32):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(32):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[0].detach(), 256))
        k2 = np.append(k2, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[1].detach(), 256))
        k3 = np.append(k3, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[2].detach(), 256))
        k4 = np.append(k4, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[3].detach(), 256))
        k5 = np.append(k5, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[4].detach(), 256))
        k6 = np.append(k6, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[5].detach(), 256))
        k7 = np.append(k7, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[6].detach(), 256))
        k8 = np.append(k8, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[7].detach(), 256))
        k9 = np.append(k9, np.repeat(model.layer2[2].conv2.weight[j][(j+i) % 32].reshape(9)[8].detach(), 256))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 256))
    k2 = np.multiply(k2, np.repeat(A.detach(), 256))
    k3 = np.multiply(k3, np.repeat(A.detach(), 256))
    k4 = np.multiply(k4, np.repeat(A.detach(), 256))
    k5 = np.multiply(k5, np.repeat(A.detach(), 256))
    k6 = np.multiply(k6, np.repeat(A.detach(), 256))
    k7 = np.multiply(k7, np.repeat(A.detach(), 256))
    k8 = np.multiply(k8, np.repeat(A.detach(), 256))
    k9 = np.multiply(k9, np.repeat(A.detach(), 256))
    
    mul1 = np.roll(k1, 256 * i)
    mul2 = np.roll(k2, 256 * i)
    mul3 = np.roll(k3, 256 * i)
    mul4 = np.roll(k4, 256 * i)
    mul5 = np.roll(k5, 256 * i)
    mul6 = np.roll(k6, 256 * i)
    mul7 = np.roll(k7, 256 * i)
    mul8 = np.roll(k8, 256 * i)
    mul9 = np.roll(k9, 256 * i)
    
    np.savetxt('weights/layer6-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer6-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer6-conv2bn2-bias.bin', np.repeat(b.detach(), 256), delimiter=',')

## Layer3[0]: Conv1+Bn1 SX

In [42]:
img_width = 16
padding = 1

bin_mask1 = np.tile(np.array(build_mask(img_width + 1, 0, img_width -1, img_width ** 2)), 32)
bin_mask2 = np.tile(np.array(build_mask(img_width, 0, img_width ** 2, img_width ** 2)), 32)
bin_mask3 = np.tile(np.array(build_mask(img_width, 0, img_width - 1, img_width ** 2)), 32)
bin_mask4 = np.tile(np.array(build_mask(1, 0, img_width - 1, img_width ** 2)), 32)
bin_mask5 = np.tile(np.array(build_mask(0, 0, img_width ** 2, img_width ** 2)), 32)
bin_mask6 = np.tile(np.array(build_mask(0, 1, img_width - 1, img_width ** 2)), 32)
bin_mask7 = np.tile(np.array(build_mask(1, img_width - 1, img_width - 1, img_width ** 2)), 32)
bin_mask8 = np.tile(np.array(build_mask(0, img_width, img_width ** 2, img_width ** 2)), 32)
bin_mask9 = np.tile(np.array(build_mask(0, img_width + 1, img_width - 1, img_width ** 2)), 32)

In [43]:
def altalena2(v):
    new_v = []
    for i in range(len(v)):
        if i % 2 != 0:
            new_v.append(0)
        elif i % 32 >= 16 and i % 32 < 32:
            new_v.append(0)
        else:
            new_v.append(v[i])
    return new_v

In [44]:
A = model.layer3[0].bn1.weight / torch.sqrt(model.layer3[0].bn1.running_var + model.layer3[0].bn1.eps)
b = -(model.layer3[0].bn1.weight * model.layer3[0].bn1.running_mean / torch.sqrt(model.layer3[0].bn1.running_var + model.layer3[0].bn1.eps)) + model.layer3[0].bn1.bias
print("A: {}\n\nb: {}".format(A, b))

channels = []

for i in range(32):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])
    
    for j in range(64):
        k1 = np.append(k1, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[0].detach(), 256))
        k2 = np.append(k2, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[1].detach(), 256))
        k3 = np.append(k3, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[2].detach(), 256))
        k4 = np.append(k4, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[3].detach(), 256))
        k5 = np.append(k5, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[4].detach(), 256))
        k6 = np.append(k6, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[5].detach(), 256))
        k7 = np.append(k7, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[6].detach(), 256))
        k8 = np.append(k8, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[7].detach(), 256))
        k9 = np.append(k9, np.repeat(model.layer3[0].conv1.weight[j][(j+i) % 32].reshape(9)[8].detach(), 256))
    
    k1 = np.multiply(k1, altalena2(np.tile(bin_mask1, 2)))
    k2 = np.multiply(k2, altalena2(np.tile(bin_mask2, 2)))
    k3 = np.multiply(k3, altalena2(np.tile(bin_mask3, 2)))
    k4 = np.multiply(k4, altalena2(np.tile(bin_mask4, 2)))
    k5 = np.multiply(k5, altalena2(np.tile(bin_mask5, 2)))
    k6 = np.multiply(k6, altalena2(np.tile(bin_mask6, 2)))
    k7 = np.multiply(k7, altalena2(np.tile(bin_mask7, 2)))
    k8 = np.multiply(k8, altalena2(np.tile(bin_mask8, 2)))
    k9 = np.multiply(k9, altalena2(np.tile(bin_mask9, 2)))

    k1 = np.multiply(k1, np.repeat(A.detach().numpy(), 256))
    k2 = np.multiply(k2, np.repeat(A.detach().numpy(), 256))
    k3 = np.multiply(k3, np.repeat(A.detach().numpy(), 256))
    k4 = np.multiply(k4, np.repeat(A.detach().numpy(), 256))
    k5 = np.multiply(k5, np.repeat(A.detach().numpy(), 256))
    k6 = np.multiply(k6, np.repeat(A.detach().numpy(), 256))
    k7 = np.multiply(k7, np.repeat(A.detach().numpy(), 256))
    k8 = np.multiply(k8, np.repeat(A.detach().numpy(), 256))
    k9 = np.multiply(k9, np.repeat(A.detach().numpy(), 256))

    
    #Affianco CH1[0]-CH16[0]-CH1[1]-CH16[1]-CH1[2]...
    k1 = np.add(k1, np.roll(k1, -8192 + 1))[:8192]
    k2 = np.add(k2, np.roll(k2, -8192 + 1))[:8192]
    k3 = np.add(k3, np.roll(k3, -8192 + 1))[:8192]
    k4 = np.add(k4, np.roll(k4, -8192 + 1))[:8192]
    k5 = np.add(k5, np.roll(k5, -8192 + 1))[:8192]
    k6 = np.add(k6, np.roll(k6, -8192 + 1))[:8192]
    k7 = np.add(k7, np.roll(k7, -8192 + 1))[:8192]
    k8 = np.add(k8, np.roll(k8, -8192 + 1))[:8192]
    k9 = np.add(k9, np.roll(k9, -8192 + 1))[:8192]

    
    np.savetxt('weights/layer7-conv1bn1-ch{}-k1.bin'.format(i), altalena2(np.roll(k1, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k1.bin'.format(i+32), altalena2(np.roll(k1, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k2.bin'.format(i), altalena2(np.roll(k2, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k2.bin'.format(i+32), altalena2(np.roll(k2, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k3.bin'.format(i), altalena2(np.roll(k3, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k3.bin'.format(i+32), altalena2(np.roll(k3, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k4.bin'.format(i), altalena2(np.roll(k4, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k4.bin'.format(i+32), altalena2(np.roll(k4, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k5.bin'.format(i), altalena2(np.roll(k5, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k5.bin'.format(i+32), altalena2(np.roll(k5, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k6.bin'.format(i), altalena2(np.roll(k6, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k6.bin'.format(i+32), altalena2(np.roll(k6, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k7.bin'.format(i), altalena2(np.roll(k7, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k7.bin'.format(i+32), altalena2(np.roll(k7, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k8.bin'.format(i), altalena2(np.roll(k8, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k8.bin'.format(i+32), altalena2(np.roll(k8, 256 * i - 1)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k9.bin'.format(i), altalena2(np.roll(k9, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7-conv1bn1-ch{}-k9.bin'.format(i+32), altalena2(np.roll(k9, 256 * i - 1)), delimiter=',')
    
    
bias_corrected = np.add(altalena2(np.repeat(b.detach().numpy(),256)), np.roll(altalena2(np.repeat(b.detach().numpy(),256)), -8192 + 1))[:8192]
bias_corrected016 = altalena2(np.repeat(b.detach().numpy()[:32], 256))
bias_corrected1632 = altalena2(np.roll(np.repeat(b.detach().numpy()[32:64], 256), -1))

np.savetxt('weights/layer7-conv1bn1-bias1.bin'.format(i), bias_corrected016, delimiter=',')
np.savetxt('weights/layer7-conv1bn1-bias2.bin'.format(i), bias_corrected1632, delimiter=',')

A: tensor([0.7852, 0.6988, 0.7153, 0.5844, 0.7026, 0.5671, 0.7342, 0.7735, 0.6820,
        0.6612, 0.5970, 0.5450, 0.6615, 0.6018, 0.6589, 0.7381, 0.7584, 0.6420,
        0.7446, 0.6767, 0.8471, 0.6572, 0.7782, 0.6296, 0.7325, 0.5878, 0.7295,
        0.7023, 0.5688, 0.6702, 0.5775, 0.8381, 0.5502, 0.6512, 0.6194, 0.7252,
        0.6100, 0.7107, 0.6952, 0.4918, 0.6784, 0.7418, 0.8401, 0.6253, 0.6673,
        0.6501, 0.6103, 0.7182, 0.7388, 0.7017, 0.7574, 0.7854, 0.7315, 0.6495,
        0.7971, 0.6357, 0.6459, 0.5802, 0.7215, 0.6131, 0.7307, 0.6417, 0.7794,
        0.8286], grad_fn=<DivBackward0>)

b: tensor([-0.1588,  0.0817,  0.2918,  0.1824,  0.0592, -0.2113, -0.0125, -0.3414,
         0.0232, -0.0052, -0.2336, -0.1512,  0.1029,  0.3411,  0.2257, -0.0721,
         0.3679,  0.2816, -0.2067,  0.3021,  0.0033,  0.2673, -0.0598,  0.0198,
         0.2362,  0.0603,  0.0112,  0.1465,  0.8076,  0.4792, -0.0515, -0.0544,
         0.1555,  0.3247, -0.2614, -0.2994, -0.1041, -0.2443, -0.0653,  

## Layer3[0]: Conv1+Bn1 DX

In [46]:
A = model.layer3[0].downsample[1].weight / torch.sqrt(model.layer3[0].downsample[1].running_var + model.layer3[0].downsample[1].eps)
b = -(model.layer3[0].downsample[1].weight * model.layer3[0].downsample[1].running_mean / torch.sqrt(model.layer3[0].downsample[1].running_var + model.layer3[0].downsample[1].eps)) + model.layer3[0].downsample[1].bias
print("A: {}\n\nb: {}".format(A, b))

for i in range(32):
    k1 = np.array([])
    
    for j in range(64):
        k1 = np.append(k1, np.repeat(model.layer3[0].downsample[0].weight[j][(j+i) % 32].reshape(1)[0].detach(), 256))
    
    k1 = np.multiply(k1, altalena2(np.tile(bin_mask5, 2)))

    k1 = np.multiply(k1, np.repeat(A.detach().numpy(), 256))
    
    #Affianco CH1[0]-CH16[0]-CH1[1]-CH16[1]-CH1[2]...
    k1 = np.add(k1, np.roll(k1, -8192 + 1))[:8192]
    
    print(k1[0])

    np.savetxt('weights/layer7dx-conv1bn1-ch{}-k1.bin'.format(i), altalena2(np.roll(k1, 256 * i)), delimiter=',')
    np.savetxt('weights/layer7dx-conv1bn1-ch{}-k1.bin'.format(i+32), altalena2(np.roll(k1, 256 * i - 1)), delimiter=',')
    
bias_corrected016 = altalena2(np.repeat(b.detach().numpy()[:32], 256))
bias_corrected1632 = altalena2(np.roll(np.repeat(b.detach().numpy()[32:64], 256), -1))
#bias_corrected = np.add(altalena(np.repeat(b.detach().numpy(),1024)), np.roll(altalena(np.repeat(b.detach().numpy(),1024)), -16384 + 1))[:16384]
#bias_corrected016 = altalena(np.repeat(b.detach().numpy()[:16], 1024))
#bias_corrected1632 = altalena(np.repeat(b.detach().numpy()[16:32], 1024))

np.savetxt('weights/layer7dx-conv1bn1-bias1.bin'.format(i), bias_corrected016, delimiter=',')
np.savetxt('weights/layer7dx-conv1bn1-bias2.bin'.format(i), bias_corrected1632, delimiter=',')

A: tensor([0.9636, 0.8384, 0.6860, 0.8404, 0.9047, 0.9746, 0.7416, 0.7377, 0.9653,
        0.9845, 0.8502, 0.7280, 0.9929, 0.8763, 0.9123, 0.5750, 0.9231, 0.6414,
        0.7063, 0.6996, 0.8063, 0.6810, 0.9202, 0.8036, 1.0214, 0.8607, 0.7410,
        0.8437, 0.6967, 0.8093, 0.6429, 1.3858, 0.6277, 0.7279, 1.0041, 0.7585,
        0.8431, 0.5789, 0.6564, 0.7140, 0.7016, 0.7634, 0.8902, 0.8297, 0.9412,
        0.7452, 0.8048, 0.6787, 0.9225, 0.7063, 0.5947, 0.9477, 0.7277, 0.6615,
        0.8845, 0.8250, 0.7366, 0.8169, 0.8526, 1.0482, 1.0458, 0.7799, 0.8699,
        0.8760], grad_fn=<DivBackward0>)

b: tensor([-2.6746e-02, -5.8056e-02, -2.9641e-03,  2.9841e-02, -3.3699e-02,
        -2.0143e-02,  5.4205e-02,  7.4022e-06,  4.4684e-03, -8.3803e-02,
        -3.9280e-03, -1.4146e-02, -6.6965e-02, -5.8474e-02, -1.7255e-01,
        -4.2936e-02,  5.7749e-02, -1.0887e-01, -6.1774e-02, -1.0028e-01,
        -5.6817e-02, -1.2174e-01, -3.0950e-02,  1.1160e-02, -1.2527e-01,
        -2.3630e-02, -7.347

In [47]:
img_width = 8
padding = 1

bin_mask1 = np.tile(np.array(build_mask(img_width + 1, 0, img_width -1, img_width ** 2)), 64)
bin_mask2 = np.tile(np.array(build_mask(img_width, 0, img_width ** 2, img_width ** 2)), 64)
bin_mask3 = np.tile(np.array(build_mask(img_width, 0, img_width - 1, img_width ** 2)), 64)
bin_mask4 = np.tile(np.array(build_mask(1, 0, img_width - 1, img_width ** 2)), 64)
bin_mask5 = np.tile(np.array(build_mask(0, 0, img_width ** 2, img_width ** 2)), 64)
bin_mask6 = np.tile(np.array(build_mask(0, 1, img_width - 1, img_width ** 2)), 64)
bin_mask7 = np.tile(np.array(build_mask(1, img_width - 1, img_width - 1, img_width ** 2)), 64)
bin_mask8 = np.tile(np.array(build_mask(0, img_width, img_width ** 2, img_width ** 2)), 64)
bin_mask9 = np.tile(np.array(build_mask(0, img_width + 1, img_width - 1, img_width ** 2)), 64)

## Layer3[0]: Conv2+Bn2

In [48]:
A = model.layer3[0].bn2.weight / torch.sqrt(model.layer3[0].bn2.running_var + model.layer3[0].bn2.eps)
b = -(model.layer3[0].bn2.weight * model.layer3[0].bn2.running_mean / torch.sqrt(model.layer3[0].bn2.running_var + model.layer3[0].bn2.eps)) + model.layer3[0].bn2.bias

ks = []

for i in range(64):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(64):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[0].detach(), 64))
        k2 = np.append(k2, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[1].detach(), 64))
        k3 = np.append(k3, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[2].detach(), 64))
        k4 = np.append(k4, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[3].detach(), 64))
        k5 = np.append(k5, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[4].detach(), 64))
        k6 = np.append(k6, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[5].detach(), 64))
        k7 = np.append(k7, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[6].detach(), 64))
        k8 = np.append(k8, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[7].detach(), 64))
        k9 = np.append(k9, np.repeat(model.layer3[0].conv2.weight[j][(j+i) % 64].reshape(9)[8].detach(), 64))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 64))
    k2 = np.multiply(k2, np.repeat(A.detach(), 64))
    k3 = np.multiply(k3, np.repeat(A.detach(), 64))
    k4 = np.multiply(k4, np.repeat(A.detach(), 64))
    k5 = np.multiply(k5, np.repeat(A.detach(), 64))
    k6 = np.multiply(k6, np.repeat(A.detach(), 64))
    k7 = np.multiply(k7, np.repeat(A.detach(), 64))
    k8 = np.multiply(k8, np.repeat(A.detach(), 64))
    k9 = np.multiply(k9, np.repeat(A.detach(), 64))
    
    mul1 = np.roll(k1, 64 * i)
    mul2 = np.roll(k2, 64 * i)
    mul3 = np.roll(k3, 64 * i)
    mul4 = np.roll(k4, 64 * i)
    mul5 = np.roll(k5, 64 * i)
    mul6 = np.roll(k6, 64 * i)
    mul7 = np.roll(k7, 64 * i)
    mul8 = np.roll(k8, 64 * i)
    mul9 = np.roll(k9, 64 * i)
    
    np.savetxt('weights/layer7-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer7-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer7-conv2bn2-bias.bin', np.repeat(b.detach(), 64), delimiter=',')

## Layer3[1]: Conv1+Bn1

In [51]:
A = model.layer3[1].bn1.weight / torch.sqrt(model.layer3[1].bn1.running_var + model.layer3[1].bn1.eps)
b = -(model.layer3[1].bn1.weight * model.layer3[1].bn1.running_mean / torch.sqrt(model.layer3[1].bn1.running_var + model.layer3[1].bn1.eps)) + model.layer3[1].bn1.bias

ks = []

for i in range(64):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(64):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[0].detach(), 64))
        k2 = np.append(k2, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[1].detach(), 64))
        k3 = np.append(k3, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[2].detach(), 64))
        k4 = np.append(k4, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[3].detach(), 64))
        k5 = np.append(k5, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[4].detach(), 64))
        k6 = np.append(k6, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[5].detach(), 64))
        k7 = np.append(k7, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[6].detach(), 64))
        k8 = np.append(k8, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[7].detach(), 64))
        k9 = np.append(k9, np.repeat(model.layer3[1].conv1.weight[j][(j+i) % 64].reshape(9)[8].detach(), 64))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 64))
    k2 = np.multiply(k2, np.repeat(A.detach(), 64))
    k3 = np.multiply(k3, np.repeat(A.detach(), 64))
    k4 = np.multiply(k4, np.repeat(A.detach(), 64))
    k5 = np.multiply(k5, np.repeat(A.detach(), 64))
    k6 = np.multiply(k6, np.repeat(A.detach(), 64))
    k7 = np.multiply(k7, np.repeat(A.detach(), 64))
    k8 = np.multiply(k8, np.repeat(A.detach(), 64))
    k9 = np.multiply(k9, np.repeat(A.detach(), 64))
    
    mul1 = np.roll(k1, 64 * i)
    mul2 = np.roll(k2, 64 * i)
    mul3 = np.roll(k3, 64 * i)
    mul4 = np.roll(k4, 64 * i)
    mul5 = np.roll(k5, 64 * i)
    mul6 = np.roll(k6, 64 * i)
    mul7 = np.roll(k7, 64 * i)
    mul8 = np.roll(k8, 64 * i)
    mul9 = np.roll(k9, 64 * i)
    
    np.savetxt('weights/layer8-conv1bn1-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer8-conv1bn1-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer8-conv1bn1-bias.bin', np.repeat(b.detach(), 64), delimiter=',')

## Layer3[1]: Conv2+Bn2

In [52]:
A = model.layer3[1].bn2.weight / torch.sqrt(model.layer3[1].bn2.running_var + model.layer3[1].bn2.eps)
b = -(model.layer3[1].bn2.weight * model.layer3[1].bn2.running_mean / torch.sqrt(model.layer3[1].bn2.running_var + model.layer3[1].bn2.eps)) + model.layer3[1].bn2.bias

ks = []

for i in range(64):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(64):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[0].detach(), 64))
        k2 = np.append(k2, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[1].detach(), 64))
        k3 = np.append(k3, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[2].detach(), 64))
        k4 = np.append(k4, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[3].detach(), 64))
        k5 = np.append(k5, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[4].detach(), 64))
        k6 = np.append(k6, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[5].detach(), 64))
        k7 = np.append(k7, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[6].detach(), 64))
        k8 = np.append(k8, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[7].detach(), 64))
        k9 = np.append(k9, np.repeat(model.layer3[1].conv2.weight[j][(j+i) % 64].reshape(9)[8].detach(), 64))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 64))
    k2 = np.multiply(k2, np.repeat(A.detach(), 64))
    k3 = np.multiply(k3, np.repeat(A.detach(), 64))
    k4 = np.multiply(k4, np.repeat(A.detach(), 64))
    k5 = np.multiply(k5, np.repeat(A.detach(), 64))
    k6 = np.multiply(k6, np.repeat(A.detach(), 64))
    k7 = np.multiply(k7, np.repeat(A.detach(), 64))
    k8 = np.multiply(k8, np.repeat(A.detach(), 64))
    k9 = np.multiply(k9, np.repeat(A.detach(), 64))
    
    mul1 = np.roll(k1, 64 * i)
    mul2 = np.roll(k2, 64 * i)
    mul3 = np.roll(k3, 64 * i)
    mul4 = np.roll(k4, 64 * i)
    mul5 = np.roll(k5, 64 * i)
    mul6 = np.roll(k6, 64 * i)
    mul7 = np.roll(k7, 64 * i)
    mul8 = np.roll(k8, 64 * i)
    mul9 = np.roll(k9, 64 * i)
    
    np.savetxt('weights/layer8-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer8-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer8-conv2bn2-bias.bin', np.repeat(b.detach(), 64), delimiter=',')

## Layer3[2]: Conv1+Bn1

In [53]:
A = model.layer3[2].bn1.weight / torch.sqrt(model.layer3[2].bn1.running_var + model.layer3[2].bn1.eps)
b = -(model.layer3[2].bn1.weight * model.layer3[2].bn1.running_mean / torch.sqrt(model.layer3[2].bn1.running_var + model.layer3[2].bn1.eps)) + model.layer3[2].bn1.bias

ks = []

for i in range(64):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(64):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[0].detach(), 64))
        k2 = np.append(k2, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[1].detach(), 64))
        k3 = np.append(k3, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[2].detach(), 64))
        k4 = np.append(k4, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[3].detach(), 64))
        k5 = np.append(k5, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[4].detach(), 64))
        k6 = np.append(k6, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[5].detach(), 64))
        k7 = np.append(k7, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[6].detach(), 64))
        k8 = np.append(k8, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[7].detach(), 64))
        k9 = np.append(k9, np.repeat(model.layer3[2].conv1.weight[j][(j+i) % 64].reshape(9)[8].detach(), 64))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)


    k1 = np.multiply(k1, np.repeat(A.detach(), 64))
    k2 = np.multiply(k2, np.repeat(A.detach(), 64))
    k3 = np.multiply(k3, np.repeat(A.detach(), 64))
    k4 = np.multiply(k4, np.repeat(A.detach(), 64))
    k5 = np.multiply(k5, np.repeat(A.detach(), 64))
    k6 = np.multiply(k6, np.repeat(A.detach(), 64))
    k7 = np.multiply(k7, np.repeat(A.detach(), 64))
    k8 = np.multiply(k8, np.repeat(A.detach(), 64))
    k9 = np.multiply(k9, np.repeat(A.detach(), 64))
    
    mul1 = np.roll(k1, 64 * i)
    mul2 = np.roll(k2, 64 * i)
    mul3 = np.roll(k3, 64 * i)
    mul4 = np.roll(k4, 64 * i)
    mul5 = np.roll(k5, 64 * i)
    mul6 = np.roll(k6, 64 * i)
    mul7 = np.roll(k7, 64 * i)
    mul8 = np.roll(k8, 64 * i)
    mul9 = np.roll(k9, 64 * i)
    
    np.savetxt('weights/layer9-conv1bn1-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer9-conv1bn1-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer9-conv1bn1-bias.bin', np.repeat(b.detach(), 64), delimiter=',')

## Layer3[2]: Conv2+Bn2

In [95]:
A = model.layer3[2].bn2.weight / torch.sqrt(model.layer3[2].bn2.running_var + model.layer3[2].bn2.eps)
b = -(model.layer3[2].bn2.weight * model.layer3[2].bn2.running_mean / torch.sqrt(model.layer3[2].bn2.running_var + model.layer3[2].bn2.eps)) + model.layer3[2].bn2.bias

ks = []

for i in range(64):
    k1 = np.array([])
    k2 = np.array([])
    k3 = np.array([])
    k4 = np.array([])
    k5 = np.array([])
    k6 = np.array([])
    k7 = np.array([])
    k8 = np.array([])
    k9 = np.array([])

    for j in range(64):
        #Qua moltiplico np.repeat blabla per A[j]
        k1 = np.append(k1, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[0].detach(), 64))
        k2 = np.append(k2, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[1].detach(), 64))
        k3 = np.append(k3, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[2].detach(), 64))
        k4 = np.append(k4, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[3].detach(), 64))
        k5 = np.append(k5, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[4].detach(), 64))
        k6 = np.append(k6, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[5].detach(), 64))
        k7 = np.append(k7, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[6].detach(), 64))
        k8 = np.append(k8, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[7].detach(), 64))
        k9 = np.append(k9, np.repeat(model.layer3[2].conv2.weight[j][(j+i) % 64].reshape(9)[8].detach(), 64))

    k1 = np.multiply(k1, bin_mask1)
    k2 = np.multiply(k2, bin_mask2)
    k3 = np.multiply(k3, bin_mask3)
    k4 = np.multiply(k4, bin_mask4)
    k5 = np.multiply(k5, bin_mask5)
    k6 = np.multiply(k6, bin_mask6)
    k7 = np.multiply(k7, bin_mask7)
    k8 = np.multiply(k8, bin_mask8)
    k9 = np.multiply(k9, bin_mask9)

    k1 = np.multiply(k1, np.repeat(A.detach(), 64))
    k2 = np.multiply(k2, np.repeat(A.detach(), 64))
    k3 = np.multiply(k3, np.repeat(A.detach(), 64))
    k4 = np.multiply(k4, np.repeat(A.detach(), 64))
    k5 = np.multiply(k5, np.repeat(A.detach(), 64))
    k6 = np.multiply(k6, np.repeat(A.detach(), 64))
    k7 = np.multiply(k7, np.repeat(A.detach(), 64))
    k8 = np.multiply(k8, np.repeat(A.detach(), 64))
    k9 = np.multiply(k9, np.repeat(A.detach(), 64))
    
    mul1 = np.roll(k1, 64 * i)
    mul2 = np.roll(k2, 64 * i)
    mul3 = np.roll(k3, 64 * i)
    mul4 = np.roll(k4, 64 * i)
    mul5 = np.roll(k5, 64 * i)
    mul6 = np.roll(k6, 64 * i)
    mul7 = np.roll(k7, 64 * i)
    mul8 = np.roll(k8, 64 * i)
    mul9 = np.roll(k9, 64 * i)
    
    np.savetxt('weights/layer9-conv2bn2-ch{}-k1.bin'.format(i), mul1, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k2.bin'.format(i), mul2, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k3.bin'.format(i), mul3, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k4.bin'.format(i), mul4, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k5.bin'.format(i), mul5, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k6.bin'.format(i), mul6, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k7.bin'.format(i), mul7, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k8.bin'.format(i), mul8, delimiter=',')
    np.savetxt('weights/layer9-conv2bn2-ch{}-k9.bin'.format(i), mul9, delimiter=',')

np.savetxt('weights/layer9-conv2bn2-bias.bin', np.repeat(b.detach(), 64), delimiter=',')

## Fully Connected Layer

In [8]:
np.savetxt('weights/fc.bin', model.fc.weight.t().reshape(-1).detach().numpy())