In [1]:
import numpy as np
import torch.nn as nn
import torch
from SparsityAnalysis import extract_patterns, SparseConvArrays

### A Simple Demo Test

In [3]:
weight = np.array(
        [[[[0., 0., 0.],
           [0., 1., 0.],
           [1., 1., 1.]],

          [[0., 0., 0.],
           [0., 1., 0.],
           [1., 1., 1.]],

          [[0., 0., 0.],
           [0., 1., 0.],
           [1., 1., 1.]]]])
weight.shape

In [5]:
patterns = np.array(extract_patterns(weight))
patterns

array([[0, 0, 0, 0, 1, 0, 1, 1, 1]])

In [6]:
sparse_conv_arrays = SparseConvArrays(weight, patterns)

In [7]:
offset = sparse_conv_arrays.offset
reorder = sparse_conv_arrays.reorder
index = sparse_conv_arrays.index
stride = sparse_conv_arrays.stride
weight = sparse_conv_arrays.weight
ptset = sparse_conv_arrays.ptset
print(f"offset:{offset}\n\nreorder:{reorder}\n\nindex:{index}\n\nstride:{stride}\n\nweight:{weight}\n\nptset:\n{ptset}")

offset:[0 3]

reorder:[0]

index:[0 1 2]

stride:[0 3]

weight:[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

ptset:
[[[1 1]
  [2 0]
  [2 1]
  [2 2]]]


### Load Actual Weights from Pattern-Pruned ResNet-34

In [2]:
path = 'resnet34_6_pattern_connectivity_pruning.pt'
state_dict = torch.load(path, map_location=torch.device('cpu'))

# residual_conv_dict = {k:v.cpu().numpy() for (k,v) in state_dict.items() if "layer" in k and "conv" in k}
residual_convs = [v.cpu().numpy() for (k, v) in state_dict.items() if "layer" in k and "conv" in k]
data_shapes = [
    [1, 64, 32, 32], [1, 64, 32, 32], [1, 64, 32, 32], [1, 64, 32, 32], [1, 64, 32, 32], [1, 64, 32, 32],
    [1, 64, 32, 32], [1, 128, 16, 16], [1, 128, 16, 16], [1, 128, 16, 16], [1, 128, 16, 16], [1, 128, 16, 16],
    [1, 128, 16, 16], [1, 128, 16, 16], [1, 128, 16, 16], [1, 256, 8, 8], [1, 256, 8, 8], [1, 256, 8, 8],
    [1, 256, 8, 8], [1, 256, 8, 8], [1, 256, 8, 8], [1, 256, 8, 8], [1, 256, 8, 8], [1, 256, 8, 8],
    [1, 256, 8, 8], [1, 256, 8, 8], [1, 256, 8, 8], [1, 512, 4, 4], [1, 512, 4, 4], [1, 512, 4, 4],
    [1, 512, 4, 4], [1, 512, 4, 4],
]

### Correctness Check - unit test

In [13]:
from conv_naive import Convolution
conv = Convolution()
ip = np.ones((1,64,32,32)).astype(np.float32)
mask = np.ones((128,64,3,3)).astype(np.float32)
output_1 = conv.conv_multiple_filters(ip, mask)[0]
output_gt = nn.functional.conv2d(torch.tensor(ip), torch.tensor(mask),padding=1)
output_gt = output_gt.cpu().numpy()
# print(output_gt)
# print(output_1)
print(f'conv_cuda|nnpack: {np.allclose(output_1,output_gt)}')

conv_cuda|nnpack: True


### Correctness Check - full model

In [12]:
from conv_naive import Convolution
import numpy as np
conv = Convolution()
for idx in range(len(residual_convs[:])):
    input_data = np.ones(data_shapes[idx]).astype(np.float32)
    conv_mask = residual_convs[idx].astype(np.float32)
    output_1 = conv.conv_multiple_filters(input_data, conv_mask)[0]
    output_gt = nn.functional.conv2d(torch.tensor(input_data), torch.tensor(conv_mask),padding=1)
    output_gt = output_gt.cpu().numpy()
    print('*'*10 + f' conv layer {idx} ' + '*'*10)
    print(f'conv_cuda|nnpack: {np.allclose(output_1,output_gt)}\n')


********** conv layer 0 **********
conv_cuda|nnpack: False

********** conv layer 1 **********
conv_cuda|nnpack: False

********** conv layer 2 **********
conv_cuda|nnpack: False

********** conv layer 3 **********
conv_cuda|nnpack: False

********** conv layer 4 **********
conv_cuda|nnpack: False

********** conv layer 5 **********
conv_cuda|nnpack: False

********** conv layer 6 **********
conv_cuda|nnpack: False

********** conv layer 7 **********
conv_cuda|nnpack: False

********** conv layer 8 **********
conv_cuda|nnpack: False

********** conv layer 9 **********
conv_cuda|nnpack: False

********** conv layer 10 **********
conv_cuda|nnpack: False

********** conv layer 11 **********
conv_cuda|nnpack: False

********** conv layer 12 **********
conv_cuda|nnpack: False

********** conv layer 13 **********
conv_cuda|nnpack: False

********** conv layer 14 **********
conv_cuda|nnpack: False

********** conv layer 15 **********
conv_cuda|nnpack: False

********** conv layer 16 *********

### Time Cost w/o memory transfer - nnpack

In [14]:
from conv_naive import Convolution
import numpy as np
from tqdm import tqdm
import time
conv = Convolution()
cuda0 = torch.device('cuda:0')
cpu = torch.device('cpu')
total_time = 0

for idx in tqdm(range(len(residual_convs[:]))):
    input_data = np.ones(data_shapes[idx]).astype(np.float32)
    conv_mask = residual_convs[idx].astype(np.float32)
    input_data_g = torch.tensor(input_data, device = cuda0)
    conv_mask_g = torch.tensor(conv_mask, device = cuda0)
    start = time.time()
    output_gt = nn.functional.conv2d(input_data_g, conv_mask_g,padding=1)
    #output_gt = nn.functional.conv2d(torch.tensor(input_data), torch.tensor(conv_mask),padding=1)
    end =  time.time()
    total_time += end - start

print(f'{round(total_time,3)}s')



  0%|          | 0/32 [00:00<?, ?it/s]

### Time Cost w/o memory transfer - conv_naive

In [5]:
from conv_naive import Convolution
import numpy as np
from tqdm import tqdm
import time
conv = Convolution()
cuda0 = torch.device('cuda:0')
total_time = 0

for idx in tqdm(range(len(residual_convs[:]))):
    input_data = np.ones(data_shapes[idx]).astype(np.float32)
    conv_mask = residual_convs[idx].astype(np.float32)
    output_1, time_ = conv.conv_multiple_filters(input_data, conv_mask)
    total_time += time_

total_time



100%|██████████| 32/32 [00:00<00:00, 208.53it/s]


0.08981484746932983

### Space Cost Analysis

In [15]:
for idx in range(len(residual_convs[-3:-1])):
    patterns = np.array(extract_patterns(residual_convs[idx]))
    sparse_conv_arrays = SparseConvArrays(residual_convs[idx], patterns)
    offset = sparse_conv_arrays.offset
    reorder = sparse_conv_arrays.reorder
    index = sparse_conv_arrays.index
    stride = sparse_conv_arrays.stride
    weight = sparse_conv_arrays.weight
    ptset = sparse_conv_arrays.ptset
    #print(f"offset:{offset}\n\nreorder:{reorder}\n\nindex:{index}\n\nstride:{stride}\n\nweight:{weight}\n\nptset:\n{ptset}")
    #print(conv_layer_weight)
    ### Space
    print('*'*10 + f' conv layer {idx} ' + '*'*10)
    print(f'Normal_conv_mask:\n{residual_convs[idx].nbytes}')
    print(f'FKW_conv_mask:\n{offset.nbytes+reorder.nbytes+index.nbytes+stride.nbytes+weight.nbytes+ptset.nbytes}\n')
    #print(patterns)

********** conv layer 0 **********
Normal_conv_mask:
147456
FKW_conv_mask:
6600

********** conv layer 1 **********
Normal_conv_mask:
147456
FKW_conv_mask:
6600



In [16]:
147456/6600

22.341818181818184