## 掛載雲端硬碟

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

## 更改檔案所在路徑


In [2]:
# # Change to your own folder !!!
# %cd /content/drive/MyDrive/your own folder/

## 載入函式庫


In [3]:
import os

import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import numpy as np

from models.resnet import ResNet50

## 超參數設定

In [4]:
DATASET = 'cifar10'
TEST_BATCH_SIZE = 1000
CUDA = True
PRUNE_PERCENT = 0.9 # Change your prune ratio!
WEIGHT_PATH = './model_best.pth' # Change to your own folder !!!
PRUNE_PATH = './model_prune.pth' # Change to your own folder !!!

## 載入模型

In [5]:
torch.cuda.set_device(0)
CUDA = CUDA and torch.cuda.is_available()

model = ResNet50(num_classes=10)
if CUDA:
    model.cuda()

if WEIGHT_PATH:
    if os.path.isfile(WEIGHT_PATH):
        checkpoint = torch.load(WEIGHT_PATH)
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        print('LOADING CHECKPOINT {} @EPOCH={}, BEST_PREC1={}'.format(WEIGHT_PATH,checkpoint['epoch'],best_prec1))

    else:
        print("NO CHECKPOINT FOUND")

print(model)

LOADING CHECKPOINT ./model_best.pth @EPOCH=34, BEST_PREC1=0.9039999842643738
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsamp

## 進行剪枝
#### 計算所有Batch Normalizaiton中的scale factor絕對值大小並排序
#### 利用設定好的PRUNE_PERCENT來取得閥值

In [6]:
total = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        total += m.weight.data.shape[0]

bn = torch.zeros(total)
index = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        size = m.weight.data.shape[0]
        bn[index:(index+size)] = m.weight.data.abs().clone()
        index += size

y, i = torch.sort(bn)

threshold_index = int(total * PRUNE_PERCENT)
threshold = y[threshold_index]


## 根據Batch Normalization Layer資訊建立CONFIG
#### 1. 複製Batch Normalization Layer的weight(也就是scale factor γ)
#### 2. 建立mask，大於threshold的index的值會設成1,小於threshold的值會設成0
#### 3. mask的值加總後，會是剪枝後Layer對應的輸出channel數
#### 4. 最後得到要建立剪枝模型的CONFIG

In [7]:
pruned = 0
cfg = []  #用來建立剪枝網路的CONFIG
cfg_mask = [] #用來幫助剪枝的遮罩
cfg_origin = []

In [8]:
for k, m in enumerate((model.modules())):
    if isinstance(m, nn.BatchNorm2d):
        weight_copy = m.weight.data.clone()
        mask = weight_copy.abs().gt(threshold).float().cuda() # 大於 threshold 的設為 True (1.0)，其餘為 False(0.0)

        # 注意: 需自行設計處理剩下channel數為0的情況 (e.g. 至少保留3個channel)
        ################################################
        #          請填空          #
        ################################################
        if int(torch.sum(mask)) <= 3:
            print(f'Warning: layer {k} all channels pruned, keeping top 3...')
            _, sorted_idx = torch.sort(weight_copy.abs(), descending=True)
            mask[sorted_idx[:3]] = 1.0

        # 處理剪枝後的權重
        m.weight.data.mul_(mask)
        m.bias.data.mul_(mask)
        pruned = pruned + mask.shape[0] - torch.sum(mask)
        cfg.append(int(torch.sum(mask)))    # 記錄每一層 BN 剩下幾個通道
        cfg_mask.append(mask.clone())     # 儲存每層對應的 mask
        cfg_origin.append(mask.shape[0])
        # if mask.shape[0] != int(torch.sum(mask)):
        print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
            format(k, mask.shape[0], int(torch.sum(mask))))

pruned_ratio = pruned/total

print(f'PRUNE RATIO={pruned_ratio}')
print('PREPROCESSING SUCCESSFUL!')

print(f'cfg1: {cfg}')
# print(f'cfg_origin: {cfg_origin}')
for i in range(len(cfg)):
    if (i > 0):
        if (cfg_origin[i] > cfg_origin[i - 1]):
            cfg[i] = cfg_origin[i]
print(f'cfg2: {cfg}')
        


layer index: 2 	 total channel: 64 	 remaining channel: 64
layer index: 8 	 total channel: 64 	 remaining channel: 64
layer index: 10 	 total channel: 64 	 remaining channel: 64
layer index: 12 	 total channel: 256 	 remaining channel: 255
layer index: 18 	 total channel: 64 	 remaining channel: 64
layer index: 20 	 total channel: 64 	 remaining channel: 64
layer index: 22 	 total channel: 256 	 remaining channel: 218
layer index: 26 	 total channel: 64 	 remaining channel: 64
layer index: 28 	 total channel: 64 	 remaining channel: 64
layer index: 30 	 total channel: 256 	 remaining channel: 94
layer index: 35 	 total channel: 128 	 remaining channel: 117
layer index: 37 	 total channel: 128 	 remaining channel: 108
layer index: 39 	 total channel: 512 	 remaining channel: 150
layer index: 45 	 total channel: 128 	 remaining channel: 78
layer index: 47 	 total channel: 128 	 remaining channel: 117
layer index: 49 	 total channel: 512 	 remaining channel: 84
layer index: 53 	 total cha

## 建立剪枝模型

In [9]:
newmodel = ResNet50(num_classes=10, cfg=cfg)
newmodel.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

### 將原本的模型權重複製到剪枝的模型
#### 根據不同層決定要複製什麼權重
###### Batch Normalization Layer
1.   scale factor
2.   bias
3.   running mean
4.   running variance

###### Convolutional Layer
1.   weight

###### Linear Layer
1.   weight
2.   bias




In [10]:
old_modules = list(model.modules())
new_modules = list(newmodel.modules())

# for i in range(len(cfg_mask)):
#     idx = np.squeeze(np.argwhere(np.asarray(cfg_mask[i].cpu().numpy())))
#     print(i, ": ", idx.size)

layer_id_in_cfg = 0
start_mask = torch.ones(3) #3為input channel(R,G,B)
end_mask = cfg_mask[layer_id_in_cfg]
bn_count = 0


layer_id_offset = 0
for layer_id in range(len(old_modules)):

    m0 = old_modules[layer_id]
    m1 = new_modules[layer_id + layer_id_offset]

    if type(m0) != type(m1):
        layer_id_offset += 2
        m1 = new_modules[layer_id + layer_id_offset]

        # print("old", layer_id, type(m0))
        # print("new", layer_id + layer_id_offset, type(m1), "\n")

    if isinstance(m0, nn.BatchNorm2d):
        bn_count += 1
        
        #### 找出遮罩中非零元素的index ####
        ################################################
        #          請填空          #
        ################################################
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))
        if idx1.ndim == 0:
            idx1 = np.expand_dims(idx1, 0)


        #### 複製weight, bias, running mean,and running variance ####
        ################################################
        #          請填空          #
        ################################################
        

        if cfg_origin[layer_id_in_cfg] > cfg_origin[layer_id_in_cfg - 1]: # change to origin cfg value
            new_w = torch.zeros_like(m0.weight.data)  # 先建立同 shape 的 0 tensor
            new_b = torch.zeros_like(m0.bias.data)  # 先建立同 shape 的 0 tensor
            new_rm = torch.zeros_like(m0.running_mean)  # 先建立同 shape 的 0 tensor
            new_rv = torch.zeros_like(m0.running_var)  # 先建立同 shape 的 0 tensor
            new_w[idx1] = m0.weight.data[idx1].clone()
            new_b[idx1] = m0.bias.data[idx1].clone()
            new_rm[idx1] = m0.running_mean[idx1].clone()
            new_rv[idx1] = m0.running_var[idx1].clone()
            m1.weight.data = new_w.clone()
            m1.bias.data = new_b.clone()
            m1.running_mean = new_rm.clone()
            m1.running_var = new_rv.clone()
        else:
            m1.weight.data = m0.weight.data[idx1].clone()
            m1.bias.data = m0.bias.data[idx1].clone()
            m1.running_mean = m0.running_mean[idx1].clone()
            m1.running_var = m0.running_var[idx1].clone()

        layer_id_in_cfg += 1
        start_mask = end_mask.clone()

        #最後一層連接層不做修改
        if layer_id_in_cfg < len(cfg_mask):
            end_mask = cfg_mask[layer_id_in_cfg]


    elif isinstance(m0, nn.Conv2d):
        print("layer:", layer_id)
        # print("before parm copy:")
        # print("m0: ", m0.weight.shape)
        # print("m1: ", m1.weight.shape)

        w = m0.weight.data.clone()
        
        # Conv2d weight shape: [out_channels, in_channels, kH, kW]

        # 判斷是否為 downsample conv
        is_downsample = (
            m0.kernel_size == (1, 1)
            and (m0.stride != (1, 1))
        )

        if isinstance(old_modules[layer_id + 1], nn.BatchNorm2d):

            # 一般 conv，會被剪 input/output channel
            idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
            idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))

            # print("this: ", cfg_origin[layer_id_in_cfg], "next: ", cfg_origin[layer_id_in_cfg + 1], "last: ", cfg_origin[layer_id_in_cfg - 1])

            if cfg_origin[layer_id_in_cfg] > cfg_origin[layer_id_in_cfg - 1]: # change to origin cfg value
                new_w = torch.zeros_like(m0.weight.data)  # 先建立同 shape 的 0 tensor
                new_w[idx1.tolist(), :, :, :] = w[idx1.tolist(), :, :, :].clone()
                new_w = new_w[:, idx0.tolist(), :, :].clone()
                m1.weight.data = new_w.clone()
            elif cfg_origin[layer_id_in_cfg] < cfg_origin[layer_id_in_cfg - 1]:
                new_w = torch.zeros_like(m0.weight.data)  # 先建立同 shape 的 0 tensor
                new_w = w[idx1.tolist(), :, :, :].clone()
                new_w[ :,idx0.tolist() , :, :] = new_w[:, idx0.tolist(), :, :].clone()
                m1.weight.data = new_w.clone()
            else:
                m1.weight.data = w[idx1.tolist(), :, :, :].clone()
                m1.weight.data = m1.weight.data[:, idx0.tolist(), :, :].clone()
            

            # print("idx1: ", idx1.size)
            # print("idx0: ", idx0.size)
            

        # elif is_downsample:
        # else:
        #     print(f"[INFO] Fixing downsample conv: {m0} → {m1}")
        #     # downsample 的輸出應該要和 conv3 的輸出一致
        #     idx1 = np.squeeze(np.argwhere(np.asarray(cfg_mask[layer_id_in_cfg - 1].cpu().numpy())))
        #     # downsample 通常不剪 input channel，只剪 output channel
        #     m1.weight.data = w[idx1.tolist(), :, :, :].clone()
        #     # try
        #     idx0 = np.squeeze(np.argwhere(np.asarray(cfg_mask[layer_id_in_cfg - 4].cpu().numpy())))
        #     m1.weight.data = m1.weight.data[:, idx0.tolist(), :, :].clone()
        #     print("idx1: ", idx1.size)
        #     print("idx0: ", idx0.size)

        

        else:
            # 其他層 (例如第一層 conv)
            print("downsample:")
            m1.weight.data = w.clone()

        # print("after parm copy:")
        print("m0: ", m0.weight.shape)
        print("m1: ", m1.weight.shape)
            


    elif isinstance(m0, nn.Linear):

        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        if idx0.ndim == 0:
            idx0 = np.expand_dims(idx0, 0)

        #### 複製weight ####
        ################################################
        #          請填空          #
        ################################################
        # m1.weight.data = m0.weight.data[:, idx0.tolist()].clone()
        m1.weight.data = torch.zeros_like(m0.weight.data)
        m1.weight.data[:, idx0.tolist()] = m0.weight.data[:, idx0.tolist()].clone()

        #### 複製bias ####
        m1.bias.data = m0.bias.data.clone()


layer: 1
m0:  torch.Size([64, 3, 3, 3])
m1:  torch.Size([64, 3, 3, 3])
layer: 7
m0:  torch.Size([64, 64, 1, 1])
m1:  torch.Size([64, 64, 1, 1])
layer: 9
m0:  torch.Size([64, 64, 3, 3])
m1:  torch.Size([64, 64, 3, 3])
layer: 11
m0:  torch.Size([256, 64, 1, 1])
m1:  torch.Size([256, 64, 1, 1])
layer: 15
downsample:
m0:  torch.Size([256, 64, 1, 1])
m1:  torch.Size([256, 64, 1, 1])
layer: 17
m0:  torch.Size([64, 256, 1, 1])
m1:  torch.Size([64, 256, 1, 1])
layer: 19
m0:  torch.Size([64, 64, 3, 3])
m1:  torch.Size([64, 64, 3, 3])
layer: 21
m0:  torch.Size([256, 64, 1, 1])
m1:  torch.Size([256, 64, 1, 1])
layer: 25
m0:  torch.Size([64, 256, 1, 1])
m1:  torch.Size([64, 256, 1, 1])
layer: 27
m0:  torch.Size([64, 64, 3, 3])
m1:  torch.Size([64, 64, 3, 3])
layer: 29
m0:  torch.Size([256, 64, 1, 1])
m1:  torch.Size([256, 64, 1, 1])
layer: 34
m0:  torch.Size([128, 256, 1, 1])
m1:  torch.Size([117, 256, 1, 1])
layer: 36
m0:  torch.Size([128, 128, 3, 3])
m1:  torch.Size([108, 117, 3, 3])
layer: 38
m

## 測試函數




In [11]:
from tqdm import tqdm
def test(model):
    kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
    test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('./data', train=False, download=True, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])),
        batch_size=TEST_BATCH_SIZE, shuffle=True, **kwargs)

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in tqdm(test_loader):
          if CUDA:
              data, target = data.cuda(), target.cuda()
          data, target = Variable(data), Variable(target)
          output = model(data)
          pred = output.data.max(1, keepdim=True)[1]
          correct += pred.eq(target.data.view_as(pred)).cpu().sum()
          total += target.size(0)

    print('\nTest set: Accuracy: {}/{} ({:.1f}%)\n'.format(
        correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
    return correct / float(len(test_loader.dataset))

## 儲存模型並印出結果，以及剪枝後的test acc


In [12]:
torch.save({'cfg': cfg, 'state_dict': newmodel.state_dict()}, PRUNE_PATH)

print(newmodel)
model = newmodel.cuda()
test(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Test set: Accuracy: 2183/10000 (21.8%)






tensor(0.2183)

In [13]:
from thop import profile
from torchsummary import summary

##### 使用 thop 計算 FLOPs 和參數數量 #####
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
model = model.to(device)

dummy_input = torch.randn(1, 3, 32, 32).to(device)
flops, params = profile(model, inputs=(dummy_input, ))

print(f"FLOPs: {flops}")
print(f"Params: {params}")
summary(model, input_size=(3, 32, 32))

Device: cuda
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
FLOPs: 130318480.0
Params: 3736356.0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
         MaxPool2d-4           [-1, 64, 16, 16]           