In [1]:
import sys, os
sys.path.append(os.path.dirname(os.path.abspath('.')))

import torch
import torch.nn as nn
import torch.nn.functional as F
from copy import deepcopy

import torch_pruning as tp

# 搞个复杂的网络

In [2]:
class DeepFCN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(DeepFCN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.add_module('first_relu', nn.ReLU())
        self.fc2 = nn.Sequential(
            nn.Linear(256,64),
            nn.ReLU()
        )
        self.fc3 = nn.ModuleList(
            [nn.Sequential(
            nn.Linear(64,64),
            nn.ReLU()) for i in range(3)
            ]
        )
        self.fc4 = nn.ModuleDict({
            'fc4-1': nn.Linear(64,32),
            'relu': nn.ReLU()
        })
        self.fc5 = nn.Linear(32, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.first_relu(x)
        x = self.fc2(x)
        for i, l in enumerate(self.fc3):
            x = l(x)
        x = self.fc4['fc4-1'](x)
        x = self.fc4['relu'](x)
        y_hat = self.fc5(x)
        return y_hat

model = DeepFCN(225, 10)
print(model)

DeepFCN(
  (fc1): Linear(in_features=225, out_features=256, bias=True)
  (first_relu): ReLU()
  (fc2): Sequential(
    (0): Linear(in_features=256, out_features=64, bias=True)
    (1): ReLU()
  )
  (fc3): ModuleList(
    (0): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
    )
    (1): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
    )
    (2): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
    )
  )
  (fc4): ModuleDict(
    (fc4-1): Linear(in_features=64, out_features=32, bias=True)
    (relu): ReLU()
  )
  (fc5): Linear(in_features=32, out_features=10, bias=True)
)


# 两个复制品上进行module_to_idxs规划
此处需要model, 静态层[model.fc5]

In [3]:
model1 = deepcopy(model)
static_layers1 = []
static_layers1.append(model1.fc5)
print(static_layers1)
module_to_idxs1 = tp.planner.get_ordered_module_to_idxs(model1, 0.2, nn.Linear, static_layers1, torch.randn(1,128))

model2 = deepcopy(model)
static_layers2 = []
static_layers2.append(model2.fc5)
print(static_layers2)
module_to_idxs2 = tp.planner.get_ordered_module_to_idxs(model2, 0.3, nn.Linear, static_layers2, torch.randn(1,128))

[Linear(in_features=32, out_features=10, bias=True)]
[Linear(in_features=32, out_features=10, bias=True)]


# 看一个局部对比

In [4]:
print(module_to_idxs1[model1.fc2[0]])

print(module_to_idxs2[model2.fc2[0]])

[55, 43, 42, 47, 37, 19, 12, 53, 11, 14, 51, 0]
[24, 28, 44, 51, 60, 1, 54, 26, 20, 34, 40, 22, 43, 3, 21, 36, 59, 33, 5]


# 随机生成交叉互换的指示向量

In [5]:
import random

s = random.randint(0, len(module_to_idxs1)-1)
e = random.randint(s+1, len(module_to_idxs1))
print(s,e)
indicate_vector = [1 if s<=i<e else 0 for i in range(len(module_to_idxs1))]
print(indicate_vector)

5 6
[0, 0, 0, 0, 0, 1]


# 为了便于看效果，我们制定一个indicate_vector，不用上面随机的

In [6]:
indicate_vector = [0,1,0,1,0,1]

module_to_idxs1, module_to_idxs2 = tp.planner.crossover(module_to_idxs1, module_to_idxs2, indicate_vector)

print(module_to_idxs1[model1.fc2[0]])

print(module_to_idxs2[model2.fc2[0]])

[24, 28, 44, 51, 60, 1, 54, 26, 20, 34, 40, 22, 43, 3, 21, 36, 59, 33, 5]
[55, 43, 42, 47, 37, 19, 12, 53, 11, 14, 51, 0]


# 可以看出第二个全连接层的module_to_idxs已经完全交换了

# 手动模拟遗传算法
所依赖的输入：model本身，静态层model1.fc5.那么问题就是我如何让用户给定model model.fc5的同时，搞定model1,model1.fc5

解决方案：
用户传入model和model.fc5，还有种群大小population_size
我们先在model.fc5上面打标签，然后再进行deepcopy，这样所有的复制品的.fc5上面都有标签 do_not_prune

然后我们按照population_size进行多次复制，对每一次复制进行随机剪枝

In [7]:
'input = model, [model.fc5], population_size'
model
static_layers = [model.fc5]
population_size = 10
target_type = nn.Linear
example_inputs = torch.randn(1,225)

维护一个模型池model_pool，这个池子里可以通过模型找到（idxs，performance）将来还可以加入categorical_performance

In [8]:
from collections import OrderedDict

In [9]:
for layer in static_layers:
    layer.do_not_prune = True

def get_module_to_idxs(model, amount, target_type):
    module_to_idxs = OrderedDict()
    def init_strategy(m):
        strategy = tp.prune.strategy.RandomStrategy()
        if hasattr(m, 'do_not_prune'):
            return
        elif isinstance(m, target_type):
            module_to_idxs[m] = strategy(m.weight, amount=amount)
    model.apply(init_strategy)
    return module_to_idxs
    
model_pool = []
for i in range(population_size):
    tmp_model = deepcopy(model)
    tmp_module_to_idxs = get_module_to_idxs(tmp_model, 0.2, nn.Linear)
    tmp_model.modult_to_idxs = tmp_module_to_idxs
    DG = tp.DependencyGraph()
    DG.build_dependency(tmp_model,example_inputs)
    pruning_plans = []
    def get_pruning_plans(m):
        if m in tmp_module_to_idxs:
            pruning_plans.append(DG.get_pruning_plan(m, tp.prune.prune_linear, idxs=tmp_module_to_idxs[m]))
    tmp_model.apply(get_pruning_plans)
    for plan in pruning_plans:
        plan.exec()
    model_pool.append(tmp_model)
    

In [10]:
model_pool

[DeepFCN(
   (fc1): Linear(in_features=225, out_features=205, bias=True)
   (first_relu): ReLU()
   (fc2): Sequential(
     (0): Linear(in_features=205, out_features=52, bias=True)
     (1): ReLU()
   )
   (fc3): ModuleList(
     (0): Sequential(
       (0): Linear(in_features=52, out_features=52, bias=True)
       (1): ReLU()
     )
     (1): Sequential(
       (0): Linear(in_features=52, out_features=52, bias=True)
       (1): ReLU()
     )
     (2): Sequential(
       (0): Linear(in_features=52, out_features=52, bias=True)
       (1): ReLU()
     )
   )
   (fc4): ModuleDict(
     (fc4-1): Linear(in_features=52, out_features=26, bias=True)
     (relu): ReLU()
   )
   (fc5): Linear(in_features=26, out_features=10, bias=True)
 ),
 DeepFCN(
   (fc1): Linear(in_features=225, out_features=205, bias=True)
   (first_relu): ReLU()
   (fc2): Sequential(
     (0): Linear(in_features=205, out_features=52, bias=True)
     (1): ReLU()
   )
   (fc3): ModuleList(
     (0): Sequential(
       (0): L

In [14]:
len(model_pool)

10

## 可以看出他们确实是两两不同的

In [11]:
model_pool[0].fc1.weight.equal(model_pool[1].fc1.weight)

False

## 把训练流程搬过来

In [12]:
import time
import torchvision

def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    trans.append(torchvision.transforms.Lambda(lambda x: torch.flatten(x)))
    transform = torchvision.transforms.Compose(trans)
    
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)

    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)

    return train_iter, test_iter


def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n


def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    incumbent_test_accuracy = 0
    incumbent_epoch = 0
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        if incumbent_test_accuracy < test_acc:
            incumbent_test_accuracy = test_acc
            incumbent_epoch = epoch
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
    return {'incumbent_epoch': incumbent_epoch, 'incumbent_test_accuracy': incumbent_test_accuracy}


In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for model in model_pool:
    batch_size = 128
    train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=15)
    lr, num_epochs = 0.001, 10
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    res = train_ch5(model, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
    model.performance = res['incumbent_test_accuracy']

training on  cpu
epoch 1, loss 1.0456, train acc 0.581, test acc 0.719, time 8.4 sec
epoch 2, loss 0.6206, train acc 0.776, test acc 0.789, time 8.2 sec
epoch 3, loss 0.5306, train acc 0.812, test acc 0.818, time 8.3 sec
epoch 4, loss 0.4754, train acc 0.831, test acc 0.831, time 8.2 sec
epoch 5, loss 0.4373, train acc 0.845, test acc 0.840, time 8.7 sec
epoch 6, loss 0.4122, train acc 0.852, test acc 0.835, time 9.1 sec
epoch 7, loss 0.3928, train acc 0.857, test acc 0.851, time 8.6 sec
epoch 8, loss 0.3730, train acc 0.864, test acc 0.854, time 8.7 sec
epoch 9, loss 0.3647, train acc 0.867, test acc 0.856, time 8.6 sec
epoch 10, loss 0.3557, train acc 0.871, test acc 0.840, time 8.6 sec
training on  cpu
epoch 1, loss 1.0674, train acc 0.562, test acc 0.688, time 8.6 sec
epoch 2, loss 0.6362, train acc 0.764, test acc 0.796, time 8.7 sec
epoch 3, loss 0.5237, train acc 0.813, test acc 0.816, time 8.7 sec
epoch 4, loss 0.4797, train acc 0.829, test acc 0.825, time 8.6 sec
epoch 5, loss

In [15]:
for model in model_pool:
    print(model.performance)

0.8562
0.8589
0.8657
0.858
0.8501
0.8605
0.8579
0.8668
0.8472
0.8611
