In [1]:
import sys, os
sys.path.append(os.path.dirname(os.path.abspath('.')))

import torch
import torch.nn as nn
import torch.nn.functional as F
from copy import deepcopy
import random

import torch_pruning as tp

# 搞个复杂的网络

In [2]:
class DeepFCN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(DeepFCN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.add_module('first_relu', nn.ReLU())
        self.fc2 = nn.Sequential(
            nn.Linear(256,64),
            nn.ReLU()
        )
        self.fc3 = nn.ModuleList(
            [nn.Sequential(
            nn.Linear(64,64),
            nn.ReLU()) for i in range(3)
            ]
        )
        self.fc4 = nn.ModuleDict({
            'fc4-1': nn.Linear(64,32),
            'relu': nn.ReLU()
        })
        self.fc5 = nn.Linear(32, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.first_relu(x)
        x = self.fc2(x)
        for i, l in enumerate(self.fc3):
            x = l(x)
        x = self.fc4['fc4-1'](x)
        x = self.fc4['relu'](x)
        y_hat = self.fc5(x)
        return y_hat

base_model = DeepFCN(225, 10)

# 两个复制品上进行module_to_idxs规划
此处需要model, 静态层[model.fc5]

In [3]:
model1 = deepcopy(base_model)
static_layers1 = []
static_layers1.append(model1.fc5)
print(static_layers1)
module_to_idxs1 = tp.planner.get_ordered_module_to_idxs(model1, 0.2, nn.Linear, static_layers1, torch.randn(1,128))

model2 = deepcopy(base_model)
static_layers2 = []
static_layers2.append(model2.fc5)
print(static_layers2)
module_to_idxs2 = tp.planner.get_ordered_module_to_idxs(model2, 0.3, nn.Linear, static_layers2, torch.randn(1,128))

[Linear(in_features=32, out_features=10, bias=True)]
[Linear(in_features=32, out_features=10, bias=True)]


# 看一个局部对比

In [4]:
print(module_to_idxs1[model1.fc2[0]])

print(module_to_idxs2[model2.fc2[0]])

[43, 14, 45, 16, 2, 31, 53, 1, 27, 6, 44, 52]
[24, 3, 16, 33, 55, 28, 9, 12, 59, 42, 38, 32, 20, 34, 46, 25, 29, 26, 52]


# 随机生成交叉互换的指示向量

In [5]:
import random

s = random.randint(0, len(module_to_idxs1)-1)
e = random.randint(s+1, len(module_to_idxs1))
print(s,e)
indicate_vector = [1 if s<=i<e else 0 for i in range(len(module_to_idxs1))]
print(indicate_vector)

4 5
[0, 0, 0, 0, 1, 0]


# 为了便于看效果，我们制定一个indicate_vector，不用上面随机的

In [6]:
indicate_vector = [0,1,0,1,0,1]

module_to_idxs1, module_to_idxs2 = tp.planner.crossover(module_to_idxs1, module_to_idxs2, indicate_vector)

print(module_to_idxs1[model1.fc2[0]])

print(module_to_idxs2[model2.fc2[0]])

[24, 3, 16, 33, 55, 28, 9, 12, 59, 42, 38, 32, 20, 34, 46, 25, 29, 26, 52]
[43, 14, 45, 16, 2, 31, 53, 1, 27, 6, 44, 52]


# 可以看出第二个全连接层的module_to_idxs已经完全交换了

# 手动模拟遗传算法
所依赖的输入：model本身，静态层model1.fc5.那么问题就是我如何让用户给定model model.fc5的同时，搞定model1,model1.fc5

解决方案：
用户传入model和model.fc5，还有种群大小population_size
我们先在model.fc5上面打标签，然后再进行deepcopy，这样所有的复制品的.fc5上面都有标签 do_not_prune

然后我们按照population_size进行多次复制，对每一次复制进行随机剪枝

In [7]:
'input = model, [model.fc5], population_size'
print(base_model)
num_parameter_base = 0
for para in base_model.parameters():
    num_parameter_base += para.size().numel()
print(num_parameter_base)
static_layers = [base_model.fc5]
population_size = 10
target_type = nn.Linear
example_inputs = torch.randn(1,225)


DeepFCN(
  (fc1): Linear(in_features=225, out_features=256, bias=True)
  (first_relu): ReLU()
  (fc2): Sequential(
    (0): Linear(in_features=256, out_features=64, bias=True)
    (1): ReLU()
  )
  (fc3): ModuleList(
    (0): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
    )
    (1): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
    )
    (2): Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
    )
  )
  (fc4): ModuleDict(
    (fc4-1): Linear(in_features=64, out_features=32, bias=True)
    (relu): ReLU()
  )
  (fc5): Linear(in_features=32, out_features=10, bias=True)
)
89194


维护一个模型池model_pool，这个池子里可以通过模型找到（idxs，performance）将来还可以加入categorical_performance

In [8]:
from collections import OrderedDict

for layer in static_layers:
    layer.do_not_prune = True

def get_module_to_idxs(model, amount, target_type):
    module_to_idxs = OrderedDict()
    def init_strategy(m):
        strategy = tp.prune.strategy.RandomStrategy()
        if hasattr(m, 'do_not_prune'):
            return
        elif isinstance(m, target_type):
            module_to_idxs[m] = strategy(m.weight, amount=amount)
    model.apply(init_strategy)
    return module_to_idxs
    
model_pool = []
for i in range(population_size):
    tmp_model = deepcopy(base_model)
    tmp_module_to_idxs = get_module_to_idxs(tmp_model, 0.2, nn.Linear)
    tmp_model.module_to_idxs = tmp_module_to_idxs
    DG = tp.DependencyGraph()
    DG.build_dependency(tmp_model,example_inputs)
    pruning_plans = []
    def get_pruning_plans(m):
        if m in tmp_module_to_idxs:
            pruning_plans.append(DG.get_pruning_plan(m, tp.prune.prune_linear, idxs=tmp_module_to_idxs[m]))
    tmp_model.apply(get_pruning_plans)
    for plan in pruning_plans:
        plan.exec()
    model_pool.append(tmp_model)
    

In [9]:
model_pool

[DeepFCN(
   (fc1): Linear(in_features=225, out_features=205, bias=True)
   (first_relu): ReLU()
   (fc2): Sequential(
     (0): Linear(in_features=205, out_features=52, bias=True)
     (1): ReLU()
   )
   (fc3): ModuleList(
     (0): Sequential(
       (0): Linear(in_features=52, out_features=52, bias=True)
       (1): ReLU()
     )
     (1): Sequential(
       (0): Linear(in_features=52, out_features=52, bias=True)
       (1): ReLU()
     )
     (2): Sequential(
       (0): Linear(in_features=52, out_features=52, bias=True)
       (1): ReLU()
     )
   )
   (fc4): ModuleDict(
     (fc4-1): Linear(in_features=52, out_features=26, bias=True)
     (relu): ReLU()
   )
   (fc5): Linear(in_features=26, out_features=10, bias=True)
 ),
 DeepFCN(
   (fc1): Linear(in_features=225, out_features=205, bias=True)
   (first_relu): ReLU()
   (fc2): Sequential(
     (0): Linear(in_features=205, out_features=52, bias=True)
     (1): ReLU()
   )
   (fc3): ModuleList(
     (0): Sequential(
       (0): L

In [10]:
len(model_pool)

10

## 可以看出他们确实是两两不同的

In [11]:
model_pool[0].fc1.weight.equal(model_pool[1].fc1.weight)

False

## 把训练流程搬过来

In [12]:
import time
import torchvision

def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    trans.append(torchvision.transforms.Lambda(lambda x: torch.flatten(x)))
    transform = torchvision.transforms.Compose(trans)
    
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)

    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)

    return train_iter, test_iter


def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n


def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    incumbent_test_accuracy = 0
    incumbent_epoch = 0
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        if incumbent_test_accuracy < test_acc:
            incumbent_test_accuracy = test_acc
            incumbent_epoch = epoch
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
    return {'incumbent_epoch': incumbent_epoch, 'incumbent_test_accuracy': incumbent_test_accuracy}


## 进行训练

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for model in model_pool:
    batch_size = 128
    train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=15)
    lr, num_epochs = 0.001, 2
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    res = train_ch5(model, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
    model.performance = res['incumbent_test_accuracy']

training on  cpu
epoch 1, loss 0.9557, train acc 0.627, test acc 0.751, time 8.8 sec
epoch 2, loss 0.5867, train acc 0.784, test acc 0.802, time 8.3 sec
training on  cpu
epoch 1, loss 0.9967, train acc 0.601, test acc 0.737, time 8.2 sec
epoch 2, loss 0.6112, train acc 0.767, test acc 0.786, time 8.2 sec
training on  cpu
epoch 1, loss 0.9497, train acc 0.631, test acc 0.765, time 8.2 sec
epoch 2, loss 0.5896, train acc 0.781, test acc 0.788, time 8.3 sec
training on  cpu
epoch 1, loss 0.9874, train acc 0.606, test acc 0.755, time 8.2 sec
epoch 2, loss 0.6065, train acc 0.777, test acc 0.794, time 8.3 sec
training on  cpu
epoch 1, loss 1.0041, train acc 0.605, test acc 0.728, time 8.2 sec
epoch 2, loss 0.6177, train acc 0.774, test acc 0.779, time 8.3 sec
training on  cpu
epoch 1, loss 0.9385, train acc 0.637, test acc 0.751, time 8.3 sec
epoch 2, loss 0.6073, train acc 0.776, test acc 0.779, time 8.7 sec
training on  cpu
epoch 1, loss 0.9572, train acc 0.629, test acc 0.755, time 8.2 s

## 我们看一下模型池子里的模型都有多少参数

In [14]:
for model in model_pool:
    num_parameter = 0
    for para in model.parameters():
        num_parameter += para.size().numel()
    print(num_parameter)

66958
66958
66958
66958
66958
66958
66958
66958
66958
66958


## 以及他们的最佳performance

In [15]:
for model in model_pool:
    print(model.performance)

0.8018
0.7857
0.7881
0.7944
0.7795
0.7788
0.7774
0.78
0.7984
0.781


## 计算模型池中模型的fitness

In [16]:
for model in model_pool:
    num_parameter = 0
    for para in model.parameters():
        num_parameter += para.size().numel()
    model.fitness = model.performance - 0.2 * (num_parameter/num_parameter_base)
    print(model.fitness)

0.6516598560441285
0.6355598560441285
0.6379598560441285
0.6442598560441285
0.6293598560441285
0.6286598560441286
0.6272598560441285
0.6298598560441285
0.6482598560441285
0.6308598560441285


## 遗传迭代
* 保留fitness最高的
* 循环population-1次：
  * 摇骰子，随机到[0,s1)则从亲代中抽样
  * 摇骰子，随机到[s1,s1+s2)则随机选择两个亲代（不能相同）进行交叉互换
  * 摇骰子，随机到[s1+s2, 1]则随机选择一个亲代和一个纯随机模型进行局部交叉互换，保留前者的孩子（模拟突变）

In [38]:
s1 = 0.3
s2 = 0.55
s3 = 1-s1-s2

In [39]:
class ModulePool():
    def __init__(self):
        self.pool = []
        self.population = 10
        self.selection_mark = [0 for i in range(self.population)]
        self.fitness = []
    
    def inherit(self):
        if len(self.fitness) < self.population:
            for i in range(self.population):
                self.fitness.append(self.pool[i].fitness)
        incumbent = self.pool[0].fitness
        incumbent_flag = 0
        for i in range(1, self.population):
            if self.pool[i].fitness > incumbent:
                incumbent = self.pool[i].fitness
                incumbent_flag = i
        self.selection_mark[incumbent_flag] = 1
        print('[', incumbent_flag, ']')
        return
    
    def selection(self):
        # 根据fitness进行选择，标记在selection_mark。无重复选择。
        # 注意pool的长度是会在过程中变化的，但population不会
        mylist = [i for i in range(self.population)]
        weight = [0 if self.selection_mark[i]==1 else self.fitness[i] for i in range(self.population)]
        print(weight)
        choice = random.choices(mylist, weights=weight, k=1)[0]
        print('[', choice, ']')
        self.selection_mark[choice] = 1
        return
    
    def crossover(self):
        # 根据fitness进行选择, 为了选两个不一样的，决定调两次random；进行随机片段交换,保留其中较好的孩子
        mylist = [i for i in range(self.population)]
        weight = [self.fitness[i] for i in range(self.population)]
        choices = random.choices(mylist, weights=weight, k=1)
        weight[choices[0]] = 0
        choices.extend(random.choices(mylist, weights=weight, k=1))
        p1 = deepcopy(self.pool[choices[0]])
        p2 = deepcopy(self.pool[choices[1]])
#         print(choices[0],'【p1.module_to_idxs.items()】', p1.module_to_idxs.items())
#         print(choices[1],'【p2.module_to_idxs.items()】', p2.module_to_idxs.items()) 
        s = random.randint(0, len(p1.module_to_idxs)-1)
        e = random.randint(s+1, len(p1.module_to_idxs))
        indicate_vector = [1 if s<=i<e else 0 for i in range(len(module_to_idxs1))]
#         print('indicae_vector', indicate_vector)
        
        for i, ((k1, v1), (k2, v2)) in enumerate(zip(*[p1.module_to_idxs.items(), p2.module_to_idxs.items()])):
            if indicate_vector[i] == 1:
                tmp = p1.module_to_idxs[k1]
                p1.module_to_idxs[k1] = p2.module_to_idxs[k2]
                p2.module_to_idxs[k2] = tmp
#         # 对比用
#         p1 = self.pool[choices[0]]
#         p2 = self.pool[choices[1]]
#         print('【p1.module_to_idxs.items()】', p1.module_to_idxs.items())
#         print('【p2.module_to_idxs.items()】', p2.module_to_idxs.items()) 
        child1 = deepcopy(base_model)
        child1.module_to_idxs = get_module_to_idxs(child1, 0.2, nn.Linear)
        for i, ((k1, v1), (k2, v2)) in enumerate(zip(*[p1.module_to_idxs.items(), child1.module_to_idxs.items()])):
                child1.module_to_idxs[k2] = p1.module_to_idxs[k1]
        DG1 = tp.DependencyGraph()
        DG1.build_dependency(child1,example_inputs)
        pruning_plans1 = []
        def get_pruning_plans(m):
            if m in child1.module_to_idxs:
                pruning_plans1.append(DG1.get_pruning_plan(m, tp.prune.prune_linear, idxs=child1.module_to_idxs[m]))
        child1.apply(get_pruning_plans)
        for plan in pruning_plans1:
            plan.exec()
            
        
        self.pool.append(child1)
        
        return
    
    def mutation(self):
        # 根据fitness进行选择，选一个，另一个则产生一个随机模型，进行片段交换，保留其中较好的孩子
        return
    
    def age(self):
        # 吧没有被标记的模型从pool中删除，把新模型加上
        return
    
mypool = ModulePool()    

In [40]:
mypool.pool = model_pool
mypool.population = population_size
mypool.selection_mark = [0 for i in range(mypool.population)]

## 进行一次更新迭代（mypool尚未完善）

In [41]:
def evolve(mypool:ModulePool):
    for i in range(population_size):
        if i == 0:
            print(i, 'inherit')
            mypool.inherit()
            continue
        dice = random.random()
        if dice < s1:
            print(i, 'selection')
            mypool.selection()
        elif dice < s1+s2:
            print(i, 'crossover')
            mypool.crossover()
        else:
            print(i, 'mutation')
            mypool.mutation()
    mypool.age()

In [42]:
evolve(mypool)

0 inherit
[ 0 ]
1 crossover


KeyError: Linear(in_features=225, out_features=205, bias=True)

In [44]:
net = mypool.pool[0]

In [49]:
net.module_to_idxs.items()

odict_items([(Linear(in_features=225, out_features=205, bias=True), [121, 171, 73, 161, 249, 145, 19, 233, 162, 51, 111, 222, 172, 156, 40, 126, 199, 86, 187, 61, 232, 211, 197, 115, 13, 253, 206, 167, 119, 166, 224, 96, 20, 9, 24, 91, 59, 124, 177, 236, 3, 132, 181, 242, 141, 239, 155, 205, 109, 52, 252]), (Linear(in_features=205, out_features=52, bias=True), [4, 34, 7, 15, 6, 51, 56, 44, 1, 45, 22, 55]), (Linear(in_features=52, out_features=52, bias=True), [43, 12, 36, 60, 49, 23, 38, 17, 31, 21, 39, 22]), (Linear(in_features=52, out_features=52, bias=True), [55, 16, 62, 28, 7, 8, 58, 30, 15, 40, 27, 1]), (Linear(in_features=52, out_features=52, bias=True), [10, 35, 38, 44, 62, 13, 54, 55, 32, 48, 17, 58]), (Linear(in_features=52, out_features=26, bias=True), [17, 26, 21, 7, 15, 16])])